Actual source code: ex5.c
1: static char help[] = "Tests MatMult(), MatMultAdd(), MatMultTranspose().\n\
2: Also MatMultTransposeAdd(), MatScale(), MatGetDiagonal(), MatDiagonalScale(), MatZeroEntries() and MatDuplicate().\n\n";
4: #include <petscmat.h>
6: int main(int argc, char **args)
7: {
8: Mat C;
9: Vec s, u, w, x, y, z;
10: PetscInt i, j, m = 8, n, rstart, rend, vstart, vend;
11: PetscScalar one = 1.0, negone = -1.0, v, alpha = 0.1;
12: PetscReal norm, tol = PETSC_SQRT_MACHINE_EPSILON;
13: PetscBool flg;
15: PetscFunctionBeginUser;
16: PetscCall(PetscInitialize(&argc, &args, NULL, help));
17: PetscCall(PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD, PETSC_VIEWER_ASCII_COMMON));
18: PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
19: n = m;
20: PetscCall(PetscOptionsHasName(NULL, NULL, "-rectA", &flg));
21: if (flg) n += 2;
22: PetscCall(PetscOptionsHasName(NULL, NULL, "-rectB", &flg));
23: if (flg) n -= 2;
25: /* ---------- Assemble matrix and vectors ----------- */
27: PetscCall(MatCreate(PETSC_COMM_WORLD, &C));
28: PetscCall(MatSetSizes(C, PETSC_DECIDE, PETSC_DECIDE, m, n));
29: PetscCall(MatSetFromOptions(C));
30: PetscCall(MatSetUp(C));
31: PetscCall(MatGetOwnershipRange(C, &rstart, &rend));
32: PetscCall(VecCreate(PETSC_COMM_WORLD, &x));
33: PetscCall(VecSetSizes(x, PETSC_DECIDE, m));
34: PetscCall(VecSetFromOptions(x));
35: PetscCall(VecDuplicate(x, &z));
36: PetscCall(VecDuplicate(x, &w));
37: PetscCall(VecCreate(PETSC_COMM_WORLD, &y));
38: PetscCall(VecSetSizes(y, PETSC_DECIDE, n));
39: PetscCall(VecSetFromOptions(y));
40: PetscCall(VecDuplicate(y, &u));
41: PetscCall(VecDuplicate(y, &s));
42: PetscCall(VecGetOwnershipRange(y, &vstart, &vend));
44: /* Assembly */
45: for (i = rstart; i < rend; i++) {
46: v = 100 * (i + 1);
47: PetscCall(VecSetValues(z, 1, &i, &v, INSERT_VALUES));
48: for (j = 0; j < n; j++) {
49: v = 10 * (i + 1) + j + 1;
50: PetscCall(MatSetValues(C, 1, &i, 1, &j, &v, INSERT_VALUES));
51: }
52: }
54: /* Flush off proc Vec values and do more assembly */
55: PetscCall(VecAssemblyBegin(z));
56: for (i = vstart; i < vend; i++) {
57: v = one * ((PetscReal)i);
58: PetscCall(VecSetValues(y, 1, &i, &v, INSERT_VALUES));
59: v = 100.0 * i;
60: PetscCall(VecSetValues(u, 1, &i, &v, INSERT_VALUES));
61: }
63: /* Flush off proc Mat values and do more assembly */
64: PetscCall(MatAssemblyBegin(C, MAT_FLUSH_ASSEMBLY));
65: for (i = rstart; i < rend; i++) {
66: for (j = 0; j < n; j++) {
67: v = 10 * (i + 1) + j + 1;
68: PetscCall(MatSetValues(C, 1, &i, 1, &j, &v, INSERT_VALUES));
69: }
70: }
71: /* Try overlap Coomunication with the next stage XXXSetValues */
72: PetscCall(VecAssemblyEnd(z));
74: PetscCall(MatAssemblyEnd(C, MAT_FLUSH_ASSEMBLY));
75: CHKMEMQ;
76: /* The Assembly for the second Stage */
77: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
78: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
79: PetscCall(VecAssemblyBegin(y));
80: PetscCall(VecAssemblyEnd(y));
81: PetscCall(MatScale(C, alpha));
82: PetscCall(VecAssemblyBegin(u));
83: PetscCall(VecAssemblyEnd(u));
84: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatMult()\n"));
85: CHKMEMQ;
86: PetscCall(MatMult(C, y, x));
87: CHKMEMQ;
88: PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD));
89: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatMultAdd()\n"));
90: PetscCall(MatMultAdd(C, y, z, w));
91: PetscCall(VecAXPY(x, one, z));
92: PetscCall(VecAXPY(x, negone, w));
93: PetscCall(VecNorm(x, NORM_2, &norm));
94: if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error difference = %g\n", (double)norm));
96: /* ------- Test MatMultTranspose(), MatMultTransposeAdd() ------- */
98: for (i = rstart; i < rend; i++) {
99: v = one * ((PetscReal)i);
100: PetscCall(VecSetValues(x, 1, &i, &v, INSERT_VALUES));
101: }
102: PetscCall(VecAssemblyBegin(x));
103: PetscCall(VecAssemblyEnd(x));
104: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatMultTranspose()\n"));
105: PetscCall(MatMultTranspose(C, x, y));
106: PetscCall(VecView(y, PETSC_VIEWER_STDOUT_WORLD));
108: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatMultTransposeAdd()\n"));
109: PetscCall(MatMultTransposeAdd(C, x, u, s));
110: PetscCall(VecAXPY(y, one, u));
111: PetscCall(VecAXPY(y, negone, s));
112: PetscCall(VecNorm(y, NORM_2, &norm));
113: if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error difference = %g\n", (double)norm));
115: /* -------------------- Test MatGetDiagonal() ------------------ */
117: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatGetDiagonal(), MatDiagonalScale()\n"));
118: PetscCall(MatView(C, PETSC_VIEWER_STDOUT_WORLD));
119: PetscCall(VecSet(x, one));
120: PetscCall(MatGetDiagonal(C, x));
121: PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD));
122: for (i = vstart; i < vend; i++) {
123: v = one * ((PetscReal)(i + 1));
124: PetscCall(VecSetValues(y, 1, &i, &v, INSERT_VALUES));
125: }
127: /* -------------------- Test () MatDiagonalScale ------------------ */
128: PetscCall(PetscOptionsHasName(NULL, NULL, "-test_diagonalscale", &flg));
129: if (flg) {
130: PetscCall(MatDiagonalScale(C, x, y));
131: PetscCall(MatView(C, PETSC_VIEWER_STDOUT_WORLD));
132: }
133: /* -------------------- Test () MatZeroEntries() and MatDuplicate() ------------------ */
134: PetscCall(PetscOptionsHasName(NULL, NULL, "-test_zeroentries", &flg));
135: if (flg) {
136: Mat D;
137: PetscCall(MatDuplicate(C, MAT_COPY_VALUES, &D));
138: PetscCall(MatZeroEntries(D));
139: PetscCall(MatView(D, PETSC_VIEWER_STDOUT_WORLD));
140: PetscCall(MatDestroy(&D));
141: }
142: /* Free data structures */
143: PetscCall(VecDestroy(&u));
144: PetscCall(VecDestroy(&s));
145: PetscCall(VecDestroy(&w));
146: PetscCall(VecDestroy(&x));
147: PetscCall(VecDestroy(&y));
148: PetscCall(VecDestroy(&z));
149: PetscCall(MatDestroy(&C));
151: PetscCall(PetscFinalize());
152: return 0;
153: }
155: /*TEST
157: test:
158: suffix: 11_A
159: args: -mat_type seqaij -rectA
160: filter: grep -v type
162: test:
163: args: -mat_type seqdense -rectA
164: suffix: 12_A
166: test:
167: args: -mat_type seqaij -rectB
168: suffix: 11_B
169: filter: grep -v type
171: test:
172: args: -mat_type seqdense -rectB
173: suffix: 12_B
175: test:
176: suffix: 21
177: args: -mat_type mpiaij
178: filter: grep -v type
180: test:
181: suffix: 22
182: args: -mat_type mpidense
184: test:
185: suffix: 23
186: nsize: 3
187: args: -mat_type mpiaij
188: filter: grep -v type
190: test:
191: suffix: 24
192: nsize: 3
193: args: -mat_type mpidense
195: test:
196: suffix: 2_aijcusparse_1
197: args: -mat_type mpiaijcusparse -vec_type cuda
198: filter: grep -v type
199: output_file: output/ex5_21.out
200: requires: cuda
202: test:
203: nsize: 3
204: suffix: 2_aijcusparse_2
205: filter: grep -v type
206: args: -mat_type mpiaijcusparse -vec_type cuda
207: args: -sf_type {{basic neighbor}}
208: output_file: output/ex5_23.out
209: requires: cuda
211: test:
212: nsize: 3
213: suffix: 2_aijcusparse_3
214: filter: grep -v type
215: args: -mat_type mpiaijcusparse -vec_type cuda
216: args: -sf_type {{basic neighbor}}
217: output_file: output/ex5_23.out
218: requires: cuda defined(PETSC_HAVE_MPI_GPU_AWARE)
220: test:
221: suffix: 31
222: args: -mat_type mpiaij -test_diagonalscale
223: filter: grep -v type
225: test:
226: suffix: 32
227: args: -mat_type mpibaij -test_diagonalscale
229: test:
230: suffix: 33
231: nsize: 3
232: args: -mat_type mpiaij -test_diagonalscale
233: filter: grep -v type
235: test:
236: suffix: 34
237: nsize: 3
238: args: -mat_type mpibaij -test_diagonalscale
240: test:
241: suffix: 3_aijcusparse_1
242: args: -mat_type mpiaijcusparse -vec_type cuda -test_diagonalscale
243: filter: grep -v type
244: output_file: output/ex5_31.out
245: requires: cuda
247: test:
248: suffix: 3_aijcusparse_2
249: nsize: 3
250: args: -mat_type mpiaijcusparse -vec_type cuda -test_diagonalscale
251: filter: grep -v type
252: output_file: output/ex5_33.out
253: requires: cuda
255: test:
256: suffix: 3_kokkos
257: nsize: 3
258: args: -mat_type mpiaijkokkos -vec_type kokkos -test_diagonalscale
259: filter: grep -v type
260: output_file: output/ex5_33.out
261: requires: kokkos_kernels
263: test:
264: suffix: aijcusparse_1
265: args: -mat_type seqaijcusparse -vec_type cuda -rectA
266: filter: grep -v type
267: output_file: output/ex5_11_A.out
268: requires: cuda
270: test:
271: suffix: aijcusparse_2
272: args: -mat_type seqaijcusparse -vec_type cuda -rectB
273: filter: grep -v type
274: output_file: output/ex5_11_B.out
275: requires: cuda
277: test:
278: suffix: sell_1
279: args: -mat_type sell -mat_sell_slice_height 8
280: output_file: output/ex5_41.out
282: test:
283: suffix: sell_2
284: nsize: 3
285: args: -mat_type sell -mat_sell_slice_height 8
286: output_file: output/ex5_43.out
288: test:
289: suffix: sell_3
290: args: -mat_type sell -test_diagonalscale -mat_sell_slice_height 8
291: output_file: output/ex5_51.out
293: test:
294: suffix: sell_4
295: nsize: 3
296: args: -mat_type sell -test_diagonalscale -mat_sell_slice_height 8
297: output_file: output/ex5_53.out
299: test:
300: suffix: sell_5
301: nsize: 3
302: args: -mat_type sellcuda -vec_type cuda -test_diagonalscale -test_zeroentries
303: output_file: output/ex5_55.out
304: requires: cuda !complex
306: test:
307: suffix: sell_6
308: nsize: 3
309: args: -mat_type sellcuda -vec_type cuda -mat_sell_spmv_cuda_kernel {{1 2 3 4 5 6}}
310: output_file: output/ex5_56.out
311: requires: cuda !complex
313: test:
314: suffix: sell_7
315: args: -m 32 -mat_type sellcuda -vec_type cuda -mat_sell_spmv_cuda_kernel {{0 7 9}} -mat_sell_spmv_cuda_blocky {{2 4 8 16 32}}
316: output_file: output/ex5_57.out
317: requires: cuda !complex !single
319: test:
320: suffix: sell_8
321: nsize: 3
322: args: -mat_type sellhip -vec_type hip -test_diagonalscale -test_zeroentries
323: filter: sed -e "s/hip/cuda/g"
324: output_file: output/ex5_55.out
325: requires: hip !complex
327: test:
328: suffix: sell_9
329: nsize: 3
330: args: -mat_type sellhip -vec_type hip -mat_sell_spmv_hip_kernel {{1 2 3 4 5 6}}
331: filter: sed -e "s/hip/cuda/g"
332: output_file: output/ex5_56.out
333: requires: hip !complex
335: test:
336: suffix: sell_10
337: args: -m 32 -mat_type sellhip -vec_type hip -mat_sell_spmv_hip_kernel {{0 7 9}} -mat_sell_spmv_hip_blocky {{2 4 8 16 32}}
338: filter: sed -e "s/hip/cuda/g"
339: output_file: output/ex5_57.out
340: requires: hip !complex !single
341: TEST*/