Actual source code: ex226.c
1: static char help[] = "Benchmark for MatMatMult() of AIJ matrices using different 2d finite-difference stencils.\n\n";
3: #include <petscmat.h>
5: /* Converts 3d grid coordinates (i,j,k) for a grid of size m \times n to global indexing. Pass k = 0 for a 2d grid. */
6: PetscInt global_index(PetscInt i, PetscInt j, PetscInt k, PetscInt m, PetscInt n)
7: {
8: return i + j * m + k * m * n;
9: }
11: int main(int argc, char **argv)
12: {
13: Mat A, B, C, PtAP, PtAP_copy, PtAP_squared;
14: PetscInt i, M, N, Istart, Iend, n = 7, j, J, Ii, m = 8, k, o = 1;
15: PetscScalar v;
16: PetscBool equal = PETSC_FALSE, mat_view = PETSC_FALSE;
17: char stencil[PETSC_MAX_PATH_LEN];
18: PetscLogStage fullMatMatMultStage;
20: PetscFunctionBeginUser;
21: PetscCall(PetscInitialize(&argc, &argv, NULL, help));
22: PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
23: PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
24: PetscCall(PetscOptionsGetInt(NULL, NULL, "-o", &o, NULL));
25: PetscCall(PetscOptionsHasName(NULL, NULL, "-result_view", &mat_view));
26: PetscCall(PetscOptionsGetString(NULL, NULL, "-stencil", stencil, sizeof(stencil), NULL));
28: /* Create a aij matrix A */
29: M = N = m * n * o;
30: PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
31: PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, M, N));
32: PetscCall(MatSetType(A, MATAIJ));
33: PetscCall(MatSetFromOptions(A));
35: /* Consistency checks */
36: PetscCheck(o >= 1 && m > 1 && n >= 1, PETSC_COMM_WORLD, PETSC_ERR_USER, "Dimensions need to be larger than zero!");
38: /************ 2D stencils ***************/
39: PetscCall(PetscStrcmp(stencil, "2d5point", &equal));
40: if (equal) { /* 5-point stencil, 2D */
41: PetscCall(MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL));
42: PetscCall(MatSeqAIJSetPreallocation(A, 5, NULL));
43: PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
44: for (Ii = Istart; Ii < Iend; Ii++) {
45: v = -1.0;
46: k = Ii / (m * n);
47: j = (Ii - k * m * n) / m;
48: i = (Ii - k * m * n - j * m);
49: if (i > 0) {
50: J = global_index(i - 1, j, k, m, n);
51: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
52: }
53: if (i < m - 1) {
54: J = global_index(i + 1, j, k, m, n);
55: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
56: }
57: if (j > 0) {
58: J = global_index(i, j - 1, k, m, n);
59: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
60: }
61: if (j < n - 1) {
62: J = global_index(i, j + 1, k, m, n);
63: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
64: }
65: v = 4.0;
66: PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
67: }
68: }
69: PetscCall(PetscStrcmp(stencil, "2d9point", &equal));
70: if (equal) { /* 9-point stencil, 2D */
71: PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL));
72: PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL));
73: PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
74: for (Ii = Istart; Ii < Iend; Ii++) {
75: v = -1.0;
76: k = Ii / (m * n);
77: j = (Ii - k * m * n) / m;
78: i = (Ii - k * m * n - j * m);
79: if (i > 0) {
80: J = global_index(i - 1, j, k, m, n);
81: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
82: }
83: if (i > 0 && j > 0) {
84: J = global_index(i - 1, j - 1, k, m, n);
85: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
86: }
87: if (j > 0) {
88: J = global_index(i, j - 1, k, m, n);
89: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
90: }
91: if (i < m - 1 && j > 0) {
92: J = global_index(i + 1, j - 1, k, m, n);
93: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
94: }
95: if (i < m - 1) {
96: J = global_index(i + 1, j, k, m, n);
97: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
98: }
99: if (i < m - 1 && j < n - 1) {
100: J = global_index(i + 1, j + 1, k, m, n);
101: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
102: }
103: if (j < n - 1) {
104: J = global_index(i, j + 1, k, m, n);
105: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
106: }
107: if (i > 0 && j < n - 1) {
108: J = global_index(i - 1, j + 1, k, m, n);
109: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
110: }
111: v = 8.0;
112: PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
113: }
114: }
115: PetscCall(PetscStrcmp(stencil, "2d9point2", &equal));
116: if (equal) { /* 9-point Cartesian stencil (width 2 per coordinate), 2D */
117: PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL));
118: PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL));
119: PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
120: for (Ii = Istart; Ii < Iend; Ii++) {
121: v = -1.0;
122: k = Ii / (m * n);
123: j = (Ii - k * m * n) / m;
124: i = (Ii - k * m * n - j * m);
125: if (i > 0) {
126: J = global_index(i - 1, j, k, m, n);
127: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
128: }
129: if (i > 1) {
130: J = global_index(i - 2, j, k, m, n);
131: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
132: }
133: if (i < m - 1) {
134: J = global_index(i + 1, j, k, m, n);
135: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
136: }
137: if (i < m - 2) {
138: J = global_index(i + 2, j, k, m, n);
139: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
140: }
141: if (j > 0) {
142: J = global_index(i, j - 1, k, m, n);
143: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
144: }
145: if (j > 1) {
146: J = global_index(i, j - 2, k, m, n);
147: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
148: }
149: if (j < n - 1) {
150: J = global_index(i, j + 1, k, m, n);
151: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
152: }
153: if (j < n - 2) {
154: J = global_index(i, j + 2, k, m, n);
155: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
156: }
157: v = 8.0;
158: PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
159: }
160: }
161: PetscCall(PetscStrcmp(stencil, "2d13point", &equal));
162: if (equal) { /* 13-point Cartesian stencil (width 3 per coordinate), 2D */
163: PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL));
164: PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL));
165: PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
166: for (Ii = Istart; Ii < Iend; Ii++) {
167: v = -1.0;
168: k = Ii / (m * n);
169: j = (Ii - k * m * n) / m;
170: i = (Ii - k * m * n - j * m);
171: if (i > 0) {
172: J = global_index(i - 1, j, k, m, n);
173: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
174: }
175: if (i > 1) {
176: J = global_index(i - 2, j, k, m, n);
177: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
178: }
179: if (i > 2) {
180: J = global_index(i - 3, j, k, m, n);
181: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
182: }
183: if (i < m - 1) {
184: J = global_index(i + 1, j, k, m, n);
185: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
186: }
187: if (i < m - 2) {
188: J = global_index(i + 2, j, k, m, n);
189: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
190: }
191: if (i < m - 3) {
192: J = global_index(i + 3, j, k, m, n);
193: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
194: }
195: if (j > 0) {
196: J = global_index(i, j - 1, k, m, n);
197: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
198: }
199: if (j > 1) {
200: J = global_index(i, j - 2, k, m, n);
201: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
202: }
203: if (j > 2) {
204: J = global_index(i, j - 3, k, m, n);
205: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
206: }
207: if (j < n - 1) {
208: J = global_index(i, j + 1, k, m, n);
209: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
210: }
211: if (j < n - 2) {
212: J = global_index(i, j + 2, k, m, n);
213: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
214: }
215: if (j < n - 3) {
216: J = global_index(i, j + 3, k, m, n);
217: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
218: }
219: v = 12.0;
220: PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
221: }
222: }
223: /************ 3D stencils ***************/
224: PetscCall(PetscStrcmp(stencil, "3d7point", &equal));
225: if (equal) { /* 7-point stencil, 3D */
226: PetscCall(MatMPIAIJSetPreallocation(A, 7, NULL, 7, NULL));
227: PetscCall(MatSeqAIJSetPreallocation(A, 7, NULL));
228: PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
229: for (Ii = Istart; Ii < Iend; Ii++) {
230: v = -1.0;
231: k = Ii / (m * n);
232: j = (Ii - k * m * n) / m;
233: i = (Ii - k * m * n - j * m);
234: if (i > 0) {
235: J = global_index(i - 1, j, k, m, n);
236: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
237: }
238: if (i < m - 1) {
239: J = global_index(i + 1, j, k, m, n);
240: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
241: }
242: if (j > 0) {
243: J = global_index(i, j - 1, k, m, n);
244: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
245: }
246: if (j < n - 1) {
247: J = global_index(i, j + 1, k, m, n);
248: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
249: }
250: if (k > 0) {
251: J = global_index(i, j, k - 1, m, n);
252: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
253: }
254: if (k < o - 1) {
255: J = global_index(i, j, k + 1, m, n);
256: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
257: }
258: v = 6.0;
259: PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
260: }
261: }
262: PetscCall(PetscStrcmp(stencil, "3d13point", &equal));
263: if (equal) { /* 13-point stencil, 3D */
264: PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL));
265: PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL));
266: PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
267: for (Ii = Istart; Ii < Iend; Ii++) {
268: v = -1.0;
269: k = Ii / (m * n);
270: j = (Ii - k * m * n) / m;
271: i = (Ii - k * m * n - j * m);
272: if (i > 0) {
273: J = global_index(i - 1, j, k, m, n);
274: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
275: }
276: if (i > 1) {
277: J = global_index(i - 2, j, k, m, n);
278: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
279: }
280: if (i < m - 1) {
281: J = global_index(i + 1, j, k, m, n);
282: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
283: }
284: if (i < m - 2) {
285: J = global_index(i + 2, j, k, m, n);
286: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
287: }
288: if (j > 0) {
289: J = global_index(i, j - 1, k, m, n);
290: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
291: }
292: if (j > 1) {
293: J = global_index(i, j - 2, k, m, n);
294: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
295: }
296: if (j < n - 1) {
297: J = global_index(i, j + 1, k, m, n);
298: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
299: }
300: if (j < n - 2) {
301: J = global_index(i, j + 2, k, m, n);
302: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
303: }
304: if (k > 0) {
305: J = global_index(i, j, k - 1, m, n);
306: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
307: }
308: if (k > 1) {
309: J = global_index(i, j, k - 2, m, n);
310: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
311: }
312: if (k < o - 1) {
313: J = global_index(i, j, k + 1, m, n);
314: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
315: }
316: if (k < o - 2) {
317: J = global_index(i, j, k + 2, m, n);
318: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
319: }
320: v = 12.0;
321: PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
322: }
323: }
324: PetscCall(PetscStrcmp(stencil, "3d19point", &equal));
325: if (equal) { /* 19-point stencil, 3D */
326: PetscCall(MatMPIAIJSetPreallocation(A, 19, NULL, 19, NULL));
327: PetscCall(MatSeqAIJSetPreallocation(A, 19, NULL));
328: PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
329: for (Ii = Istart; Ii < Iend; Ii++) {
330: v = -1.0;
331: k = Ii / (m * n);
332: j = (Ii - k * m * n) / m;
333: i = (Ii - k * m * n - j * m);
334: /* one hop */
335: if (i > 0) {
336: J = global_index(i - 1, j, k, m, n);
337: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
338: }
339: if (i < m - 1) {
340: J = global_index(i + 1, j, k, m, n);
341: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
342: }
343: if (j > 0) {
344: J = global_index(i, j - 1, k, m, n);
345: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
346: }
347: if (j < n - 1) {
348: J = global_index(i, j + 1, k, m, n);
349: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
350: }
351: if (k > 0) {
352: J = global_index(i, j, k - 1, m, n);
353: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
354: }
355: if (k < o - 1) {
356: J = global_index(i, j, k + 1, m, n);
357: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
358: }
359: /* two hops */
360: if (i > 0 && j > 0) {
361: J = global_index(i - 1, j - 1, k, m, n);
362: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
363: }
364: if (i > 0 && k > 0) {
365: J = global_index(i - 1, j, k - 1, m, n);
366: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
367: }
368: if (i > 0 && j < n - 1) {
369: J = global_index(i - 1, j + 1, k, m, n);
370: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
371: }
372: if (i > 0 && k < o - 1) {
373: J = global_index(i - 1, j, k + 1, m, n);
374: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
375: }
376: if (i < m - 1 && j > 0) {
377: J = global_index(i + 1, j - 1, k, m, n);
378: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
379: }
380: if (i < m - 1 && k > 0) {
381: J = global_index(i + 1, j, k - 1, m, n);
382: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
383: }
384: if (i < m - 1 && j < n - 1) {
385: J = global_index(i + 1, j + 1, k, m, n);
386: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
387: }
388: if (i < m - 1 && k < o - 1) {
389: J = global_index(i + 1, j, k + 1, m, n);
390: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
391: }
392: if (j > 0 && k > 0) {
393: J = global_index(i, j - 1, k - 1, m, n);
394: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
395: }
396: if (j > 0 && k < o - 1) {
397: J = global_index(i, j - 1, k + 1, m, n);
398: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
399: }
400: if (j < n - 1 && k > 0) {
401: J = global_index(i, j + 1, k - 1, m, n);
402: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
403: }
404: if (j < n - 1 && k < o - 1) {
405: J = global_index(i, j + 1, k + 1, m, n);
406: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
407: }
408: v = 18.0;
409: PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
410: }
411: }
412: PetscCall(PetscStrcmp(stencil, "3d27point", &equal));
413: if (equal) { /* 27-point stencil, 3D */
414: PetscCall(MatMPIAIJSetPreallocation(A, 27, NULL, 27, NULL));
415: PetscCall(MatSeqAIJSetPreallocation(A, 27, NULL));
416: PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
417: for (Ii = Istart; Ii < Iend; Ii++) {
418: v = -1.0;
419: k = Ii / (m * n);
420: j = (Ii - k * m * n) / m;
421: i = (Ii - k * m * n - j * m);
422: if (k > 0) {
423: if (j > 0) {
424: if (i > 0) {
425: J = global_index(i - 1, j - 1, k - 1, m, n);
426: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
427: }
428: J = global_index(i, j - 1, k - 1, m, n);
429: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
430: if (i < m - 1) {
431: J = global_index(i + 1, j - 1, k - 1, m, n);
432: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
433: }
434: }
435: {
436: if (i > 0) {
437: J = global_index(i - 1, j, k - 1, m, n);
438: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
439: }
440: J = global_index(i, j, k - 1, m, n);
441: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
442: if (i < m - 1) {
443: J = global_index(i + 1, j, k - 1, m, n);
444: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
445: }
446: }
447: if (j < n - 1) {
448: if (i > 0) {
449: J = global_index(i - 1, j + 1, k - 1, m, n);
450: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
451: }
452: J = global_index(i, j + 1, k - 1, m, n);
453: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
454: if (i < m - 1) {
455: J = global_index(i + 1, j + 1, k - 1, m, n);
456: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
457: }
458: }
459: }
460: {
461: if (j > 0) {
462: if (i > 0) {
463: J = global_index(i - 1, j - 1, k, m, n);
464: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
465: }
466: J = global_index(i, j - 1, k, m, n);
467: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
468: if (i < m - 1) {
469: J = global_index(i + 1, j - 1, k, m, n);
470: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
471: }
472: }
473: {
474: if (i > 0) {
475: J = global_index(i - 1, j, k, m, n);
476: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
477: }
478: J = global_index(i, j, k, m, n);
479: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
480: if (i < m - 1) {
481: J = global_index(i + 1, j, k, m, n);
482: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
483: }
484: }
485: if (j < n - 1) {
486: if (i > 0) {
487: J = global_index(i - 1, j + 1, k, m, n);
488: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
489: }
490: J = global_index(i, j + 1, k, m, n);
491: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
492: if (i < m - 1) {
493: J = global_index(i + 1, j + 1, k, m, n);
494: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
495: }
496: }
497: }
498: if (k < o - 1) {
499: if (j > 0) {
500: if (i > 0) {
501: J = global_index(i - 1, j - 1, k + 1, m, n);
502: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
503: }
504: J = global_index(i, j - 1, k + 1, m, n);
505: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
506: if (i < m - 1) {
507: J = global_index(i + 1, j - 1, k + 1, m, n);
508: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
509: }
510: }
511: {
512: if (i > 0) {
513: J = global_index(i - 1, j, k + 1, m, n);
514: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
515: }
516: J = global_index(i, j, k + 1, m, n);
517: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
518: if (i < m - 1) {
519: J = global_index(i + 1, j, k + 1, m, n);
520: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
521: }
522: }
523: if (j < n - 1) {
524: if (i > 0) {
525: J = global_index(i - 1, j + 1, k + 1, m, n);
526: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
527: }
528: J = global_index(i, j + 1, k + 1, m, n);
529: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
530: if (i < m - 1) {
531: J = global_index(i + 1, j + 1, k + 1, m, n);
532: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
533: }
534: }
535: }
536: v = 26.0;
537: PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
538: }
539: }
540: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
541: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
543: /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */
544: PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
546: PetscCall(PetscLogStageRegister("Full MatMatMult", &fullMatMatMultStage));
548: /* Test C = A*B */
549: PetscCall(PetscLogStagePush(fullMatMatMultStage));
550: PetscCall(MatMatMult(A, B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &C));
552: /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C) */
553: PetscCall(MatPtAP(C, C, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &PtAP));
554: PetscCall(MatDuplicate(PtAP, MAT_COPY_VALUES, &PtAP_copy));
555: PetscCall(MatMatMult(PtAP, PtAP_copy, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &PtAP_squared));
557: PetscCall(MatView(C, PETSC_VIEWER_STDOUT_WORLD));
558: PetscCall(MatView(PtAP_squared, PETSC_VIEWER_STDOUT_WORLD));
560: PetscCall(MatDestroy(&PtAP_squared));
561: PetscCall(MatDestroy(&PtAP_copy));
562: PetscCall(MatDestroy(&PtAP));
563: PetscCall(MatDestroy(&C));
564: PetscCall(MatDestroy(&B));
565: PetscCall(MatDestroy(&A));
566: PetscCall(PetscFinalize());
567: return 0;
568: }
570: /*TEST
572: test:
573: suffix: 1
574: nsize: 1
575: args: -m 8 -n 8 -stencil 2d5point -matmatmult_via sorted
577: test:
578: suffix: 2
579: nsize: 1
580: args: -m 5 -n 5 -o 5 -stencil 3d27point -matmatmult_via rowmerge
582: test:
583: suffix: 3
584: nsize: 4
585: args: -m 6 -n 6 -stencil 2d5point -matmatmult_via seqmpi
587: TEST*/