Actual source code: ex226.c

  1: static char help[] = "Benchmark for MatMatMult() of AIJ matrices using different 2d finite-difference stencils.\n\n";

  3: #include <petscmat.h>

  5: /* Converts 3d grid coordinates (i,j,k) for a grid of size m \times n to global indexing. Pass k = 0 for a 2d grid. */
  6: PetscInt global_index(PetscInt i, PetscInt j, PetscInt k, PetscInt m, PetscInt n)
  7: {
  8:   return i + j * m + k * m * n;
  9: }

 11: int main(int argc, char **argv)
 12: {
 13:   Mat           A, B, C, PtAP, PtAP_copy, PtAP_squared;
 14:   PetscInt      i, M, N, Istart, Iend, n = 7, j, J, Ii, m = 8, k, o = 1;
 15:   PetscScalar   v;
 16:   PetscBool     equal = PETSC_FALSE, mat_view = PETSC_FALSE;
 17:   char          stencil[PETSC_MAX_PATH_LEN];
 18:   PetscLogStage fullMatMatMultStage;

 20:   PetscFunctionBeginUser;
 21:   PetscCall(PetscInitialize(&argc, &argv, NULL, help));
 22:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
 23:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
 24:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-o", &o, NULL));
 25:   PetscCall(PetscOptionsHasName(NULL, NULL, "-result_view", &mat_view));
 26:   PetscCall(PetscOptionsGetString(NULL, NULL, "-stencil", stencil, sizeof(stencil), NULL));

 28:   /* Create a aij matrix A */
 29:   M = N = m * n * o;
 30:   PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
 31:   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, M, N));
 32:   PetscCall(MatSetType(A, MATAIJ));
 33:   PetscCall(MatSetFromOptions(A));

 35:   /* Consistency checks */
 36:   PetscCheck(o >= 1 && m > 1 && n >= 1, PETSC_COMM_WORLD, PETSC_ERR_USER, "Dimensions need to be larger than zero!");

 38:   /************ 2D stencils ***************/
 39:   PetscCall(PetscStrcmp(stencil, "2d5point", &equal));
 40:   if (equal) { /* 5-point stencil, 2D */
 41:     PetscCall(MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL));
 42:     PetscCall(MatSeqAIJSetPreallocation(A, 5, NULL));
 43:     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
 44:     for (Ii = Istart; Ii < Iend; Ii++) {
 45:       v = -1.0;
 46:       k = Ii / (m * n);
 47:       j = (Ii - k * m * n) / m;
 48:       i = (Ii - k * m * n - j * m);
 49:       if (i > 0) {
 50:         J = global_index(i - 1, j, k, m, n);
 51:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
 52:       }
 53:       if (i < m - 1) {
 54:         J = global_index(i + 1, j, k, m, n);
 55:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
 56:       }
 57:       if (j > 0) {
 58:         J = global_index(i, j - 1, k, m, n);
 59:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
 60:       }
 61:       if (j < n - 1) {
 62:         J = global_index(i, j + 1, k, m, n);
 63:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
 64:       }
 65:       v = 4.0;
 66:       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
 67:     }
 68:   }
 69:   PetscCall(PetscStrcmp(stencil, "2d9point", &equal));
 70:   if (equal) { /* 9-point stencil, 2D */
 71:     PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL));
 72:     PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL));
 73:     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
 74:     for (Ii = Istart; Ii < Iend; Ii++) {
 75:       v = -1.0;
 76:       k = Ii / (m * n);
 77:       j = (Ii - k * m * n) / m;
 78:       i = (Ii - k * m * n - j * m);
 79:       if (i > 0) {
 80:         J = global_index(i - 1, j, k, m, n);
 81:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
 82:       }
 83:       if (i > 0 && j > 0) {
 84:         J = global_index(i - 1, j - 1, k, m, n);
 85:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
 86:       }
 87:       if (j > 0) {
 88:         J = global_index(i, j - 1, k, m, n);
 89:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
 90:       }
 91:       if (i < m - 1 && j > 0) {
 92:         J = global_index(i + 1, j - 1, k, m, n);
 93:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
 94:       }
 95:       if (i < m - 1) {
 96:         J = global_index(i + 1, j, k, m, n);
 97:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
 98:       }
 99:       if (i < m - 1 && j < n - 1) {
100:         J = global_index(i + 1, j + 1, k, m, n);
101:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
102:       }
103:       if (j < n - 1) {
104:         J = global_index(i, j + 1, k, m, n);
105:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
106:       }
107:       if (i > 0 && j < n - 1) {
108:         J = global_index(i - 1, j + 1, k, m, n);
109:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
110:       }
111:       v = 8.0;
112:       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
113:     }
114:   }
115:   PetscCall(PetscStrcmp(stencil, "2d9point2", &equal));
116:   if (equal) { /* 9-point Cartesian stencil (width 2 per coordinate), 2D */
117:     PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL));
118:     PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL));
119:     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
120:     for (Ii = Istart; Ii < Iend; Ii++) {
121:       v = -1.0;
122:       k = Ii / (m * n);
123:       j = (Ii - k * m * n) / m;
124:       i = (Ii - k * m * n - j * m);
125:       if (i > 0) {
126:         J = global_index(i - 1, j, k, m, n);
127:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
128:       }
129:       if (i > 1) {
130:         J = global_index(i - 2, j, k, m, n);
131:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
132:       }
133:       if (i < m - 1) {
134:         J = global_index(i + 1, j, k, m, n);
135:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
136:       }
137:       if (i < m - 2) {
138:         J = global_index(i + 2, j, k, m, n);
139:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
140:       }
141:       if (j > 0) {
142:         J = global_index(i, j - 1, k, m, n);
143:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
144:       }
145:       if (j > 1) {
146:         J = global_index(i, j - 2, k, m, n);
147:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
148:       }
149:       if (j < n - 1) {
150:         J = global_index(i, j + 1, k, m, n);
151:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
152:       }
153:       if (j < n - 2) {
154:         J = global_index(i, j + 2, k, m, n);
155:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
156:       }
157:       v = 8.0;
158:       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
159:     }
160:   }
161:   PetscCall(PetscStrcmp(stencil, "2d13point", &equal));
162:   if (equal) { /* 13-point Cartesian stencil (width 3 per coordinate), 2D */
163:     PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL));
164:     PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL));
165:     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
166:     for (Ii = Istart; Ii < Iend; Ii++) {
167:       v = -1.0;
168:       k = Ii / (m * n);
169:       j = (Ii - k * m * n) / m;
170:       i = (Ii - k * m * n - j * m);
171:       if (i > 0) {
172:         J = global_index(i - 1, j, k, m, n);
173:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
174:       }
175:       if (i > 1) {
176:         J = global_index(i - 2, j, k, m, n);
177:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
178:       }
179:       if (i > 2) {
180:         J = global_index(i - 3, j, k, m, n);
181:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
182:       }
183:       if (i < m - 1) {
184:         J = global_index(i + 1, j, k, m, n);
185:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
186:       }
187:       if (i < m - 2) {
188:         J = global_index(i + 2, j, k, m, n);
189:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
190:       }
191:       if (i < m - 3) {
192:         J = global_index(i + 3, j, k, m, n);
193:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
194:       }
195:       if (j > 0) {
196:         J = global_index(i, j - 1, k, m, n);
197:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
198:       }
199:       if (j > 1) {
200:         J = global_index(i, j - 2, k, m, n);
201:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
202:       }
203:       if (j > 2) {
204:         J = global_index(i, j - 3, k, m, n);
205:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
206:       }
207:       if (j < n - 1) {
208:         J = global_index(i, j + 1, k, m, n);
209:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
210:       }
211:       if (j < n - 2) {
212:         J = global_index(i, j + 2, k, m, n);
213:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
214:       }
215:       if (j < n - 3) {
216:         J = global_index(i, j + 3, k, m, n);
217:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
218:       }
219:       v = 12.0;
220:       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
221:     }
222:   }
223:   /************ 3D stencils ***************/
224:   PetscCall(PetscStrcmp(stencil, "3d7point", &equal));
225:   if (equal) { /* 7-point stencil, 3D */
226:     PetscCall(MatMPIAIJSetPreallocation(A, 7, NULL, 7, NULL));
227:     PetscCall(MatSeqAIJSetPreallocation(A, 7, NULL));
228:     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
229:     for (Ii = Istart; Ii < Iend; Ii++) {
230:       v = -1.0;
231:       k = Ii / (m * n);
232:       j = (Ii - k * m * n) / m;
233:       i = (Ii - k * m * n - j * m);
234:       if (i > 0) {
235:         J = global_index(i - 1, j, k, m, n);
236:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
237:       }
238:       if (i < m - 1) {
239:         J = global_index(i + 1, j, k, m, n);
240:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
241:       }
242:       if (j > 0) {
243:         J = global_index(i, j - 1, k, m, n);
244:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
245:       }
246:       if (j < n - 1) {
247:         J = global_index(i, j + 1, k, m, n);
248:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
249:       }
250:       if (k > 0) {
251:         J = global_index(i, j, k - 1, m, n);
252:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
253:       }
254:       if (k < o - 1) {
255:         J = global_index(i, j, k + 1, m, n);
256:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
257:       }
258:       v = 6.0;
259:       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
260:     }
261:   }
262:   PetscCall(PetscStrcmp(stencil, "3d13point", &equal));
263:   if (equal) { /* 13-point stencil, 3D */
264:     PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL));
265:     PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL));
266:     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
267:     for (Ii = Istart; Ii < Iend; Ii++) {
268:       v = -1.0;
269:       k = Ii / (m * n);
270:       j = (Ii - k * m * n) / m;
271:       i = (Ii - k * m * n - j * m);
272:       if (i > 0) {
273:         J = global_index(i - 1, j, k, m, n);
274:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
275:       }
276:       if (i > 1) {
277:         J = global_index(i - 2, j, k, m, n);
278:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
279:       }
280:       if (i < m - 1) {
281:         J = global_index(i + 1, j, k, m, n);
282:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
283:       }
284:       if (i < m - 2) {
285:         J = global_index(i + 2, j, k, m, n);
286:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
287:       }
288:       if (j > 0) {
289:         J = global_index(i, j - 1, k, m, n);
290:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
291:       }
292:       if (j > 1) {
293:         J = global_index(i, j - 2, k, m, n);
294:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
295:       }
296:       if (j < n - 1) {
297:         J = global_index(i, j + 1, k, m, n);
298:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
299:       }
300:       if (j < n - 2) {
301:         J = global_index(i, j + 2, k, m, n);
302:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
303:       }
304:       if (k > 0) {
305:         J = global_index(i, j, k - 1, m, n);
306:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
307:       }
308:       if (k > 1) {
309:         J = global_index(i, j, k - 2, m, n);
310:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
311:       }
312:       if (k < o - 1) {
313:         J = global_index(i, j, k + 1, m, n);
314:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
315:       }
316:       if (k < o - 2) {
317:         J = global_index(i, j, k + 2, m, n);
318:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
319:       }
320:       v = 12.0;
321:       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
322:     }
323:   }
324:   PetscCall(PetscStrcmp(stencil, "3d19point", &equal));
325:   if (equal) { /* 19-point stencil, 3D */
326:     PetscCall(MatMPIAIJSetPreallocation(A, 19, NULL, 19, NULL));
327:     PetscCall(MatSeqAIJSetPreallocation(A, 19, NULL));
328:     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
329:     for (Ii = Istart; Ii < Iend; Ii++) {
330:       v = -1.0;
331:       k = Ii / (m * n);
332:       j = (Ii - k * m * n) / m;
333:       i = (Ii - k * m * n - j * m);
334:       /* one hop */
335:       if (i > 0) {
336:         J = global_index(i - 1, j, k, m, n);
337:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
338:       }
339:       if (i < m - 1) {
340:         J = global_index(i + 1, j, k, m, n);
341:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
342:       }
343:       if (j > 0) {
344:         J = global_index(i, j - 1, k, m, n);
345:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
346:       }
347:       if (j < n - 1) {
348:         J = global_index(i, j + 1, k, m, n);
349:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
350:       }
351:       if (k > 0) {
352:         J = global_index(i, j, k - 1, m, n);
353:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
354:       }
355:       if (k < o - 1) {
356:         J = global_index(i, j, k + 1, m, n);
357:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
358:       }
359:       /* two hops */
360:       if (i > 0 && j > 0) {
361:         J = global_index(i - 1, j - 1, k, m, n);
362:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
363:       }
364:       if (i > 0 && k > 0) {
365:         J = global_index(i - 1, j, k - 1, m, n);
366:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
367:       }
368:       if (i > 0 && j < n - 1) {
369:         J = global_index(i - 1, j + 1, k, m, n);
370:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
371:       }
372:       if (i > 0 && k < o - 1) {
373:         J = global_index(i - 1, j, k + 1, m, n);
374:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
375:       }
376:       if (i < m - 1 && j > 0) {
377:         J = global_index(i + 1, j - 1, k, m, n);
378:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
379:       }
380:       if (i < m - 1 && k > 0) {
381:         J = global_index(i + 1, j, k - 1, m, n);
382:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
383:       }
384:       if (i < m - 1 && j < n - 1) {
385:         J = global_index(i + 1, j + 1, k, m, n);
386:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
387:       }
388:       if (i < m - 1 && k < o - 1) {
389:         J = global_index(i + 1, j, k + 1, m, n);
390:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
391:       }
392:       if (j > 0 && k > 0) {
393:         J = global_index(i, j - 1, k - 1, m, n);
394:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
395:       }
396:       if (j > 0 && k < o - 1) {
397:         J = global_index(i, j - 1, k + 1, m, n);
398:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
399:       }
400:       if (j < n - 1 && k > 0) {
401:         J = global_index(i, j + 1, k - 1, m, n);
402:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
403:       }
404:       if (j < n - 1 && k < o - 1) {
405:         J = global_index(i, j + 1, k + 1, m, n);
406:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
407:       }
408:       v = 18.0;
409:       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
410:     }
411:   }
412:   PetscCall(PetscStrcmp(stencil, "3d27point", &equal));
413:   if (equal) { /* 27-point stencil, 3D */
414:     PetscCall(MatMPIAIJSetPreallocation(A, 27, NULL, 27, NULL));
415:     PetscCall(MatSeqAIJSetPreallocation(A, 27, NULL));
416:     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
417:     for (Ii = Istart; Ii < Iend; Ii++) {
418:       v = -1.0;
419:       k = Ii / (m * n);
420:       j = (Ii - k * m * n) / m;
421:       i = (Ii - k * m * n - j * m);
422:       if (k > 0) {
423:         if (j > 0) {
424:           if (i > 0) {
425:             J = global_index(i - 1, j - 1, k - 1, m, n);
426:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
427:           }
428:           J = global_index(i, j - 1, k - 1, m, n);
429:           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
430:           if (i < m - 1) {
431:             J = global_index(i + 1, j - 1, k - 1, m, n);
432:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
433:           }
434:         }
435:         {
436:           if (i > 0) {
437:             J = global_index(i - 1, j, k - 1, m, n);
438:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
439:           }
440:           J = global_index(i, j, k - 1, m, n);
441:           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
442:           if (i < m - 1) {
443:             J = global_index(i + 1, j, k - 1, m, n);
444:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
445:           }
446:         }
447:         if (j < n - 1) {
448:           if (i > 0) {
449:             J = global_index(i - 1, j + 1, k - 1, m, n);
450:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
451:           }
452:           J = global_index(i, j + 1, k - 1, m, n);
453:           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
454:           if (i < m - 1) {
455:             J = global_index(i + 1, j + 1, k - 1, m, n);
456:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
457:           }
458:         }
459:       }
460:       {
461:         if (j > 0) {
462:           if (i > 0) {
463:             J = global_index(i - 1, j - 1, k, m, n);
464:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
465:           }
466:           J = global_index(i, j - 1, k, m, n);
467:           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
468:           if (i < m - 1) {
469:             J = global_index(i + 1, j - 1, k, m, n);
470:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
471:           }
472:         }
473:         {
474:           if (i > 0) {
475:             J = global_index(i - 1, j, k, m, n);
476:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
477:           }
478:           J = global_index(i, j, k, m, n);
479:           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
480:           if (i < m - 1) {
481:             J = global_index(i + 1, j, k, m, n);
482:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
483:           }
484:         }
485:         if (j < n - 1) {
486:           if (i > 0) {
487:             J = global_index(i - 1, j + 1, k, m, n);
488:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
489:           }
490:           J = global_index(i, j + 1, k, m, n);
491:           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
492:           if (i < m - 1) {
493:             J = global_index(i + 1, j + 1, k, m, n);
494:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
495:           }
496:         }
497:       }
498:       if (k < o - 1) {
499:         if (j > 0) {
500:           if (i > 0) {
501:             J = global_index(i - 1, j - 1, k + 1, m, n);
502:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
503:           }
504:           J = global_index(i, j - 1, k + 1, m, n);
505:           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
506:           if (i < m - 1) {
507:             J = global_index(i + 1, j - 1, k + 1, m, n);
508:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
509:           }
510:         }
511:         {
512:           if (i > 0) {
513:             J = global_index(i - 1, j, k + 1, m, n);
514:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
515:           }
516:           J = global_index(i, j, k + 1, m, n);
517:           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
518:           if (i < m - 1) {
519:             J = global_index(i + 1, j, k + 1, m, n);
520:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
521:           }
522:         }
523:         if (j < n - 1) {
524:           if (i > 0) {
525:             J = global_index(i - 1, j + 1, k + 1, m, n);
526:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
527:           }
528:           J = global_index(i, j + 1, k + 1, m, n);
529:           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
530:           if (i < m - 1) {
531:             J = global_index(i + 1, j + 1, k + 1, m, n);
532:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
533:           }
534:         }
535:       }
536:       v = 26.0;
537:       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
538:     }
539:   }
540:   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
541:   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));

543:   /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */
544:   PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));

546:   PetscCall(PetscLogStageRegister("Full MatMatMult", &fullMatMatMultStage));

548:   /* Test C = A*B */
549:   PetscCall(PetscLogStagePush(fullMatMatMultStage));
550:   PetscCall(MatMatMult(A, B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &C));

552:   /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C)  */
553:   PetscCall(MatPtAP(C, C, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &PtAP));
554:   PetscCall(MatDuplicate(PtAP, MAT_COPY_VALUES, &PtAP_copy));
555:   PetscCall(MatMatMult(PtAP, PtAP_copy, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &PtAP_squared));

557:   PetscCall(MatView(C, PETSC_VIEWER_STDOUT_WORLD));
558:   PetscCall(MatView(PtAP_squared, PETSC_VIEWER_STDOUT_WORLD));

560:   PetscCall(MatDestroy(&PtAP_squared));
561:   PetscCall(MatDestroy(&PtAP_copy));
562:   PetscCall(MatDestroy(&PtAP));
563:   PetscCall(MatDestroy(&C));
564:   PetscCall(MatDestroy(&B));
565:   PetscCall(MatDestroy(&A));
566:   PetscCall(PetscFinalize());
567:   return 0;
568: }

570: /*TEST

572:  test:
573:       suffix: 1
574:       nsize: 1
575:       args: -m 8 -n 8 -stencil 2d5point -matmatmult_via sorted

577:  test:
578:        suffix: 2
579:        nsize: 1
580:        args: -m 5 -n 5 -o 5 -stencil 3d27point -matmatmult_via rowmerge

582:  test:
583:       suffix: 3
584:       nsize: 4
585:       args: -m 6 -n 6 -stencil 2d5point -matmatmult_via seqmpi

587: TEST*/