Actual source code: matstash.c

  1: #include <petsc/private/matimpl.h>

  3: #define DEFAULT_STASH_SIZE 10000

  5: static PetscErrorCode       MatStashScatterBegin_Ref(Mat, MatStash *, PetscInt *);
  6: PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
  7: PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
  8: #if !defined(PETSC_HAVE_MPIUNI)
  9: static PetscErrorCode MatStashScatterBegin_BTS(Mat, MatStash *, PetscInt *);
 10: static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
 11: static PetscErrorCode MatStashScatterEnd_BTS(MatStash *);
 12: #endif

 14: /*
 15:   MatStashCreate_Private - Creates a stash,currently used for all the parallel
 16:   matrix implementations. The stash is where elements of a matrix destined
 17:   to be stored on other processors are kept until matrix assembly is done.

 19:   This is a simple minded stash. Simply adds entries to end of stash.

 21:   Input Parameters:
 22:   comm - communicator, required for scatters.
 23:   bs   - stash block size. used when stashing blocks of values

 25:   Output Parameter:
 26:   stash    - the newly created stash
 27: */
 28: PetscErrorCode MatStashCreate_Private(MPI_Comm comm, PetscInt bs, MatStash *stash)
 29: {
 30:   PetscInt  max, *opt, nopt, i;
 31:   PetscBool flg;

 33:   PetscFunctionBegin;
 34:   /* Require 2 tags,get the second using PetscCommGetNewTag() */
 35:   stash->comm = comm;

 37:   PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag1));
 38:   PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag2));
 39:   PetscCallMPI(MPI_Comm_size(stash->comm, &stash->size));
 40:   PetscCallMPI(MPI_Comm_rank(stash->comm, &stash->rank));
 41:   PetscCall(PetscMalloc1(2 * stash->size, &stash->flg_v));
 42:   for (i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1;

 44:   nopt = stash->size;
 45:   PetscCall(PetscMalloc1(nopt, &opt));
 46:   PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-matstash_initial_size", opt, &nopt, &flg));
 47:   if (flg) {
 48:     if (nopt == 1) max = opt[0];
 49:     else if (nopt == stash->size) max = opt[stash->rank];
 50:     else if (stash->rank < nopt) max = opt[stash->rank];
 51:     else max = 0; /* Use default */
 52:     stash->umax = max;
 53:   } else {
 54:     stash->umax = 0;
 55:   }
 56:   PetscCall(PetscFree(opt));
 57:   if (bs <= 0) bs = 1;

 59:   stash->bs         = bs;
 60:   stash->nmax       = 0;
 61:   stash->oldnmax    = 0;
 62:   stash->n          = 0;
 63:   stash->reallocs   = -1;
 64:   stash->space_head = NULL;
 65:   stash->space      = NULL;

 67:   stash->send_waits  = NULL;
 68:   stash->recv_waits  = NULL;
 69:   stash->send_status = NULL;
 70:   stash->nsends      = 0;
 71:   stash->nrecvs      = 0;
 72:   stash->svalues     = NULL;
 73:   stash->rvalues     = NULL;
 74:   stash->rindices    = NULL;
 75:   stash->nprocessed  = 0;
 76:   stash->reproduce   = PETSC_FALSE;
 77:   stash->blocktype   = MPI_DATATYPE_NULL;

 79:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-matstash_reproduce", &stash->reproduce, NULL));
 80: #if !defined(PETSC_HAVE_MPIUNI)
 81:   flg = PETSC_FALSE;
 82:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-matstash_legacy", &flg, NULL));
 83:   if (!flg) {
 84:     stash->ScatterBegin   = MatStashScatterBegin_BTS;
 85:     stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
 86:     stash->ScatterEnd     = MatStashScatterEnd_BTS;
 87:     stash->ScatterDestroy = MatStashScatterDestroy_BTS;
 88:   } else {
 89: #endif
 90:     stash->ScatterBegin   = MatStashScatterBegin_Ref;
 91:     stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
 92:     stash->ScatterEnd     = MatStashScatterEnd_Ref;
 93:     stash->ScatterDestroy = NULL;
 94: #if !defined(PETSC_HAVE_MPIUNI)
 95:   }
 96: #endif
 97:   PetscFunctionReturn(PETSC_SUCCESS);
 98: }

100: /*
101:    MatStashDestroy_Private - Destroy the stash
102: */
103: PetscErrorCode MatStashDestroy_Private(MatStash *stash)
104: {
105:   PetscFunctionBegin;
106:   PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
107:   if (stash->ScatterDestroy) PetscCall((*stash->ScatterDestroy)(stash));
108:   stash->space = NULL;
109:   PetscCall(PetscFree(stash->flg_v));
110:   PetscFunctionReturn(PETSC_SUCCESS);
111: }

113: /*
114:    MatStashScatterEnd_Private - This is called as the final stage of
115:    scatter. The final stages of message passing is done here, and
116:    all the memory used for message passing is cleaned up. This
117:    routine also resets the stash, and deallocates the memory used
118:    for the stash. It also keeps track of the current memory usage
119:    so that the same value can be used the next time through.
120: */
121: PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
122: {
123:   PetscFunctionBegin;
124:   PetscCall((*stash->ScatterEnd)(stash));
125:   PetscFunctionReturn(PETSC_SUCCESS);
126: }

128: PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
129: {
130:   PetscMPIInt nsends = stash->nsends;
131:   PetscInt    bs2, oldnmax;
132:   MPI_Status *send_status;

134:   PetscFunctionBegin;
135:   for (PetscMPIInt i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1;
136:   /* wait on sends */
137:   if (nsends) {
138:     PetscCall(PetscMalloc1(2 * nsends, &send_status));
139:     PetscCallMPI(MPI_Waitall(2 * nsends, stash->send_waits, send_status));
140:     PetscCall(PetscFree(send_status));
141:   }

143:   /* Now update nmaxold to be app 10% more than max n used, this way the
144:      wastage of space is reduced the next time this stash is used.
145:      Also update the oldmax, only if it increases */
146:   if (stash->n) {
147:     bs2     = stash->bs * stash->bs;
148:     oldnmax = ((int)(stash->n * 1.1) + 5) * bs2;
149:     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
150:   }

152:   stash->nmax       = 0;
153:   stash->n          = 0;
154:   stash->reallocs   = -1;
155:   stash->nprocessed = 0;

157:   PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));

159:   stash->space = NULL;

161:   PetscCall(PetscFree(stash->send_waits));
162:   PetscCall(PetscFree(stash->recv_waits));
163:   PetscCall(PetscFree2(stash->svalues, stash->sindices));
164:   PetscCall(PetscFree(stash->rvalues[0]));
165:   PetscCall(PetscFree(stash->rvalues));
166:   PetscCall(PetscFree(stash->rindices[0]));
167:   PetscCall(PetscFree(stash->rindices));
168:   PetscFunctionReturn(PETSC_SUCCESS);
169: }

171: /*
172:    MatStashGetInfo_Private - Gets the relevant statistics of the stash

174:    Input Parameters:
175:    stash    - the stash
176:    nstash   - the size of the stash. Indicates the number of values stored.
177:    reallocs - the number of additional mallocs incurred.

179: */
180: PetscErrorCode MatStashGetInfo_Private(MatStash *stash, PetscInt *nstash, PetscInt *reallocs)
181: {
182:   PetscInt bs2 = stash->bs * stash->bs;

184:   PetscFunctionBegin;
185:   if (nstash) *nstash = stash->n * bs2;
186:   if (reallocs) {
187:     if (stash->reallocs < 0) *reallocs = 0;
188:     else *reallocs = stash->reallocs;
189:   }
190:   PetscFunctionReturn(PETSC_SUCCESS);
191: }

193: /*
194:    MatStashSetInitialSize_Private - Sets the initial size of the stash

196:    Input Parameters:
197:    stash  - the stash
198:    max    - the value that is used as the max size of the stash.
199:             this value is used while allocating memory.
200: */
201: PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash, PetscInt max)
202: {
203:   PetscFunctionBegin;
204:   stash->umax = max;
205:   PetscFunctionReturn(PETSC_SUCCESS);
206: }

208: /* MatStashExpand_Private - Expand the stash. This function is called
209:    when the space in the stash is not sufficient to add the new values
210:    being inserted into the stash.

212:    Input Parameters:
213:    stash - the stash
214:    incr  - the minimum increase requested

216:    Note:
217:    This routine doubles the currently used memory.
218:  */
219: static PetscErrorCode MatStashExpand_Private(MatStash *stash, PetscInt incr)
220: {
221:   PetscInt   newnmax, bs2 = stash->bs * stash->bs;
222:   PetscCount cnewnmax;

224:   PetscFunctionBegin;
225:   /* allocate a larger stash */
226:   if (!stash->oldnmax && !stash->nmax) { /* new stash */
227:     if (stash->umax) cnewnmax = stash->umax / bs2;
228:     else cnewnmax = DEFAULT_STASH_SIZE / bs2;
229:   } else if (!stash->nmax) { /* reusing stash */
230:     if (stash->umax > stash->oldnmax) cnewnmax = stash->umax / bs2;
231:     else cnewnmax = stash->oldnmax / bs2;
232:   } else cnewnmax = stash->nmax * 2;
233:   if (cnewnmax < (stash->nmax + incr)) cnewnmax += 2 * incr;
234:   PetscCall(PetscIntCast(cnewnmax, &newnmax));

236:   /* Get a MatStashSpace and attach it to stash */
237:   PetscCall(PetscMatStashSpaceGet(bs2, newnmax, &stash->space));
238:   if (!stash->space_head) { /* new stash or reusing stash->oldnmax */
239:     stash->space_head = stash->space;
240:   }

242:   stash->reallocs++;
243:   stash->nmax = newnmax;
244:   PetscFunctionReturn(PETSC_SUCCESS);
245: }
246: /*
247:   MatStashValuesRow_Private - inserts values into the stash. This function
248:   expects the values to be row-oriented. Multiple columns belong to the same row
249:   can be inserted with a single call to this function.

251:   Input Parameters:
252:   stash  - the stash
253:   row    - the global row corresponding to the values
254:   n      - the number of elements inserted. All elements belong to the above row.
255:   idxn   - the global column indices corresponding to each of the values.
256:   values - the values inserted
257: */
258: PetscErrorCode MatStashValuesRow_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscBool ignorezeroentries)
259: {
260:   PetscInt           i, k, cnt = 0;
261:   PetscMatStashSpace space = stash->space;

263:   PetscFunctionBegin;
264:   /* Check and see if we have sufficient memory */
265:   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
266:   space = stash->space;
267:   k     = space->local_used;
268:   for (i = 0; i < n; i++) {
269:     if (ignorezeroentries && values && values[i] == 0.0) continue;
270:     space->idx[k] = row;
271:     space->idy[k] = idxn[i];
272:     space->val[k] = values ? values[i] : 0.0;
273:     k++;
274:     cnt++;
275:   }
276:   stash->n += cnt;
277:   space->local_used += cnt;
278:   space->local_remaining -= cnt;
279:   PetscFunctionReturn(PETSC_SUCCESS);
280: }

282: /*
283:   MatStashValuesCol_Private - inserts values into the stash. This function
284:   expects the values to be column-oriented. Multiple columns belong to the same row
285:   can be inserted with a single call to this function.

287:   Input Parameters:
288:   stash   - the stash
289:   row     - the global row corresponding to the values
290:   n       - the number of elements inserted. All elements belong to the above row.
291:   idxn    - the global column indices corresponding to each of the values.
292:   values  - the values inserted
293:   stepval - the consecutive values are sepated by a distance of stepval.
294:             this happens because the input is column-oriented.
295: */
296: PetscErrorCode MatStashValuesCol_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt stepval, PetscBool ignorezeroentries)
297: {
298:   PetscInt           i, k, cnt = 0;
299:   PetscMatStashSpace space = stash->space;

301:   PetscFunctionBegin;
302:   /* Check and see if we have sufficient memory */
303:   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
304:   space = stash->space;
305:   k     = space->local_used;
306:   for (i = 0; i < n; i++) {
307:     if (ignorezeroentries && values && values[i * stepval] == 0.0) continue;
308:     space->idx[k] = row;
309:     space->idy[k] = idxn[i];
310:     space->val[k] = values ? values[i * stepval] : 0.0;
311:     k++;
312:     cnt++;
313:   }
314:   stash->n += cnt;
315:   space->local_used += cnt;
316:   space->local_remaining -= cnt;
317:   PetscFunctionReturn(PETSC_SUCCESS);
318: }

320: /*
321:   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
322:   This function expects the values to be row-oriented. Multiple columns belong
323:   to the same block-row can be inserted with a single call to this function.
324:   This function extracts the sub-block of values based on the dimensions of
325:   the original input block, and the row,col values corresponding to the blocks.

327:   Input Parameters:
328:   stash  - the stash
329:   row    - the global block-row corresponding to the values
330:   n      - the number of elements inserted. All elements belong to the above row.
331:   idxn   - the global block-column indices corresponding to each of the blocks of
332:            values. Each block is of size bs*bs.
333:   values - the values inserted
334:   rmax   - the number of block-rows in the original block.
335:   cmax   - the number of block-columns on the original block.
336:   idx    - the index of the current block-row in the original block.
337: */
338: PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx)
339: {
340:   PetscInt           i, j, k, bs2, bs = stash->bs, l;
341:   const PetscScalar *vals;
342:   PetscScalar       *array;
343:   PetscMatStashSpace space = stash->space;

345:   PetscFunctionBegin;
346:   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
347:   space = stash->space;
348:   l     = space->local_used;
349:   bs2   = bs * bs;
350:   for (i = 0; i < n; i++) {
351:     space->idx[l] = row;
352:     space->idy[l] = idxn[i];
353:     /* Now copy over the block of values. Store the values column-oriented.
354:        This enables inserting multiple blocks belonging to a row with a single
355:        function call */
356:     array = space->val + bs2 * l;
357:     vals  = values + idx * bs2 * n + bs * i;
358:     for (j = 0; j < bs; j++) {
359:       for (k = 0; k < bs; k++) array[k * bs] = values ? vals[k] : 0.0;
360:       array++;
361:       vals += cmax * bs;
362:     }
363:     l++;
364:   }
365:   stash->n += n;
366:   space->local_used += n;
367:   space->local_remaining -= n;
368:   PetscFunctionReturn(PETSC_SUCCESS);
369: }

371: /*
372:   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
373:   This function expects the values to be column-oriented. Multiple columns belong
374:   to the same block-row can be inserted with a single call to this function.
375:   This function extracts the sub-block of values based on the dimensions of
376:   the original input block, and the row,col values corresponding to the blocks.

378:   Input Parameters:
379:   stash  - the stash
380:   row    - the global block-row corresponding to the values
381:   n      - the number of elements inserted. All elements belong to the above row.
382:   idxn   - the global block-column indices corresponding to each of the blocks of
383:            values. Each block is of size bs*bs.
384:   values - the values inserted
385:   rmax   - the number of block-rows in the original block.
386:   cmax   - the number of block-columns on the original block.
387:   idx    - the index of the current block-row in the original block.
388: */
389: PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx)
390: {
391:   PetscInt           i, j, k, bs2, bs = stash->bs, l;
392:   const PetscScalar *vals;
393:   PetscScalar       *array;
394:   PetscMatStashSpace space = stash->space;

396:   PetscFunctionBegin;
397:   if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
398:   space = stash->space;
399:   l     = space->local_used;
400:   bs2   = bs * bs;
401:   for (i = 0; i < n; i++) {
402:     space->idx[l] = row;
403:     space->idy[l] = idxn[i];
404:     /* Now copy over the block of values. Store the values column-oriented.
405:      This enables inserting multiple blocks belonging to a row with a single
406:      function call */
407:     array = space->val + bs2 * l;
408:     vals  = values + idx * bs2 * n + bs * i;
409:     for (j = 0; j < bs; j++) {
410:       for (k = 0; k < bs; k++) array[k] = values ? vals[k] : 0.0;
411:       array += bs;
412:       vals += rmax * bs;
413:     }
414:     l++;
415:   }
416:   stash->n += n;
417:   space->local_used += n;
418:   space->local_remaining -= n;
419:   PetscFunctionReturn(PETSC_SUCCESS);
420: }
421: /*
422:   MatStashScatterBegin_Private - Initiates the transfer of values to the
423:   correct owners. This function goes through the stash, and check the
424:   owners of each stashed value, and sends the values off to the owner
425:   processors.

427:   Input Parameters:
428:   stash  - the stash
429:   owners - an array of size 'no-of-procs' which gives the ownership range
430:            for each node.

432:   Note:
433:     The 'owners' array in the cased of the blocked-stash has the
434:   ranges specified blocked global indices, and for the regular stash in
435:   the proper global indices.
436: */
437: PetscErrorCode MatStashScatterBegin_Private(Mat mat, MatStash *stash, PetscInt *owners)
438: {
439:   PetscFunctionBegin;
440:   PetscCall((*stash->ScatterBegin)(mat, stash, owners));
441:   PetscFunctionReturn(PETSC_SUCCESS);
442: }

444: static PetscErrorCode MatStashScatterBegin_Ref(Mat mat, MatStash *stash, PetscInt *owners)
445: {
446:   PetscInt          *owner, *startv, *starti, bs2;
447:   PetscInt           size = stash->size;
448:   PetscInt          *sindices, **rindices, j, ii, idx, lastidx, l;
449:   PetscScalar      **rvalues, *svalues;
450:   MPI_Comm           comm = stash->comm;
451:   MPI_Request       *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
452:   PetscMPIInt       *sizes, *nlengths, nreceives, nsends, tag1 = stash->tag1, tag2 = stash->tag2;
453:   PetscInt          *sp_idx, *sp_idy;
454:   PetscScalar       *sp_val;
455:   PetscMatStashSpace space, space_next;

457:   PetscFunctionBegin;
458:   { /* make sure all processors are either in INSERTMODE or ADDMODE */
459:     InsertMode addv;
460:     PetscCallMPI(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
461:     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
462:     mat->insertmode = addv; /* in case this processor had no cache */
463:   }

465:   bs2 = stash->bs * stash->bs;

467:   /*  first count number of contributors to each processor */
468:   PetscCall(PetscCalloc1(size, &nlengths));
469:   PetscCall(PetscMalloc1(stash->n + 1, &owner));

471:   ii = j  = 0;
472:   lastidx = -1;
473:   space   = stash->space_head;
474:   while (space) {
475:     space_next = space->next;
476:     sp_idx     = space->idx;
477:     for (l = 0; l < space->local_used; l++) {
478:       /* if indices are NOT locally sorted, need to start search at the beginning */
479:       if (lastidx > (idx = sp_idx[l])) j = 0;
480:       lastidx = idx;
481:       for (; j < size; j++) {
482:         if (idx >= owners[j] && idx < owners[j + 1]) {
483:           nlengths[j]++;
484:           owner[ii] = j;
485:           break;
486:         }
487:       }
488:       ii++;
489:     }
490:     space = space_next;
491:   }

493:   /* Now check what procs get messages - and compute nsends. */
494:   PetscCall(PetscCalloc1(size, &sizes));
495:   nsends = 0;
496:   for (PetscMPIInt i = 0; i < size; i++) {
497:     if (nlengths[i]) {
498:       sizes[i] = 1;
499:       nsends++;
500:     }
501:   }

503:   {
504:     PetscMPIInt *onodes, *olengths;
505:     /* Determine the number of messages to expect, their lengths, from from-ids */
506:     PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
507:     PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths));
508:     /* since clubbing row,col - lengths are multiplied by 2 */
509:     for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] *= 2;
510:     PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
511:     /* values are size 'bs2' lengths (and remove earlier factor 2 */
512:     for (PetscMPIInt i = 0; i < nreceives; i++) PetscCall(PetscMPIIntCast(olengths[i] * bs2 / 2, &olengths[i]));
513:     PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
514:     PetscCall(PetscFree(onodes));
515:     PetscCall(PetscFree(olengths));
516:   }

518:   /* do sends:
519:       1) starts[i] gives the starting index in svalues for stuff going to
520:          the ith processor
521:   */
522:   PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
523:   PetscCall(PetscMalloc1(2 * nsends, &send_waits));
524:   PetscCall(PetscMalloc2(size, &startv, size, &starti));
525:   /* use 2 sends the first with all_a, the next with all_i and all_j */
526:   startv[0] = 0;
527:   starti[0] = 0;
528:   for (PetscMPIInt i = 1; i < size; i++) {
529:     startv[i] = startv[i - 1] + nlengths[i - 1];
530:     starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
531:   }

533:   ii    = 0;
534:   space = stash->space_head;
535:   while (space) {
536:     space_next = space->next;
537:     sp_idx     = space->idx;
538:     sp_idy     = space->idy;
539:     sp_val     = space->val;
540:     for (l = 0; l < space->local_used; l++) {
541:       j = owner[ii];
542:       if (bs2 == 1) {
543:         svalues[startv[j]] = sp_val[l];
544:       } else {
545:         PetscInt     k;
546:         PetscScalar *buf1, *buf2;
547:         buf1 = svalues + bs2 * startv[j];
548:         buf2 = space->val + bs2 * l;
549:         for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
550:       }
551:       sindices[starti[j]]               = sp_idx[l];
552:       sindices[starti[j] + nlengths[j]] = sp_idy[l];
553:       startv[j]++;
554:       starti[j]++;
555:       ii++;
556:     }
557:     space = space_next;
558:   }
559:   startv[0] = 0;
560:   for (PetscMPIInt i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];

562:   for (PetscMPIInt i = 0, count = 0; i < size; i++) {
563:     if (sizes[i]) {
564:       PetscCallMPI(MPIU_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
565:       PetscCallMPI(MPIU_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
566:     }
567:   }
568: #if defined(PETSC_USE_INFO)
569:   PetscCall(PetscInfo(NULL, "No of messages: %d \n", nsends));
570:   for (PetscMPIInt i = 0; i < size; i++) {
571:     if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %d: size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
572:   }
573: #endif
574:   PetscCall(PetscFree(nlengths));
575:   PetscCall(PetscFree(owner));
576:   PetscCall(PetscFree2(startv, starti));
577:   PetscCall(PetscFree(sizes));

579:   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
580:   PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));

582:   for (PetscMPIInt i = 0; i < nreceives; i++) {
583:     recv_waits[2 * i]     = recv_waits1[i];
584:     recv_waits[2 * i + 1] = recv_waits2[i];
585:   }
586:   stash->recv_waits = recv_waits;

588:   PetscCall(PetscFree(recv_waits1));
589:   PetscCall(PetscFree(recv_waits2));

591:   stash->svalues         = svalues;
592:   stash->sindices        = sindices;
593:   stash->rvalues         = rvalues;
594:   stash->rindices        = rindices;
595:   stash->send_waits      = send_waits;
596:   stash->nsends          = nsends;
597:   stash->nrecvs          = nreceives;
598:   stash->reproduce_count = 0;
599:   PetscFunctionReturn(PETSC_SUCCESS);
600: }

602: /*
603:    MatStashScatterGetMesg_Private - This function waits on the receives posted
604:    in the function MatStashScatterBegin_Private() and returns one message at
605:    a time to the calling function. If no messages are left, it indicates this
606:    by setting flg = 0, else it sets flg = 1.

608:    Input Parameter:
609:    stash - the stash

611:    Output Parameters:
612:    nvals - the number of entries in the current message.
613:    rows  - an array of row indices (or blocked indices) corresponding to the values
614:    cols  - an array of columnindices (or blocked indices) corresponding to the values
615:    vals  - the values
616:    flg   - 0 indicates no more message left, and the current call has no values associated.
617:            1 indicates that the current call successfully received a message, and the
618:              other output parameters nvals,rows,cols,vals are set appropriately.
619: */
620: PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg)
621: {
622:   PetscFunctionBegin;
623:   PetscCall((*stash->ScatterGetMesg)(stash, nvals, rows, cols, vals, flg));
624:   PetscFunctionReturn(PETSC_SUCCESS);
625: }

627: PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg)
628: {
629:   PetscMPIInt i, *flg_v = stash->flg_v, i1, i2;
630:   PetscInt    bs2;
631:   MPI_Status  recv_status;
632:   PetscBool   match_found = PETSC_FALSE;

634:   PetscFunctionBegin;
635:   *flg = 0; /* When a message is discovered this is reset to 1 */
636:   /* Return if no more messages to process */
637:   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(PETSC_SUCCESS);

639:   bs2 = stash->bs * stash->bs;
640:   /* If a matching pair of receives are found, process them, and return the data to
641:      the calling function. Until then keep receiving messages */
642:   while (!match_found) {
643:     if (stash->reproduce) {
644:       i = stash->reproduce_count++;
645:       PetscCallMPI(MPI_Wait(stash->recv_waits + i, &recv_status));
646:     } else {
647:       PetscCallMPI(MPI_Waitany(2 * stash->nrecvs, stash->recv_waits, &i, &recv_status));
648:     }
649:     PetscCheck(recv_status.MPI_SOURCE >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Negative MPI source!");

651:     /* Now pack the received message into a structure which is usable by others */
652:     if (i % 2) {
653:       PetscCallMPI(MPI_Get_count(&recv_status, MPIU_SCALAR, nvals));
654:       flg_v[2 * recv_status.MPI_SOURCE] = i / 2;
655:       *nvals                            = *nvals / bs2;
656:     } else {
657:       PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, nvals));
658:       flg_v[2 * recv_status.MPI_SOURCE + 1] = i / 2;
659:       *nvals                                = *nvals / 2; /* This message has both row indices and col indices */
660:     }

662:     /* Check if we have both messages from this proc */
663:     i1 = flg_v[2 * recv_status.MPI_SOURCE];
664:     i2 = flg_v[2 * recv_status.MPI_SOURCE + 1];
665:     if (i1 != -1 && i2 != -1) {
666:       *rows = stash->rindices[i2];
667:       *cols = *rows + *nvals;
668:       *vals = stash->rvalues[i1];
669:       *flg  = 1;
670:       stash->nprocessed++;
671:       match_found = PETSC_TRUE;
672:     }
673:   }
674:   PetscFunctionReturn(PETSC_SUCCESS);
675: }

677: #if !defined(PETSC_HAVE_MPIUNI)
678: typedef struct {
679:   PetscInt    row;
680:   PetscInt    col;
681:   PetscScalar vals[1]; /* Actually an array of length bs2 */
682: } MatStashBlock;

684: static PetscErrorCode MatStashSortCompress_Private(MatStash *stash, InsertMode insertmode)
685: {
686:   PetscMatStashSpace space;
687:   PetscInt           n = stash->n, bs = stash->bs, bs2 = bs * bs, cnt, *row, *col, *perm, rowstart, i;
688:   PetscScalar      **valptr;

690:   PetscFunctionBegin;
691:   PetscCall(PetscMalloc4(n, &row, n, &col, n, &valptr, n, &perm));
692:   for (space = stash->space_head, cnt = 0; space; space = space->next) {
693:     for (i = 0; i < space->local_used; i++) {
694:       row[cnt]    = space->idx[i];
695:       col[cnt]    = space->idy[i];
696:       valptr[cnt] = &space->val[i * bs2];
697:       perm[cnt]   = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */
698:       cnt++;
699:     }
700:   }
701:   PetscCheck(cnt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatStash n %" PetscInt_FMT ", but counted %" PetscInt_FMT " entries", n, cnt);
702:   PetscCall(PetscSortIntWithArrayPair(n, row, col, perm));
703:   /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
704:   for (rowstart = 0, cnt = 0, i = 1; i <= n; i++) {
705:     if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */
706:       PetscInt colstart;
707:       PetscCall(PetscSortIntWithArray(i - rowstart, &col[rowstart], &perm[rowstart]));
708:       for (colstart = rowstart; colstart < i;) { /* Compress multiple insertions to the same location */
709:         PetscInt       j, l;
710:         MatStashBlock *block;
711:         PetscCall(PetscSegBufferGet(stash->segsendblocks, 1, &block));
712:         block->row = row[rowstart];
713:         block->col = col[colstart];
714:         PetscCall(PetscArraycpy(block->vals, valptr[perm[colstart]], bs2));
715:         for (j = colstart + 1; j < i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
716:           if (insertmode == ADD_VALUES) {
717:             for (l = 0; l < bs2; l++) block->vals[l] += valptr[perm[j]][l];
718:           } else {
719:             PetscCall(PetscArraycpy(block->vals, valptr[perm[j]], bs2));
720:           }
721:         }
722:         colstart = j;
723:       }
724:       rowstart = i;
725:     }
726:   }
727:   PetscCall(PetscFree4(row, col, valptr, perm));
728:   PetscFunctionReturn(PETSC_SUCCESS);
729: }

731: static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
732: {
733:   PetscFunctionBegin;
734:   if (stash->blocktype == MPI_DATATYPE_NULL) {
735:     PetscInt     bs2 = PetscSqr(stash->bs);
736:     PetscMPIInt  blocklens[2];
737:     MPI_Aint     displs[2];
738:     MPI_Datatype types[2], stype;
739:     /*
740:         DummyBlock is a type having standard layout, even when PetscScalar is C++ std::complex.
741:        std::complex itself has standard layout, so does DummyBlock, recursively.
742:        To be compatible with C++ std::complex, complex implementations on GPUs must also have standard layout,
743:        though they can have different alignment, e.g, 16 bytes for double complex, instead of 8 bytes as in GCC stdlibc++.
744:        offsetof(type, member) only requires type to have standard layout. Ref. https://en.cppreference.com/w/cpp/types/offsetof.

746:        We can test if std::complex has standard layout with the following code:
747:        #include <iostream>
748:        #include <type_traits>
749:        #include <complex>
750:        int main() {
751:          std::cout << std::boolalpha;
752:          std::cout << std::is_standard_layout<std::complex<double> >::value << '\n';
753:        }
754:        Output: true
755:      */
756:     struct DummyBlock {
757:       PetscInt    row, col;
758:       PetscScalar vals;
759:     };

761:     stash->blocktype_size = offsetof(struct DummyBlock, vals) + bs2 * sizeof(PetscScalar);
762:     if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
763:       stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
764:     }
765:     PetscCall(PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segsendblocks));
766:     PetscCall(PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segrecvblocks));
767:     PetscCall(PetscSegBufferCreate(sizeof(MatStashFrame), 1, &stash->segrecvframe));
768:     blocklens[0] = 2;
769:     blocklens[1] = (PetscMPIInt)bs2;
770:     displs[0]    = offsetof(struct DummyBlock, row);
771:     displs[1]    = offsetof(struct DummyBlock, vals);
772:     types[0]     = MPIU_INT;
773:     types[1]     = MPIU_SCALAR;
774:     PetscCallMPI(MPI_Type_create_struct(2, blocklens, displs, types, &stype));
775:     PetscCallMPI(MPI_Type_commit(&stype));
776:     PetscCallMPI(MPI_Type_create_resized(stype, 0, stash->blocktype_size, &stash->blocktype));
777:     PetscCallMPI(MPI_Type_commit(&stash->blocktype));
778:     PetscCallMPI(MPI_Type_free(&stype));
779:   }
780:   PetscFunctionReturn(PETSC_SUCCESS);
781: }

783: /* Callback invoked after target rank has initiated receive of rendezvous message.
784:  * Here we post the main sends.
785:  */
786: static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rankid, PetscMPIInt rank, void *sdata, MPI_Request req[], void *ctx)
787: {
788:   MatStash       *stash = (MatStash *)ctx;
789:   MatStashHeader *hdr   = (MatStashHeader *)sdata;

791:   PetscFunctionBegin;
792:   PetscCheck(rank == stash->sendranks[rankid], comm, PETSC_ERR_PLIB, "BTS Send rank %d does not match sendranks[%d] %d", rank, rankid, stash->sendranks[rankid]);
793:   PetscCallMPI(MPIU_Isend(stash->sendframes[rankid].buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0]));
794:   stash->sendframes[rankid].count   = hdr->count;
795:   stash->sendframes[rankid].pending = 1;
796:   PetscFunctionReturn(PETSC_SUCCESS);
797: }

799: /*
800:     Callback invoked by target after receiving rendezvous message.
801:     Here we post the main recvs.
802:  */
803: static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rank, void *rdata, MPI_Request req[], void *ctx)
804: {
805:   MatStash       *stash = (MatStash *)ctx;
806:   MatStashHeader *hdr   = (MatStashHeader *)rdata;
807:   MatStashFrame  *frame;

809:   PetscFunctionBegin;
810:   PetscCall(PetscSegBufferGet(stash->segrecvframe, 1, &frame));
811:   PetscCall(PetscSegBufferGet(stash->segrecvblocks, hdr->count, &frame->buffer));
812:   PetscCallMPI(MPIU_Irecv(frame->buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0]));
813:   frame->count   = hdr->count;
814:   frame->pending = 1;
815:   PetscFunctionReturn(PETSC_SUCCESS);
816: }

818: /*
819:  * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
820:  */
821: static PetscErrorCode MatStashScatterBegin_BTS(Mat mat, MatStash *stash, PetscInt owners[])
822: {
823:   PetscCount nblocks;
824:   char      *sendblocks;

826:   PetscFunctionBegin;
827:   if (PetscDefined(USE_DEBUG)) { /* make sure all processors are either in INSERTMODE or ADDMODE */
828:     InsertMode addv;
829:     PetscCallMPI(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
830:     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
831:   }

833:   PetscCall(MatStashBlockTypeSetUp(stash));
834:   PetscCall(MatStashSortCompress_Private(stash, mat->insertmode));
835:   PetscCall(PetscSegBufferGetSize(stash->segsendblocks, &nblocks));
836:   PetscCall(PetscSegBufferExtractInPlace(stash->segsendblocks, &sendblocks));
837:   if (stash->first_assembly_done) { /* Set up sendhdrs and sendframes for each rank that we sent before */
838:     PetscInt   i;
839:     PetscCount b;
840:     for (i = 0, b = 0; i < stash->nsendranks; i++) {
841:       stash->sendframes[i].buffer = &sendblocks[b * stash->blocktype_size];
842:       /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
843:       stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
844:       for (; b < nblocks; b++) {
845:         MatStashBlock *sendblock_b = (MatStashBlock *)&sendblocks[b * stash->blocktype_size];
846:         PetscCheck(sendblock_b->row >= owners[stash->sendranks[i]], stash->comm, PETSC_ERR_ARG_WRONG, "MAT_SUBSET_OFF_PROC_ENTRIES set, but row %" PetscInt_FMT " owned by %d not communicated in initial assembly", sendblock_b->row, stash->sendranks[i]);
847:         if (sendblock_b->row >= owners[stash->sendranks[i] + 1]) break;
848:         stash->sendhdr[i].count++;
849:       }
850:     }
851:   } else { /* Dynamically count and pack (first time) */
852:     PetscInt   sendno;
853:     PetscCount i, rowstart;

855:     /* Count number of send ranks and allocate for sends */
856:     stash->nsendranks = 0;
857:     for (rowstart = 0; rowstart < nblocks;) {
858:       PetscInt       owner;
859:       MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size];

861:       PetscCall(PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &owner));
862:       if (owner < 0) owner = -(owner + 2);
863:       for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */
864:         MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];

866:         if (sendblock_i->row >= owners[owner + 1]) break;
867:       }
868:       stash->nsendranks++;
869:       rowstart = i;
870:     }
871:     PetscCall(PetscMalloc3(stash->nsendranks, &stash->sendranks, stash->nsendranks, &stash->sendhdr, stash->nsendranks, &stash->sendframes));

873:     /* Set up sendhdrs and sendframes */
874:     sendno = 0;
875:     for (rowstart = 0; rowstart < nblocks;) {
876:       PetscInt       iowner;
877:       PetscMPIInt    owner;
878:       MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size];

880:       PetscCall(PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &iowner));
881:       PetscCall(PetscMPIIntCast(iowner, &owner));
882:       if (owner < 0) owner = -(owner + 2);
883:       stash->sendranks[sendno] = owner;
884:       for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */
885:         MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];

887:         if (sendblock_i->row >= owners[owner + 1]) break;
888:       }
889:       stash->sendframes[sendno].buffer  = sendblock_rowstart;
890:       stash->sendframes[sendno].pending = 0;
891:       PetscCall(PetscIntCast(i - rowstart, &stash->sendhdr[sendno].count));
892:       sendno++;
893:       rowstart = i;
894:     }
895:     PetscCheck(sendno == stash->nsendranks, stash->comm, PETSC_ERR_PLIB, "BTS counted %d sendranks, but %" PetscInt_FMT " sends", stash->nsendranks, sendno);
896:   }

898:   /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
899:    * message or a dummy entry of some sort. */
900:   if (mat->insertmode == INSERT_VALUES) {
901:     for (PetscCount i = 0; i < nblocks; i++) {
902:       MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
903:       sendblock_i->row           = -(sendblock_i->row + 1);
904:     }
905:   }

907:   if (stash->first_assembly_done) {
908:     PetscMPIInt i, tag;

910:     PetscCall(PetscCommGetNewTag(stash->comm, &tag));
911:     for (i = 0; i < stash->nrecvranks; i++) PetscCall(MatStashBTSRecv_Private(stash->comm, &tag, stash->recvranks[i], &stash->recvhdr[i], &stash->recvreqs[i], stash));
912:     for (i = 0; i < stash->nsendranks; i++) PetscCall(MatStashBTSSend_Private(stash->comm, &tag, i, stash->sendranks[i], &stash->sendhdr[i], &stash->sendreqs[i], stash));
913:     stash->use_status = PETSC_TRUE; /* Use count from message status. */
914:   } else {
915:     PetscCall(PetscCommBuildTwoSidedFReq(stash->comm, 1, MPIU_INT, stash->nsendranks, stash->sendranks, (PetscInt *)stash->sendhdr, &stash->nrecvranks, &stash->recvranks, (PetscInt *)&stash->recvhdr, 1, &stash->sendreqs, &stash->recvreqs, MatStashBTSSend_Private, MatStashBTSRecv_Private, stash));
916:     PetscCall(PetscMalloc2(stash->nrecvranks, &stash->some_indices, stash->nrecvranks, &stash->some_statuses));
917:     stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
918:   }

920:   PetscCall(PetscSegBufferExtractInPlace(stash->segrecvframe, &stash->recvframes));
921:   stash->recvframe_active    = NULL;
922:   stash->recvframe_i         = 0;
923:   stash->some_i              = 0;
924:   stash->some_count          = 0;
925:   stash->recvcount           = 0;
926:   stash->first_assembly_done = mat->assembly_subset; /* See the same logic in VecAssemblyBegin_MPI_BTS */
927:   stash->insertmode          = &mat->insertmode;
928:   PetscFunctionReturn(PETSC_SUCCESS);
929: }

931: static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash, PetscMPIInt *n, PetscInt **row, PetscInt **col, PetscScalar **val, PetscInt *flg)
932: {
933:   MatStashBlock *block;

935:   PetscFunctionBegin;
936:   *flg = 0;
937:   while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
938:     if (stash->some_i == stash->some_count) {
939:       if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(PETSC_SUCCESS); /* Done */
940:       PetscCallMPI(MPI_Waitsome(stash->nrecvranks, stash->recvreqs, &stash->some_count, stash->some_indices, stash->use_status ? stash->some_statuses : MPI_STATUSES_IGNORE));
941:       stash->some_i = 0;
942:     }
943:     stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
944:     stash->recvframe_count  = stash->recvframe_active->count; /* From header; maximum count */
945:     if (stash->use_status) {                                  /* Count what was actually sent */
946:       PetscMPIInt ic;

948:       PetscCallMPI(MPI_Get_count(&stash->some_statuses[stash->some_i], stash->blocktype, &ic));
949:       stash->recvframe_count = ic;
950:     }
951:     if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
952:       block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[0];
953:       if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
954:       PetscCheck(*stash->insertmode != INSERT_VALUES || block->row < 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Assembling INSERT_VALUES, but rank %d requested ADD_VALUES", stash->recvranks[stash->some_indices[stash->some_i]]);
955:       PetscCheck(*stash->insertmode != ADD_VALUES || block->row >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Assembling ADD_VALUES, but rank %d requested INSERT_VALUES", stash->recvranks[stash->some_indices[stash->some_i]]);
956:     }
957:     stash->some_i++;
958:     stash->recvcount++;
959:     stash->recvframe_i = 0;
960:   }
961:   *n    = 1;
962:   block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[stash->recvframe_i * stash->blocktype_size];
963:   if (block->row < 0) block->row = -(block->row + 1);
964:   *row = &block->row;
965:   *col = &block->col;
966:   *val = block->vals;
967:   stash->recvframe_i++;
968:   *flg = 1;
969:   PetscFunctionReturn(PETSC_SUCCESS);
970: }

972: static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
973: {
974:   PetscFunctionBegin;
975:   PetscCallMPI(MPI_Waitall(stash->nsendranks, stash->sendreqs, MPI_STATUSES_IGNORE));
976:   if (stash->first_assembly_done) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks  */
977:     PetscCall(PetscSegBufferExtractInPlace(stash->segrecvblocks, NULL));
978:   } else { /* No reuse, so collect everything. */
979:     PetscCall(MatStashScatterDestroy_BTS(stash));
980:   }

982:   /* Now update nmaxold to be app 10% more than max n used, this way the
983:      wastage of space is reduced the next time this stash is used.
984:      Also update the oldmax, only if it increases */
985:   if (stash->n) {
986:     PetscInt bs2     = stash->bs * stash->bs;
987:     PetscInt oldnmax = ((int)(stash->n * 1.1) + 5) * bs2;
988:     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
989:   }

991:   stash->nmax       = 0;
992:   stash->n          = 0;
993:   stash->reallocs   = -1;
994:   stash->nprocessed = 0;

996:   PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));

998:   stash->space = NULL;
999:   PetscFunctionReturn(PETSC_SUCCESS);
1000: }

1002: PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
1003: {
1004:   PetscFunctionBegin;
1005:   PetscCall(PetscSegBufferDestroy(&stash->segsendblocks));
1006:   PetscCall(PetscSegBufferDestroy(&stash->segrecvframe));
1007:   stash->recvframes = NULL;
1008:   PetscCall(PetscSegBufferDestroy(&stash->segrecvblocks));
1009:   if (stash->blocktype != MPI_DATATYPE_NULL) PetscCallMPI(MPI_Type_free(&stash->blocktype));
1010:   stash->nsendranks = 0;
1011:   stash->nrecvranks = 0;
1012:   PetscCall(PetscFree3(stash->sendranks, stash->sendhdr, stash->sendframes));
1013:   PetscCall(PetscFree(stash->sendreqs));
1014:   PetscCall(PetscFree(stash->recvreqs));
1015:   PetscCall(PetscFree(stash->recvranks));
1016:   PetscCall(PetscFree(stash->recvhdr));
1017:   PetscCall(PetscFree2(stash->some_indices, stash->some_statuses));
1018:   PetscFunctionReturn(PETSC_SUCCESS);
1019: }
1020: #endif