Actual source code: vecstash.c

  1: #include <petsc/private/vecimpl.h>

  3: #define DEFAULT_STASH_SIZE 100

  5: /*
  6:   VecStashCreate_Private - Creates a stash,currently used for all the parallel
  7:   matrix implementations. The stash is where elements of a matrix destined
  8:   to be stored on other processors are kept until matrix assembly is done.

 10:   This is a simple minded stash. Simply adds entries to end of stash.

 12:   Input Parameters:
 13:   comm - communicator, required for scatters.
 14:   bs   - stash block size. used when stashing blocks of values

 16:   Output Parameter:
 17: . stash    - the newly created stash
 18: */
 19: PetscErrorCode VecStashCreate_Private(MPI_Comm comm, PetscInt bs, VecStash *stash)
 20: {
 21:   PetscInt  max, *opt, nopt;
 22:   PetscBool flg;

 24:   PetscFunctionBegin;
 25:   /* Require 2 tags, get the second using PetscCommGetNewTag() */
 26:   stash->comm = comm;
 27:   PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag1));
 28:   PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag2));
 29:   PetscCallMPI(MPI_Comm_size(stash->comm, &stash->size));
 30:   PetscCallMPI(MPI_Comm_rank(stash->comm, &stash->rank));

 32:   nopt = stash->size;
 33:   PetscCall(PetscMalloc1(nopt, &opt));
 34:   PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-vecstash_initial_size", opt, &nopt, &flg));
 35:   if (flg) {
 36:     if (nopt == 1) max = opt[0];
 37:     else if (nopt == stash->size) max = opt[stash->rank];
 38:     else if (stash->rank < nopt) max = opt[stash->rank];
 39:     else max = 0; /* use default */
 40:     stash->umax = max;
 41:   } else {
 42:     stash->umax = 0;
 43:   }
 44:   PetscCall(PetscFree(opt));

 46:   if (bs <= 0) bs = 1;

 48:   stash->bs       = bs;
 49:   stash->nmax     = 0;
 50:   stash->oldnmax  = 0;
 51:   stash->n        = 0;
 52:   stash->reallocs = -1;
 53:   stash->idx      = NULL;
 54:   stash->array    = NULL;

 56:   stash->send_waits   = NULL;
 57:   stash->recv_waits   = NULL;
 58:   stash->send_status  = NULL;
 59:   stash->nsends       = 0;
 60:   stash->nrecvs       = 0;
 61:   stash->svalues      = NULL;
 62:   stash->rvalues      = NULL;
 63:   stash->rmax         = 0;
 64:   stash->nprocs       = NULL;
 65:   stash->nprocessed   = 0;
 66:   stash->donotstash   = PETSC_FALSE;
 67:   stash->ignorenegidx = PETSC_FALSE;
 68:   PetscFunctionReturn(PETSC_SUCCESS);
 69: }

 71: /*
 72:    VecStashDestroy_Private - Destroy the stash
 73: */
 74: PetscErrorCode VecStashDestroy_Private(VecStash *stash)
 75: {
 76:   PetscFunctionBegin;
 77:   PetscCall(PetscFree2(stash->array, stash->idx));
 78:   PetscCall(PetscFree(stash->bowners));
 79:   PetscFunctionReturn(PETSC_SUCCESS);
 80: }

 82: /*
 83:    VecStashScatterEnd_Private - This is called as the final stage of
 84:    scatter. The final stages of message passing is done here, and
 85:    all the memory used for message passing is cleanedu up. This
 86:    routine also resets the stash, and deallocates the memory used
 87:    for the stash. It also keeps track of the current memory usage
 88:    so that the same value can be used the next time through.
 89: */
 90: PetscErrorCode VecStashScatterEnd_Private(VecStash *stash)
 91: {
 92:   PetscMPIInt nsends = stash->nsends;
 93:   PetscInt    oldnmax;
 94:   MPI_Status *send_status;

 96:   PetscFunctionBegin;
 97:   /* wait on sends */
 98:   if (nsends) {
 99:     PetscCall(PetscMalloc1(2 * nsends, &send_status));
100:     PetscCallMPI(MPI_Waitall(2 * nsends, stash->send_waits, send_status));
101:     PetscCall(PetscFree(send_status));
102:   }

104:   /* Now update nmaxold to be app 10% more than max n, this way the
105:      wastage of space is reduced the next time this stash is used.
106:      Also update the oldmax, only if it increases */
107:   if (stash->n) {
108:     oldnmax = ((PetscInt)(stash->n * 1.1) + 5) * stash->bs;
109:     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
110:   }

112:   stash->nmax       = 0;
113:   stash->n          = 0;
114:   stash->reallocs   = -1;
115:   stash->rmax       = 0;
116:   stash->nprocessed = 0;

118:   PetscCall(PetscFree2(stash->array, stash->idx));
119:   stash->array = NULL;
120:   stash->idx   = NULL;
121:   PetscCall(PetscFree(stash->send_waits));
122:   PetscCall(PetscFree(stash->recv_waits));
123:   PetscCall(PetscFree2(stash->svalues, stash->sindices));
124:   PetscCall(PetscFree2(stash->rvalues, stash->rindices));
125:   PetscCall(PetscFree(stash->nprocs));
126:   PetscFunctionReturn(PETSC_SUCCESS);
127: }

129: /*
130:    VecStashGetInfo_Private - Gets the relevant statistics of the stash

132:    Input Parameters:
133:    stash    - the stash
134:    nstash   - the size of the stash
135:    reallocs - the number of additional mallocs incurred.

137: */
138: PetscErrorCode VecStashGetInfo_Private(VecStash *stash, PetscInt *nstash, PetscInt *reallocs)
139: {
140:   PetscFunctionBegin;
141:   if (nstash) *nstash = stash->n * stash->bs;
142:   if (reallocs) {
143:     if (stash->reallocs < 0) *reallocs = 0;
144:     else *reallocs = stash->reallocs;
145:   }
146:   PetscFunctionReturn(PETSC_SUCCESS);
147: }

149: /*
150:    VecStashSetInitialSize_Private - Sets the initial size of the stash

152:    Input Parameters:
153:    stash  - the stash
154:    max    - the value that is used as the max size of the stash.
155:             this value is used while allocating memory. It specifies
156:             the number of vals stored, even with the block-stash
157: */
158: PetscErrorCode VecStashSetInitialSize_Private(VecStash *stash, PetscInt max)
159: {
160:   PetscFunctionBegin;
161:   stash->umax = max;
162:   PetscFunctionReturn(PETSC_SUCCESS);
163: }

165: /* VecStashExpand_Private - Expand the stash. This function is called
166:    when the space in the stash is not sufficient to add the new values
167:    being inserted into the stash.

169:    Input Parameters:
170:    stash - the stash
171:    incr  - the minimum increase requested

173:    Notes:
174:    This routine doubles the currently used memory.
175: */
176: PetscErrorCode VecStashExpand_Private(VecStash *stash, PetscInt incr)
177: {
178:   PetscInt    *n_idx, newnmax, bs = stash->bs;
179:   PetscScalar *n_array;

181:   PetscFunctionBegin;
182:   /* allocate a larger stash. */
183:   if (!stash->oldnmax && !stash->nmax) { /* new stash */
184:     if (stash->umax) newnmax = stash->umax / bs;
185:     else newnmax = DEFAULT_STASH_SIZE / bs;
186:   } else if (!stash->nmax) { /* reusing stash */
187:     if (stash->umax > stash->oldnmax) newnmax = stash->umax / bs;
188:     else newnmax = stash->oldnmax / bs;
189:   } else newnmax = stash->nmax * 2;

191:   if (newnmax < (stash->nmax + incr)) newnmax += 2 * incr;

193:   PetscCall(PetscMalloc2(bs * newnmax, &n_array, newnmax, &n_idx));
194:   PetscCall(PetscMemcpy(n_array, stash->array, bs * stash->nmax * sizeof(PetscScalar)));
195:   PetscCall(PetscMemcpy(n_idx, stash->idx, stash->nmax * sizeof(PetscInt)));
196:   PetscCall(PetscFree2(stash->array, stash->idx));

198:   stash->array = n_array;
199:   stash->idx   = n_idx;
200:   stash->nmax  = newnmax;
201:   stash->reallocs++;
202:   PetscFunctionReturn(PETSC_SUCCESS);
203: }
204: /*
205:   VecStashScatterBegin_Private - Initiates the transfer of values to the
206:   correct owners. This function goes through the stash, and check the
207:   owners of each stashed value, and sends the values off to the owner
208:   processors.

210:   Input Parameters:
211:   stash  - the stash
212:   owners - an array of size 'no-of-procs' which gives the ownership range
213:            for each node.

215:   Notes:
216:     The 'owners' array in the cased of the blocked-stash has the
217:   ranges specified blocked global indices, and for the regular stash in
218:   the proper global indices.
219: */
220: PetscErrorCode VecStashScatterBegin_Private(VecStash *stash, const PetscInt *owners)
221: {
222:   PetscMPIInt  size = stash->size, tag1 = stash->tag1, tag2 = stash->tag2, j, *owner, nsends, nreceives;
223:   PetscInt    *start, *nprocs, inreceives;
224:   PetscInt     nmax, *sindices, *rindices, idx, bs = stash->bs, lastidx;
225:   PetscScalar *rvalues, *svalues;
226:   MPI_Comm     comm = stash->comm;
227:   MPI_Request *send_waits, *recv_waits;

229:   PetscFunctionBegin;
230:   /*  first count number of contributors to each processor */
231:   PetscCall(PetscCalloc1(2 * size, &nprocs));
232:   PetscCall(PetscMalloc1(stash->n, &owner));

234:   j       = 0;
235:   lastidx = -1;
236:   for (PetscInt i = 0; i < stash->n; i++) {
237:     /* if indices are NOT locally sorted, need to start search at the beginning */
238:     if (lastidx > (idx = stash->idx[i])) j = 0;
239:     lastidx = idx;
240:     for (; j < size; j++) {
241:       if (idx >= owners[j] && idx < owners[j + 1]) {
242:         nprocs[2 * j]++;
243:         nprocs[2 * j + 1] = 1;
244:         owner[i]          = j;
245:         break;
246:       }
247:     }
248:   }
249:   nsends = 0;
250:   for (PetscMPIInt i = 0; i < size; i++) nsends += nprocs[2 * i + 1];

252:   /* inform other processors of number of messages and max length*/
253:   PetscCall(PetscMaxSum(comm, nprocs, &nmax, &inreceives));
254:   PetscCall(PetscMPIIntCast(inreceives, &nreceives));

256:   /* post receives:
257:      since we don't know how long each individual message is we
258:      allocate the largest needed buffer for each receive. Potentially
259:      this is a lot of wasted space.
260:   */
261:   PetscCall(PetscMalloc2(nreceives * nmax * bs, &rvalues, nreceives * nmax, &rindices));
262:   PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
263:   for (PetscMPIInt i = 0, count = 0; i < nreceives; i++) {
264:     PetscCallMPI(MPIU_Irecv(rvalues + bs * nmax * i, bs * nmax, MPIU_SCALAR, MPI_ANY_SOURCE, tag1, comm, recv_waits + count++));
265:     PetscCallMPI(MPIU_Irecv(rindices + nmax * i, nmax, MPIU_INT, MPI_ANY_SOURCE, tag2, comm, recv_waits + count++));
266:   }

268:   /* do sends:
269:       1) starts[i] gives the starting index in svalues for stuff going to
270:          the ith processor
271:   */
272:   PetscCall(PetscMalloc2(stash->n * bs, &svalues, stash->n, &sindices));
273:   PetscCall(PetscMalloc1(2 * nsends, &send_waits));
274:   PetscCall(PetscMalloc1(size, &start));
275:   /* use 2 sends the first with all_v, the next with all_i */
276:   start[0] = 0;
277:   for (PetscMPIInt i = 1; i < size; i++) start[i] = start[i - 1] + nprocs[2 * i - 2];

279:   for (PetscInt i = 0; i < stash->n; i++) {
280:     j = owner[i];
281:     if (bs == 1) svalues[start[j]] = stash->array[i];
282:     else PetscCall(PetscMemcpy(svalues + bs * start[j], stash->array + bs * i, bs * sizeof(PetscScalar)));
283:     sindices[start[j]] = stash->idx[i];
284:     start[j]++;
285:   }
286:   start[0] = 0;
287:   for (PetscMPIInt i = 1; i < size; i++) start[i] = start[i - 1] + nprocs[2 * i - 2];

289:   for (PetscMPIInt i = 0, count = 0; i < size; i++) {
290:     if (nprocs[2 * i + 1]) {
291:       PetscCallMPI(MPIU_Isend(svalues + bs * start[i], bs * nprocs[2 * i], MPIU_SCALAR, i, tag1, comm, send_waits + count++));
292:       PetscCallMPI(MPIU_Isend(sindices + start[i], nprocs[2 * i], MPIU_INT, i, tag2, comm, send_waits + count++));
293:     }
294:   }
295:   PetscCall(PetscFree(owner));
296:   PetscCall(PetscFree(start));
297:   /* This memory is reused in scatter end  for a different purpose*/
298:   for (PetscMPIInt i = 0; i < 2 * size; i++) nprocs[i] = -1;

300:   stash->nprocs     = nprocs;
301:   stash->svalues    = svalues;
302:   stash->sindices   = sindices;
303:   stash->rvalues    = rvalues;
304:   stash->rindices   = rindices;
305:   stash->nsends     = nsends;
306:   stash->nrecvs     = nreceives;
307:   stash->send_waits = send_waits;
308:   stash->recv_waits = recv_waits;
309:   stash->rmax       = nmax;
310:   PetscFunctionReturn(PETSC_SUCCESS);
311: }

313: /*
314:    VecStashScatterGetMesg_Private - This function waits on the receives posted
315:    in the function VecStashScatterBegin_Private() and returns one message at
316:    a time to the calling function. If no messages are left, it indicates this
317:    by setting flg = 0, else it sets flg = 1.

319:    Input Parameters:
320:    stash - the stash

322:    Output Parameters:
323:    nvals - the number of entries in the current message.
324:    rows  - an array of row indices (or blocked indices) corresponding to the values
325:    cols  - an array of columnindices (or blocked indices) corresponding to the values
326:    vals  - the values
327:    flg   - 0 indicates no more message left, and the current call has no values associated.
328:            1 indicates that the current call successfully received a message, and the
329:              other output parameters nvals,rows,cols,vals are set appropriately.
330: */
331: PetscErrorCode VecStashScatterGetMesg_Private(VecStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscScalar **vals, PetscInt *flg)
332: {
333:   PetscMPIInt i = 0; /* dummy value so MPI-Uni doesn't think it is not set */
334:   PetscInt   *flg_v;
335:   PetscInt    i1, i2, bs = stash->bs;
336:   MPI_Status  recv_status;
337:   PetscBool   match_found = PETSC_FALSE;

339:   PetscFunctionBegin;
340:   *flg = 0; /* When a message is discovered this is reset to 1 */
341:   /* Return if no more messages to process */
342:   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(PETSC_SUCCESS);

344:   flg_v = stash->nprocs;
345:   /* If a matching pair of receives are found, process them, and return the data to
346:      the calling function. Until then keep receiving messages */
347:   while (!match_found) {
348:     PetscCallMPI(MPI_Waitany(2 * stash->nrecvs, stash->recv_waits, &i, &recv_status));
349:     /* Now pack the received message into a structure which is usable by others */
350:     if (i % 2) {
351:       PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, nvals));
352:       flg_v[2 * recv_status.MPI_SOURCE + 1] = i / 2;
353:     } else {
354:       PetscCallMPI(MPI_Get_count(&recv_status, MPIU_SCALAR, nvals));
355:       flg_v[2 * recv_status.MPI_SOURCE] = i / 2;
356:       *nvals                            = *nvals / bs;
357:     }

359:     /* Check if we have both the messages from this proc */
360:     i1 = flg_v[2 * recv_status.MPI_SOURCE];
361:     i2 = flg_v[2 * recv_status.MPI_SOURCE + 1];
362:     if (i1 != -1 && i2 != -1) {
363:       *rows = stash->rindices + i2 * stash->rmax;
364:       *vals = stash->rvalues + i1 * bs * stash->rmax;
365:       *flg  = 1;
366:       stash->nprocessed++;
367:       match_found = PETSC_TRUE;
368:     }
369:   }
370:   PetscFunctionReturn(PETSC_SUCCESS);
371: }

373: /*
374:  * Sort the stash, removing duplicates (combining as appropriate).
375:  */
376: PetscErrorCode VecStashSortCompress_Private(VecStash *stash)
377: {
378:   PetscInt i, j, bs = stash->bs;

380:   PetscFunctionBegin;
381:   if (!stash->n) PetscFunctionReturn(PETSC_SUCCESS);
382:   if (bs == 1) {
383:     PetscCall(PetscSortIntWithScalarArray(stash->n, stash->idx, stash->array));
384:     for (i = 1, j = 0; i < stash->n; i++) {
385:       if (stash->idx[i] == stash->idx[j]) {
386:         switch (stash->insertmode) {
387:         case ADD_VALUES:
388:           stash->array[j] += stash->array[i];
389:           break;
390:         case INSERT_VALUES:
391:           stash->array[j] = stash->array[i];
392:           break;
393:         default:
394:           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Insert mode not supported 0x%x", stash->insertmode);
395:         }
396:       } else {
397:         j++;
398:         stash->idx[j]   = stash->idx[i];
399:         stash->array[j] = stash->array[i];
400:       }
401:     }
402:     stash->n = j + 1;
403:   } else { /* block stash */
404:     PetscInt    *perm = NULL;
405:     PetscScalar *arr;
406:     PetscCall(PetscMalloc2(stash->n, &perm, stash->n * bs, &arr));
407:     for (i = 0; i < stash->n; i++) perm[i] = i;
408:     PetscCall(PetscSortIntWithArray(stash->n, stash->idx, perm));

410:     /* Out-of-place copy of arr */
411:     PetscCall(PetscMemcpy(arr, stash->array + perm[0] * bs, bs * sizeof(PetscScalar)));
412:     for (i = 1, j = 0; i < stash->n; i++) {
413:       PetscInt k;
414:       if (stash->idx[i] == stash->idx[j]) {
415:         switch (stash->insertmode) {
416:         case ADD_VALUES:
417:           for (k = 0; k < bs; k++) arr[j * bs + k] += stash->array[perm[i] * bs + k];
418:           break;
419:         case INSERT_VALUES:
420:           for (k = 0; k < bs; k++) arr[j * bs + k] = stash->array[perm[i] * bs + k];
421:           break;
422:         default:
423:           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Insert mode not supported 0x%x", stash->insertmode);
424:         }
425:       } else {
426:         j++;
427:         stash->idx[j] = stash->idx[i];
428:         for (k = 0; k < bs; k++) arr[j * bs + k] = stash->array[perm[i] * bs + k];
429:       }
430:     }
431:     stash->n = j + 1;
432:     PetscCall(PetscMemcpy(stash->array, arr, stash->n * bs * sizeof(PetscScalar)));
433:     PetscCall(PetscFree2(perm, arr));
434:   }
435:   PetscFunctionReturn(PETSC_SUCCESS);
436: }

438: PetscErrorCode VecStashGetOwnerList_Private(VecStash *stash, PetscLayout map, PetscMPIInt *nowners, PetscMPIInt **owners)
439: {
440:   PetscInt       i, bs = stash->bs;
441:   PetscMPIInt    r;
442:   PetscSegBuffer seg;

444:   PetscFunctionBegin;
445:   PetscCheck(bs == 1 || bs == map->bs, map->comm, PETSC_ERR_PLIB, "Stash block size %" PetscInt_FMT " does not match layout block size %" PetscInt_FMT, bs, map->bs);
446:   PetscCall(PetscSegBufferCreate(sizeof(PetscMPIInt), 50, &seg));
447:   *nowners = 0;
448:   for (i = 0, r = -1; i < stash->n; i++) {
449:     if (stash->idx[i] * bs >= map->range[r + 1]) {
450:       PetscMPIInt *rank;
451:       PetscCall(PetscSegBufferGet(seg, 1, &rank));
452:       PetscCall(PetscLayoutFindOwner(map, stash->idx[i] * bs, &r));
453:       *rank = r;
454:       (*nowners)++;
455:     }
456:   }
457:   PetscCall(PetscSegBufferExtractAlloc(seg, owners));
458:   PetscCall(PetscSegBufferDestroy(&seg));
459:   PetscFunctionReturn(PETSC_SUCCESS);
460: }