Actual source code: matstash.c
1: #include <petsc/private/matimpl.h>
3: #define DEFAULT_STASH_SIZE 10000
5: static PetscErrorCode MatStashScatterBegin_Ref(Mat, MatStash *, PetscInt *);
6: PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
7: PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
8: #if !defined(PETSC_HAVE_MPIUNI)
9: static PetscErrorCode MatStashScatterBegin_BTS(Mat, MatStash *, PetscInt *);
10: static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
11: static PetscErrorCode MatStashScatterEnd_BTS(MatStash *);
12: #endif
14: /*
15: MatStashCreate_Private - Creates a stash,currently used for all the parallel
16: matrix implementations. The stash is where elements of a matrix destined
17: to be stored on other processors are kept until matrix assembly is done.
19: This is a simple minded stash. Simply adds entries to end of stash.
21: Input Parameters:
22: comm - communicator, required for scatters.
23: bs - stash block size. used when stashing blocks of values
25: Output Parameter:
26: stash - the newly created stash
27: */
28: PetscErrorCode MatStashCreate_Private(MPI_Comm comm, PetscInt bs, MatStash *stash)
29: {
30: PetscInt max, *opt, nopt, i;
31: PetscBool flg;
33: PetscFunctionBegin;
34: /* Require 2 tags,get the second using PetscCommGetNewTag() */
35: stash->comm = comm;
37: PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag1));
38: PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag2));
39: PetscCallMPI(MPI_Comm_size(stash->comm, &stash->size));
40: PetscCallMPI(MPI_Comm_rank(stash->comm, &stash->rank));
41: PetscCall(PetscMalloc1(2 * stash->size, &stash->flg_v));
42: for (i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1;
44: nopt = stash->size;
45: PetscCall(PetscMalloc1(nopt, &opt));
46: PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-matstash_initial_size", opt, &nopt, &flg));
47: if (flg) {
48: if (nopt == 1) max = opt[0];
49: else if (nopt == stash->size) max = opt[stash->rank];
50: else if (stash->rank < nopt) max = opt[stash->rank];
51: else max = 0; /* Use default */
52: stash->umax = max;
53: } else {
54: stash->umax = 0;
55: }
56: PetscCall(PetscFree(opt));
57: if (bs <= 0) bs = 1;
59: stash->bs = bs;
60: stash->nmax = 0;
61: stash->oldnmax = 0;
62: stash->n = 0;
63: stash->reallocs = -1;
64: stash->space_head = NULL;
65: stash->space = NULL;
67: stash->send_waits = NULL;
68: stash->recv_waits = NULL;
69: stash->send_status = NULL;
70: stash->nsends = 0;
71: stash->nrecvs = 0;
72: stash->svalues = NULL;
73: stash->rvalues = NULL;
74: stash->rindices = NULL;
75: stash->nprocessed = 0;
76: stash->reproduce = PETSC_FALSE;
77: stash->blocktype = MPI_DATATYPE_NULL;
79: PetscCall(PetscOptionsGetBool(NULL, NULL, "-matstash_reproduce", &stash->reproduce, NULL));
80: #if !defined(PETSC_HAVE_MPIUNI)
81: flg = PETSC_FALSE;
82: PetscCall(PetscOptionsGetBool(NULL, NULL, "-matstash_legacy", &flg, NULL));
83: if (!flg) {
84: stash->ScatterBegin = MatStashScatterBegin_BTS;
85: stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
86: stash->ScatterEnd = MatStashScatterEnd_BTS;
87: stash->ScatterDestroy = MatStashScatterDestroy_BTS;
88: } else {
89: #endif
90: stash->ScatterBegin = MatStashScatterBegin_Ref;
91: stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
92: stash->ScatterEnd = MatStashScatterEnd_Ref;
93: stash->ScatterDestroy = NULL;
94: #if !defined(PETSC_HAVE_MPIUNI)
95: }
96: #endif
97: PetscFunctionReturn(PETSC_SUCCESS);
98: }
100: /*
101: MatStashDestroy_Private - Destroy the stash
102: */
103: PetscErrorCode MatStashDestroy_Private(MatStash *stash)
104: {
105: PetscFunctionBegin;
106: PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
107: if (stash->ScatterDestroy) PetscCall((*stash->ScatterDestroy)(stash));
108: stash->space = NULL;
109: PetscCall(PetscFree(stash->flg_v));
110: PetscFunctionReturn(PETSC_SUCCESS);
111: }
113: /*
114: MatStashScatterEnd_Private - This is called as the final stage of
115: scatter. The final stages of message passing is done here, and
116: all the memory used for message passing is cleaned up. This
117: routine also resets the stash, and deallocates the memory used
118: for the stash. It also keeps track of the current memory usage
119: so that the same value can be used the next time through.
120: */
121: PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
122: {
123: PetscFunctionBegin;
124: PetscCall((*stash->ScatterEnd)(stash));
125: PetscFunctionReturn(PETSC_SUCCESS);
126: }
128: PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
129: {
130: PetscMPIInt nsends = stash->nsends;
131: PetscInt bs2, oldnmax;
132: MPI_Status *send_status;
134: PetscFunctionBegin;
135: for (PetscMPIInt i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1;
136: /* wait on sends */
137: if (nsends) {
138: PetscCall(PetscMalloc1(2 * nsends, &send_status));
139: PetscCallMPI(MPI_Waitall(2 * nsends, stash->send_waits, send_status));
140: PetscCall(PetscFree(send_status));
141: }
143: /* Now update nmaxold to be app 10% more than max n used, this way the
144: wastage of space is reduced the next time this stash is used.
145: Also update the oldmax, only if it increases */
146: if (stash->n) {
147: bs2 = stash->bs * stash->bs;
148: oldnmax = ((int)(stash->n * 1.1) + 5) * bs2;
149: if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
150: }
152: stash->nmax = 0;
153: stash->n = 0;
154: stash->reallocs = -1;
155: stash->nprocessed = 0;
157: PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
159: stash->space = NULL;
161: PetscCall(PetscFree(stash->send_waits));
162: PetscCall(PetscFree(stash->recv_waits));
163: PetscCall(PetscFree2(stash->svalues, stash->sindices));
164: PetscCall(PetscFree(stash->rvalues[0]));
165: PetscCall(PetscFree(stash->rvalues));
166: PetscCall(PetscFree(stash->rindices[0]));
167: PetscCall(PetscFree(stash->rindices));
168: PetscFunctionReturn(PETSC_SUCCESS);
169: }
171: /*
172: MatStashGetInfo_Private - Gets the relevant statistics of the stash
174: Input Parameters:
175: stash - the stash
176: nstash - the size of the stash. Indicates the number of values stored.
177: reallocs - the number of additional mallocs incurred.
179: */
180: PetscErrorCode MatStashGetInfo_Private(MatStash *stash, PetscInt *nstash, PetscInt *reallocs)
181: {
182: PetscInt bs2 = stash->bs * stash->bs;
184: PetscFunctionBegin;
185: if (nstash) *nstash = stash->n * bs2;
186: if (reallocs) {
187: if (stash->reallocs < 0) *reallocs = 0;
188: else *reallocs = stash->reallocs;
189: }
190: PetscFunctionReturn(PETSC_SUCCESS);
191: }
193: /*
194: MatStashSetInitialSize_Private - Sets the initial size of the stash
196: Input Parameters:
197: stash - the stash
198: max - the value that is used as the max size of the stash.
199: this value is used while allocating memory.
200: */
201: PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash, PetscInt max)
202: {
203: PetscFunctionBegin;
204: stash->umax = max;
205: PetscFunctionReturn(PETSC_SUCCESS);
206: }
208: /* MatStashExpand_Private - Expand the stash. This function is called
209: when the space in the stash is not sufficient to add the new values
210: being inserted into the stash.
212: Input Parameters:
213: stash - the stash
214: incr - the minimum increase requested
216: Note:
217: This routine doubles the currently used memory.
218: */
219: static PetscErrorCode MatStashExpand_Private(MatStash *stash, PetscInt incr)
220: {
221: PetscInt newnmax, bs2 = stash->bs * stash->bs;
222: PetscCount cnewnmax;
224: PetscFunctionBegin;
225: /* allocate a larger stash */
226: if (!stash->oldnmax && !stash->nmax) { /* new stash */
227: if (stash->umax) cnewnmax = stash->umax / bs2;
228: else cnewnmax = DEFAULT_STASH_SIZE / bs2;
229: } else if (!stash->nmax) { /* reusing stash */
230: if (stash->umax > stash->oldnmax) cnewnmax = stash->umax / bs2;
231: else cnewnmax = stash->oldnmax / bs2;
232: } else cnewnmax = stash->nmax * 2;
233: if (cnewnmax < (stash->nmax + incr)) cnewnmax += 2 * incr;
234: PetscCall(PetscIntCast(cnewnmax, &newnmax));
236: /* Get a MatStashSpace and attach it to stash */
237: PetscCall(PetscMatStashSpaceGet(bs2, newnmax, &stash->space));
238: if (!stash->space_head) { /* new stash or reusing stash->oldnmax */
239: stash->space_head = stash->space;
240: }
242: stash->reallocs++;
243: stash->nmax = newnmax;
244: PetscFunctionReturn(PETSC_SUCCESS);
245: }
246: /*
247: MatStashValuesRow_Private - inserts values into the stash. This function
248: expects the values to be row-oriented. Multiple columns belong to the same row
249: can be inserted with a single call to this function.
251: Input Parameters:
252: stash - the stash
253: row - the global row corresponding to the values
254: n - the number of elements inserted. All elements belong to the above row.
255: idxn - the global column indices corresponding to each of the values.
256: values - the values inserted
257: */
258: PetscErrorCode MatStashValuesRow_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscBool ignorezeroentries)
259: {
260: PetscInt i, k, cnt = 0;
261: PetscMatStashSpace space = stash->space;
263: PetscFunctionBegin;
264: /* Check and see if we have sufficient memory */
265: if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
266: space = stash->space;
267: k = space->local_used;
268: for (i = 0; i < n; i++) {
269: if (ignorezeroentries && values && values[i] == 0.0) continue;
270: space->idx[k] = row;
271: space->idy[k] = idxn[i];
272: space->val[k] = values ? values[i] : 0.0;
273: k++;
274: cnt++;
275: }
276: stash->n += cnt;
277: space->local_used += cnt;
278: space->local_remaining -= cnt;
279: PetscFunctionReturn(PETSC_SUCCESS);
280: }
282: /*
283: MatStashValuesCol_Private - inserts values into the stash. This function
284: expects the values to be column-oriented. Multiple columns belong to the same row
285: can be inserted with a single call to this function.
287: Input Parameters:
288: stash - the stash
289: row - the global row corresponding to the values
290: n - the number of elements inserted. All elements belong to the above row.
291: idxn - the global column indices corresponding to each of the values.
292: values - the values inserted
293: stepval - the consecutive values are sepated by a distance of stepval.
294: this happens because the input is column-oriented.
295: */
296: PetscErrorCode MatStashValuesCol_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt stepval, PetscBool ignorezeroentries)
297: {
298: PetscInt i, k, cnt = 0;
299: PetscMatStashSpace space = stash->space;
301: PetscFunctionBegin;
302: /* Check and see if we have sufficient memory */
303: if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
304: space = stash->space;
305: k = space->local_used;
306: for (i = 0; i < n; i++) {
307: if (ignorezeroentries && values && values[i * stepval] == 0.0) continue;
308: space->idx[k] = row;
309: space->idy[k] = idxn[i];
310: space->val[k] = values ? values[i * stepval] : 0.0;
311: k++;
312: cnt++;
313: }
314: stash->n += cnt;
315: space->local_used += cnt;
316: space->local_remaining -= cnt;
317: PetscFunctionReturn(PETSC_SUCCESS);
318: }
320: /*
321: MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
322: This function expects the values to be row-oriented. Multiple columns belong
323: to the same block-row can be inserted with a single call to this function.
324: This function extracts the sub-block of values based on the dimensions of
325: the original input block, and the row,col values corresponding to the blocks.
327: Input Parameters:
328: stash - the stash
329: row - the global block-row corresponding to the values
330: n - the number of elements inserted. All elements belong to the above row.
331: idxn - the global block-column indices corresponding to each of the blocks of
332: values. Each block is of size bs*bs.
333: values - the values inserted
334: rmax - the number of block-rows in the original block.
335: cmax - the number of block-columns on the original block.
336: idx - the index of the current block-row in the original block.
337: */
338: PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx)
339: {
340: PetscInt i, j, k, bs2, bs = stash->bs, l;
341: const PetscScalar *vals;
342: PetscScalar *array;
343: PetscMatStashSpace space = stash->space;
345: PetscFunctionBegin;
346: if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
347: space = stash->space;
348: l = space->local_used;
349: bs2 = bs * bs;
350: for (i = 0; i < n; i++) {
351: space->idx[l] = row;
352: space->idy[l] = idxn[i];
353: /* Now copy over the block of values. Store the values column-oriented.
354: This enables inserting multiple blocks belonging to a row with a single
355: function call */
356: array = space->val + bs2 * l;
357: vals = values + idx * bs2 * n + bs * i;
358: for (j = 0; j < bs; j++) {
359: for (k = 0; k < bs; k++) array[k * bs] = values ? vals[k] : 0.0;
360: array++;
361: vals += cmax * bs;
362: }
363: l++;
364: }
365: stash->n += n;
366: space->local_used += n;
367: space->local_remaining -= n;
368: PetscFunctionReturn(PETSC_SUCCESS);
369: }
371: /*
372: MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
373: This function expects the values to be column-oriented. Multiple columns belong
374: to the same block-row can be inserted with a single call to this function.
375: This function extracts the sub-block of values based on the dimensions of
376: the original input block, and the row,col values corresponding to the blocks.
378: Input Parameters:
379: stash - the stash
380: row - the global block-row corresponding to the values
381: n - the number of elements inserted. All elements belong to the above row.
382: idxn - the global block-column indices corresponding to each of the blocks of
383: values. Each block is of size bs*bs.
384: values - the values inserted
385: rmax - the number of block-rows in the original block.
386: cmax - the number of block-columns on the original block.
387: idx - the index of the current block-row in the original block.
388: */
389: PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx)
390: {
391: PetscInt i, j, k, bs2, bs = stash->bs, l;
392: const PetscScalar *vals;
393: PetscScalar *array;
394: PetscMatStashSpace space = stash->space;
396: PetscFunctionBegin;
397: if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n));
398: space = stash->space;
399: l = space->local_used;
400: bs2 = bs * bs;
401: for (i = 0; i < n; i++) {
402: space->idx[l] = row;
403: space->idy[l] = idxn[i];
404: /* Now copy over the block of values. Store the values column-oriented.
405: This enables inserting multiple blocks belonging to a row with a single
406: function call */
407: array = space->val + bs2 * l;
408: vals = values + idx * bs2 * n + bs * i;
409: for (j = 0; j < bs; j++) {
410: for (k = 0; k < bs; k++) array[k] = values ? vals[k] : 0.0;
411: array += bs;
412: vals += rmax * bs;
413: }
414: l++;
415: }
416: stash->n += n;
417: space->local_used += n;
418: space->local_remaining -= n;
419: PetscFunctionReturn(PETSC_SUCCESS);
420: }
421: /*
422: MatStashScatterBegin_Private - Initiates the transfer of values to the
423: correct owners. This function goes through the stash, and check the
424: owners of each stashed value, and sends the values off to the owner
425: processors.
427: Input Parameters:
428: stash - the stash
429: owners - an array of size 'no-of-procs' which gives the ownership range
430: for each node.
432: Note:
433: The 'owners' array in the cased of the blocked-stash has the
434: ranges specified blocked global indices, and for the regular stash in
435: the proper global indices.
436: */
437: PetscErrorCode MatStashScatterBegin_Private(Mat mat, MatStash *stash, PetscInt *owners)
438: {
439: PetscFunctionBegin;
440: PetscCall((*stash->ScatterBegin)(mat, stash, owners));
441: PetscFunctionReturn(PETSC_SUCCESS);
442: }
444: static PetscErrorCode MatStashScatterBegin_Ref(Mat mat, MatStash *stash, PetscInt *owners)
445: {
446: PetscInt *owner, *startv, *starti, bs2;
447: PetscInt size = stash->size;
448: PetscInt *sindices, **rindices, j, ii, idx, lastidx, l;
449: PetscScalar **rvalues, *svalues;
450: MPI_Comm comm = stash->comm;
451: MPI_Request *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
452: PetscMPIInt *sizes, *nlengths, nreceives, nsends, tag1 = stash->tag1, tag2 = stash->tag2;
453: PetscInt *sp_idx, *sp_idy;
454: PetscScalar *sp_val;
455: PetscMatStashSpace space, space_next;
457: PetscFunctionBegin;
458: { /* make sure all processors are either in INSERTMODE or ADDMODE */
459: InsertMode addv;
460: PetscCallMPI(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
461: PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
462: mat->insertmode = addv; /* in case this processor had no cache */
463: }
465: bs2 = stash->bs * stash->bs;
467: /* first count number of contributors to each processor */
468: PetscCall(PetscCalloc1(size, &nlengths));
469: PetscCall(PetscMalloc1(stash->n + 1, &owner));
471: ii = j = 0;
472: lastidx = -1;
473: space = stash->space_head;
474: while (space) {
475: space_next = space->next;
476: sp_idx = space->idx;
477: for (l = 0; l < space->local_used; l++) {
478: /* if indices are NOT locally sorted, need to start search at the beginning */
479: if (lastidx > (idx = sp_idx[l])) j = 0;
480: lastidx = idx;
481: for (; j < size; j++) {
482: if (idx >= owners[j] && idx < owners[j + 1]) {
483: nlengths[j]++;
484: owner[ii] = j;
485: break;
486: }
487: }
488: ii++;
489: }
490: space = space_next;
491: }
493: /* Now check what procs get messages - and compute nsends. */
494: PetscCall(PetscCalloc1(size, &sizes));
495: nsends = 0;
496: for (PetscMPIInt i = 0; i < size; i++) {
497: if (nlengths[i]) {
498: sizes[i] = 1;
499: nsends++;
500: }
501: }
503: {
504: PetscMPIInt *onodes, *olengths;
505: /* Determine the number of messages to expect, their lengths, from from-ids */
506: PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
507: PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths));
508: /* since clubbing row,col - lengths are multiplied by 2 */
509: for (PetscMPIInt i = 0; i < nreceives; i++) olengths[i] *= 2;
510: PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
511: /* values are size 'bs2' lengths (and remove earlier factor 2 */
512: for (PetscMPIInt i = 0; i < nreceives; i++) PetscCall(PetscMPIIntCast(olengths[i] * bs2 / 2, &olengths[i]));
513: PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
514: PetscCall(PetscFree(onodes));
515: PetscCall(PetscFree(olengths));
516: }
518: /* do sends:
519: 1) starts[i] gives the starting index in svalues for stuff going to
520: the ith processor
521: */
522: PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
523: PetscCall(PetscMalloc1(2 * nsends, &send_waits));
524: PetscCall(PetscMalloc2(size, &startv, size, &starti));
525: /* use 2 sends the first with all_a, the next with all_i and all_j */
526: startv[0] = 0;
527: starti[0] = 0;
528: for (PetscMPIInt i = 1; i < size; i++) {
529: startv[i] = startv[i - 1] + nlengths[i - 1];
530: starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
531: }
533: ii = 0;
534: space = stash->space_head;
535: while (space) {
536: space_next = space->next;
537: sp_idx = space->idx;
538: sp_idy = space->idy;
539: sp_val = space->val;
540: for (l = 0; l < space->local_used; l++) {
541: j = owner[ii];
542: if (bs2 == 1) {
543: svalues[startv[j]] = sp_val[l];
544: } else {
545: PetscInt k;
546: PetscScalar *buf1, *buf2;
547: buf1 = svalues + bs2 * startv[j];
548: buf2 = space->val + bs2 * l;
549: for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
550: }
551: sindices[starti[j]] = sp_idx[l];
552: sindices[starti[j] + nlengths[j]] = sp_idy[l];
553: startv[j]++;
554: starti[j]++;
555: ii++;
556: }
557: space = space_next;
558: }
559: startv[0] = 0;
560: for (PetscMPIInt i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
562: for (PetscMPIInt i = 0, count = 0; i < size; i++) {
563: if (sizes[i]) {
564: PetscCallMPI(MPIU_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
565: PetscCallMPI(MPIU_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
566: }
567: }
568: #if defined(PETSC_USE_INFO)
569: PetscCall(PetscInfo(NULL, "No of messages: %d \n", nsends));
570: for (PetscMPIInt i = 0; i < size; i++) {
571: if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %d: size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
572: }
573: #endif
574: PetscCall(PetscFree(nlengths));
575: PetscCall(PetscFree(owner));
576: PetscCall(PetscFree2(startv, starti));
577: PetscCall(PetscFree(sizes));
579: /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
580: PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
582: for (PetscMPIInt i = 0; i < nreceives; i++) {
583: recv_waits[2 * i] = recv_waits1[i];
584: recv_waits[2 * i + 1] = recv_waits2[i];
585: }
586: stash->recv_waits = recv_waits;
588: PetscCall(PetscFree(recv_waits1));
589: PetscCall(PetscFree(recv_waits2));
591: stash->svalues = svalues;
592: stash->sindices = sindices;
593: stash->rvalues = rvalues;
594: stash->rindices = rindices;
595: stash->send_waits = send_waits;
596: stash->nsends = nsends;
597: stash->nrecvs = nreceives;
598: stash->reproduce_count = 0;
599: PetscFunctionReturn(PETSC_SUCCESS);
600: }
602: /*
603: MatStashScatterGetMesg_Private - This function waits on the receives posted
604: in the function MatStashScatterBegin_Private() and returns one message at
605: a time to the calling function. If no messages are left, it indicates this
606: by setting flg = 0, else it sets flg = 1.
608: Input Parameter:
609: stash - the stash
611: Output Parameters:
612: nvals - the number of entries in the current message.
613: rows - an array of row indices (or blocked indices) corresponding to the values
614: cols - an array of columnindices (or blocked indices) corresponding to the values
615: vals - the values
616: flg - 0 indicates no more message left, and the current call has no values associated.
617: 1 indicates that the current call successfully received a message, and the
618: other output parameters nvals,rows,cols,vals are set appropriately.
619: */
620: PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg)
621: {
622: PetscFunctionBegin;
623: PetscCall((*stash->ScatterGetMesg)(stash, nvals, rows, cols, vals, flg));
624: PetscFunctionReturn(PETSC_SUCCESS);
625: }
627: PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg)
628: {
629: PetscMPIInt i, *flg_v = stash->flg_v, i1, i2;
630: PetscInt bs2;
631: MPI_Status recv_status;
632: PetscBool match_found = PETSC_FALSE;
634: PetscFunctionBegin;
635: *flg = 0; /* When a message is discovered this is reset to 1 */
636: /* Return if no more messages to process */
637: if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(PETSC_SUCCESS);
639: bs2 = stash->bs * stash->bs;
640: /* If a matching pair of receives are found, process them, and return the data to
641: the calling function. Until then keep receiving messages */
642: while (!match_found) {
643: if (stash->reproduce) {
644: i = stash->reproduce_count++;
645: PetscCallMPI(MPI_Wait(stash->recv_waits + i, &recv_status));
646: } else {
647: PetscCallMPI(MPI_Waitany(2 * stash->nrecvs, stash->recv_waits, &i, &recv_status));
648: }
649: PetscCheck(recv_status.MPI_SOURCE >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Negative MPI source!");
651: /* Now pack the received message into a structure which is usable by others */
652: if (i % 2) {
653: PetscCallMPI(MPI_Get_count(&recv_status, MPIU_SCALAR, nvals));
654: flg_v[2 * recv_status.MPI_SOURCE] = i / 2;
655: *nvals = *nvals / bs2;
656: } else {
657: PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, nvals));
658: flg_v[2 * recv_status.MPI_SOURCE + 1] = i / 2;
659: *nvals = *nvals / 2; /* This message has both row indices and col indices */
660: }
662: /* Check if we have both messages from this proc */
663: i1 = flg_v[2 * recv_status.MPI_SOURCE];
664: i2 = flg_v[2 * recv_status.MPI_SOURCE + 1];
665: if (i1 != -1 && i2 != -1) {
666: *rows = stash->rindices[i2];
667: *cols = *rows + *nvals;
668: *vals = stash->rvalues[i1];
669: *flg = 1;
670: stash->nprocessed++;
671: match_found = PETSC_TRUE;
672: }
673: }
674: PetscFunctionReturn(PETSC_SUCCESS);
675: }
677: #if !defined(PETSC_HAVE_MPIUNI)
678: typedef struct {
679: PetscInt row;
680: PetscInt col;
681: PetscScalar vals[1]; /* Actually an array of length bs2 */
682: } MatStashBlock;
684: static PetscErrorCode MatStashSortCompress_Private(MatStash *stash, InsertMode insertmode)
685: {
686: PetscMatStashSpace space;
687: PetscInt n = stash->n, bs = stash->bs, bs2 = bs * bs, cnt, *row, *col, *perm, rowstart, i;
688: PetscScalar **valptr;
690: PetscFunctionBegin;
691: PetscCall(PetscMalloc4(n, &row, n, &col, n, &valptr, n, &perm));
692: for (space = stash->space_head, cnt = 0; space; space = space->next) {
693: for (i = 0; i < space->local_used; i++) {
694: row[cnt] = space->idx[i];
695: col[cnt] = space->idy[i];
696: valptr[cnt] = &space->val[i * bs2];
697: perm[cnt] = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */
698: cnt++;
699: }
700: }
701: PetscCheck(cnt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatStash n %" PetscInt_FMT ", but counted %" PetscInt_FMT " entries", n, cnt);
702: PetscCall(PetscSortIntWithArrayPair(n, row, col, perm));
703: /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
704: for (rowstart = 0, cnt = 0, i = 1; i <= n; i++) {
705: if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */
706: PetscInt colstart;
707: PetscCall(PetscSortIntWithArray(i - rowstart, &col[rowstart], &perm[rowstart]));
708: for (colstart = rowstart; colstart < i;) { /* Compress multiple insertions to the same location */
709: PetscInt j, l;
710: MatStashBlock *block;
711: PetscCall(PetscSegBufferGet(stash->segsendblocks, 1, &block));
712: block->row = row[rowstart];
713: block->col = col[colstart];
714: PetscCall(PetscArraycpy(block->vals, valptr[perm[colstart]], bs2));
715: for (j = colstart + 1; j < i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
716: if (insertmode == ADD_VALUES) {
717: for (l = 0; l < bs2; l++) block->vals[l] += valptr[perm[j]][l];
718: } else {
719: PetscCall(PetscArraycpy(block->vals, valptr[perm[j]], bs2));
720: }
721: }
722: colstart = j;
723: }
724: rowstart = i;
725: }
726: }
727: PetscCall(PetscFree4(row, col, valptr, perm));
728: PetscFunctionReturn(PETSC_SUCCESS);
729: }
731: static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
732: {
733: PetscFunctionBegin;
734: if (stash->blocktype == MPI_DATATYPE_NULL) {
735: PetscInt bs2 = PetscSqr(stash->bs);
736: PetscMPIInt blocklens[2];
737: MPI_Aint displs[2];
738: MPI_Datatype types[2], stype;
739: /*
740: DummyBlock is a type having standard layout, even when PetscScalar is C++ std::complex.
741: std::complex itself has standard layout, so does DummyBlock, recursively.
742: To be compatible with C++ std::complex, complex implementations on GPUs must also have standard layout,
743: though they can have different alignment, e.g, 16 bytes for double complex, instead of 8 bytes as in GCC stdlibc++.
744: offsetof(type, member) only requires type to have standard layout. Ref. https://en.cppreference.com/w/cpp/types/offsetof.
746: We can test if std::complex has standard layout with the following code:
747: #include <iostream>
748: #include <type_traits>
749: #include <complex>
750: int main() {
751: std::cout << std::boolalpha;
752: std::cout << std::is_standard_layout<std::complex<double> >::value << '\n';
753: }
754: Output: true
755: */
756: struct DummyBlock {
757: PetscInt row, col;
758: PetscScalar vals;
759: };
761: stash->blocktype_size = offsetof(struct DummyBlock, vals) + bs2 * sizeof(PetscScalar);
762: if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
763: stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
764: }
765: PetscCall(PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segsendblocks));
766: PetscCall(PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segrecvblocks));
767: PetscCall(PetscSegBufferCreate(sizeof(MatStashFrame), 1, &stash->segrecvframe));
768: blocklens[0] = 2;
769: blocklens[1] = (PetscMPIInt)bs2;
770: displs[0] = offsetof(struct DummyBlock, row);
771: displs[1] = offsetof(struct DummyBlock, vals);
772: types[0] = MPIU_INT;
773: types[1] = MPIU_SCALAR;
774: PetscCallMPI(MPI_Type_create_struct(2, blocklens, displs, types, &stype));
775: PetscCallMPI(MPI_Type_commit(&stype));
776: PetscCallMPI(MPI_Type_create_resized(stype, 0, stash->blocktype_size, &stash->blocktype));
777: PetscCallMPI(MPI_Type_commit(&stash->blocktype));
778: PetscCallMPI(MPI_Type_free(&stype));
779: }
780: PetscFunctionReturn(PETSC_SUCCESS);
781: }
783: /* Callback invoked after target rank has initiated receive of rendezvous message.
784: * Here we post the main sends.
785: */
786: static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rankid, PetscMPIInt rank, void *sdata, MPI_Request req[], void *ctx)
787: {
788: MatStash *stash = (MatStash *)ctx;
789: MatStashHeader *hdr = (MatStashHeader *)sdata;
791: PetscFunctionBegin;
792: PetscCheck(rank == stash->sendranks[rankid], comm, PETSC_ERR_PLIB, "BTS Send rank %d does not match sendranks[%d] %d", rank, rankid, stash->sendranks[rankid]);
793: PetscCallMPI(MPIU_Isend(stash->sendframes[rankid].buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0]));
794: stash->sendframes[rankid].count = hdr->count;
795: stash->sendframes[rankid].pending = 1;
796: PetscFunctionReturn(PETSC_SUCCESS);
797: }
799: /*
800: Callback invoked by target after receiving rendezvous message.
801: Here we post the main recvs.
802: */
803: static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rank, void *rdata, MPI_Request req[], void *ctx)
804: {
805: MatStash *stash = (MatStash *)ctx;
806: MatStashHeader *hdr = (MatStashHeader *)rdata;
807: MatStashFrame *frame;
809: PetscFunctionBegin;
810: PetscCall(PetscSegBufferGet(stash->segrecvframe, 1, &frame));
811: PetscCall(PetscSegBufferGet(stash->segrecvblocks, hdr->count, &frame->buffer));
812: PetscCallMPI(MPIU_Irecv(frame->buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0]));
813: frame->count = hdr->count;
814: frame->pending = 1;
815: PetscFunctionReturn(PETSC_SUCCESS);
816: }
818: /*
819: * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
820: */
821: static PetscErrorCode MatStashScatterBegin_BTS(Mat mat, MatStash *stash, PetscInt owners[])
822: {
823: PetscCount nblocks;
824: char *sendblocks;
826: PetscFunctionBegin;
827: if (PetscDefined(USE_DEBUG)) { /* make sure all processors are either in INSERTMODE or ADDMODE */
828: InsertMode addv;
829: PetscCallMPI(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
830: PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
831: }
833: PetscCall(MatStashBlockTypeSetUp(stash));
834: PetscCall(MatStashSortCompress_Private(stash, mat->insertmode));
835: PetscCall(PetscSegBufferGetSize(stash->segsendblocks, &nblocks));
836: PetscCall(PetscSegBufferExtractInPlace(stash->segsendblocks, &sendblocks));
837: if (stash->first_assembly_done) { /* Set up sendhdrs and sendframes for each rank that we sent before */
838: PetscInt i;
839: PetscCount b;
840: for (i = 0, b = 0; i < stash->nsendranks; i++) {
841: stash->sendframes[i].buffer = &sendblocks[b * stash->blocktype_size];
842: /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
843: stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
844: for (; b < nblocks; b++) {
845: MatStashBlock *sendblock_b = (MatStashBlock *)&sendblocks[b * stash->blocktype_size];
846: PetscCheck(sendblock_b->row >= owners[stash->sendranks[i]], stash->comm, PETSC_ERR_ARG_WRONG, "MAT_SUBSET_OFF_PROC_ENTRIES set, but row %" PetscInt_FMT " owned by %d not communicated in initial assembly", sendblock_b->row, stash->sendranks[i]);
847: if (sendblock_b->row >= owners[stash->sendranks[i] + 1]) break;
848: stash->sendhdr[i].count++;
849: }
850: }
851: } else { /* Dynamically count and pack (first time) */
852: PetscInt sendno;
853: PetscCount i, rowstart;
855: /* Count number of send ranks and allocate for sends */
856: stash->nsendranks = 0;
857: for (rowstart = 0; rowstart < nblocks;) {
858: PetscInt owner;
859: MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size];
861: PetscCall(PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &owner));
862: if (owner < 0) owner = -(owner + 2);
863: for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */
864: MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
866: if (sendblock_i->row >= owners[owner + 1]) break;
867: }
868: stash->nsendranks++;
869: rowstart = i;
870: }
871: PetscCall(PetscMalloc3(stash->nsendranks, &stash->sendranks, stash->nsendranks, &stash->sendhdr, stash->nsendranks, &stash->sendframes));
873: /* Set up sendhdrs and sendframes */
874: sendno = 0;
875: for (rowstart = 0; rowstart < nblocks;) {
876: PetscInt iowner;
877: PetscMPIInt owner;
878: MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size];
880: PetscCall(PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &iowner));
881: PetscCall(PetscMPIIntCast(iowner, &owner));
882: if (owner < 0) owner = -(owner + 2);
883: stash->sendranks[sendno] = owner;
884: for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */
885: MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
887: if (sendblock_i->row >= owners[owner + 1]) break;
888: }
889: stash->sendframes[sendno].buffer = sendblock_rowstart;
890: stash->sendframes[sendno].pending = 0;
891: PetscCall(PetscIntCast(i - rowstart, &stash->sendhdr[sendno].count));
892: sendno++;
893: rowstart = i;
894: }
895: PetscCheck(sendno == stash->nsendranks, stash->comm, PETSC_ERR_PLIB, "BTS counted %d sendranks, but %" PetscInt_FMT " sends", stash->nsendranks, sendno);
896: }
898: /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
899: * message or a dummy entry of some sort. */
900: if (mat->insertmode == INSERT_VALUES) {
901: for (PetscCount i = 0; i < nblocks; i++) {
902: MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
903: sendblock_i->row = -(sendblock_i->row + 1);
904: }
905: }
907: if (stash->first_assembly_done) {
908: PetscMPIInt i, tag;
910: PetscCall(PetscCommGetNewTag(stash->comm, &tag));
911: for (i = 0; i < stash->nrecvranks; i++) PetscCall(MatStashBTSRecv_Private(stash->comm, &tag, stash->recvranks[i], &stash->recvhdr[i], &stash->recvreqs[i], stash));
912: for (i = 0; i < stash->nsendranks; i++) PetscCall(MatStashBTSSend_Private(stash->comm, &tag, i, stash->sendranks[i], &stash->sendhdr[i], &stash->sendreqs[i], stash));
913: stash->use_status = PETSC_TRUE; /* Use count from message status. */
914: } else {
915: PetscCall(PetscCommBuildTwoSidedFReq(stash->comm, 1, MPIU_INT, stash->nsendranks, stash->sendranks, (PetscInt *)stash->sendhdr, &stash->nrecvranks, &stash->recvranks, (PetscInt *)&stash->recvhdr, 1, &stash->sendreqs, &stash->recvreqs, MatStashBTSSend_Private, MatStashBTSRecv_Private, stash));
916: PetscCall(PetscMalloc2(stash->nrecvranks, &stash->some_indices, stash->nrecvranks, &stash->some_statuses));
917: stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
918: }
920: PetscCall(PetscSegBufferExtractInPlace(stash->segrecvframe, &stash->recvframes));
921: stash->recvframe_active = NULL;
922: stash->recvframe_i = 0;
923: stash->some_i = 0;
924: stash->some_count = 0;
925: stash->recvcount = 0;
926: stash->first_assembly_done = mat->assembly_subset; /* See the same logic in VecAssemblyBegin_MPI_BTS */
927: stash->insertmode = &mat->insertmode;
928: PetscFunctionReturn(PETSC_SUCCESS);
929: }
931: static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash, PetscMPIInt *n, PetscInt **row, PetscInt **col, PetscScalar **val, PetscInt *flg)
932: {
933: MatStashBlock *block;
935: PetscFunctionBegin;
936: *flg = 0;
937: while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
938: if (stash->some_i == stash->some_count) {
939: if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(PETSC_SUCCESS); /* Done */
940: PetscCallMPI(MPI_Waitsome(stash->nrecvranks, stash->recvreqs, &stash->some_count, stash->some_indices, stash->use_status ? stash->some_statuses : MPI_STATUSES_IGNORE));
941: stash->some_i = 0;
942: }
943: stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
944: stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */
945: if (stash->use_status) { /* Count what was actually sent */
946: PetscMPIInt ic;
948: PetscCallMPI(MPI_Get_count(&stash->some_statuses[stash->some_i], stash->blocktype, &ic));
949: stash->recvframe_count = ic;
950: }
951: if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
952: block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[0];
953: if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
954: PetscCheck(*stash->insertmode != INSERT_VALUES || block->row < 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Assembling INSERT_VALUES, but rank %d requested ADD_VALUES", stash->recvranks[stash->some_indices[stash->some_i]]);
955: PetscCheck(*stash->insertmode != ADD_VALUES || block->row >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Assembling ADD_VALUES, but rank %d requested INSERT_VALUES", stash->recvranks[stash->some_indices[stash->some_i]]);
956: }
957: stash->some_i++;
958: stash->recvcount++;
959: stash->recvframe_i = 0;
960: }
961: *n = 1;
962: block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[stash->recvframe_i * stash->blocktype_size];
963: if (block->row < 0) block->row = -(block->row + 1);
964: *row = &block->row;
965: *col = &block->col;
966: *val = block->vals;
967: stash->recvframe_i++;
968: *flg = 1;
969: PetscFunctionReturn(PETSC_SUCCESS);
970: }
972: static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
973: {
974: PetscFunctionBegin;
975: PetscCallMPI(MPI_Waitall(stash->nsendranks, stash->sendreqs, MPI_STATUSES_IGNORE));
976: if (stash->first_assembly_done) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks */
977: PetscCall(PetscSegBufferExtractInPlace(stash->segrecvblocks, NULL));
978: } else { /* No reuse, so collect everything. */
979: PetscCall(MatStashScatterDestroy_BTS(stash));
980: }
982: /* Now update nmaxold to be app 10% more than max n used, this way the
983: wastage of space is reduced the next time this stash is used.
984: Also update the oldmax, only if it increases */
985: if (stash->n) {
986: PetscInt bs2 = stash->bs * stash->bs;
987: PetscInt oldnmax = ((int)(stash->n * 1.1) + 5) * bs2;
988: if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
989: }
991: stash->nmax = 0;
992: stash->n = 0;
993: stash->reallocs = -1;
994: stash->nprocessed = 0;
996: PetscCall(PetscMatStashSpaceDestroy(&stash->space_head));
998: stash->space = NULL;
999: PetscFunctionReturn(PETSC_SUCCESS);
1000: }
1002: PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
1003: {
1004: PetscFunctionBegin;
1005: PetscCall(PetscSegBufferDestroy(&stash->segsendblocks));
1006: PetscCall(PetscSegBufferDestroy(&stash->segrecvframe));
1007: stash->recvframes = NULL;
1008: PetscCall(PetscSegBufferDestroy(&stash->segrecvblocks));
1009: if (stash->blocktype != MPI_DATATYPE_NULL) PetscCallMPI(MPI_Type_free(&stash->blocktype));
1010: stash->nsendranks = 0;
1011: stash->nrecvranks = 0;
1012: PetscCall(PetscFree3(stash->sendranks, stash->sendhdr, stash->sendframes));
1013: PetscCall(PetscFree(stash->sendreqs));
1014: PetscCall(PetscFree(stash->recvreqs));
1015: PetscCall(PetscFree(stash->recvranks));
1016: PetscCall(PetscFree(stash->recvhdr));
1017: PetscCall(PetscFree2(stash->some_indices, stash->some_statuses));
1018: PetscFunctionReturn(PETSC_SUCCESS);
1019: }
1020: #endif