Actual source code: vecstash.c
1: #include <petsc/private/vecimpl.h>
3: #define DEFAULT_STASH_SIZE 100
5: /*
6: VecStashCreate_Private - Creates a stash,currently used for all the parallel
7: matrix implementations. The stash is where elements of a matrix destined
8: to be stored on other processors are kept until matrix assembly is done.
10: This is a simple minded stash. Simply adds entries to end of stash.
12: Input Parameters:
13: comm - communicator, required for scatters.
14: bs - stash block size. used when stashing blocks of values
16: Output Parameter:
17: . stash - the newly created stash
18: */
19: PetscErrorCode VecStashCreate_Private(MPI_Comm comm, PetscInt bs, VecStash *stash)
20: {
21: PetscInt max, *opt, nopt;
22: PetscBool flg;
24: PetscFunctionBegin;
25: /* Require 2 tags, get the second using PetscCommGetNewTag() */
26: stash->comm = comm;
27: PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag1));
28: PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag2));
29: PetscCallMPI(MPI_Comm_size(stash->comm, &stash->size));
30: PetscCallMPI(MPI_Comm_rank(stash->comm, &stash->rank));
32: nopt = stash->size;
33: PetscCall(PetscMalloc1(nopt, &opt));
34: PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-vecstash_initial_size", opt, &nopt, &flg));
35: if (flg) {
36: if (nopt == 1) max = opt[0];
37: else if (nopt == stash->size) max = opt[stash->rank];
38: else if (stash->rank < nopt) max = opt[stash->rank];
39: else max = 0; /* use default */
40: stash->umax = max;
41: } else {
42: stash->umax = 0;
43: }
44: PetscCall(PetscFree(opt));
46: if (bs <= 0) bs = 1;
48: stash->bs = bs;
49: stash->nmax = 0;
50: stash->oldnmax = 0;
51: stash->n = 0;
52: stash->reallocs = -1;
53: stash->idx = NULL;
54: stash->array = NULL;
56: stash->send_waits = NULL;
57: stash->recv_waits = NULL;
58: stash->send_status = NULL;
59: stash->nsends = 0;
60: stash->nrecvs = 0;
61: stash->svalues = NULL;
62: stash->rvalues = NULL;
63: stash->rmax = 0;
64: stash->nprocs = NULL;
65: stash->nprocessed = 0;
66: stash->donotstash = PETSC_FALSE;
67: stash->ignorenegidx = PETSC_FALSE;
68: PetscFunctionReturn(PETSC_SUCCESS);
69: }
71: /*
72: VecStashDestroy_Private - Destroy the stash
73: */
74: PetscErrorCode VecStashDestroy_Private(VecStash *stash)
75: {
76: PetscFunctionBegin;
77: PetscCall(PetscFree2(stash->array, stash->idx));
78: PetscCall(PetscFree(stash->bowners));
79: PetscFunctionReturn(PETSC_SUCCESS);
80: }
82: /*
83: VecStashScatterEnd_Private - This is called as the final stage of
84: scatter. The final stages of message passing is done here, and
85: all the memory used for message passing is cleanedu up. This
86: routine also resets the stash, and deallocates the memory used
87: for the stash. It also keeps track of the current memory usage
88: so that the same value can be used the next time through.
89: */
90: PetscErrorCode VecStashScatterEnd_Private(VecStash *stash)
91: {
92: PetscMPIInt nsends = stash->nsends;
93: PetscInt oldnmax;
94: MPI_Status *send_status;
96: PetscFunctionBegin;
97: /* wait on sends */
98: if (nsends) {
99: PetscCall(PetscMalloc1(2 * nsends, &send_status));
100: PetscCallMPI(MPI_Waitall(2 * nsends, stash->send_waits, send_status));
101: PetscCall(PetscFree(send_status));
102: }
104: /* Now update nmaxold to be app 10% more than max n, this way the
105: wastage of space is reduced the next time this stash is used.
106: Also update the oldmax, only if it increases */
107: if (stash->n) {
108: oldnmax = ((PetscInt)(stash->n * 1.1) + 5) * stash->bs;
109: if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
110: }
112: stash->nmax = 0;
113: stash->n = 0;
114: stash->reallocs = -1;
115: stash->rmax = 0;
116: stash->nprocessed = 0;
118: PetscCall(PetscFree2(stash->array, stash->idx));
119: stash->array = NULL;
120: stash->idx = NULL;
121: PetscCall(PetscFree(stash->send_waits));
122: PetscCall(PetscFree(stash->recv_waits));
123: PetscCall(PetscFree2(stash->svalues, stash->sindices));
124: PetscCall(PetscFree2(stash->rvalues, stash->rindices));
125: PetscCall(PetscFree(stash->nprocs));
126: PetscFunctionReturn(PETSC_SUCCESS);
127: }
129: /*
130: VecStashGetInfo_Private - Gets the relevant statistics of the stash
132: Input Parameters:
133: stash - the stash
134: nstash - the size of the stash
135: reallocs - the number of additional mallocs incurred.
137: */
138: PetscErrorCode VecStashGetInfo_Private(VecStash *stash, PetscInt *nstash, PetscInt *reallocs)
139: {
140: PetscFunctionBegin;
141: if (nstash) *nstash = stash->n * stash->bs;
142: if (reallocs) {
143: if (stash->reallocs < 0) *reallocs = 0;
144: else *reallocs = stash->reallocs;
145: }
146: PetscFunctionReturn(PETSC_SUCCESS);
147: }
149: /*
150: VecStashSetInitialSize_Private - Sets the initial size of the stash
152: Input Parameters:
153: stash - the stash
154: max - the value that is used as the max size of the stash.
155: this value is used while allocating memory. It specifies
156: the number of vals stored, even with the block-stash
157: */
158: PetscErrorCode VecStashSetInitialSize_Private(VecStash *stash, PetscInt max)
159: {
160: PetscFunctionBegin;
161: stash->umax = max;
162: PetscFunctionReturn(PETSC_SUCCESS);
163: }
165: /* VecStashExpand_Private - Expand the stash. This function is called
166: when the space in the stash is not sufficient to add the new values
167: being inserted into the stash.
169: Input Parameters:
170: stash - the stash
171: incr - the minimum increase requested
173: Notes:
174: This routine doubles the currently used memory.
175: */
176: PetscErrorCode VecStashExpand_Private(VecStash *stash, PetscInt incr)
177: {
178: PetscInt *n_idx, newnmax, bs = stash->bs;
179: PetscScalar *n_array;
181: PetscFunctionBegin;
182: /* allocate a larger stash. */
183: if (!stash->oldnmax && !stash->nmax) { /* new stash */
184: if (stash->umax) newnmax = stash->umax / bs;
185: else newnmax = DEFAULT_STASH_SIZE / bs;
186: } else if (!stash->nmax) { /* reusing stash */
187: if (stash->umax > stash->oldnmax) newnmax = stash->umax / bs;
188: else newnmax = stash->oldnmax / bs;
189: } else newnmax = stash->nmax * 2;
191: if (newnmax < (stash->nmax + incr)) newnmax += 2 * incr;
193: PetscCall(PetscMalloc2(bs * newnmax, &n_array, newnmax, &n_idx));
194: PetscCall(PetscMemcpy(n_array, stash->array, bs * stash->nmax * sizeof(PetscScalar)));
195: PetscCall(PetscMemcpy(n_idx, stash->idx, stash->nmax * sizeof(PetscInt)));
196: PetscCall(PetscFree2(stash->array, stash->idx));
198: stash->array = n_array;
199: stash->idx = n_idx;
200: stash->nmax = newnmax;
201: stash->reallocs++;
202: PetscFunctionReturn(PETSC_SUCCESS);
203: }
204: /*
205: VecStashScatterBegin_Private - Initiates the transfer of values to the
206: correct owners. This function goes through the stash, and check the
207: owners of each stashed value, and sends the values off to the owner
208: processors.
210: Input Parameters:
211: stash - the stash
212: owners - an array of size 'no-of-procs' which gives the ownership range
213: for each node.
215: Notes:
216: The 'owners' array in the cased of the blocked-stash has the
217: ranges specified blocked global indices, and for the regular stash in
218: the proper global indices.
219: */
220: PetscErrorCode VecStashScatterBegin_Private(VecStash *stash, const PetscInt *owners)
221: {
222: PetscMPIInt size = stash->size, tag1 = stash->tag1, tag2 = stash->tag2, j, *owner, nsends, nreceives;
223: PetscInt *start, *nprocs, inreceives;
224: PetscInt nmax, *sindices, *rindices, idx, bs = stash->bs, lastidx;
225: PetscScalar *rvalues, *svalues;
226: MPI_Comm comm = stash->comm;
227: MPI_Request *send_waits, *recv_waits;
229: PetscFunctionBegin;
230: /* first count number of contributors to each processor */
231: PetscCall(PetscCalloc1(2 * size, &nprocs));
232: PetscCall(PetscMalloc1(stash->n, &owner));
234: j = 0;
235: lastidx = -1;
236: for (PetscInt i = 0; i < stash->n; i++) {
237: /* if indices are NOT locally sorted, need to start search at the beginning */
238: if (lastidx > (idx = stash->idx[i])) j = 0;
239: lastidx = idx;
240: for (; j < size; j++) {
241: if (idx >= owners[j] && idx < owners[j + 1]) {
242: nprocs[2 * j]++;
243: nprocs[2 * j + 1] = 1;
244: owner[i] = j;
245: break;
246: }
247: }
248: }
249: nsends = 0;
250: for (PetscMPIInt i = 0; i < size; i++) nsends += nprocs[2 * i + 1];
252: /* inform other processors of number of messages and max length*/
253: PetscCall(PetscMaxSum(comm, nprocs, &nmax, &inreceives));
254: PetscCall(PetscMPIIntCast(inreceives, &nreceives));
256: /* post receives:
257: since we don't know how long each individual message is we
258: allocate the largest needed buffer for each receive. Potentially
259: this is a lot of wasted space.
260: */
261: PetscCall(PetscMalloc2(nreceives * nmax * bs, &rvalues, nreceives * nmax, &rindices));
262: PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
263: for (PetscMPIInt i = 0, count = 0; i < nreceives; i++) {
264: PetscCallMPI(MPIU_Irecv(rvalues + bs * nmax * i, bs * nmax, MPIU_SCALAR, MPI_ANY_SOURCE, tag1, comm, recv_waits + count++));
265: PetscCallMPI(MPIU_Irecv(rindices + nmax * i, nmax, MPIU_INT, MPI_ANY_SOURCE, tag2, comm, recv_waits + count++));
266: }
268: /* do sends:
269: 1) starts[i] gives the starting index in svalues for stuff going to
270: the ith processor
271: */
272: PetscCall(PetscMalloc2(stash->n * bs, &svalues, stash->n, &sindices));
273: PetscCall(PetscMalloc1(2 * nsends, &send_waits));
274: PetscCall(PetscMalloc1(size, &start));
275: /* use 2 sends the first with all_v, the next with all_i */
276: start[0] = 0;
277: for (PetscMPIInt i = 1; i < size; i++) start[i] = start[i - 1] + nprocs[2 * i - 2];
279: for (PetscInt i = 0; i < stash->n; i++) {
280: j = owner[i];
281: if (bs == 1) svalues[start[j]] = stash->array[i];
282: else PetscCall(PetscMemcpy(svalues + bs * start[j], stash->array + bs * i, bs * sizeof(PetscScalar)));
283: sindices[start[j]] = stash->idx[i];
284: start[j]++;
285: }
286: start[0] = 0;
287: for (PetscMPIInt i = 1; i < size; i++) start[i] = start[i - 1] + nprocs[2 * i - 2];
289: for (PetscMPIInt i = 0, count = 0; i < size; i++) {
290: if (nprocs[2 * i + 1]) {
291: PetscCallMPI(MPIU_Isend(svalues + bs * start[i], bs * nprocs[2 * i], MPIU_SCALAR, i, tag1, comm, send_waits + count++));
292: PetscCallMPI(MPIU_Isend(sindices + start[i], nprocs[2 * i], MPIU_INT, i, tag2, comm, send_waits + count++));
293: }
294: }
295: PetscCall(PetscFree(owner));
296: PetscCall(PetscFree(start));
297: /* This memory is reused in scatter end for a different purpose*/
298: for (PetscMPIInt i = 0; i < 2 * size; i++) nprocs[i] = -1;
300: stash->nprocs = nprocs;
301: stash->svalues = svalues;
302: stash->sindices = sindices;
303: stash->rvalues = rvalues;
304: stash->rindices = rindices;
305: stash->nsends = nsends;
306: stash->nrecvs = nreceives;
307: stash->send_waits = send_waits;
308: stash->recv_waits = recv_waits;
309: stash->rmax = nmax;
310: PetscFunctionReturn(PETSC_SUCCESS);
311: }
313: /*
314: VecStashScatterGetMesg_Private - This function waits on the receives posted
315: in the function VecStashScatterBegin_Private() and returns one message at
316: a time to the calling function. If no messages are left, it indicates this
317: by setting flg = 0, else it sets flg = 1.
319: Input Parameters:
320: stash - the stash
322: Output Parameters:
323: nvals - the number of entries in the current message.
324: rows - an array of row indices (or blocked indices) corresponding to the values
325: cols - an array of columnindices (or blocked indices) corresponding to the values
326: vals - the values
327: flg - 0 indicates no more message left, and the current call has no values associated.
328: 1 indicates that the current call successfully received a message, and the
329: other output parameters nvals,rows,cols,vals are set appropriately.
330: */
331: PetscErrorCode VecStashScatterGetMesg_Private(VecStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscScalar **vals, PetscInt *flg)
332: {
333: PetscMPIInt i = 0; /* dummy value so MPI-Uni doesn't think it is not set */
334: PetscInt *flg_v;
335: PetscInt i1, i2, bs = stash->bs;
336: MPI_Status recv_status;
337: PetscBool match_found = PETSC_FALSE;
339: PetscFunctionBegin;
340: *flg = 0; /* When a message is discovered this is reset to 1 */
341: /* Return if no more messages to process */
342: if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(PETSC_SUCCESS);
344: flg_v = stash->nprocs;
345: /* If a matching pair of receives are found, process them, and return the data to
346: the calling function. Until then keep receiving messages */
347: while (!match_found) {
348: PetscCallMPI(MPI_Waitany(2 * stash->nrecvs, stash->recv_waits, &i, &recv_status));
349: /* Now pack the received message into a structure which is usable by others */
350: if (i % 2) {
351: PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, nvals));
352: flg_v[2 * recv_status.MPI_SOURCE + 1] = i / 2;
353: } else {
354: PetscCallMPI(MPI_Get_count(&recv_status, MPIU_SCALAR, nvals));
355: flg_v[2 * recv_status.MPI_SOURCE] = i / 2;
356: *nvals = *nvals / bs;
357: }
359: /* Check if we have both the messages from this proc */
360: i1 = flg_v[2 * recv_status.MPI_SOURCE];
361: i2 = flg_v[2 * recv_status.MPI_SOURCE + 1];
362: if (i1 != -1 && i2 != -1) {
363: *rows = stash->rindices + i2 * stash->rmax;
364: *vals = stash->rvalues + i1 * bs * stash->rmax;
365: *flg = 1;
366: stash->nprocessed++;
367: match_found = PETSC_TRUE;
368: }
369: }
370: PetscFunctionReturn(PETSC_SUCCESS);
371: }
373: /*
374: * Sort the stash, removing duplicates (combining as appropriate).
375: */
376: PetscErrorCode VecStashSortCompress_Private(VecStash *stash)
377: {
378: PetscInt i, j, bs = stash->bs;
380: PetscFunctionBegin;
381: if (!stash->n) PetscFunctionReturn(PETSC_SUCCESS);
382: if (bs == 1) {
383: PetscCall(PetscSortIntWithScalarArray(stash->n, stash->idx, stash->array));
384: for (i = 1, j = 0; i < stash->n; i++) {
385: if (stash->idx[i] == stash->idx[j]) {
386: switch (stash->insertmode) {
387: case ADD_VALUES:
388: stash->array[j] += stash->array[i];
389: break;
390: case INSERT_VALUES:
391: stash->array[j] = stash->array[i];
392: break;
393: default:
394: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Insert mode not supported 0x%x", stash->insertmode);
395: }
396: } else {
397: j++;
398: stash->idx[j] = stash->idx[i];
399: stash->array[j] = stash->array[i];
400: }
401: }
402: stash->n = j + 1;
403: } else { /* block stash */
404: PetscInt *perm = NULL;
405: PetscScalar *arr;
406: PetscCall(PetscMalloc2(stash->n, &perm, stash->n * bs, &arr));
407: for (i = 0; i < stash->n; i++) perm[i] = i;
408: PetscCall(PetscSortIntWithArray(stash->n, stash->idx, perm));
410: /* Out-of-place copy of arr */
411: PetscCall(PetscMemcpy(arr, stash->array + perm[0] * bs, bs * sizeof(PetscScalar)));
412: for (i = 1, j = 0; i < stash->n; i++) {
413: PetscInt k;
414: if (stash->idx[i] == stash->idx[j]) {
415: switch (stash->insertmode) {
416: case ADD_VALUES:
417: for (k = 0; k < bs; k++) arr[j * bs + k] += stash->array[perm[i] * bs + k];
418: break;
419: case INSERT_VALUES:
420: for (k = 0; k < bs; k++) arr[j * bs + k] = stash->array[perm[i] * bs + k];
421: break;
422: default:
423: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Insert mode not supported 0x%x", stash->insertmode);
424: }
425: } else {
426: j++;
427: stash->idx[j] = stash->idx[i];
428: for (k = 0; k < bs; k++) arr[j * bs + k] = stash->array[perm[i] * bs + k];
429: }
430: }
431: stash->n = j + 1;
432: PetscCall(PetscMemcpy(stash->array, arr, stash->n * bs * sizeof(PetscScalar)));
433: PetscCall(PetscFree2(perm, arr));
434: }
435: PetscFunctionReturn(PETSC_SUCCESS);
436: }
438: PetscErrorCode VecStashGetOwnerList_Private(VecStash *stash, PetscLayout map, PetscMPIInt *nowners, PetscMPIInt **owners)
439: {
440: PetscInt i, bs = stash->bs;
441: PetscMPIInt r;
442: PetscSegBuffer seg;
444: PetscFunctionBegin;
445: PetscCheck(bs == 1 || bs == map->bs, map->comm, PETSC_ERR_PLIB, "Stash block size %" PetscInt_FMT " does not match layout block size %" PetscInt_FMT, bs, map->bs);
446: PetscCall(PetscSegBufferCreate(sizeof(PetscMPIInt), 50, &seg));
447: *nowners = 0;
448: for (i = 0, r = -1; i < stash->n; i++) {
449: if (stash->idx[i] * bs >= map->range[r + 1]) {
450: PetscMPIInt *rank;
451: PetscCall(PetscSegBufferGet(seg, 1, &rank));
452: PetscCall(PetscLayoutFindOwner(map, stash->idx[i] * bs, &r));
453: *rank = r;
454: (*nowners)++;
455: }
456: }
457: PetscCall(PetscSegBufferExtractAlloc(seg, owners));
458: PetscCall(PetscSegBufferDestroy(&seg));
459: PetscFunctionReturn(PETSC_SUCCESS);
460: }