Actual source code: ex183.c
1: static char help[] = "Example of extracting an array of MPI submatrices from a given MPI matrix.\n"
2: "This test can only be run in parallel.\n"
3: "\n";
5: #include <petscmat.h>
7: PetscErrorCode MyISView(IS *rowis, IS *colis, PetscInt gs, PetscInt ss, PetscViewer viewer)
8: {
9: PetscViewer subviewer = NULL;
11: PetscFunctionBeginUser;
12: PetscCheck(ss <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ss must be less than or equal to 1");
13: PetscCall(PetscViewerASCIIPrintf(viewer, "Row IS %" PetscInt_FMT "\n", gs));
14: PetscCall(PetscViewerGetSubViewer(viewer, ss > -1 ? PetscObjectComm((PetscObject)rowis[ss]) : PETSC_COMM_SELF, &subviewer));
15: if (ss > -1) PetscCall(ISView(rowis[ss], subviewer));
16: PetscCall(PetscViewerRestoreSubViewer(viewer, ss > -1 ? PetscObjectComm((PetscObject)rowis[ss]) : PETSC_COMM_SELF, &subviewer));
17: PetscCall(PetscViewerASCIIPrintf(viewer, "Col IS %" PetscInt_FMT "\n", gs));
18: PetscCall(PetscViewerGetSubViewer(viewer, ss > -1 ? PetscObjectComm((PetscObject)rowis[ss]) : PETSC_COMM_SELF, &subviewer));
19: if (ss > -1) PetscCall(ISView(colis[ss], subviewer));
20: PetscCall(PetscViewerRestoreSubViewer(viewer, ss > -1 ? PetscObjectComm((PetscObject)rowis[ss]) : PETSC_COMM_SELF, &subviewer));
21: PetscFunctionReturn(PETSC_SUCCESS);
22: }
24: int main(int argc, char **args)
25: {
26: Mat A, *submats;
27: MPI_Comm subcomm;
28: PetscMPIInt rank, size, subrank, subsize, color;
29: PetscInt m, n, N, bs, rstart, rend, i, j, k, total_subdomains, hash, nsubdomains = 1;
30: PetscInt nis, *cols, gnsubdomains, gsubdomainnums[1], gsubdomainperm[1], s, gs;
31: PetscInt *rowindices, *colindices, idx, rep;
32: PetscScalar *vals;
33: IS rowis[1], colis[1];
34: PetscViewer viewer;
35: PetscBool permute_indices, flg;
37: PetscFunctionBeginUser;
38: PetscCall(PetscInitialize(&argc, &args, NULL, help));
39: PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
40: PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
42: PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "ex183", "Mat");
43: m = 5;
44: PetscCall(PetscOptionsInt("-m", "Local matrix size", "MatSetSizes", m, &m, &flg));
45: total_subdomains = size - 1;
46: PetscCall(PetscOptionsRangeInt("-total_subdomains", "Number of submatrices where 0 < n < comm size", "MatCreateSubMatricesMPI", total_subdomains, &total_subdomains, &flg, 1, size));
47: permute_indices = PETSC_FALSE;
48: PetscCall(PetscOptionsBool("-permute_indices", "Whether to permute indices before breaking them into subdomains", "ISCreateGeneral", permute_indices, &permute_indices, &flg));
49: hash = 7;
50: PetscCall(PetscOptionsInt("-hash", "Permutation factor, which has to be relatively prime to M = size*m (total matrix size)", "ISCreateGeneral", hash, &hash, &flg));
51: rep = 2;
52: PetscCall(PetscOptionsRangeInt("-rep", "Number of times to carry out submatrix extractions; currently only 1 & 2 are supported", NULL, rep, &rep, &flg, 1, 2));
53: PetscOptionsEnd();
55: viewer = PETSC_VIEWER_STDOUT_WORLD;
56: /* Create logically sparse, but effectively dense matrix for easy verification of submatrix extraction correctness. */
57: PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
58: PetscCall(MatSetSizes(A, m, m, PETSC_DECIDE, PETSC_DECIDE));
59: PetscCall(MatSetFromOptions(A));
60: PetscCall(MatSetUp(A));
61: PetscCall(MatGetSize(A, NULL, &N));
62: PetscCall(MatGetLocalSize(A, NULL, &n));
63: PetscCall(MatGetBlockSize(A, &bs));
64: PetscCall(MatSeqAIJSetPreallocation(A, n, NULL));
65: PetscCall(MatMPIAIJSetPreallocation(A, n, NULL, N - n, NULL));
66: PetscCall(MatSeqBAIJSetPreallocation(A, bs, n / bs, NULL));
67: PetscCall(MatMPIBAIJSetPreallocation(A, bs, n / bs, NULL, (N - n) / bs, NULL));
68: PetscCall(MatSeqSBAIJSetPreallocation(A, bs, n / bs, NULL));
69: PetscCall(MatMPISBAIJSetPreallocation(A, bs, n / bs, NULL, (N - n) / bs, NULL));
71: PetscCall(PetscMalloc2(N, &cols, N, &vals));
72: PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
73: for (j = 0; j < N; ++j) cols[j] = j;
74: for (i = rstart; i < rend; i++) {
75: for (j = 0; j < N; ++j) vals[j] = i * 10000 + j;
76: PetscCall(MatSetValues(A, 1, &i, N, cols, vals, INSERT_VALUES));
77: }
78: PetscCall(PetscFree2(cols, vals));
79: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
80: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
82: PetscCall(PetscViewerASCIIPrintf(viewer, "Initial matrix:\n"));
83: PetscCall(MatView(A, viewer));
85: /*
86: Create subcomms and ISs so that each rank participates in one IS.
87: The IS either coalesces adjacent rank indices (contiguous),
88: or selects indices by scrambling them using a hash.
89: */
90: k = size / total_subdomains + (size % total_subdomains > 0); /* There are up to k ranks to a color */
91: color = rank / k;
92: PetscCallMPI(MPI_Comm_split(PETSC_COMM_WORLD, color, rank, &subcomm));
93: PetscCallMPI(MPI_Comm_size(subcomm, &subsize));
94: PetscCallMPI(MPI_Comm_rank(subcomm, &subrank));
95: PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
96: nis = 1;
97: PetscCall(PetscMalloc2(rend - rstart, &rowindices, rend - rstart, &colindices));
99: for (j = rstart; j < rend; ++j) {
100: if (permute_indices) {
101: idx = (j * hash);
102: } else {
103: idx = j;
104: }
105: rowindices[j - rstart] = idx % N;
106: colindices[j - rstart] = (idx + m) % N;
107: }
108: PetscCall(ISCreateGeneral(subcomm, rend - rstart, rowindices, PETSC_COPY_VALUES, &rowis[0]));
109: PetscCall(ISCreateGeneral(subcomm, rend - rstart, colindices, PETSC_COPY_VALUES, &colis[0]));
110: PetscCall(ISSort(rowis[0]));
111: PetscCall(ISSort(colis[0]));
112: PetscCall(PetscFree2(rowindices, colindices));
113: /*
114: Now view the ISs. To avoid deadlock when viewing a list of objects on different subcomms,
115: we need to obtain the global numbers of our local objects and wait for the corresponding global
116: number to be viewed.
117: */
118: PetscCall(PetscViewerASCIIPrintf(viewer, "Subdomains"));
119: if (permute_indices) PetscCall(PetscViewerASCIIPrintf(viewer, " (hash=%" PetscInt_FMT ")", hash));
120: PetscCall(PetscViewerASCIIPrintf(viewer, ":\n"));
121: PetscCall(PetscViewerFlush(viewer));
123: nsubdomains = 1;
124: for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s;
125: PetscCall(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD, 1, (PetscObject *)rowis, &gnsubdomains, gsubdomainnums));
126: PetscCall(PetscSortIntWithPermutation(nsubdomains, gsubdomainnums, gsubdomainperm));
127: for (gs = 0, s = 0; gs < gnsubdomains; ++gs) {
128: PetscInt ss;
129: if (s < nsubdomains) {
130: ss = gsubdomainperm[s];
131: if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */
132: ++s;
133: } else ss = -1;
134: } else ss = -1;
135: PetscCall(MyISView(rowis, colis, gs, ss, viewer));
136: }
137: PetscCall(PetscViewerFlush(viewer));
138: PetscCall(ISSort(rowis[0]));
139: PetscCall(ISSort(colis[0]));
140: nsubdomains = 1;
141: PetscCall(MatCreateSubMatricesMPI(A, nsubdomains, rowis, colis, MAT_INITIAL_MATRIX, &submats));
142: /*
143: Now view the matrices. To avoid deadlock when viewing a list of objects on different subcomms,
144: we need to obtain the global numbers of our local objects and wait for the corresponding global
145: number to be viewed. Also all MPI processes need to call PetscViewerGetSubViewer() the same number of times
146: */
147: PetscCall(PetscViewerASCIIPrintf(viewer, "Submatrices (repetition 1):\n"));
148: for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s;
149: PetscCall(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD, 1, (PetscObject *)submats, &gnsubdomains, gsubdomainnums));
150: PetscCall(PetscSortIntWithPermutation(nsubdomains, gsubdomainnums, gsubdomainperm));
151: for (gs = 0, s = 0; gs < gnsubdomains; ++gs) {
152: PetscViewer subviewer = NULL;
153: if (s < nsubdomains) {
154: PetscInt ss;
155: ss = gsubdomainperm[s];
156: if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */
157: PetscCall(PetscViewerGetSubViewer(viewer, PetscObjectComm((PetscObject)submats[ss]), &subviewer));
158: PetscCall(MatView(submats[ss], subviewer));
159: PetscCall(PetscViewerRestoreSubViewer(viewer, PetscObjectComm((PetscObject)submats[ss]), &subviewer));
160: ++s;
161: } else {
162: PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &subviewer));
163: PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &subviewer));
164: }
165: } else {
166: PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &subviewer));
167: PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &subviewer));
168: }
169: }
170: if (rep == 1) goto cleanup;
171: nsubdomains = 1;
172: PetscCall(MatCreateSubMatricesMPI(A, nsubdomains, rowis, colis, MAT_REUSE_MATRIX, &submats));
173: /*
174: Now view the matrices. To avoid deadlock when viewing a list of objects on different subcomms,
175: we need to obtain the global numbers of our local objects and wait for the corresponding global
176: number to be viewed.
177: */
178: PetscCall(PetscViewerASCIIPrintf(viewer, "Submatrices (repetition 2):\n"));
179: for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s;
180: PetscCall(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD, 1, (PetscObject *)submats, &gnsubdomains, gsubdomainnums));
181: PetscCall(PetscSortIntWithPermutation(nsubdomains, gsubdomainnums, gsubdomainperm));
182: for (gs = 0, s = 0; gs < gnsubdomains; ++gs) {
183: PetscViewer subviewer = NULL;
184: if (s < nsubdomains) {
185: PetscInt ss;
186: ss = gsubdomainperm[s];
187: if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */
188: PetscCall(PetscViewerGetSubViewer(viewer, PetscObjectComm((PetscObject)submats[ss]), &subviewer));
189: PetscCall(MatView(submats[ss], subviewer));
190: PetscCall(PetscViewerRestoreSubViewer(viewer, PetscObjectComm((PetscObject)submats[ss]), &subviewer));
191: ++s;
192: } else {
193: PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &subviewer));
194: PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &subviewer));
195: }
196: } else {
197: PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &subviewer));
198: PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &subviewer));
199: }
200: }
201: cleanup:
202: for (k = 0; k < nsubdomains; ++k) PetscCall(MatDestroy(submats + k));
203: PetscCall(PetscFree(submats));
204: for (k = 0; k < nis; ++k) {
205: PetscCall(ISDestroy(rowis + k));
206: PetscCall(ISDestroy(colis + k));
207: }
208: PetscCall(MatDestroy(&A));
209: PetscCallMPI(MPI_Comm_free(&subcomm));
210: PetscCall(PetscFinalize());
211: return 0;
212: }
214: /*TEST
216: test:
217: nsize: 2
218: args: -total_subdomains 1
219: output_file: output/ex183_2_1.out
221: test:
222: suffix: 2
223: nsize: 3
224: args: -total_subdomains 2
225: output_file: output/ex183_3_2.out
227: test:
228: suffix: 3
229: nsize: 4
230: args: -total_subdomains 2
231: output_file: output/ex183_4_2.out
233: test:
234: suffix: 4
235: nsize: 6
236: args: -total_subdomains 2
237: output_file: output/ex183_6_2.out
239: TEST*/