Actual source code: ex33.c
1: static char help[] = "Test memory scalability of MatMatMult() for AIJ and DENSE matrices. \n\
2: Modified from the code contributed by Ian Lin <iancclin@umich.edu> \n\n";
4: /*
5: Example:
6: mpiexec -n <np> ./ex33 -mem_view -matmatmult_Bbn <Bbn>
7: */
9: #include <petsc.h>
11: PetscErrorCode Print_memory(PetscLogDouble mem)
12: {
13: double max_mem, min_mem;
15: PetscFunctionBeginUser;
16: PetscCallMPI(MPI_Reduce(&mem, &max_mem, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD));
17: PetscCallMPI(MPI_Reduce(&mem, &min_mem, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD));
18: max_mem = max_mem / 1024.0 / 1024.0;
19: min_mem = min_mem / 1024.0 / 1024.0;
20: PetscCall(PetscPrintf(MPI_COMM_WORLD, " max and min memory across all processors %.4f Mb, %.4f Mb.\n", (double)max_mem, (double)min_mem));
21: PetscFunctionReturn(PETSC_SUCCESS);
22: }
24: /*
25: Illustrate how to use MPI derived data types.
26: It would save memory significantly. See MatMPIDenseScatter()
27: */
28: PetscErrorCode TestMPIDerivedDataType()
29: {
30: MPI_Datatype type1, type2, rtype1, rtype2;
31: PetscInt i, j;
32: PetscScalar buffer[24]; /* An array of 4 rows, 6 cols */
33: MPI_Status status;
34: PetscMPIInt rank, size, disp[2];
36: PetscFunctionBeginUser;
37: PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
38: PetscCheck(size >= 2, PETSC_COMM_SELF, PETSC_ERR_WRONG_MPI_SIZE, "Must use at least 2 processors");
39: PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
41: if (rank == 0) {
42: /* proc[0] sends 2 rows to proc[1] */
43: for (i = 0; i < 24; i++) buffer[i] = (PetscScalar)i;
45: disp[0] = 0;
46: disp[1] = 2;
47: PetscCallMPI(MPI_Type_create_indexed_block(2, 1, (const PetscMPIInt *)disp, MPIU_SCALAR, &type1));
48: /* one column has 4 entries */
49: PetscCallMPI(MPI_Type_create_resized(type1, 0, 4 * sizeof(PetscScalar), &type2));
50: PetscCallMPI(MPI_Type_commit(&type2));
51: PetscCallMPI(MPI_Send(buffer, 6, type2, 1, 123, MPI_COMM_WORLD));
53: } else if (rank == 1) {
54: /* proc[1] receives 2 rows from proc[0], and put them into contiguous rows, starting at the row 1 (disp[0]) */
55: for (i = 0; i < 24; i++) buffer[i] = 0.0;
57: disp[0] = 1;
58: PetscCallMPI(MPI_Type_create_indexed_block(1, 2, (const PetscMPIInt *)disp, MPIU_SCALAR, &rtype1));
59: PetscCallMPI(MPI_Type_create_resized(rtype1, 0, 4 * sizeof(PetscScalar), &rtype2));
61: PetscCallMPI(MPI_Type_commit(&rtype2));
62: PetscCallMPI(MPI_Recv(buffer, 6, rtype2, 0, 123, MPI_COMM_WORLD, &status));
63: for (i = 0; i < 4; i++) {
64: for (j = 0; j < 6; j++) PetscCall(PetscPrintf(MPI_COMM_SELF, " %g", (double)PetscRealPart(buffer[i + j * 4])));
65: PetscCall(PetscPrintf(MPI_COMM_SELF, "\n"));
66: }
67: }
69: if (rank == 0) {
70: PetscCallMPI(MPI_Type_free(&type1));
71: PetscCallMPI(MPI_Type_free(&type2));
72: } else if (rank == 1) {
73: PetscCallMPI(MPI_Type_free(&rtype1));
74: PetscCallMPI(MPI_Type_free(&rtype2));
75: }
76: PetscCallMPI(MPI_Barrier(MPI_COMM_WORLD));
77: PetscFunctionReturn(PETSC_SUCCESS);
78: }
80: int main(int argc, char **args)
81: {
82: PetscInt mA = 2700, nX = 80, nz = 40;
83: /* PetscInt mA=6,nX=5,nz=2; //small test */
84: PetscLogDouble mem;
85: Mat A, X, Y;
86: PetscBool flg = PETSC_FALSE;
88: PetscFunctionBeginUser;
89: PetscCall(PetscInitialize(&argc, &args, NULL, help));
90: PetscCall(PetscOptionsGetBool(NULL, NULL, "-test_mpiderivedtype", &flg, NULL));
91: if (flg) {
92: PetscCall(TestMPIDerivedDataType());
93: PetscCall(PetscFinalize());
94: return 0;
95: }
97: PetscCall(PetscOptionsGetBool(NULL, NULL, "-mem_view", &flg, NULL));
98: PetscCall(PetscMemoryGetCurrentUsage(&mem));
99: if (flg) {
100: PetscCall(PetscPrintf(MPI_COMM_WORLD, "Before start,"));
101: PetscCall(Print_memory(mem));
102: }
104: PetscCall(MatCreateAIJ(PETSC_COMM_WORLD, PETSC_DECIDE, PETSC_DECIDE, mA, mA, nz, NULL, nz, NULL, &A));
105: PetscCall(MatSetRandom(A, NULL));
106: PetscCall(PetscMemoryGetCurrentUsage(&mem));
107: if (flg) {
108: PetscCall(PetscPrintf(MPI_COMM_WORLD, "After creating A,"));
109: PetscCall(Print_memory(mem));
110: }
112: PetscCall(MatCreateDense(PETSC_COMM_WORLD, PETSC_DECIDE, PETSC_DECIDE, mA, nX, NULL, &X));
113: PetscCall(MatSetRandom(X, NULL));
114: PetscCall(PetscMemoryGetCurrentUsage(&mem));
115: if (flg) {
116: PetscCall(PetscPrintf(MPI_COMM_WORLD, "After creating X,"));
117: PetscCall(Print_memory(mem));
118: }
120: PetscCall(MatMatMult(A, X, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &Y));
121: PetscCall(PetscMemoryGetCurrentUsage(&mem));
122: if (flg) {
123: PetscCall(PetscPrintf(MPI_COMM_WORLD, "After MatMatMult,"));
124: PetscCall(Print_memory(mem));
125: }
127: /* Test reuse */
128: PetscCall(MatMatMult(A, X, MAT_REUSE_MATRIX, PETSC_DETERMINE, &Y));
129: PetscCall(PetscMemoryGetCurrentUsage(&mem));
130: if (flg) {
131: PetscCall(PetscPrintf(MPI_COMM_WORLD, "After reuse MatMatMult,"));
132: PetscCall(Print_memory(mem));
133: }
135: /* Check accuracy */
136: PetscCall(MatMatMultEqual(A, X, Y, 10, &flg));
137: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMETYPE, "Error in MatMatMult()");
139: PetscCall(MatDestroy(&A));
140: PetscCall(MatDestroy(&X));
141: PetscCall(MatDestroy(&Y));
143: PetscCall(PetscFinalize());
144: return 0;
145: }
147: /*TEST
149: test:
150: suffix: 1
151: nsize: 4
152: output_file: output/ex33.out
154: test:
155: suffix: 2
156: nsize: 8
157: output_file: output/ex33.out
159: test:
160: suffix: 3
161: nsize: 2
162: args: -test_mpiderivedtype
163: output_file: output/ex33_3.out
165: TEST*/