Actual source code: ex33.c

  1: static char help[] = "Test memory scalability of MatMatMult() for AIJ and DENSE matrices. \n\
  2: Modified from the code contributed by Ian Lin <iancclin@umich.edu> \n\n";

  4: /*
  5: Example:
  6:   mpiexec -n <np> ./ex33 -mem_view -matmatmult_Bbn <Bbn>
  7: */

  9: #include <petsc.h>

 11: PetscErrorCode Print_memory(PetscLogDouble mem)
 12: {
 13:   double max_mem, min_mem;

 15:   PetscFunctionBeginUser;
 16:   PetscCallMPI(MPI_Reduce(&mem, &max_mem, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD));
 17:   PetscCallMPI(MPI_Reduce(&mem, &min_mem, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD));
 18:   max_mem = max_mem / 1024.0 / 1024.0;
 19:   min_mem = min_mem / 1024.0 / 1024.0;
 20:   PetscCall(PetscPrintf(MPI_COMM_WORLD, " max and min memory across all processors %.4f Mb, %.4f Mb.\n", (double)max_mem, (double)min_mem));
 21:   PetscFunctionReturn(PETSC_SUCCESS);
 22: }

 24: /*
 25:    Illustrate how to use MPI derived data types.
 26:    It would save memory significantly. See MatMPIDenseScatter()
 27: */
 28: PetscErrorCode TestMPIDerivedDataType()
 29: {
 30:   MPI_Datatype type1, type2, rtype1, rtype2;
 31:   PetscInt     i, j;
 32:   PetscScalar  buffer[24]; /* An array of 4 rows, 6 cols */
 33:   MPI_Status   status;
 34:   PetscMPIInt  rank, size, disp[2];

 36:   PetscFunctionBeginUser;
 37:   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
 38:   PetscCheck(size >= 2, PETSC_COMM_SELF, PETSC_ERR_WRONG_MPI_SIZE, "Must use at least 2 processors");
 39:   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));

 41:   if (rank == 0) {
 42:     /* proc[0] sends 2 rows to proc[1] */
 43:     for (i = 0; i < 24; i++) buffer[i] = (PetscScalar)i;

 45:     disp[0] = 0;
 46:     disp[1] = 2;
 47:     PetscCallMPI(MPI_Type_create_indexed_block(2, 1, (const PetscMPIInt *)disp, MPIU_SCALAR, &type1));
 48:     /* one column has 4 entries */
 49:     PetscCallMPI(MPI_Type_create_resized(type1, 0, 4 * sizeof(PetscScalar), &type2));
 50:     PetscCallMPI(MPI_Type_commit(&type2));
 51:     PetscCallMPI(MPI_Send(buffer, 6, type2, 1, 123, MPI_COMM_WORLD));

 53:   } else if (rank == 1) {
 54:     /* proc[1] receives 2 rows from proc[0], and put them into contiguous rows, starting at the row 1 (disp[0]) */
 55:     for (i = 0; i < 24; i++) buffer[i] = 0.0;

 57:     disp[0] = 1;
 58:     PetscCallMPI(MPI_Type_create_indexed_block(1, 2, (const PetscMPIInt *)disp, MPIU_SCALAR, &rtype1));
 59:     PetscCallMPI(MPI_Type_create_resized(rtype1, 0, 4 * sizeof(PetscScalar), &rtype2));

 61:     PetscCallMPI(MPI_Type_commit(&rtype2));
 62:     PetscCallMPI(MPI_Recv(buffer, 6, rtype2, 0, 123, MPI_COMM_WORLD, &status));
 63:     for (i = 0; i < 4; i++) {
 64:       for (j = 0; j < 6; j++) PetscCall(PetscPrintf(MPI_COMM_SELF, "  %g", (double)PetscRealPart(buffer[i + j * 4])));
 65:       PetscCall(PetscPrintf(MPI_COMM_SELF, "\n"));
 66:     }
 67:   }

 69:   if (rank == 0) {
 70:     PetscCallMPI(MPI_Type_free(&type1));
 71:     PetscCallMPI(MPI_Type_free(&type2));
 72:   } else if (rank == 1) {
 73:     PetscCallMPI(MPI_Type_free(&rtype1));
 74:     PetscCallMPI(MPI_Type_free(&rtype2));
 75:   }
 76:   PetscCallMPI(MPI_Barrier(MPI_COMM_WORLD));
 77:   PetscFunctionReturn(PETSC_SUCCESS);
 78: }

 80: int main(int argc, char **args)
 81: {
 82:   PetscInt mA = 2700, nX = 80, nz = 40;
 83:   /* PetscInt        mA=6,nX=5,nz=2; //small test */
 84:   PetscLogDouble mem;
 85:   Mat            A, X, Y;
 86:   PetscBool      flg = PETSC_FALSE;

 88:   PetscFunctionBeginUser;
 89:   PetscCall(PetscInitialize(&argc, &args, NULL, help));
 90:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-test_mpiderivedtype", &flg, NULL));
 91:   if (flg) {
 92:     PetscCall(TestMPIDerivedDataType());
 93:     PetscCall(PetscFinalize());
 94:     return 0;
 95:   }

 97:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-mem_view", &flg, NULL));
 98:   PetscCall(PetscMemoryGetCurrentUsage(&mem));
 99:   if (flg) {
100:     PetscCall(PetscPrintf(MPI_COMM_WORLD, "Before start,"));
101:     PetscCall(Print_memory(mem));
102:   }

104:   PetscCall(MatCreateAIJ(PETSC_COMM_WORLD, PETSC_DECIDE, PETSC_DECIDE, mA, mA, nz, NULL, nz, NULL, &A));
105:   PetscCall(MatSetRandom(A, NULL));
106:   PetscCall(PetscMemoryGetCurrentUsage(&mem));
107:   if (flg) {
108:     PetscCall(PetscPrintf(MPI_COMM_WORLD, "After creating A,"));
109:     PetscCall(Print_memory(mem));
110:   }

112:   PetscCall(MatCreateDense(PETSC_COMM_WORLD, PETSC_DECIDE, PETSC_DECIDE, mA, nX, NULL, &X));
113:   PetscCall(MatSetRandom(X, NULL));
114:   PetscCall(PetscMemoryGetCurrentUsage(&mem));
115:   if (flg) {
116:     PetscCall(PetscPrintf(MPI_COMM_WORLD, "After creating X,"));
117:     PetscCall(Print_memory(mem));
118:   }

120:   PetscCall(MatMatMult(A, X, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &Y));
121:   PetscCall(PetscMemoryGetCurrentUsage(&mem));
122:   if (flg) {
123:     PetscCall(PetscPrintf(MPI_COMM_WORLD, "After MatMatMult,"));
124:     PetscCall(Print_memory(mem));
125:   }

127:   /* Test reuse */
128:   PetscCall(MatMatMult(A, X, MAT_REUSE_MATRIX, PETSC_DETERMINE, &Y));
129:   PetscCall(PetscMemoryGetCurrentUsage(&mem));
130:   if (flg) {
131:     PetscCall(PetscPrintf(MPI_COMM_WORLD, "After reuse MatMatMult,"));
132:     PetscCall(Print_memory(mem));
133:   }

135:   /* Check accuracy */
136:   PetscCall(MatMatMultEqual(A, X, Y, 10, &flg));
137:   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMETYPE, "Error in MatMatMult()");

139:   PetscCall(MatDestroy(&A));
140:   PetscCall(MatDestroy(&X));
141:   PetscCall(MatDestroy(&Y));

143:   PetscCall(PetscFinalize());
144:   return 0;
145: }

147: /*TEST

149:    test:
150:       suffix: 1
151:       nsize: 4
152:       output_file: output/ex33.out

154:    test:
155:       suffix: 2
156:       nsize: 8
157:       output_file: output/ex33.out

159:    test:
160:       suffix: 3
161:       nsize: 2
162:       args: -test_mpiderivedtype
163:       output_file: output/ex33_3.out

165: TEST*/