Actual source code: ex236.c

  1: static char help[] = "Test CPU/GPU memory leaks, MatMult and MatMultTransposeAdd during successive matrix assemblies\n\n";

  3: #include <petscmat.h>

  5: int main(int argc, char **argv)
  6: {
  7:   PetscMPIInt rank, size;
  8:   Mat         A;
  9:   PetscInt    i, j, k, n = 3, vstart, rstart, rend, margin;
 10:   Vec         x, y;

 12:   PetscFunctionBeginUser;
 13:   PetscCall(PetscInitialize(&argc, &argv, NULL, help));
 14:   PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
 15:   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));

 17:   PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
 18:   PetscCall(MatSetSizes(A, n, n, PETSC_DECIDE, PETSC_DECIDE));
 19:   PetscCall(MatSetFromOptions(A));

 21:   PetscCall(MatMPIAIJSetPreallocation(A, n, NULL, 0, NULL));
 22:   PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
 23:   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
 24:   PetscCall(MatCreateVecs(A, &x, &y));
 25:   PetscCall(VecSet(x, 1.0));

 27:   /*
 28:     Matrix A only has nonzeros in the diagonal block, which is of size 3x3.
 29:     We do three successive assemblies on A. The first two have the same non-zero
 30:     pattern but different values, and the third breaks the non-zero pattern. The
 31:     first two assemblies have enough zero-rows that triggers compressed-row storage
 32:     in MATAIJ and MATAIJCUSPARSE.

 34:     These settings are used to test memory management and correctness in MatMult
 35:     and MatMultTransposeAdd.
 36:   */

 38:   for (k = 0; k < 3; k++) { /* Three assemblies */
 39:     vstart = (size * k + rank) * n * n + 1;
 40:     margin = (k == 2) ? 0 : 2; /* Create two zero-rows in the first two assemblies */
 41:     for (i = rstart; i < rend - margin; i++) {
 42:       for (j = rstart; j < rend; j++) {
 43:         PetscCall(MatSetValue(A, i, j, (PetscScalar)vstart, INSERT_VALUES));
 44:         vstart++;
 45:       }
 46:     }
 47:     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
 48:     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
 49:     PetscCall(MatMult(A, x, y));
 50:     PetscCall(MatMultTransposeAdd(A, x, y, y)); /* y[i] = sum of row i and column i of A */
 51:     PetscCall(VecView(y, PETSC_VIEWER_STDOUT_WORLD));
 52:   }

 54:   PetscCall(MatDestroy(&A));
 55:   PetscCall(VecDestroy(&x));
 56:   PetscCall(VecDestroy(&y));
 57:   PetscCall(PetscFinalize());

 59:   /* Uncomment this line if you want to use "cuda-memcheck --leaf-check full" to check this program */
 60:   /*cudaDeviceReset();*/
 61:   return 0;
 62: }

 64: /*TEST

 66:    testset:
 67:      nsize: 2
 68:      output_file: output/ex236_1.out
 69:      filter: grep -v type

 71:      test:
 72:        args: -mat_type aij

 74:      test:
 75:        requires: cuda
 76:        suffix: cuda
 77:        args: -mat_type aijcusparse
 78: TEST*/