Actual source code: ex32.c

  1: static char help[] = "Tests MATSEQDENSECUDA\n\n";

  3: #include <petscmat.h>

  5: int main(int argc, char **argv)
  6: {
  7:   Mat       A, AC, B;
  8:   PetscInt  m = 10, n = 10;
  9:   PetscReal r, tol    = 10 * PETSC_SMALL;

 11:   PetscFunctionBeginUser;
 12:   PetscCall(PetscInitialize(&argc, &argv, NULL, help));
 13:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
 14:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
 15:   PetscCall(MatCreate(PETSC_COMM_SELF, &A));
 16:   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, m, n));
 17:   PetscCall(MatSetType(A, MATSEQDENSE));
 18:   PetscCall(MatSetFromOptions(A));
 19:   PetscCall(MatSeqDenseSetPreallocation(A, NULL));
 20:   PetscCall(MatSetRandom(A, NULL));
 21: #if 0
 22:   PetscInt       i,j;
 23:   PetscScalar    val;
 24:   for (i=0; i<m; i++) {
 25:     for (j=0; j<n; j++) {
 26:       val = (PetscScalar)(i+j);
 27:       PetscCall(MatSetValues(A,1,&i,1,&j,&val,INSERT_VALUES));
 28:     }
 29:   }
 30:   PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
 31:   PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
 32: #endif

 34:   /* Create a CUDA version of A */
 35: #if defined(PETSC_HAVE_CUDA)
 36:   PetscCall(MatConvert(A, MATSEQDENSECUDA, MAT_INITIAL_MATRIX, &AC));
 37: #else
 38:   PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &AC));
 39: #endif
 40:   PetscCall(MatDuplicate(AC, MAT_COPY_VALUES, &B));

 42:   /* full CUDA AXPY */
 43:   PetscCall(MatAXPY(B, -1.0, AC, SAME_NONZERO_PATTERN));
 44:   PetscCall(MatNorm(B, NORM_INFINITY, &r));
 45:   PetscCheck(r == 0.0, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Error MatDuplicate + MatCopy + MatAXPY %g", (double)r);

 47:   /* test Copy */
 48:   PetscCall(MatCopy(AC, B, SAME_NONZERO_PATTERN));

 50:   /* call MatAXPY_Basic since B is CUDA, A is CPU,  */
 51:   PetscCall(MatAXPY(B, -1.0, A, SAME_NONZERO_PATTERN));
 52:   PetscCall(MatNorm(B, NORM_INFINITY, &r));
 53:   PetscCheck(r == 0.0, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Error MatDuplicate + MatCopy + MatAXPY_Basic %g", (double)r);

 55:   if (m == n) {
 56:     Mat B1, B2;

 58:     PetscCall(MatCopy(AC, B, SAME_NONZERO_PATTERN));
 59:     /* full CUDA PtAP */
 60:     PetscCall(MatPtAP(B, AC, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B1));

 62:     /* CPU PtAP since A is on the CPU only */
 63:     PetscCall(MatPtAP(B, A, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B2));

 65:     PetscCall(MatAXPY(B2, -1.0, B1, SAME_NONZERO_PATTERN));
 66:     PetscCall(MatNorm(B2, NORM_INFINITY, &r));
 67:     PetscCheck(r <= tol, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Error MatPtAP %g", (double)r);

 69:     /* test reuse */
 70:     PetscCall(MatPtAP(B, AC, MAT_REUSE_MATRIX, PETSC_DETERMINE, &B1));
 71:     PetscCall(MatPtAP(B, A, MAT_REUSE_MATRIX, PETSC_DETERMINE, &B2));
 72:     PetscCall(MatAXPY(B2, -1.0, B1, SAME_NONZERO_PATTERN));
 73:     PetscCall(MatNorm(B2, NORM_INFINITY, &r));
 74:     PetscCheck(r <= tol, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Error MatPtAP %g", (double)r);

 76:     PetscCall(MatDestroy(&B1));
 77:     PetscCall(MatDestroy(&B2));
 78:   }

 80:   PetscCall(MatDestroy(&B));
 81:   PetscCall(MatDestroy(&AC));
 82:   PetscCall(MatDestroy(&A));
 83:   PetscCall(PetscFinalize());
 84:   return 0;
 85: }

 87: /*TEST

 89:    build:
 90:      requires: cuda

 92:    test:
 93:      output_file: output/ex32_1.out
 94:      args: -m {{3 5 12}} -n {{3 5 12}}
 95:      suffix: seqdensecuda

 97: TEST*/