Actual source code: ex32.c
1: static char help[] = "Tests MATSEQDENSECUDA\n\n";
3: #include <petscmat.h>
5: int main(int argc, char **argv)
6: {
7: Mat A, AC, B;
8: PetscInt m = 10, n = 10;
9: PetscReal r, tol = 10 * PETSC_SMALL;
11: PetscFunctionBeginUser;
12: PetscCall(PetscInitialize(&argc, &argv, NULL, help));
13: PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
14: PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
15: PetscCall(MatCreate(PETSC_COMM_SELF, &A));
16: PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, m, n));
17: PetscCall(MatSetType(A, MATSEQDENSE));
18: PetscCall(MatSetFromOptions(A));
19: PetscCall(MatSeqDenseSetPreallocation(A, NULL));
20: PetscCall(MatSetRandom(A, NULL));
21: #if 0
22: PetscInt i,j;
23: PetscScalar val;
24: for (i=0; i<m; i++) {
25: for (j=0; j<n; j++) {
26: val = (PetscScalar)(i+j);
27: PetscCall(MatSetValues(A,1,&i,1,&j,&val,INSERT_VALUES));
28: }
29: }
30: PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
31: PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
32: #endif
34: /* Create a CUDA version of A */
35: #if defined(PETSC_HAVE_CUDA)
36: PetscCall(MatConvert(A, MATSEQDENSECUDA, MAT_INITIAL_MATRIX, &AC));
37: #else
38: PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &AC));
39: #endif
40: PetscCall(MatDuplicate(AC, MAT_COPY_VALUES, &B));
42: /* full CUDA AXPY */
43: PetscCall(MatAXPY(B, -1.0, AC, SAME_NONZERO_PATTERN));
44: PetscCall(MatNorm(B, NORM_INFINITY, &r));
45: PetscCheck(r == 0.0, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Error MatDuplicate + MatCopy + MatAXPY %g", (double)r);
47: /* test Copy */
48: PetscCall(MatCopy(AC, B, SAME_NONZERO_PATTERN));
50: /* call MatAXPY_Basic since B is CUDA, A is CPU, */
51: PetscCall(MatAXPY(B, -1.0, A, SAME_NONZERO_PATTERN));
52: PetscCall(MatNorm(B, NORM_INFINITY, &r));
53: PetscCheck(r == 0.0, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Error MatDuplicate + MatCopy + MatAXPY_Basic %g", (double)r);
55: if (m == n) {
56: Mat B1, B2;
58: PetscCall(MatCopy(AC, B, SAME_NONZERO_PATTERN));
59: /* full CUDA PtAP */
60: PetscCall(MatPtAP(B, AC, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B1));
62: /* CPU PtAP since A is on the CPU only */
63: PetscCall(MatPtAP(B, A, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B2));
65: PetscCall(MatAXPY(B2, -1.0, B1, SAME_NONZERO_PATTERN));
66: PetscCall(MatNorm(B2, NORM_INFINITY, &r));
67: PetscCheck(r <= tol, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Error MatPtAP %g", (double)r);
69: /* test reuse */
70: PetscCall(MatPtAP(B, AC, MAT_REUSE_MATRIX, PETSC_DETERMINE, &B1));
71: PetscCall(MatPtAP(B, A, MAT_REUSE_MATRIX, PETSC_DETERMINE, &B2));
72: PetscCall(MatAXPY(B2, -1.0, B1, SAME_NONZERO_PATTERN));
73: PetscCall(MatNorm(B2, NORM_INFINITY, &r));
74: PetscCheck(r <= tol, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Error MatPtAP %g", (double)r);
76: PetscCall(MatDestroy(&B1));
77: PetscCall(MatDestroy(&B2));
78: }
80: PetscCall(MatDestroy(&B));
81: PetscCall(MatDestroy(&AC));
82: PetscCall(MatDestroy(&A));
83: PetscCall(PetscFinalize());
84: return 0;
85: }
87: /*TEST
89: build:
90: requires: cuda
92: test:
93: output_file: output/ex32_1.out
94: args: -m {{3 5 12}} -n {{3 5 12}}
95: suffix: seqdensecuda
97: TEST*/