Actual source code: ex13.c
1: static char help[] = "Benchmark Poisson Problem in 2d and 3d with finite elements.\n\
2: We solve the Poisson problem in a rectangular domain\n\
3: using a parallel unstructured mesh (DMPLEX) to discretize it.\n\n\n";
5: #include <petscdmplex.h>
6: #include <petscsnes.h>
7: #include <petscds.h>
8: #include <petscconvest.h>
9: #if defined(PETSC_HAVE_AMGX)
10: #include <amgx_c.h>
11: #endif
13: typedef struct {
14: PetscInt nit; /* Number of benchmark iterations */
15: PetscBool strong; /* Do not integrate the Laplacian by parts */
16: } AppCtx;
18: static PetscErrorCode trig_u(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nc, PetscScalar *u, void *ctx)
19: {
20: PetscInt d;
21: *u = 0.0;
22: for (d = 0; d < dim; ++d) *u += PetscSinReal(2.0 * PETSC_PI * x[d]);
23: return PETSC_SUCCESS;
24: }
26: static void f0_trig_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[])
27: {
28: PetscInt d;
29: for (d = 0; d < dim; ++d) f0[0] += -4.0 * PetscSqr(PETSC_PI) * PetscSinReal(2.0 * PETSC_PI * x[d]);
30: }
32: static void f1_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[])
33: {
34: PetscInt d;
35: for (d = 0; d < dim; ++d) f1[d] = u_x[d];
36: }
38: static void g3_uu(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[])
39: {
40: PetscInt d;
41: for (d = 0; d < dim; ++d) g3[d * dim + d] = 1.0;
42: }
44: static PetscErrorCode quadratic_u(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nc, PetscScalar *u, void *ctx)
45: {
46: *u = PetscSqr(x[0]) + PetscSqr(x[1]);
47: return PETSC_SUCCESS;
48: }
50: static void f0_strong_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[])
51: {
52: PetscInt d;
53: for (d = 0; d < dim; ++d) f0[0] -= u_x[dim + d * dim + d];
54: f0[0] += 4.0;
55: }
57: static PetscErrorCode ProcessOptions(MPI_Comm comm, AppCtx *options)
58: {
59: PetscFunctionBeginUser;
60: options->nit = 10;
61: options->strong = PETSC_FALSE;
62: PetscOptionsBegin(comm, "", "Poisson Problem Options", "DMPLEX");
63: PetscCall(PetscOptionsInt("-benchmark_it", "Solve the benchmark problem this many times", "ex13.c", options->nit, &options->nit, NULL));
64: PetscCall(PetscOptionsBool("-strong", "Do not integrate the Laplacian by parts", "ex13.c", options->strong, &options->strong, NULL));
65: PetscOptionsEnd();
66: PetscFunctionReturn(PETSC_SUCCESS);
67: }
69: static PetscErrorCode CreateMesh(MPI_Comm comm, AppCtx *user, DM *dm)
70: {
71: PetscFunctionBeginUser;
72: PetscCall(DMCreate(comm, dm));
73: PetscCall(DMSetType(*dm, DMPLEX));
74: PetscCall(DMSetFromOptions(*dm));
75: PetscCall(DMSetApplicationContext(*dm, user));
76: PetscCall(DMViewFromOptions(*dm, NULL, "-dm_view"));
77: { // perturb to get general coordinates
78: Vec coordinates;
79: PetscScalar *coords;
80: PetscInt nloc, v;
81: PetscRandom rnd;
82: PetscReal del;
83: PetscCall(PetscRandomCreate(PETSC_COMM_SELF, &rnd));
84: PetscCall(PetscRandomSetInterval(rnd, -PETSC_SQRT_MACHINE_EPSILON, PETSC_SQRT_MACHINE_EPSILON));
85: PetscCall(PetscRandomSetFromOptions(rnd));
86: PetscCall(DMGetCoordinatesLocal(*dm, &coordinates));
87: PetscCall(VecGetArray(coordinates, &coords));
88: PetscCall(VecGetLocalSize(coordinates, &nloc));
89: for (v = 0; v < nloc; ++v) {
90: PetscCall(PetscRandomGetValueReal(rnd, &del));
91: coords[v] += del * coords[v];
92: }
93: PetscCall(VecRestoreArray(coordinates, &coords));
94: PetscCall(PetscRandomDestroy(&rnd));
95: }
96: PetscFunctionReturn(PETSC_SUCCESS);
97: }
99: static PetscErrorCode SetupPrimalProblem(DM dm, AppCtx *user)
100: {
101: PetscDS ds;
102: DMLabel label;
103: const PetscInt id = 1;
105: PetscFunctionBeginUser;
106: PetscCall(DMGetDS(dm, &ds));
107: PetscCall(DMGetLabel(dm, "marker", &label));
108: if (user->strong) {
109: PetscCall(PetscDSSetResidual(ds, 0, f0_strong_u, NULL));
110: PetscCall(PetscDSSetExactSolution(ds, 0, quadratic_u, user));
111: PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (void (*)(void))quadratic_u, NULL, user, NULL));
112: } else {
113: PetscCall(PetscDSSetResidual(ds, 0, f0_trig_u, f1_u));
114: PetscCall(PetscDSSetJacobian(ds, 0, 0, NULL, NULL, NULL, g3_uu));
115: PetscCall(PetscDSSetExactSolution(ds, 0, trig_u, user));
116: PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (void (*)(void))trig_u, NULL, user, NULL));
117: }
118: PetscFunctionReturn(PETSC_SUCCESS);
119: }
121: static PetscErrorCode SetupDiscretization(DM dm, const char name[], PetscErrorCode (*setup)(DM, AppCtx *), AppCtx *user)
122: {
123: DM cdm = dm;
124: PetscFE fe;
125: DMPolytopeType ct;
126: PetscBool simplex;
127: PetscInt dim, cStart;
128: char prefix[PETSC_MAX_PATH_LEN];
130: PetscFunctionBeginUser;
131: PetscCall(DMGetDimension(dm, &dim));
132: PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, NULL));
133: PetscCall(DMPlexGetCellType(dm, cStart, &ct));
134: simplex = DMPolytopeTypeGetNumVertices(ct) == DMPolytopeTypeGetDim(ct) + 1 ? PETSC_TRUE : PETSC_FALSE; // false
135: /* Create finite element */
136: PetscCall(PetscSNPrintf(prefix, PETSC_MAX_PATH_LEN, "%s_", name));
137: PetscCall(PetscFECreateDefault(PETSC_COMM_SELF, dim, 1, simplex, name ? prefix : NULL, -1, &fe));
138: PetscCall(PetscObjectSetName((PetscObject)fe, name));
139: /* Set discretization and boundary conditions for each mesh */
140: PetscCall(DMSetField(dm, 0, NULL, (PetscObject)fe));
141: PetscCall(DMCreateDS(dm));
142: PetscCall((*setup)(dm, user));
143: while (cdm) {
144: PetscCall(DMCopyDisc(dm, cdm));
145: /* TODO: Check whether the boundary of coarse meshes is marked */
146: PetscCall(DMGetCoarseDM(cdm, &cdm));
147: }
148: PetscCall(PetscFEDestroy(&fe));
149: PetscFunctionReturn(PETSC_SUCCESS);
150: }
152: int main(int argc, char **argv)
153: {
154: DM dm; /* Problem specification */
155: SNES snes; /* Nonlinear solver */
156: Vec u; /* Solutions */
157: AppCtx user; /* User-defined work context */
158: PetscLogDouble time;
159: Mat Amat;
161: PetscFunctionBeginUser;
162: PetscCall(PetscInitialize(&argc, &argv, NULL, help));
163: PetscCall(ProcessOptions(PETSC_COMM_WORLD, &user));
164: /* system */
165: PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes));
166: PetscCall(CreateMesh(PETSC_COMM_WORLD, &user, &dm));
167: PetscCall(SNESSetDM(snes, dm));
168: PetscCall(SetupDiscretization(dm, "potential", SetupPrimalProblem, &user));
169: PetscCall(DMCreateGlobalVector(dm, &u));
170: {
171: PetscInt N;
172: PetscCall(VecGetSize(u, &N));
173: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Number equations N = %" PetscInt_FMT "\n", N));
174: }
175: PetscCall(SNESSetFromOptions(snes));
176: PetscCall(PetscObjectSetName((PetscObject)u, "potential"));
177: PetscCall(DMPlexSetSNESLocalFEM(dm, PETSC_FALSE, &user));
178: PetscCall(DMSNESCheckFromOptions(snes, u));
179: PetscCall(PetscTime(&time));
180: PetscCall(SNESSetUp(snes));
181: #if defined(PETSC_HAVE_AMGX)
182: KSP ksp;
183: PC pc;
184: PetscBool flg;
185: AMGX_resources_handle rsc;
186: PetscCall(SNESGetKSP(snes, &ksp));
187: PetscCall(KSPGetPC(ksp, &pc));
188: PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCAMGX, &flg));
189: if (flg) {
190: PetscCall(PCAmgXGetResources(pc, (void *)&rsc));
191: /* do ... with resource */
192: }
193: #endif
194: PetscCall(SNESGetJacobian(snes, &Amat, NULL, NULL, NULL));
195: PetscCall(MatSetOption(Amat, MAT_SPD, PETSC_TRUE));
196: PetscCall(MatSetOption(Amat, MAT_SPD_ETERNAL, PETSC_TRUE));
197: PetscCall(SNESSolve(snes, NULL, u));
198: PetscCall(PetscTimeSubtract(&time));
199: /* Benchmark system */
200: if (user.nit) {
201: Vec b;
202: PetscInt i;
203: PetscLogStage kspstage;
204: PetscCall(PetscLogStageRegister("Solve only", &kspstage));
205: PetscCall(PetscLogStagePush(kspstage));
206: PetscCall(SNESGetSolution(snes, &u));
207: PetscCall(SNESGetFunction(snes, &b, NULL, NULL));
208: for (i = 0; i < user.nit; i++) {
209: PetscCall(VecZeroEntries(u));
210: PetscCall(SNESSolve(snes, NULL, u));
211: }
212: PetscCall(PetscLogStagePop());
213: }
214: PetscCall(SNESGetSolution(snes, &u));
215: PetscCall(VecViewFromOptions(u, NULL, "-potential_view"));
216: /* Cleanup */
217: PetscCall(VecDestroy(&u));
218: PetscCall(SNESDestroy(&snes));
219: PetscCall(DMDestroy(&dm));
220: PetscCall(PetscFinalize());
221: return 0;
222: }
224: /*TEST
226: test:
227: suffix: strong
228: requires: triangle
229: args: -dm_plex_dim 2 -dm_refine 1 -benchmark_it 0 -dmsnes_check -potential_petscspace_degree 2 -dm_ds_jet_degree 2 -strong -pc_type jacobi
231: testset:
232: nsize: 4
233: output_file: output/ex13_comparison.out
234: args: -dm_plex_dim 3 -benchmark_it 2 -dm_plex_simplex 0 -dm_plex_box_faces 2,2,1 -dm_refine 2 -petscpartitioner_simple_node_grid 1,1,1 -petscpartitioner_simple_process_grid 2,2,1 -potential_petscspace_degree 2 -petscpartitioner_type simple -snes_type ksponly -dm_view -ksp_type cg -ksp_rtol 1e-12 -snes_lag_jacobian -2 -dm_plex_box_upper 2,2,1 -dm_plex_box_lower 0,0,0 -pc_type gamg -pc_gamg_process_eq_limit 200 -pc_gamg_coarse_eq_limit 1000 -pc_gamg_esteig_ksp_type cg -mg_levels_ksp_chebyshev_esteig 0,0.2,0,1.05 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_square_graph true -pc_gamg_threshold 0.04 -pc_gamg_threshold_scale .25 -pc_gamg_aggressive_coarsening 2 -pc_gamg_mis_k_minimum_degree_ordering true -ksp_monitor -ksp_norm_type unpreconditioned
235: test:
236: suffix: comparison
237: test:
238: suffix: cuda
239: requires: cuda
240: args: -dm_mat_type aijcusparse -dm_vec_type cuda
241: test:
242: suffix: kokkos
243: requires: kokkos_kernels
244: args: -dm_mat_type aijkokkos -dm_vec_type kokkos
245: test:
246: suffix: kokkos_sycl
247: requires: sycl kokkos_kernels
248: args: -dm_mat_type aijkokkos -dm_vec_type kokkos
249: test:
250: suffix: aijmkl_comp
251: requires: mkl_sparse
252: args: -dm_mat_type aijmkl
254: testset:
255: requires: cuda amgx
256: filter: grep -v Built | grep -v "AMGX version" | grep -v "CUDA Runtime"
257: output_file: output/ex13_amgx.out
258: args: -dm_plex_dim 2 -dm_plex_box_faces 2,2 -dm_refine 2 -petscpartitioner_type simple -potential_petscspace_degree 2 -dm_plex_simplex 0 -ksp_monitor \
259: -snes_type ksponly -dm_view -ksp_type cg -ksp_norm_type unpreconditioned -ksp_converged_reason -snes_rtol 1.e-4 -pc_type amgx -benchmark_it 1 -pc_amgx_verbose false
260: nsize: 4
261: test:
262: suffix: amgx
263: args: -dm_mat_type aijcusparse -dm_vec_type cuda
264: test:
265: suffix: amgx_cpu
266: args: -dm_mat_type aij
268: TEST*/