Actual source code: ex13.c

  1: static char help[] = "Benchmark Poisson Problem in 2d and 3d with finite elements.\n\
  2: We solve the Poisson problem in a rectangular domain\n\
  3: using a parallel unstructured mesh (DMPLEX) to discretize it.\n\n\n";

  5: #include <petscdmplex.h>
  6: #include <petscsnes.h>
  7: #include <petscds.h>
  8: #include <petscconvest.h>
  9: #if defined(PETSC_HAVE_AMGX)
 10:   #include <amgx_c.h>
 11: #endif

 13: typedef struct {
 14:   PetscInt  nit;    /* Number of benchmark iterations */
 15:   PetscBool strong; /* Do not integrate the Laplacian by parts */
 16: } AppCtx;

 18: static PetscErrorCode trig_u(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nc, PetscScalar *u, void *ctx)
 19: {
 20:   PetscInt d;
 21:   *u = 0.0;
 22:   for (d = 0; d < dim; ++d) *u += PetscSinReal(2.0 * PETSC_PI * x[d]);
 23:   return PETSC_SUCCESS;
 24: }

 26: static void f0_trig_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[])
 27: {
 28:   PetscInt d;
 29:   for (d = 0; d < dim; ++d) f0[0] += -4.0 * PetscSqr(PETSC_PI) * PetscSinReal(2.0 * PETSC_PI * x[d]);
 30: }

 32: static void f1_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[])
 33: {
 34:   PetscInt d;
 35:   for (d = 0; d < dim; ++d) f1[d] = u_x[d];
 36: }

 38: static void g3_uu(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[])
 39: {
 40:   PetscInt d;
 41:   for (d = 0; d < dim; ++d) g3[d * dim + d] = 1.0;
 42: }

 44: static PetscErrorCode quadratic_u(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nc, PetscScalar *u, void *ctx)
 45: {
 46:   *u = PetscSqr(x[0]) + PetscSqr(x[1]);
 47:   return PETSC_SUCCESS;
 48: }

 50: static void f0_strong_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[])
 51: {
 52:   PetscInt d;
 53:   for (d = 0; d < dim; ++d) f0[0] -= u_x[dim + d * dim + d];
 54:   f0[0] += 4.0;
 55: }

 57: static PetscErrorCode ProcessOptions(MPI_Comm comm, AppCtx *options)
 58: {
 59:   PetscFunctionBeginUser;
 60:   options->nit    = 10;
 61:   options->strong = PETSC_FALSE;
 62:   PetscOptionsBegin(comm, "", "Poisson Problem Options", "DMPLEX");
 63:   PetscCall(PetscOptionsInt("-benchmark_it", "Solve the benchmark problem this many times", "ex13.c", options->nit, &options->nit, NULL));
 64:   PetscCall(PetscOptionsBool("-strong", "Do not integrate the Laplacian by parts", "ex13.c", options->strong, &options->strong, NULL));
 65:   PetscOptionsEnd();
 66:   PetscFunctionReturn(PETSC_SUCCESS);
 67: }

 69: static PetscErrorCode CreateMesh(MPI_Comm comm, AppCtx *user, DM *dm)
 70: {
 71:   PetscFunctionBeginUser;
 72:   PetscCall(DMCreate(comm, dm));
 73:   PetscCall(DMSetType(*dm, DMPLEX));
 74:   PetscCall(DMSetFromOptions(*dm));
 75:   PetscCall(DMSetApplicationContext(*dm, user));
 76:   PetscCall(DMViewFromOptions(*dm, NULL, "-dm_view"));
 77:   { // perturb to get general coordinates
 78:     Vec          coordinates;
 79:     PetscScalar *coords;
 80:     PetscInt     nloc, v;
 81:     PetscRandom  rnd;
 82:     PetscReal    del;
 83:     PetscCall(PetscRandomCreate(PETSC_COMM_SELF, &rnd));
 84:     PetscCall(PetscRandomSetInterval(rnd, -PETSC_SQRT_MACHINE_EPSILON, PETSC_SQRT_MACHINE_EPSILON));
 85:     PetscCall(PetscRandomSetFromOptions(rnd));
 86:     PetscCall(DMGetCoordinatesLocal(*dm, &coordinates));
 87:     PetscCall(VecGetArray(coordinates, &coords));
 88:     PetscCall(VecGetLocalSize(coordinates, &nloc));
 89:     for (v = 0; v < nloc; ++v) {
 90:       PetscCall(PetscRandomGetValueReal(rnd, &del));
 91:       coords[v] += del * coords[v];
 92:     }
 93:     PetscCall(VecRestoreArray(coordinates, &coords));
 94:     PetscCall(PetscRandomDestroy(&rnd));
 95:   }
 96:   PetscFunctionReturn(PETSC_SUCCESS);
 97: }

 99: static PetscErrorCode SetupPrimalProblem(DM dm, AppCtx *user)
100: {
101:   PetscDS        ds;
102:   DMLabel        label;
103:   const PetscInt id = 1;

105:   PetscFunctionBeginUser;
106:   PetscCall(DMGetDS(dm, &ds));
107:   PetscCall(DMGetLabel(dm, "marker", &label));
108:   if (user->strong) {
109:     PetscCall(PetscDSSetResidual(ds, 0, f0_strong_u, NULL));
110:     PetscCall(PetscDSSetExactSolution(ds, 0, quadratic_u, user));
111:     PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (void (*)(void))quadratic_u, NULL, user, NULL));
112:   } else {
113:     PetscCall(PetscDSSetResidual(ds, 0, f0_trig_u, f1_u));
114:     PetscCall(PetscDSSetJacobian(ds, 0, 0, NULL, NULL, NULL, g3_uu));
115:     PetscCall(PetscDSSetExactSolution(ds, 0, trig_u, user));
116:     PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (void (*)(void))trig_u, NULL, user, NULL));
117:   }
118:   PetscFunctionReturn(PETSC_SUCCESS);
119: }

121: static PetscErrorCode SetupDiscretization(DM dm, const char name[], PetscErrorCode (*setup)(DM, AppCtx *), AppCtx *user)
122: {
123:   DM             cdm = dm;
124:   PetscFE        fe;
125:   DMPolytopeType ct;
126:   PetscBool      simplex;
127:   PetscInt       dim, cStart;
128:   char           prefix[PETSC_MAX_PATH_LEN];

130:   PetscFunctionBeginUser;
131:   PetscCall(DMGetDimension(dm, &dim));
132:   PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, NULL));
133:   PetscCall(DMPlexGetCellType(dm, cStart, &ct));
134:   simplex = DMPolytopeTypeGetNumVertices(ct) == DMPolytopeTypeGetDim(ct) + 1 ? PETSC_TRUE : PETSC_FALSE; // false
135:   /* Create finite element */
136:   PetscCall(PetscSNPrintf(prefix, PETSC_MAX_PATH_LEN, "%s_", name));
137:   PetscCall(PetscFECreateDefault(PETSC_COMM_SELF, dim, 1, simplex, name ? prefix : NULL, -1, &fe));
138:   PetscCall(PetscObjectSetName((PetscObject)fe, name));
139:   /* Set discretization and boundary conditions for each mesh */
140:   PetscCall(DMSetField(dm, 0, NULL, (PetscObject)fe));
141:   PetscCall(DMCreateDS(dm));
142:   PetscCall((*setup)(dm, user));
143:   while (cdm) {
144:     PetscCall(DMCopyDisc(dm, cdm));
145:     /* TODO: Check whether the boundary of coarse meshes is marked */
146:     PetscCall(DMGetCoarseDM(cdm, &cdm));
147:   }
148:   PetscCall(PetscFEDestroy(&fe));
149:   PetscFunctionReturn(PETSC_SUCCESS);
150: }

152: int main(int argc, char **argv)
153: {
154:   DM             dm;   /* Problem specification */
155:   SNES           snes; /* Nonlinear solver */
156:   Vec            u;    /* Solutions */
157:   AppCtx         user; /* User-defined work context */
158:   PetscLogDouble time;
159:   Mat            Amat;

161:   PetscFunctionBeginUser;
162:   PetscCall(PetscInitialize(&argc, &argv, NULL, help));
163:   PetscCall(ProcessOptions(PETSC_COMM_WORLD, &user));
164:   /* system */
165:   PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes));
166:   PetscCall(CreateMesh(PETSC_COMM_WORLD, &user, &dm));
167:   PetscCall(SNESSetDM(snes, dm));
168:   PetscCall(SetupDiscretization(dm, "potential", SetupPrimalProblem, &user));
169:   PetscCall(DMCreateGlobalVector(dm, &u));
170:   {
171:     PetscInt N;
172:     PetscCall(VecGetSize(u, &N));
173:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Number equations N = %" PetscInt_FMT "\n", N));
174:   }
175:   PetscCall(SNESSetFromOptions(snes));
176:   PetscCall(PetscObjectSetName((PetscObject)u, "potential"));
177:   PetscCall(DMPlexSetSNESLocalFEM(dm, PETSC_FALSE, &user));
178:   PetscCall(DMSNESCheckFromOptions(snes, u));
179:   PetscCall(PetscTime(&time));
180:   PetscCall(SNESSetUp(snes));
181: #if defined(PETSC_HAVE_AMGX)
182:   KSP                   ksp;
183:   PC                    pc;
184:   PetscBool             flg;
185:   AMGX_resources_handle rsc;
186:   PetscCall(SNESGetKSP(snes, &ksp));
187:   PetscCall(KSPGetPC(ksp, &pc));
188:   PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCAMGX, &flg));
189:   if (flg) {
190:     PetscCall(PCAmgXGetResources(pc, (void *)&rsc));
191:     /* do ... with resource */
192:   }
193: #endif
194:   PetscCall(SNESGetJacobian(snes, &Amat, NULL, NULL, NULL));
195:   PetscCall(MatSetOption(Amat, MAT_SPD, PETSC_TRUE));
196:   PetscCall(MatSetOption(Amat, MAT_SPD_ETERNAL, PETSC_TRUE));
197:   PetscCall(SNESSolve(snes, NULL, u));
198:   PetscCall(PetscTimeSubtract(&time));
199:   /* Benchmark system */
200:   if (user.nit) {
201:     Vec           b;
202:     PetscInt      i;
203:     PetscLogStage kspstage;
204:     PetscCall(PetscLogStageRegister("Solve only", &kspstage));
205:     PetscCall(PetscLogStagePush(kspstage));
206:     PetscCall(SNESGetSolution(snes, &u));
207:     PetscCall(SNESGetFunction(snes, &b, NULL, NULL));
208:     for (i = 0; i < user.nit; i++) {
209:       PetscCall(VecZeroEntries(u));
210:       PetscCall(SNESSolve(snes, NULL, u));
211:     }
212:     PetscCall(PetscLogStagePop());
213:   }
214:   PetscCall(SNESGetSolution(snes, &u));
215:   PetscCall(VecViewFromOptions(u, NULL, "-potential_view"));
216:   /* Cleanup */
217:   PetscCall(VecDestroy(&u));
218:   PetscCall(SNESDestroy(&snes));
219:   PetscCall(DMDestroy(&dm));
220:   PetscCall(PetscFinalize());
221:   return 0;
222: }

224: /*TEST

226:   test:
227:     suffix: strong
228:     requires: triangle
229:     args: -dm_plex_dim 2 -dm_refine 1 -benchmark_it 0 -dmsnes_check -potential_petscspace_degree 2 -dm_ds_jet_degree 2 -strong -pc_type jacobi

231:   testset:
232:     nsize: 4
233:     output_file: output/ex13_comparison.out
234:     args: -dm_plex_dim 3 -benchmark_it 2 -dm_plex_simplex 0 -dm_plex_box_faces 2,2,1 -dm_refine 2 -petscpartitioner_simple_node_grid 1,1,1 -petscpartitioner_simple_process_grid 2,2,1 -potential_petscspace_degree 2 -petscpartitioner_type simple -snes_type ksponly -dm_view -ksp_type cg -ksp_rtol 1e-12 -snes_lag_jacobian -2 -dm_plex_box_upper 2,2,1 -dm_plex_box_lower 0,0,0 -pc_type gamg -pc_gamg_process_eq_limit 200 -pc_gamg_coarse_eq_limit 1000 -pc_gamg_esteig_ksp_type cg -mg_levels_ksp_chebyshev_esteig 0,0.2,0,1.05 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_square_graph true -pc_gamg_threshold 0.04 -pc_gamg_threshold_scale .25 -pc_gamg_aggressive_coarsening 2 -pc_gamg_mis_k_minimum_degree_ordering true -ksp_monitor -ksp_norm_type unpreconditioned
235:     test:
236:       suffix: comparison
237:     test:
238:       suffix: cuda
239:       requires: cuda
240:       args: -dm_mat_type aijcusparse -dm_vec_type cuda
241:     test:
242:       suffix: kokkos
243:       requires: kokkos_kernels
244:       args: -dm_mat_type aijkokkos -dm_vec_type kokkos
245:     test:
246:       suffix: kokkos_sycl
247:       requires: sycl kokkos_kernels
248:       args: -dm_mat_type aijkokkos -dm_vec_type kokkos
249:     test:
250:       suffix: aijmkl_comp
251:       requires: mkl_sparse
252:       args: -dm_mat_type aijmkl

254:   testset:
255:     requires: cuda amgx
256:     filter: grep -v Built | grep -v "AMGX version" | grep -v "CUDA Runtime"
257:     output_file: output/ex13_amgx.out
258:     args: -dm_plex_dim 2 -dm_plex_box_faces 2,2 -dm_refine 2 -petscpartitioner_type simple -potential_petscspace_degree 2 -dm_plex_simplex 0 -ksp_monitor \
259:           -snes_type ksponly -dm_view -ksp_type cg -ksp_norm_type unpreconditioned -ksp_converged_reason -snes_rtol 1.e-4 -pc_type amgx -benchmark_it 1 -pc_amgx_verbose false
260:     nsize: 4
261:     test:
262:       suffix: amgx
263:       args: -dm_mat_type aijcusparse -dm_vec_type cuda
264:     test:
265:       suffix: amgx_cpu
266:       args: -dm_mat_type aij

268: TEST*/