Actual source code: bench_spmv.c

  1: static char help[] = "Driver for benchmarking SpMV.";

  3: #include <petscmat.h>
  4: #include "cJSON.h"
  5: #include "mmloader.h"

  7: char *read_file(const char *filename)
  8: {
  9:   FILE  *file       = NULL;
 10:   long   length     = 0;
 11:   char  *content    = NULL;
 12:   size_t read_chars = 0;

 14:   /* open in read binary mode */
 15:   file = fopen(filename, "rb");
 16:   if (file) {
 17:     /* get the length */
 18:     fseek(file, 0, SEEK_END);
 19:     length = ftell(file);
 20:     fseek(file, 0, SEEK_SET);
 21:     /* allocate content buffer */
 22:     content = (char *)malloc((size_t)length + sizeof(""));
 23:     /* read the file into memory */
 24:     read_chars          = fread(content, sizeof(char), (size_t)length, file);
 25:     content[read_chars] = '\0';
 26:     fclose(file);
 27:   }
 28:   return content;
 29: }

 31: void write_file(const char *filename, const char *content)
 32: {
 33:   FILE *file = NULL;
 34:   file       = fopen(filename, "w");
 35:   if (file) { fputs(content, file); }
 36:   fclose(file);
 37: }

 39: int ParseJSON(const char *const inputjsonfile, char ***outputfilenames, char ***outputgroupnames, char ***outputmatnames, int *nmat)
 40: {
 41:   char        *content     = read_file(inputjsonfile);
 42:   cJSON       *matrix_json = NULL;
 43:   const cJSON *problem = NULL, *elem = NULL;
 44:   const cJSON *item = NULL;
 45:   char       **filenames, **groupnames, **matnames;
 46:   int          i, n;
 47:   if (!content) return 0;
 48:   matrix_json = cJSON_Parse(content);
 49:   if (!matrix_json) return 0;
 50:   n          = cJSON_GetArraySize(matrix_json);
 51:   *nmat      = n;
 52:   filenames  = (char **)malloc(sizeof(char *) * n);
 53:   groupnames = (char **)malloc(sizeof(char *) * n);
 54:   matnames   = (char **)malloc(sizeof(char *) * n);
 55:   for (i = 0; i < n; i++) {
 56:     elem         = cJSON_GetArrayItem(matrix_json, i);
 57:     item         = cJSON_GetObjectItemCaseSensitive(elem, "filename");
 58:     filenames[i] = (char *)malloc(sizeof(char) * (strlen(item->valuestring) + 1));
 59:     strcpy(filenames[i], item->valuestring);
 60:     problem       = cJSON_GetObjectItemCaseSensitive(elem, "problem");
 61:     item          = cJSON_GetObjectItemCaseSensitive(problem, "group");
 62:     groupnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
 63:     strcpy(groupnames[i], item->valuestring);
 64:     item        = cJSON_GetObjectItemCaseSensitive(problem, "name");
 65:     matnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
 66:     strcpy(matnames[i], item->valuestring);
 67:   }
 68:   cJSON_Delete(matrix_json);
 69:   free(content);
 70:   *outputfilenames  = filenames;
 71:   *outputgroupnames = groupnames;
 72:   *outputmatnames   = matnames;
 73:   return 0;
 74: }

 76: int UpdateJSON(const char *const inputjsonfile, PetscReal *spmv_times, PetscReal starting_spmv_time, const char *const matformat, PetscBool use_gpu, PetscInt repetitions)
 77: {
 78:   char  *content     = read_file(inputjsonfile);
 79:   cJSON *matrix_json = NULL;
 80:   cJSON *elem        = NULL;
 81:   int    i, n;
 82:   if (!content) return 0;
 83:   matrix_json = cJSON_Parse(content);
 84:   if (!matrix_json) return 0;
 85:   n = cJSON_GetArraySize(matrix_json);
 86:   for (i = 0; i < n; i++) {
 87:     cJSON *spmv   = NULL;
 88:     cJSON *format = NULL;
 89:     elem          = cJSON_GetArrayItem(matrix_json, i);
 90:     spmv          = cJSON_GetObjectItem(elem, "spmv");
 91:     if (spmv) {
 92:       format = cJSON_GetObjectItem(spmv, matformat);
 93:       if (format) {
 94:         cJSON_SetNumberValue(cJSON_GetObjectItem(format, "time"), (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
 95:         cJSON_SetIntValue(cJSON_GetObjectItem(format, "repetitions"), repetitions);
 96:       } else {
 97:         format = cJSON_CreateObject();
 98:         cJSON_AddItemToObject(spmv, matformat, format);
 99:         cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
100:         cJSON_AddNumberToObject(format, "repetitions", repetitions);
101:       }
102:     } else {
103:       spmv = cJSON_CreateObject();
104:       cJSON_AddItemToObject(elem, "spmv", spmv);
105:       format = cJSON_CreateObject();
106:       cJSON_AddItemToObject(spmv, matformat, format);
107:       cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
108:       cJSON_AddNumberToObject(format, "repetitions", repetitions);
109:     }
110:   }
111:   free(content);
112:   content = cJSON_Print(matrix_json);
113:   write_file(inputjsonfile, content);
114:   cJSON_Delete(matrix_json);
115:   free(content);
116:   return 0;
117: }

119: /*
120:   For GPU formats, we keep two copies of the matrix on CPU and one copy on GPU.
121:   The extra CPU copy allows us to destroy the GPU matrix and recreate it efficiently
122:   in each repetition. As a result,  each MatMult call is fresh, and we can capture
123:   the first-time overhead (e.g. of CuSparse SpMV), and avoids the cache effect
124:   during consecutive calls.
125: */
126: PetscErrorCode TimedSpMV(Mat A, Vec b, PetscReal *time, const char *petscmatformat, PetscBool use_gpu, PetscInt repetitions)
127: {
128:   Mat            A2 = NULL;
129:   PetscInt       i;
130:   Vec            u;
131:   PetscLogDouble vstart = 0, vend = 0;
132:   PetscBool      isaijcusparse, isaijhipsparse, isaijkokkos, issellcuda, issellhip;

134:   PetscFunctionBeginUser;
135:   PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse));
136:   PetscCall(PetscStrcmp(petscmatformat, MATAIJHIPSPARSE, &isaijhipsparse));
137:   PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos));
138:   PetscCall(PetscStrcmp(petscmatformat, MATSELLCUDA, &issellcuda));
139:   PetscCall(PetscStrcmp(petscmatformat, MATSELLHIP, &issellhip));
140:   if (isaijcusparse || issellcuda) PetscCall(VecSetType(b, VECCUDA));
141:   if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS));
142:   if (isaijhipsparse || issellhip) PetscCall(VecSetType(b, VECHIP));
143:   PetscCall(VecDuplicate(b, &u));
144:   if (time) *time = 0.0;
145:   for (i = 0; i < repetitions; i++) {
146:     if (use_gpu) {
147:       PetscCall(MatDestroy(&A2));
148:       PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2));
149:       PetscCall(MatSetType(A2, petscmatformat));
150:       PetscCall(MatSetFromOptions(A2)); // This allows to change parameters such as slice height in SpMV kernels for SELL
151:     } else A2 = A;
152:     /* Timing MatMult */
153:     if (time) PetscCall(PetscTime(&vstart));

155:     PetscCall(MatMult(A2, b, u));

157:     if (time) {
158:       PetscCall(PetscTime(&vend));
159:       *time += (PetscReal)(vend - vstart);
160:     }
161:   }
162:   PetscCall(VecDestroy(&u));
163:   if (repetitions > 0 && use_gpu) PetscCall(MatDestroy(&A2));
164:   PetscFunctionReturn(PETSC_SUCCESS);
165: }

167: PetscErrorCode WarmUpDevice(Mat A, Vec b, const char *petscmatformat)
168: {
169:   Mat           A2 = NULL;
170:   PetscLogEvent event;
171:   Vec           u;
172:   PetscBool     isaijcusparse, isaijhipsparse, isaijkokkos, issellcuda, issellhip;

174:   PetscFunctionBeginUser;
175:   PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse));
176:   PetscCall(PetscStrcmp(petscmatformat, MATAIJHIPSPARSE, &isaijhipsparse));
177:   PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos));
178:   PetscCall(PetscStrcmp(petscmatformat, MATSELLCUDA, &issellcuda));
179:   PetscCall(PetscStrcmp(petscmatformat, MATSELLHIP, &issellhip));
180:   if (!isaijcusparse && !isaijkokkos && !isaijhipsparse && !issellcuda && !issellhip) PetscFunctionReturn(PETSC_SUCCESS);
181:   if (isaijcusparse || issellcuda) PetscCall(VecSetType(b, VECCUDA));
182:   if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS));
183:   if (isaijhipsparse || issellhip) PetscCall(VecSetType(b, VECHIP));
184:   PetscCall(VecDuplicate(b, &u));
185:   PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2));
186:   PetscCall(MatSetType(A2, petscmatformat));
187:   PetscCall(PetscLogEventGetId("MatMult", &event));
188:   PetscCall(PetscLogEventDeactivatePush(event));
189:   PetscCall(MatMult(A2, b, u));
190:   PetscCall(PetscLogEventDeactivatePop(event));
191:   PetscCall(VecDestroy(&u));
192:   PetscCall(MatDestroy(&A2));
193:   PetscFunctionReturn(PETSC_SUCCESS);
194: }

196: PetscErrorCode PetscLogSpMVTime(PetscReal *gputime, PetscReal *cputime, PetscReal *gpuflops, const char *petscmatformat)
197: {
198:   PetscLogEvent      event;
199:   PetscEventPerfInfo eventInfo;
200:   // PetscReal          gpuflopRate;

202:   // if (matformat) {
203:   //   PetscCall(PetscLogEventGetId("MatCUDACopyTo", &event));
204:   // } else {
205:   //  PetscCall(PetscLogEventGetId("MatCUSPARSCopyTo", &event));
206:   // }
207:   // PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
208:   // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.4e ", eventInfo.time));

210:   PetscFunctionBeginUser;
211:   PetscCall(PetscLogEventGetId("MatMult", &event));
212:   PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
213:   // gpuflopRate = eventInfo.GpuFlops/eventInfo.GpuTime;
214:   // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.2f %.4e %.4e\n", gpuflopRate/1.e6, eventInfo.GpuTime, eventInfo.time));
215:   if (cputime) *cputime = eventInfo.time;
216: #if defined(PETSC_HAVE_DEVICE)
217:   if (gputime) *gputime = eventInfo.GpuTime;
218:   if (gpuflops) *gpuflops = eventInfo.GpuFlops / 1.e6;
219: #endif
220:   PetscFunctionReturn(PETSC_SUCCESS);
221: }

223: PetscErrorCode MapToPetscMatType(const char *matformat, PetscBool use_gpu, char **petscmatformat)
224: {
225:   PetscBool iscsr, issell, iscsrkokkos;

227:   PetscFunctionBeginUser;
228:   PetscCall(PetscStrcmp(matformat, "csr", &iscsr));
229:   if (iscsr) {
230:     if (use_gpu) {
231: #if defined(PETSC_HAVE_CUDA)
232:       PetscCall(PetscStrallocpy(MATAIJCUSPARSE, petscmatformat));
233: #endif
234: #if defined(PETSC_HAVE_HIP)
235:       PetscCall(PetscStrallocpy(MATAIJHIPSPARSE, petscmatformat));
236: #endif
237:     } else PetscCall(PetscStrallocpy(MATAIJ, petscmatformat));
238:   } else {
239:     PetscCall(PetscStrcmp(matformat, "sell", &issell));
240:     if (issell) {
241:       if (use_gpu) {
242: #if defined(PETSC_HAVE_CUDA)
243:         PetscCall(PetscStrallocpy(MATSELLCUDA, petscmatformat));
244: #endif
245: #if defined(PETSC_HAVE_HIP)
246:         PetscCall(PetscStrallocpy(MATSELLHIP, petscmatformat));
247: #endif
248:       } else PetscCall(PetscStrallocpy(MATSELL, petscmatformat));
249:     } else {
250:       PetscCall(PetscStrcmp(matformat, "csrkokkos", &iscsrkokkos));
251:       if (iscsrkokkos) PetscCall(PetscStrallocpy(MATAIJKOKKOS, petscmatformat));
252:     }
253:   }
254:   PetscFunctionReturn(PETSC_SUCCESS);
255: }

257: int main(int argc, char **args)
258: {
259:   PetscInt    nmat = 1, nformats = 5, i, j, repetitions = 1;
260:   Mat         A;
261:   Vec         b;
262:   char        jfilename[PETSC_MAX_PATH_LEN];
263:   char        filename[PETSC_MAX_PATH_LEN], bfilename[PETSC_MAX_PATH_LEN];
264:   char        groupname[PETSC_MAX_PATH_LEN], matname[PETSC_MAX_PATH_LEN];
265:   char       *matformats[5];
266:   char      **filenames = NULL, **groupnames = NULL, **matnames = NULL;
267:   char        ordering[256] = MATORDERINGRCM;
268:   PetscBool   bflg, flg1, flg2, flg3, use_gpu = PETSC_FALSE, permute = PETSC_FALSE;
269:   IS          rowperm = NULL, colperm = NULL;
270:   PetscViewer fd;
271:   PetscReal   starting_spmv_time = 0, *spmv_times;

273:   PetscCall(PetscOptionsInsertString(NULL, "-log_view_gpu_time -log_view :/dev/null"));
274:   PetscCall(PetscInitialize(&argc, &args, NULL, help));
275:   PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-formats", matformats, &nformats, &flg1));
276:   if (!flg1) {
277:     nformats = 1;
278:     PetscCall(PetscStrallocpy("csr", &matformats[0]));
279:   }
280:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_gpu", &use_gpu, NULL));
281:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-repetitions", &repetitions, NULL));
282:   /* Read matrix and RHS */
283:   PetscCall(PetscOptionsGetString(NULL, NULL, "-groupname", groupname, PETSC_MAX_PATH_LEN, NULL));
284:   PetscCall(PetscOptionsGetString(NULL, NULL, "-matname", matname, PETSC_MAX_PATH_LEN, NULL));
285:   PetscCall(PetscOptionsGetString(NULL, NULL, "-ABIN", filename, PETSC_MAX_PATH_LEN, &flg1));
286:   PetscCall(PetscOptionsGetString(NULL, NULL, "-AMTX", filename, PETSC_MAX_PATH_LEN, &flg2));
287:   PetscCall(PetscOptionsGetString(NULL, NULL, "-AJSON", jfilename, PETSC_MAX_PATH_LEN, &flg3));
288:   PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Extra options", "");
289:   PetscCall(PetscOptionsFList("-permute", "Permute matrix and vector to solving in new ordering", "", MatOrderingList, ordering, ordering, sizeof(ordering), &permute));
290:   PetscOptionsEnd();
291: #if !defined(PETSC_HAVE_DEVICE)
292:   PetscCheck(!use_gpu, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "To use the option -use_gpu 1, PETSc must be configured with GPU support");
293: #endif
294:   PetscCheck(flg1 || flg2 || flg3, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate an input file with the -ABIN or -AMTX or -AJSON depending on the file format");
295:   if (flg3) {
296:     ParseJSON(jfilename, &filenames, &groupnames, &matnames, &nmat);
297:     PetscCall(PetscCalloc1(nmat, &spmv_times));
298:   } else if (flg2) {
299:     PetscCall(MatCreateFromMTX(&A, filename, PETSC_TRUE));
300:   } else if (flg1) {
301:     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_READ, &fd));
302:     PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
303:     PetscCall(MatSetType(A, MATAIJ));
304:     PetscCall(MatSetFromOptions(A));
305:     PetscCall(MatLoad(A, fd));
306:     PetscCall(PetscViewerDestroy(&fd));
307:   }
308:   if (permute) {
309:     Mat Aperm;
310:     PetscCall(MatGetOrdering(A, ordering, &rowperm, &colperm));
311:     PetscCall(MatPermute(A, rowperm, colperm, &Aperm));
312:     PetscCall(MatDestroy(&A));
313:     A = Aperm; /* Replace original operator with permuted version */
314:   }
315:   /* Let the vec object trigger the first CUDA call, which takes a relatively long time to init CUDA */
316:   PetscCall(PetscOptionsGetString(NULL, NULL, "-b", bfilename, PETSC_MAX_PATH_LEN, &bflg));
317:   if (bflg) {
318:     PetscViewer fb;
319:     PetscCall(VecCreate(PETSC_COMM_WORLD, &b));
320:     PetscCall(VecSetFromOptions(b));
321:     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, bfilename, FILE_MODE_READ, &fb));
322:     PetscCall(VecLoad(b, fb));
323:     PetscCall(PetscViewerDestroy(&fb));
324:   }

326:   for (j = 0; j < nformats; j++) {
327:     char *petscmatformat = NULL;
328:     PetscCall(MapToPetscMatType(matformats[j], use_gpu, &petscmatformat));
329:     PetscCheck(petscmatformat, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Invalid mat format %s, supported options include csr and sell.", matformats[j]);
330:     if (flg3) { // mat names specified in a JSON file
331:       for (i = 0; i < nmat; i++) {
332:         PetscCall(MatCreateFromMTX(&A, filenames[i], PETSC_TRUE));
333:         if (!bflg) {
334:           PetscCall(MatCreateVecs(A, &b, NULL));
335:           PetscCall(VecSet(b, 1.0));
336:         }
337:         if (use_gpu) PetscCall(WarmUpDevice(A, b, petscmatformat));
338:         PetscCall(TimedSpMV(A, b, NULL, petscmatformat, use_gpu, repetitions));
339:         if (use_gpu) PetscCall(PetscLogSpMVTime(&spmv_times[i], NULL, NULL, petscmatformat));
340:         else PetscCall(PetscLogSpMVTime(NULL, &spmv_times[i], NULL, petscmatformat));
341:         PetscCall(MatDestroy(&A));
342:         if (!bflg) PetscCall(VecDestroy(&b));
343:       }
344:       UpdateJSON(jfilename, spmv_times, starting_spmv_time, matformats[j], use_gpu, repetitions);
345:       starting_spmv_time = spmv_times[nmat - 1];
346:     } else {
347:       PetscReal spmv_time;
348:       if (!bflg) {
349:         PetscCall(MatCreateVecs(A, &b, NULL));
350:         PetscCall(VecSet(b, 1.0));
351:       }
352:       if (use_gpu) PetscCall(WarmUpDevice(A, b, petscmatformat));
353:       PetscCall(TimedSpMV(A, b, &spmv_time, petscmatformat, use_gpu, repetitions));
354:       if (!bflg) PetscCall(VecDestroy(&b));
355:     }
356:     PetscCall(PetscFree(petscmatformat));
357:   }
358:   if (flg3) {
359:     for (i = 0; i < nmat; i++) {
360:       free(filenames[i]);
361:       free(groupnames[i]);
362:       free(matnames[i]);
363:     }
364:     free(filenames);
365:     free(groupnames);
366:     free(matnames);
367:     PetscCall(PetscFree(spmv_times));
368:   }
369:   for (j = 0; j < nformats; j++) PetscCall(PetscFree(matformats[j]));
370:   if (flg1 || flg2) PetscCall(MatDestroy(&A));
371:   if (bflg) PetscCall(VecDestroy(&b));
372:   PetscCall(ISDestroy(&rowperm));
373:   PetscCall(ISDestroy(&colperm));
374:   PetscCall(PetscFinalize());
375:   return 0;
376: }
377: /*TEST

379:    build:
380:       requires: !complex double !windows_compilers !defined(PETSC_USE_64BIT_INDICES)
381:       depends: mmloader.c mmio.c cJSON.c

383:    test:
384:       suffix: 1
385:       args: -AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx

387:    test:
388:       suffix: 2
389:       args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu
390:       output_file: output/bench_spmv_1.out
391:       requires: cuda

393:    test:
394:       suffix: 3
395:       args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu
396:       output_file: output/bench_spmv_1.out
397:       requires: hip

399: TEST*/