Actual source code: bench_spmv.c
1: static char help[] = "Driver for benchmarking SpMV.";
3: #include <petscmat.h>
4: #include "cJSON.h"
5: #include "mmloader.h"
7: char *read_file(const char *filename)
8: {
9: FILE *file = NULL;
10: long length = 0;
11: char *content = NULL;
12: size_t read_chars = 0;
14: /* open in read binary mode */
15: file = fopen(filename, "rb");
16: if (file) {
17: /* get the length */
18: fseek(file, 0, SEEK_END);
19: length = ftell(file);
20: fseek(file, 0, SEEK_SET);
21: /* allocate content buffer */
22: content = (char *)malloc((size_t)length + sizeof(""));
23: /* read the file into memory */
24: read_chars = fread(content, sizeof(char), (size_t)length, file);
25: content[read_chars] = '\0';
26: fclose(file);
27: }
28: return content;
29: }
31: void write_file(const char *filename, const char *content)
32: {
33: FILE *file = NULL;
34: file = fopen(filename, "w");
35: if (file) { fputs(content, file); }
36: fclose(file);
37: }
39: int ParseJSON(const char *const inputjsonfile, char ***outputfilenames, char ***outputgroupnames, char ***outputmatnames, int *nmat)
40: {
41: char *content = read_file(inputjsonfile);
42: cJSON *matrix_json = NULL;
43: const cJSON *problem = NULL, *elem = NULL;
44: const cJSON *item = NULL;
45: char **filenames, **groupnames, **matnames;
46: int i, n;
47: if (!content) return 0;
48: matrix_json = cJSON_Parse(content);
49: if (!matrix_json) return 0;
50: n = cJSON_GetArraySize(matrix_json);
51: *nmat = n;
52: filenames = (char **)malloc(sizeof(char *) * n);
53: groupnames = (char **)malloc(sizeof(char *) * n);
54: matnames = (char **)malloc(sizeof(char *) * n);
55: for (i = 0; i < n; i++) {
56: elem = cJSON_GetArrayItem(matrix_json, i);
57: item = cJSON_GetObjectItemCaseSensitive(elem, "filename");
58: filenames[i] = (char *)malloc(sizeof(char) * (strlen(item->valuestring) + 1));
59: strcpy(filenames[i], item->valuestring);
60: problem = cJSON_GetObjectItemCaseSensitive(elem, "problem");
61: item = cJSON_GetObjectItemCaseSensitive(problem, "group");
62: groupnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
63: strcpy(groupnames[i], item->valuestring);
64: item = cJSON_GetObjectItemCaseSensitive(problem, "name");
65: matnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
66: strcpy(matnames[i], item->valuestring);
67: }
68: cJSON_Delete(matrix_json);
69: free(content);
70: *outputfilenames = filenames;
71: *outputgroupnames = groupnames;
72: *outputmatnames = matnames;
73: return 0;
74: }
76: int UpdateJSON(const char *const inputjsonfile, PetscReal *spmv_times, PetscReal starting_spmv_time, const char *const matformat, PetscBool use_gpu, PetscInt repetitions)
77: {
78: char *content = read_file(inputjsonfile);
79: cJSON *matrix_json = NULL;
80: cJSON *elem = NULL;
81: int i, n;
82: if (!content) return 0;
83: matrix_json = cJSON_Parse(content);
84: if (!matrix_json) return 0;
85: n = cJSON_GetArraySize(matrix_json);
86: for (i = 0; i < n; i++) {
87: cJSON *spmv = NULL;
88: cJSON *format = NULL;
89: elem = cJSON_GetArrayItem(matrix_json, i);
90: spmv = cJSON_GetObjectItem(elem, "spmv");
91: if (spmv) {
92: format = cJSON_GetObjectItem(spmv, matformat);
93: if (format) {
94: cJSON_SetNumberValue(cJSON_GetObjectItem(format, "time"), (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
95: cJSON_SetIntValue(cJSON_GetObjectItem(format, "repetitions"), repetitions);
96: } else {
97: format = cJSON_CreateObject();
98: cJSON_AddItemToObject(spmv, matformat, format);
99: cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
100: cJSON_AddNumberToObject(format, "repetitions", repetitions);
101: }
102: } else {
103: spmv = cJSON_CreateObject();
104: cJSON_AddItemToObject(elem, "spmv", spmv);
105: format = cJSON_CreateObject();
106: cJSON_AddItemToObject(spmv, matformat, format);
107: cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
108: cJSON_AddNumberToObject(format, "repetitions", repetitions);
109: }
110: }
111: free(content);
112: content = cJSON_Print(matrix_json);
113: write_file(inputjsonfile, content);
114: cJSON_Delete(matrix_json);
115: free(content);
116: return 0;
117: }
119: /*
120: For GPU formats, we keep two copies of the matrix on CPU and one copy on GPU.
121: The extra CPU copy allows us to destroy the GPU matrix and recreate it efficiently
122: in each repetition. As a result, each MatMult call is fresh, and we can capture
123: the first-time overhead (e.g. of CuSparse SpMV), and avoids the cache effect
124: during consecutive calls.
125: */
126: PetscErrorCode TimedSpMV(Mat A, Vec b, PetscReal *time, const char *petscmatformat, PetscBool use_gpu, PetscInt repetitions)
127: {
128: Mat A2 = NULL;
129: PetscInt i;
130: Vec u;
131: PetscLogDouble vstart = 0, vend = 0;
132: PetscBool isaijcusparse, isaijhipsparse, isaijkokkos, issellcuda, issellhip;
134: PetscFunctionBeginUser;
135: PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse));
136: PetscCall(PetscStrcmp(petscmatformat, MATAIJHIPSPARSE, &isaijhipsparse));
137: PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos));
138: PetscCall(PetscStrcmp(petscmatformat, MATSELLCUDA, &issellcuda));
139: PetscCall(PetscStrcmp(petscmatformat, MATSELLHIP, &issellhip));
140: if (isaijcusparse || issellcuda) PetscCall(VecSetType(b, VECCUDA));
141: if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS));
142: if (isaijhipsparse || issellhip) PetscCall(VecSetType(b, VECHIP));
143: PetscCall(VecDuplicate(b, &u));
144: if (time) *time = 0.0;
145: for (i = 0; i < repetitions; i++) {
146: if (use_gpu) {
147: PetscCall(MatDestroy(&A2));
148: PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2));
149: PetscCall(MatSetType(A2, petscmatformat));
150: PetscCall(MatSetFromOptions(A2)); // This allows to change parameters such as slice height in SpMV kernels for SELL
151: } else A2 = A;
152: /* Timing MatMult */
153: if (time) PetscCall(PetscTime(&vstart));
155: PetscCall(MatMult(A2, b, u));
157: if (time) {
158: PetscCall(PetscTime(&vend));
159: *time += (PetscReal)(vend - vstart);
160: }
161: }
162: PetscCall(VecDestroy(&u));
163: if (repetitions > 0 && use_gpu) PetscCall(MatDestroy(&A2));
164: PetscFunctionReturn(PETSC_SUCCESS);
165: }
167: PetscErrorCode WarmUpDevice(Mat A, Vec b, const char *petscmatformat)
168: {
169: Mat A2 = NULL;
170: PetscLogEvent event;
171: Vec u;
172: PetscBool isaijcusparse, isaijhipsparse, isaijkokkos, issellcuda, issellhip;
174: PetscFunctionBeginUser;
175: PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse));
176: PetscCall(PetscStrcmp(petscmatformat, MATAIJHIPSPARSE, &isaijhipsparse));
177: PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos));
178: PetscCall(PetscStrcmp(petscmatformat, MATSELLCUDA, &issellcuda));
179: PetscCall(PetscStrcmp(petscmatformat, MATSELLHIP, &issellhip));
180: if (!isaijcusparse && !isaijkokkos && !isaijhipsparse && !issellcuda && !issellhip) PetscFunctionReturn(PETSC_SUCCESS);
181: if (isaijcusparse || issellcuda) PetscCall(VecSetType(b, VECCUDA));
182: if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS));
183: if (isaijhipsparse || issellhip) PetscCall(VecSetType(b, VECHIP));
184: PetscCall(VecDuplicate(b, &u));
185: PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2));
186: PetscCall(MatSetType(A2, petscmatformat));
187: PetscCall(PetscLogEventGetId("MatMult", &event));
188: PetscCall(PetscLogEventDeactivatePush(event));
189: PetscCall(MatMult(A2, b, u));
190: PetscCall(PetscLogEventDeactivatePop(event));
191: PetscCall(VecDestroy(&u));
192: PetscCall(MatDestroy(&A2));
193: PetscFunctionReturn(PETSC_SUCCESS);
194: }
196: PetscErrorCode PetscLogSpMVTime(PetscReal *gputime, PetscReal *cputime, PetscReal *gpuflops, const char *petscmatformat)
197: {
198: PetscLogEvent event;
199: PetscEventPerfInfo eventInfo;
200: // PetscReal gpuflopRate;
202: // if (matformat) {
203: // PetscCall(PetscLogEventGetId("MatCUDACopyTo", &event));
204: // } else {
205: // PetscCall(PetscLogEventGetId("MatCUSPARSCopyTo", &event));
206: // }
207: // PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
208: // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.4e ", eventInfo.time));
210: PetscFunctionBeginUser;
211: PetscCall(PetscLogEventGetId("MatMult", &event));
212: PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
213: // gpuflopRate = eventInfo.GpuFlops/eventInfo.GpuTime;
214: // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.2f %.4e %.4e\n", gpuflopRate/1.e6, eventInfo.GpuTime, eventInfo.time));
215: if (cputime) *cputime = eventInfo.time;
216: #if defined(PETSC_HAVE_DEVICE)
217: if (gputime) *gputime = eventInfo.GpuTime;
218: if (gpuflops) *gpuflops = eventInfo.GpuFlops / 1.e6;
219: #endif
220: PetscFunctionReturn(PETSC_SUCCESS);
221: }
223: PetscErrorCode MapToPetscMatType(const char *matformat, PetscBool use_gpu, char **petscmatformat)
224: {
225: PetscBool iscsr, issell, iscsrkokkos;
227: PetscFunctionBeginUser;
228: PetscCall(PetscStrcmp(matformat, "csr", &iscsr));
229: if (iscsr) {
230: if (use_gpu) {
231: #if defined(PETSC_HAVE_CUDA)
232: PetscCall(PetscStrallocpy(MATAIJCUSPARSE, petscmatformat));
233: #endif
234: #if defined(PETSC_HAVE_HIP)
235: PetscCall(PetscStrallocpy(MATAIJHIPSPARSE, petscmatformat));
236: #endif
237: } else PetscCall(PetscStrallocpy(MATAIJ, petscmatformat));
238: } else {
239: PetscCall(PetscStrcmp(matformat, "sell", &issell));
240: if (issell) {
241: if (use_gpu) {
242: #if defined(PETSC_HAVE_CUDA)
243: PetscCall(PetscStrallocpy(MATSELLCUDA, petscmatformat));
244: #endif
245: #if defined(PETSC_HAVE_HIP)
246: PetscCall(PetscStrallocpy(MATSELLHIP, petscmatformat));
247: #endif
248: } else PetscCall(PetscStrallocpy(MATSELL, petscmatformat));
249: } else {
250: PetscCall(PetscStrcmp(matformat, "csrkokkos", &iscsrkokkos));
251: if (iscsrkokkos) PetscCall(PetscStrallocpy(MATAIJKOKKOS, petscmatformat));
252: }
253: }
254: PetscFunctionReturn(PETSC_SUCCESS);
255: }
257: int main(int argc, char **args)
258: {
259: PetscInt nmat = 1, nformats = 5, i, j, repetitions = 1;
260: Mat A;
261: Vec b;
262: char jfilename[PETSC_MAX_PATH_LEN];
263: char filename[PETSC_MAX_PATH_LEN], bfilename[PETSC_MAX_PATH_LEN];
264: char groupname[PETSC_MAX_PATH_LEN], matname[PETSC_MAX_PATH_LEN];
265: char *matformats[5];
266: char **filenames = NULL, **groupnames = NULL, **matnames = NULL;
267: char ordering[256] = MATORDERINGRCM;
268: PetscBool bflg, flg1, flg2, flg3, use_gpu = PETSC_FALSE, permute = PETSC_FALSE;
269: IS rowperm = NULL, colperm = NULL;
270: PetscViewer fd;
271: PetscReal starting_spmv_time = 0, *spmv_times;
273: PetscCall(PetscOptionsInsertString(NULL, "-log_view_gpu_time -log_view :/dev/null"));
274: PetscCall(PetscInitialize(&argc, &args, NULL, help));
275: PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-formats", matformats, &nformats, &flg1));
276: if (!flg1) {
277: nformats = 1;
278: PetscCall(PetscStrallocpy("csr", &matformats[0]));
279: }
280: PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_gpu", &use_gpu, NULL));
281: PetscCall(PetscOptionsGetInt(NULL, NULL, "-repetitions", &repetitions, NULL));
282: /* Read matrix and RHS */
283: PetscCall(PetscOptionsGetString(NULL, NULL, "-groupname", groupname, PETSC_MAX_PATH_LEN, NULL));
284: PetscCall(PetscOptionsGetString(NULL, NULL, "-matname", matname, PETSC_MAX_PATH_LEN, NULL));
285: PetscCall(PetscOptionsGetString(NULL, NULL, "-ABIN", filename, PETSC_MAX_PATH_LEN, &flg1));
286: PetscCall(PetscOptionsGetString(NULL, NULL, "-AMTX", filename, PETSC_MAX_PATH_LEN, &flg2));
287: PetscCall(PetscOptionsGetString(NULL, NULL, "-AJSON", jfilename, PETSC_MAX_PATH_LEN, &flg3));
288: PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Extra options", "");
289: PetscCall(PetscOptionsFList("-permute", "Permute matrix and vector to solving in new ordering", "", MatOrderingList, ordering, ordering, sizeof(ordering), &permute));
290: PetscOptionsEnd();
291: #if !defined(PETSC_HAVE_DEVICE)
292: PetscCheck(!use_gpu, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "To use the option -use_gpu 1, PETSc must be configured with GPU support");
293: #endif
294: PetscCheck(flg1 || flg2 || flg3, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate an input file with the -ABIN or -AMTX or -AJSON depending on the file format");
295: if (flg3) {
296: ParseJSON(jfilename, &filenames, &groupnames, &matnames, &nmat);
297: PetscCall(PetscCalloc1(nmat, &spmv_times));
298: } else if (flg2) {
299: PetscCall(MatCreateFromMTX(&A, filename, PETSC_TRUE));
300: } else if (flg1) {
301: PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_READ, &fd));
302: PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
303: PetscCall(MatSetType(A, MATAIJ));
304: PetscCall(MatSetFromOptions(A));
305: PetscCall(MatLoad(A, fd));
306: PetscCall(PetscViewerDestroy(&fd));
307: }
308: if (permute) {
309: Mat Aperm;
310: PetscCall(MatGetOrdering(A, ordering, &rowperm, &colperm));
311: PetscCall(MatPermute(A, rowperm, colperm, &Aperm));
312: PetscCall(MatDestroy(&A));
313: A = Aperm; /* Replace original operator with permuted version */
314: }
315: /* Let the vec object trigger the first CUDA call, which takes a relatively long time to init CUDA */
316: PetscCall(PetscOptionsGetString(NULL, NULL, "-b", bfilename, PETSC_MAX_PATH_LEN, &bflg));
317: if (bflg) {
318: PetscViewer fb;
319: PetscCall(VecCreate(PETSC_COMM_WORLD, &b));
320: PetscCall(VecSetFromOptions(b));
321: PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, bfilename, FILE_MODE_READ, &fb));
322: PetscCall(VecLoad(b, fb));
323: PetscCall(PetscViewerDestroy(&fb));
324: }
326: for (j = 0; j < nformats; j++) {
327: char *petscmatformat = NULL;
328: PetscCall(MapToPetscMatType(matformats[j], use_gpu, &petscmatformat));
329: PetscCheck(petscmatformat, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Invalid mat format %s, supported options include csr and sell.", matformats[j]);
330: if (flg3) { // mat names specified in a JSON file
331: for (i = 0; i < nmat; i++) {
332: PetscCall(MatCreateFromMTX(&A, filenames[i], PETSC_TRUE));
333: if (!bflg) {
334: PetscCall(MatCreateVecs(A, &b, NULL));
335: PetscCall(VecSet(b, 1.0));
336: }
337: if (use_gpu) PetscCall(WarmUpDevice(A, b, petscmatformat));
338: PetscCall(TimedSpMV(A, b, NULL, petscmatformat, use_gpu, repetitions));
339: if (use_gpu) PetscCall(PetscLogSpMVTime(&spmv_times[i], NULL, NULL, petscmatformat));
340: else PetscCall(PetscLogSpMVTime(NULL, &spmv_times[i], NULL, petscmatformat));
341: PetscCall(MatDestroy(&A));
342: if (!bflg) PetscCall(VecDestroy(&b));
343: }
344: UpdateJSON(jfilename, spmv_times, starting_spmv_time, matformats[j], use_gpu, repetitions);
345: starting_spmv_time = spmv_times[nmat - 1];
346: } else {
347: PetscReal spmv_time;
348: if (!bflg) {
349: PetscCall(MatCreateVecs(A, &b, NULL));
350: PetscCall(VecSet(b, 1.0));
351: }
352: if (use_gpu) PetscCall(WarmUpDevice(A, b, petscmatformat));
353: PetscCall(TimedSpMV(A, b, &spmv_time, petscmatformat, use_gpu, repetitions));
354: if (!bflg) PetscCall(VecDestroy(&b));
355: }
356: PetscCall(PetscFree(petscmatformat));
357: }
358: if (flg3) {
359: for (i = 0; i < nmat; i++) {
360: free(filenames[i]);
361: free(groupnames[i]);
362: free(matnames[i]);
363: }
364: free(filenames);
365: free(groupnames);
366: free(matnames);
367: PetscCall(PetscFree(spmv_times));
368: }
369: for (j = 0; j < nformats; j++) PetscCall(PetscFree(matformats[j]));
370: if (flg1 || flg2) PetscCall(MatDestroy(&A));
371: if (bflg) PetscCall(VecDestroy(&b));
372: PetscCall(ISDestroy(&rowperm));
373: PetscCall(ISDestroy(&colperm));
374: PetscCall(PetscFinalize());
375: return 0;
376: }
377: /*TEST
379: build:
380: requires: !complex double !windows_compilers !defined(PETSC_USE_64BIT_INDICES)
381: depends: mmloader.c mmio.c cJSON.c
383: test:
384: suffix: 1
385: args: -AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx
387: test:
388: suffix: 2
389: args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu
390: output_file: output/bench_spmv_1.out
391: requires: cuda
393: test:
394: suffix: 3
395: args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu
396: output_file: output/bench_spmv_1.out
397: requires: hip
399: TEST*/