Actual source code: ex7.c
1: static const char help[] = "Tests PetscDeviceAllocate().\n\n";
3: #include "petscdevicetestcommon.h"
5: #define DebugPrintf(comm, ...) PetscPrintf((comm), "[DEBUG OUTPUT] " __VA_ARGS__)
7: static PetscErrorCode IncrementSize(PetscRandom rand, PetscInt *value)
8: {
9: PetscReal rval;
11: PetscFunctionBegin;
12: // set the interval such that *value += rval never goes below 0 or above 500
13: PetscCall(PetscRandomSetInterval(rand, -(*value), 500 - (*value)));
14: PetscCall(PetscRandomGetValueReal(rand, &rval));
15: *value += (PetscInt)rval;
16: PetscCall(DebugPrintf(PetscObjectComm((PetscObject)rand), "n: %" PetscInt_FMT "\n", *value));
17: PetscFunctionReturn(PETSC_SUCCESS);
18: }
20: static PetscErrorCode TestAllocate(PetscDeviceContext dctx, PetscRandom rand, PetscMemType mtype)
21: {
22: PetscScalar *ptr, *tmp_ptr;
23: PetscInt n = 10;
25: PetscFunctionBegin;
26: if (PetscMemTypeDevice(mtype)) {
27: PetscDeviceType dtype;
29: PetscCall(PetscDeviceContextGetDeviceType(dctx, &dtype));
30: // host device context cannot handle this
31: if (dtype == PETSC_DEVICE_HOST) PetscFunctionReturn(PETSC_SUCCESS);
32: }
33: // test basic allocation, deallocation
34: PetscCall(IncrementSize(rand, &n));
35: PetscCall(PetscDeviceMalloc(dctx, mtype, n, &ptr));
36: PetscCheck(ptr, PETSC_COMM_SELF, PETSC_ERR_POINTER, "PetscDeviceMalloc() return NULL pointer for %s allocation size %" PetscInt_FMT, PetscMemTypeToString(mtype), n);
37: // this ensures the host pointer is at least valid
38: if (PetscMemTypeHost(mtype)) {
39: for (PetscInt i = 0; i < n; ++i) ptr[i] = (PetscScalar)i;
40: }
41: PetscCall(PetscDeviceFree(dctx, ptr));
43: // test alignment of various types
44: {
45: char *char_ptr;
46: short *short_ptr;
47: int *int_ptr;
48: double *double_ptr;
49: long int *long_int_ptr;
51: PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &char_ptr));
52: PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &short_ptr));
53: PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &int_ptr));
54: PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &double_ptr));
55: PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &long_int_ptr));
57: // if an error occurs here, it means the alignment system is broken!
58: PetscCall(PetscDeviceFree(dctx, char_ptr));
59: PetscCall(PetscDeviceFree(dctx, short_ptr));
60: PetscCall(PetscDeviceFree(dctx, int_ptr));
61: PetscCall(PetscDeviceFree(dctx, double_ptr));
62: PetscCall(PetscDeviceFree(dctx, long_int_ptr));
63: }
65: // test that calloc() produces cleared memory
66: PetscCall(IncrementSize(rand, &n));
67: PetscCall(PetscDeviceCalloc(dctx, mtype, n, &ptr));
68: PetscCheck(ptr, PETSC_COMM_SELF, PETSC_ERR_POINTER, "PetscDeviceCalloc() returned NULL pointer for %s allocation size %" PetscInt_FMT, PetscMemTypeToString(mtype), n);
69: if (PetscMemTypeHost(mtype)) {
70: tmp_ptr = ptr;
71: } else {
72: PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_HOST, n, &tmp_ptr));
73: PetscCall(PetscDeviceArrayCopy(dctx, tmp_ptr, ptr, n));
74: }
75: PetscCall(PetscDeviceContextSynchronize(dctx));
76: for (PetscInt i = 0; i < n; ++i) PetscCheck(tmp_ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceCalloc() returned memory that was not cleared, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(tmp_ptr[i]));
77: if (tmp_ptr == ptr) {
78: tmp_ptr = NULL;
79: } else {
80: PetscCall(PetscDeviceFree(dctx, tmp_ptr));
81: }
82: PetscCall(PetscDeviceFree(dctx, ptr));
84: // test that devicearrayzero produces cleared memory
85: PetscCall(IncrementSize(rand, &n));
86: PetscCall(PetscDeviceMalloc(dctx, mtype, n, &ptr));
87: PetscCall(PetscDeviceArrayZero(dctx, ptr, n));
88: PetscCall(PetscMalloc1(n, &tmp_ptr));
89: PetscCall(PetscDeviceRegisterMemory(tmp_ptr, PETSC_MEMTYPE_HOST, n * sizeof(*tmp_ptr)));
90: for (PetscInt i = 0; i < n; ++i) tmp_ptr[i] = (PetscScalar)i;
91: PetscCall(PetscDeviceArrayCopy(dctx, tmp_ptr, ptr, n));
92: PetscCall(PetscDeviceContextSynchronize(dctx));
93: for (PetscInt i = 0; i < n; ++i) PetscCheck(tmp_ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceArrayZero() did not clear memory, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(tmp_ptr[i]));
94: PetscCall(PetscDeviceFree(dctx, tmp_ptr));
95: PetscCall(PetscDeviceFree(dctx, ptr));
96: PetscFunctionReturn(PETSC_SUCCESS);
97: }
99: static PetscErrorCode TestAsyncCoherence(PetscDeviceContext dctx, PetscRandom rand)
100: {
101: const PetscInt nsub = 2;
102: const PetscInt n = 1024;
103: PetscScalar *ptr, *tmp_ptr;
104: PetscDeviceType dtype;
105: PetscDeviceContext *sub;
107: PetscFunctionBegin;
108: PetscCall(PetscDeviceContextGetDeviceType(dctx, &dtype));
109: // ensure the streams are nonblocking
110: PetscCall(PetscDeviceContextForkWithStreamType(dctx, PETSC_STREAM_NONBLOCKING, nsub, &sub));
111: // do a warmup to ensure each context acquires any necessary data structures
112: for (PetscInt i = 0; i < nsub; ++i) {
113: PetscCall(PetscDeviceMalloc(sub[i], PETSC_MEMTYPE_HOST, n, &ptr));
114: PetscCall(PetscDeviceFree(sub[i], ptr));
115: if (dtype != PETSC_DEVICE_HOST) {
116: PetscCall(PetscDeviceMalloc(sub[i], PETSC_MEMTYPE_DEVICE, n, &ptr));
117: PetscCall(PetscDeviceFree(sub[i], ptr));
118: }
119: }
121: // allocate on one
122: PetscCall(PetscDeviceMalloc(sub[0], PETSC_MEMTYPE_HOST, n, &ptr));
123: // free on the other
124: PetscCall(PetscDeviceFree(sub[1], ptr));
126: // allocate on one
127: PetscCall(PetscDeviceMalloc(sub[0], PETSC_MEMTYPE_HOST, n, &ptr));
128: // zero on the other
129: PetscCall(PetscDeviceArrayZero(sub[1], ptr, n));
130: PetscCall(PetscDeviceContextSynchronize(sub[1]));
131: for (PetscInt i = 0; i < n; ++i) {
132: for (PetscInt i = 0; i < n; ++i) PetscCheck(ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceArrayZero() was not properly serialized, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(ptr[i]));
133: }
134: PetscCall(PetscDeviceFree(sub[1], ptr));
136: // test the transfers are serialized
137: if (dtype != PETSC_DEVICE_HOST) {
138: PetscCall(PetscDeviceCalloc(dctx, PETSC_MEMTYPE_DEVICE, n, &ptr));
139: PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_HOST, n, &tmp_ptr));
140: PetscCall(PetscDeviceArrayCopy(sub[0], tmp_ptr, ptr, n));
141: PetscCall(PetscDeviceContextSynchronize(sub[0]));
142: for (PetscInt i = 0; i < n; ++i) {
143: for (PetscInt i = 0; i < n; ++i) PetscCheck(tmp_ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceArrayCopt() was not properly serialized, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(tmp_ptr[i]));
144: }
145: PetscCall(PetscDeviceFree(sub[1], ptr));
146: }
148: PetscCall(PetscDeviceContextJoin(dctx, nsub, PETSC_DEVICE_CONTEXT_JOIN_DESTROY, &sub));
149: PetscFunctionReturn(PETSC_SUCCESS);
150: }
152: int main(int argc, char *argv[])
153: {
154: PetscDeviceContext dctx;
155: PetscRandom rand;
157: PetscFunctionBeginUser;
158: PetscCall(PetscInitialize(&argc, &argv, NULL, help));
160: // A vile hack. The -info output is used to test correctness in this test which prints --
161: // among other things -- the PetscObjectId of the PetscDevicContext and the allocated memory.
162: //
163: // Due to device and host creating slightly different number of objects on startup there will
164: // be a mismatch in the ID's. So for the tests involving the host we sit here creating
165: // PetscContainers (and incrementing the global PetscObjectId counter) until it reaches some
166: // arbitrarily high number to ensure that our first PetscDeviceContext has the same ID across
167: // systems.
168: {
169: PetscObjectId prev_id = 0;
171: do {
172: PetscContainer c;
173: PetscObjectId id;
175: PetscCall(PetscContainerCreate(PETSC_COMM_WORLD, &c));
176: PetscCall(PetscObjectGetId((PetscObject)c, &id));
177: // sanity check, in case PetscContainer ever stops being a PetscObject
178: PetscCheck(id > prev_id, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscObjectIds are not increasing for successively created PetscContainers! current: %" PetscInt64_FMT ", previous: %" PetscInt64_FMT, id, prev_id);
179: prev_id = id;
180: PetscCall(PetscContainerDestroy(&c));
181: } while (prev_id < 50);
182: }
183: PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
185: PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rand));
186: // this seed just so happens to keep the allocation size increasing
187: PetscCall(PetscRandomSetSeed(rand, 123));
188: PetscCall(PetscRandomSeed(rand));
189: PetscCall(PetscRandomSetFromOptions(rand));
191: PetscCall(TestAllocate(dctx, rand, PETSC_MEMTYPE_HOST));
192: PetscCall(TestAllocate(dctx, rand, PETSC_MEMTYPE_DEVICE));
193: PetscCall(TestAsyncCoherence(dctx, rand));
195: PetscCall(PetscRandomDestroy(&rand));
196: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "EXIT_SUCCESS\n"));
197: PetscCall(PetscFinalize());
198: return 0;
199: }
201: /*TEST
203: testset:
204: requires: defined(PETSC_USE_INFO) defined(PETSC_USE_DEBUG) cxx
205: args: -info :device
206: suffix: with_info
207: test:
208: requires: !device
209: suffix: host_no_device
210: test:
211: requires: device
212: args: -default_device_type host
213: filter: sed -e 's/host/IMPL/g' -e 's/cuda/IMPL/g' -e 's/hip/IMPL/g' -e 's/sycl/IMPL/g'
214: suffix: host_with_device
215: test:
216: requires: cuda
217: args: -default_device_type cuda
218: suffix: cuda
219: test:
220: requires: hip
221: args: -default_device_type hip
222: suffix: hip
223: test:
224: requires: sycl
225: args: -default_device_type sycl
226: suffix: sycl
228: testset:
229: output_file: ./output/ExitSuccess.out
230: requires: !defined(PETSC_USE_DEBUG)
231: filter: grep -v "\[DEBUG OUTPUT\]"
232: suffix: no_info
233: test:
234: requires: !device
235: suffix: host_no_device
236: test:
237: requires: device
238: args: -default_device_type host
239: suffix: host_with_device
240: test:
241: requires: cuda
242: args: -default_device_type cuda
243: suffix: cuda
244: test:
245: requires: hip
246: args: -default_device_type hip
247: suffix: hip
248: test:
249: requires: sycl
250: args: -default_device_type sycl
251: suffix: sycl
253: test:
254: requires: !cxx
255: output_file: ./output/ExitSuccess.out
256: filter: grep -v "\[DEBUG OUTPUT\]"
257: suffix: no_cxx
259: TEST*/