Actual source code: math2opusutils.cu
1: #include <petsc/private/matimpl.h>
2: #include <petsc/private/vecimpl.h>
3: #include <petscsf.h>
4: #if defined(PETSC_HAVE_CUDA)
5: #include <thrust/for_each.h>
6: #include <thrust/device_vector.h>
7: #include <thrust/execution_policy.h>
8: #endif
10: PETSC_INTERN PetscErrorCode MatDenseGetH2OpusStridedSF(Mat A, PetscSF h2sf, PetscSF *osf)
11: {
12: PetscSF asf;
14: PetscFunctionBegin;
17: PetscAssertPointer(osf, 3);
18: PetscCall(PetscObjectQuery((PetscObject)A, "_math2opus_stridedsf", (PetscObject *)&asf));
19: if (!asf) {
20: PetscInt lda;
22: PetscCall(MatDenseGetLDA(A, &lda));
23: PetscCall(PetscSFCreateStridedSF(h2sf, A->cmap->N, lda, PETSC_DECIDE, &asf));
24: PetscCall(PetscObjectCompose((PetscObject)A, "_math2opus_stridedsf", (PetscObject)asf));
25: PetscCall(PetscObjectDereference((PetscObject)asf));
26: }
27: *osf = asf;
28: PetscFunctionReturn(PETSC_SUCCESS);
29: }
31: #if defined(PETSC_HAVE_CUDA)
32: struct SignVector_Functor {
33: const PetscScalar *v;
34: PetscScalar *s;
35: SignVector_Functor(const PetscScalar *_v, PetscScalar *_s) : v(_v), s(_s) { }
37: __host__ __device__ void operator()(PetscInt i) { s[i] = (v[i] < 0 ? -1 : 1); }
38: };
39: #endif
41: PETSC_INTERN PetscErrorCode VecSign(Vec v, Vec s)
42: {
43: const PetscScalar *av;
44: PetscScalar *as;
45: PetscInt i, n;
46: #if defined(PETSC_HAVE_CUDA)
47: PetscBool viscuda, siscuda;
48: #endif
50: PetscFunctionBegin;
53: PetscCall(VecGetLocalSize(s, &n));
54: PetscCall(VecGetLocalSize(v, &i));
55: PetscCheck(i == n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Invalid local sizes %" PetscInt_FMT " != %" PetscInt_FMT, i, n);
56: #if defined(PETSC_HAVE_CUDA)
57: PetscCall(PetscObjectTypeCompareAny((PetscObject)v, &viscuda, VECSEQCUDA, VECMPICUDA, ""));
58: PetscCall(PetscObjectTypeCompareAny((PetscObject)s, &siscuda, VECSEQCUDA, VECMPICUDA, ""));
59: viscuda = (PetscBool)(viscuda && !v->boundtocpu);
60: siscuda = (PetscBool)(siscuda && !s->boundtocpu);
61: if (viscuda && siscuda) {
62: PetscCall(VecCUDAGetArrayRead(v, &av));
63: PetscCall(VecCUDAGetArrayWrite(s, &as));
64: SignVector_Functor sign_vector(av, as);
65: thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(n), sign_vector);
66: PetscCall(VecCUDARestoreArrayWrite(s, &as));
67: PetscCall(VecCUDARestoreArrayRead(v, &av));
68: } else
69: #endif
70: {
71: PetscCall(VecGetArrayRead(v, &av));
72: PetscCall(VecGetArrayWrite(s, &as));
73: for (i = 0; i < n; i++) as[i] = PetscAbsScalar(av[i]) < 0 ? -1. : 1.;
74: PetscCall(VecRestoreArrayWrite(s, &as));
75: PetscCall(VecRestoreArrayRead(v, &av));
76: }
77: PetscFunctionReturn(PETSC_SUCCESS);
78: }
80: #if defined(PETSC_HAVE_CUDA)
81: struct StandardBasis_Functor {
82: PetscScalar *v;
83: PetscInt j;
85: StandardBasis_Functor(PetscScalar *_v, PetscInt _j) : v(_v), j(_j) { }
86: __host__ __device__ void operator()(PetscInt i) { v[i] = (i == j ? 1 : 0); }
87: };
88: #endif
90: PETSC_INTERN PetscErrorCode VecSetDelta(Vec x, PetscInt i)
91: {
92: #if defined(PETSC_HAVE_CUDA)
93: PetscBool iscuda;
94: #endif
95: PetscInt st, en;
97: PetscFunctionBegin;
98: PetscCall(VecGetOwnershipRange(x, &st, &en));
99: #if defined(PETSC_HAVE_CUDA)
100: PetscCall(PetscObjectTypeCompareAny((PetscObject)x, &iscuda, VECSEQCUDA, VECMPICUDA, ""));
101: iscuda = (PetscBool)(iscuda && !x->boundtocpu);
102: if (iscuda) {
103: PetscScalar *ax;
104: PetscCall(VecCUDAGetArrayWrite(x, &ax));
105: StandardBasis_Functor delta(ax, i - st);
106: thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(en - st), delta);
107: PetscCall(VecCUDARestoreArrayWrite(x, &ax));
108: } else
109: #endif
110: {
111: PetscCall(VecSet(x, 0.));
112: if (st <= i && i < en) PetscCall(VecSetValue(x, i, 1.0, INSERT_VALUES));
113: PetscCall(VecAssemblyBegin(x));
114: PetscCall(VecAssemblyEnd(x));
115: }
116: PetscFunctionReturn(PETSC_SUCCESS);
117: }
119: /* these are approximate norms */
120: /* NORM_2: Estimating the matrix p-norm Nicholas J. Higham
121: NORM_1/NORM_INFINITY: A block algorithm for matrix 1-norm estimation, with an application to 1-norm pseudospectra Higham, Nicholas J. and Tisseur, Francoise */
122: PETSC_INTERN PetscErrorCode MatApproximateNorm_Private(Mat A, NormType normtype, PetscInt normsamples, PetscReal *n)
123: {
124: Vec x, y, w, z;
125: PetscReal normz, adot;
126: PetscScalar dot;
127: PetscInt i, j, N, jold = -1;
128: PetscBool boundtocpu = PETSC_TRUE;
130: PetscFunctionBegin;
131: #if defined(PETSC_HAVE_DEVICE)
132: boundtocpu = A->boundtocpu;
133: #endif
134: switch (normtype) {
135: case NORM_INFINITY:
136: case NORM_1:
137: if (normsamples < 0) normsamples = 10; /* pure guess */
138: if (normtype == NORM_INFINITY) {
139: Mat B;
140: PetscCall(MatCreateTranspose(A, &B));
141: A = B;
142: } else {
143: PetscCall(PetscObjectReference((PetscObject)A));
144: }
145: PetscCall(MatCreateVecs(A, &x, &y));
146: PetscCall(MatCreateVecs(A, &z, &w));
147: PetscCall(VecBindToCPU(x, boundtocpu));
148: PetscCall(VecBindToCPU(y, boundtocpu));
149: PetscCall(VecBindToCPU(z, boundtocpu));
150: PetscCall(VecBindToCPU(w, boundtocpu));
151: PetscCall(VecGetSize(x, &N));
152: PetscCall(VecSet(x, 1. / N));
153: *n = 0.0;
154: for (i = 0; i < normsamples; i++) {
155: PetscCall(MatMult(A, x, y));
156: PetscCall(VecSign(y, w));
157: PetscCall(MatMultTranspose(A, w, z));
158: PetscCall(VecNorm(z, NORM_INFINITY, &normz));
159: PetscCall(VecDot(x, z, &dot));
160: adot = PetscAbsScalar(dot);
161: PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> (%g %g)\n", NormTypes[normtype], i, (double)normz, (double)adot));
162: if (normz <= adot && i > 0) {
163: PetscCall(VecNorm(y, NORM_1, n));
164: break;
165: }
166: PetscCall(VecMax(z, &j, &normz));
167: if (j == jold) {
168: PetscCall(VecNorm(y, NORM_1, n));
169: PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> breakdown (j==jold)\n", NormTypes[normtype], i));
170: break;
171: }
172: jold = j;
173: PetscCall(VecSetDelta(x, j));
174: }
175: PetscCall(MatDestroy(&A));
176: PetscCall(VecDestroy(&x));
177: PetscCall(VecDestroy(&w));
178: PetscCall(VecDestroy(&y));
179: PetscCall(VecDestroy(&z));
180: break;
181: case NORM_2:
182: if (normsamples < 0) normsamples = 20; /* pure guess */
183: PetscCall(MatCreateVecs(A, &x, &y));
184: PetscCall(MatCreateVecs(A, &z, NULL));
185: PetscCall(VecBindToCPU(x, boundtocpu));
186: PetscCall(VecBindToCPU(y, boundtocpu));
187: PetscCall(VecBindToCPU(z, boundtocpu));
188: PetscCall(VecSetRandom(x, NULL));
189: PetscCall(VecNormalize(x, NULL));
190: *n = 0.0;
191: for (i = 0; i < normsamples; i++) {
192: PetscCall(MatMult(A, x, y));
193: PetscCall(VecNormalize(y, n));
194: PetscCall(MatMultTranspose(A, y, z));
195: PetscCall(VecNorm(z, NORM_2, &normz));
196: PetscCall(VecDot(x, z, &dot));
197: adot = PetscAbsScalar(dot);
198: PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> %g (%g %g)\n", NormTypes[normtype], i, (double)*n, (double)normz, (double)adot));
199: if (normz <= adot) break;
200: if (i < normsamples - 1) {
201: Vec t;
203: PetscCall(VecNormalize(z, NULL));
204: t = x;
205: x = z;
206: z = t;
207: }
208: }
209: PetscCall(VecDestroy(&x));
210: PetscCall(VecDestroy(&y));
211: PetscCall(VecDestroy(&z));
212: break;
213: default:
214: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "%s norm not supported", NormTypes[normtype]);
215: }
216: PetscCall(PetscInfo(A, "%s norm %g computed in %" PetscInt_FMT " iterations\n", NormTypes[normtype], (double)*n, i));
217: PetscFunctionReturn(PETSC_SUCCESS);
218: }