Actual source code: math2opusutils.cu

  1: #include <petsc/private/matimpl.h>
  2: #include <petsc/private/vecimpl.h>
  3: #include <petscsf.h>
  4: #if defined(PETSC_HAVE_CUDA)
  5:   #include <thrust/for_each.h>
  6:   #include <thrust/device_vector.h>
  7:   #include <thrust/execution_policy.h>
  8: #endif

 10: PETSC_INTERN PetscErrorCode MatDenseGetH2OpusStridedSF(Mat A, PetscSF h2sf, PetscSF *osf)
 11: {
 12:   PetscSF asf;

 14:   PetscFunctionBegin;
 17:   PetscAssertPointer(osf, 3);
 18:   PetscCall(PetscObjectQuery((PetscObject)A, "_math2opus_stridedsf", (PetscObject *)&asf));
 19:   if (!asf) {
 20:     PetscInt lda;

 22:     PetscCall(MatDenseGetLDA(A, &lda));
 23:     PetscCall(PetscSFCreateStridedSF(h2sf, A->cmap->N, lda, PETSC_DECIDE, &asf));
 24:     PetscCall(PetscObjectCompose((PetscObject)A, "_math2opus_stridedsf", (PetscObject)asf));
 25:     PetscCall(PetscObjectDereference((PetscObject)asf));
 26:   }
 27:   *osf = asf;
 28:   PetscFunctionReturn(PETSC_SUCCESS);
 29: }

 31: #if defined(PETSC_HAVE_CUDA)
 32: struct SignVector_Functor {
 33:   const PetscScalar *v;
 34:   PetscScalar       *s;
 35:   SignVector_Functor(const PetscScalar *_v, PetscScalar *_s) : v(_v), s(_s) { }

 37:   __host__ __device__ void operator()(PetscInt i) { s[i] = (v[i] < 0 ? -1 : 1); }
 38: };
 39: #endif

 41: PETSC_INTERN PetscErrorCode VecSign(Vec v, Vec s)
 42: {
 43:   const PetscScalar *av;
 44:   PetscScalar       *as;
 45:   PetscInt           i, n;
 46: #if defined(PETSC_HAVE_CUDA)
 47:   PetscBool viscuda, siscuda;
 48: #endif

 50:   PetscFunctionBegin;
 53:   PetscCall(VecGetLocalSize(s, &n));
 54:   PetscCall(VecGetLocalSize(v, &i));
 55:   PetscCheck(i == n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Invalid local sizes %" PetscInt_FMT " != %" PetscInt_FMT, i, n);
 56: #if defined(PETSC_HAVE_CUDA)
 57:   PetscCall(PetscObjectTypeCompareAny((PetscObject)v, &viscuda, VECSEQCUDA, VECMPICUDA, ""));
 58:   PetscCall(PetscObjectTypeCompareAny((PetscObject)s, &siscuda, VECSEQCUDA, VECMPICUDA, ""));
 59:   viscuda = (PetscBool)(viscuda && !v->boundtocpu);
 60:   siscuda = (PetscBool)(siscuda && !s->boundtocpu);
 61:   if (viscuda && siscuda) {
 62:     PetscCall(VecCUDAGetArrayRead(v, &av));
 63:     PetscCall(VecCUDAGetArrayWrite(s, &as));
 64:     SignVector_Functor sign_vector(av, as);
 65:     thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(n), sign_vector);
 66:     PetscCall(VecCUDARestoreArrayWrite(s, &as));
 67:     PetscCall(VecCUDARestoreArrayRead(v, &av));
 68:   } else
 69: #endif
 70:   {
 71:     PetscCall(VecGetArrayRead(v, &av));
 72:     PetscCall(VecGetArrayWrite(s, &as));
 73:     for (i = 0; i < n; i++) as[i] = PetscAbsScalar(av[i]) < 0 ? -1. : 1.;
 74:     PetscCall(VecRestoreArrayWrite(s, &as));
 75:     PetscCall(VecRestoreArrayRead(v, &av));
 76:   }
 77:   PetscFunctionReturn(PETSC_SUCCESS);
 78: }

 80: #if defined(PETSC_HAVE_CUDA)
 81: struct StandardBasis_Functor {
 82:   PetscScalar *v;
 83:   PetscInt     j;

 85:   StandardBasis_Functor(PetscScalar *_v, PetscInt _j) : v(_v), j(_j) { }
 86:   __host__ __device__ void operator()(PetscInt i) { v[i] = (i == j ? 1 : 0); }
 87: };
 88: #endif

 90: PETSC_INTERN PetscErrorCode VecSetDelta(Vec x, PetscInt i)
 91: {
 92: #if defined(PETSC_HAVE_CUDA)
 93:   PetscBool iscuda;
 94: #endif
 95:   PetscInt st, en;

 97:   PetscFunctionBegin;
 98:   PetscCall(VecGetOwnershipRange(x, &st, &en));
 99: #if defined(PETSC_HAVE_CUDA)
100:   PetscCall(PetscObjectTypeCompareAny((PetscObject)x, &iscuda, VECSEQCUDA, VECMPICUDA, ""));
101:   iscuda = (PetscBool)(iscuda && !x->boundtocpu);
102:   if (iscuda) {
103:     PetscScalar *ax;
104:     PetscCall(VecCUDAGetArrayWrite(x, &ax));
105:     StandardBasis_Functor delta(ax, i - st);
106:     thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(en - st), delta);
107:     PetscCall(VecCUDARestoreArrayWrite(x, &ax));
108:   } else
109: #endif
110:   {
111:     PetscCall(VecSet(x, 0.));
112:     if (st <= i && i < en) PetscCall(VecSetValue(x, i, 1.0, INSERT_VALUES));
113:     PetscCall(VecAssemblyBegin(x));
114:     PetscCall(VecAssemblyEnd(x));
115:   }
116:   PetscFunctionReturn(PETSC_SUCCESS);
117: }

119: /* these are approximate norms */
120: /* NORM_2: Estimating the matrix p-norm Nicholas J. Higham
121:    NORM_1/NORM_INFINITY: A block algorithm for matrix 1-norm estimation, with an application to 1-norm pseudospectra Higham, Nicholas J. and Tisseur, Francoise */
122: PETSC_INTERN PetscErrorCode MatApproximateNorm_Private(Mat A, NormType normtype, PetscInt normsamples, PetscReal *n)
123: {
124:   Vec         x, y, w, z;
125:   PetscReal   normz, adot;
126:   PetscScalar dot;
127:   PetscInt    i, j, N, jold = -1;
128:   PetscBool   boundtocpu = PETSC_TRUE;

130:   PetscFunctionBegin;
131: #if defined(PETSC_HAVE_DEVICE)
132:   boundtocpu = A->boundtocpu;
133: #endif
134:   switch (normtype) {
135:   case NORM_INFINITY:
136:   case NORM_1:
137:     if (normsamples < 0) normsamples = 10; /* pure guess */
138:     if (normtype == NORM_INFINITY) {
139:       Mat B;
140:       PetscCall(MatCreateTranspose(A, &B));
141:       A = B;
142:     } else {
143:       PetscCall(PetscObjectReference((PetscObject)A));
144:     }
145:     PetscCall(MatCreateVecs(A, &x, &y));
146:     PetscCall(MatCreateVecs(A, &z, &w));
147:     PetscCall(VecBindToCPU(x, boundtocpu));
148:     PetscCall(VecBindToCPU(y, boundtocpu));
149:     PetscCall(VecBindToCPU(z, boundtocpu));
150:     PetscCall(VecBindToCPU(w, boundtocpu));
151:     PetscCall(VecGetSize(x, &N));
152:     PetscCall(VecSet(x, 1. / N));
153:     *n = 0.0;
154:     for (i = 0; i < normsamples; i++) {
155:       PetscCall(MatMult(A, x, y));
156:       PetscCall(VecSign(y, w));
157:       PetscCall(MatMultTranspose(A, w, z));
158:       PetscCall(VecNorm(z, NORM_INFINITY, &normz));
159:       PetscCall(VecDot(x, z, &dot));
160:       adot = PetscAbsScalar(dot);
161:       PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> (%g %g)\n", NormTypes[normtype], i, (double)normz, (double)adot));
162:       if (normz <= adot && i > 0) {
163:         PetscCall(VecNorm(y, NORM_1, n));
164:         break;
165:       }
166:       PetscCall(VecMax(z, &j, &normz));
167:       if (j == jold) {
168:         PetscCall(VecNorm(y, NORM_1, n));
169:         PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> breakdown (j==jold)\n", NormTypes[normtype], i));
170:         break;
171:       }
172:       jold = j;
173:       PetscCall(VecSetDelta(x, j));
174:     }
175:     PetscCall(MatDestroy(&A));
176:     PetscCall(VecDestroy(&x));
177:     PetscCall(VecDestroy(&w));
178:     PetscCall(VecDestroy(&y));
179:     PetscCall(VecDestroy(&z));
180:     break;
181:   case NORM_2:
182:     if (normsamples < 0) normsamples = 20; /* pure guess */
183:     PetscCall(MatCreateVecs(A, &x, &y));
184:     PetscCall(MatCreateVecs(A, &z, NULL));
185:     PetscCall(VecBindToCPU(x, boundtocpu));
186:     PetscCall(VecBindToCPU(y, boundtocpu));
187:     PetscCall(VecBindToCPU(z, boundtocpu));
188:     PetscCall(VecSetRandom(x, NULL));
189:     PetscCall(VecNormalize(x, NULL));
190:     *n = 0.0;
191:     for (i = 0; i < normsamples; i++) {
192:       PetscCall(MatMult(A, x, y));
193:       PetscCall(VecNormalize(y, n));
194:       PetscCall(MatMultTranspose(A, y, z));
195:       PetscCall(VecNorm(z, NORM_2, &normz));
196:       PetscCall(VecDot(x, z, &dot));
197:       adot = PetscAbsScalar(dot);
198:       PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> %g (%g %g)\n", NormTypes[normtype], i, (double)*n, (double)normz, (double)adot));
199:       if (normz <= adot) break;
200:       if (i < normsamples - 1) {
201:         Vec t;

203:         PetscCall(VecNormalize(z, NULL));
204:         t = x;
205:         x = z;
206:         z = t;
207:       }
208:     }
209:     PetscCall(VecDestroy(&x));
210:     PetscCall(VecDestroy(&y));
211:     PetscCall(VecDestroy(&z));
212:     break;
213:   default:
214:     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "%s norm not supported", NormTypes[normtype]);
215:   }
216:   PetscCall(PetscInfo(A, "%s norm %g computed in %" PetscInt_FMT " iterations\n", NormTypes[normtype], (double)*n, i));
217:   PetscFunctionReturn(PETSC_SUCCESS);
218: }