Actual source code: baijsolvnat14.c

  1: #include <../src/mat/impls/baij/seq/baij.h>
  2: #include <petsc/private/kernels/blockinvert.h>

  4: /* Block operations are done by accessing one column at a time */
  5: /* Default MatSolve for block size 14 */

  7: PetscErrorCode MatSolve_SeqBAIJ_14_NaturalOrdering(Mat A, Vec bb, Vec xx)
  8: {
  9:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
 10:   const PetscInt     n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2;
 11:   PetscInt           i, k, nz, idx, idt, m;
 12:   const MatScalar   *aa = a->a, *v;
 13:   PetscScalar        s[14];
 14:   PetscScalar       *x, xv;
 15:   const PetscScalar *b;

 17:   PetscFunctionBegin;
 18:   PetscCall(VecGetArrayRead(bb, &b));
 19:   PetscCall(VecGetArray(xx, &x));

 21:   /* forward solve the lower triangular */
 22:   for (i = 0; i < n; i++) {
 23:     v           = aa + bs2 * ai[i];
 24:     vi          = aj + ai[i];
 25:     nz          = ai[i + 1] - ai[i];
 26:     idt         = bs * i;
 27:     x[idt]      = b[idt];
 28:     x[1 + idt]  = b[1 + idt];
 29:     x[2 + idt]  = b[2 + idt];
 30:     x[3 + idt]  = b[3 + idt];
 31:     x[4 + idt]  = b[4 + idt];
 32:     x[5 + idt]  = b[5 + idt];
 33:     x[6 + idt]  = b[6 + idt];
 34:     x[7 + idt]  = b[7 + idt];
 35:     x[8 + idt]  = b[8 + idt];
 36:     x[9 + idt]  = b[9 + idt];
 37:     x[10 + idt] = b[10 + idt];
 38:     x[11 + idt] = b[11 + idt];
 39:     x[12 + idt] = b[12 + idt];
 40:     x[13 + idt] = b[13 + idt];
 41:     for (m = 0; m < nz; m++) {
 42:       idx = bs * vi[m];
 43:       for (k = 0; k < bs; k++) {
 44:         xv = x[k + idx];
 45:         x[idt] -= v[0] * xv;
 46:         x[1 + idt] -= v[1] * xv;
 47:         x[2 + idt] -= v[2] * xv;
 48:         x[3 + idt] -= v[3] * xv;
 49:         x[4 + idt] -= v[4] * xv;
 50:         x[5 + idt] -= v[5] * xv;
 51:         x[6 + idt] -= v[6] * xv;
 52:         x[7 + idt] -= v[7] * xv;
 53:         x[8 + idt] -= v[8] * xv;
 54:         x[9 + idt] -= v[9] * xv;
 55:         x[10 + idt] -= v[10] * xv;
 56:         x[11 + idt] -= v[11] * xv;
 57:         x[12 + idt] -= v[12] * xv;
 58:         x[13 + idt] -= v[13] * xv;
 59:         v += 14;
 60:       }
 61:     }
 62:   }
 63:   /* backward solve the upper triangular */
 64:   for (i = n - 1; i >= 0; i--) {
 65:     v     = aa + bs2 * (adiag[i + 1] + 1);
 66:     vi    = aj + adiag[i + 1] + 1;
 67:     nz    = adiag[i] - adiag[i + 1] - 1;
 68:     idt   = bs * i;
 69:     s[0]  = x[idt];
 70:     s[1]  = x[1 + idt];
 71:     s[2]  = x[2 + idt];
 72:     s[3]  = x[3 + idt];
 73:     s[4]  = x[4 + idt];
 74:     s[5]  = x[5 + idt];
 75:     s[6]  = x[6 + idt];
 76:     s[7]  = x[7 + idt];
 77:     s[8]  = x[8 + idt];
 78:     s[9]  = x[9 + idt];
 79:     s[10] = x[10 + idt];
 80:     s[11] = x[11 + idt];
 81:     s[12] = x[12 + idt];
 82:     s[13] = x[13 + idt];

 84:     for (m = 0; m < nz; m++) {
 85:       idx = bs * vi[m];
 86:       for (k = 0; k < bs; k++) {
 87:         xv = x[k + idx];
 88:         s[0] -= v[0] * xv;
 89:         s[1] -= v[1] * xv;
 90:         s[2] -= v[2] * xv;
 91:         s[3] -= v[3] * xv;
 92:         s[4] -= v[4] * xv;
 93:         s[5] -= v[5] * xv;
 94:         s[6] -= v[6] * xv;
 95:         s[7] -= v[7] * xv;
 96:         s[8] -= v[8] * xv;
 97:         s[9] -= v[9] * xv;
 98:         s[10] -= v[10] * xv;
 99:         s[11] -= v[11] * xv;
100:         s[12] -= v[12] * xv;
101:         s[13] -= v[13] * xv;
102:         v += 14;
103:       }
104:     }
105:     PetscCall(PetscArrayzero(x + idt, bs));
106:     for (k = 0; k < bs; k++) {
107:       x[idt] += v[0] * s[k];
108:       x[1 + idt] += v[1] * s[k];
109:       x[2 + idt] += v[2] * s[k];
110:       x[3 + idt] += v[3] * s[k];
111:       x[4 + idt] += v[4] * s[k];
112:       x[5 + idt] += v[5] * s[k];
113:       x[6 + idt] += v[6] * s[k];
114:       x[7 + idt] += v[7] * s[k];
115:       x[8 + idt] += v[8] * s[k];
116:       x[9 + idt] += v[9] * s[k];
117:       x[10 + idt] += v[10] * s[k];
118:       x[11 + idt] += v[11] * s[k];
119:       x[12 + idt] += v[12] * s[k];
120:       x[13 + idt] += v[13] * s[k];
121:       v += 14;
122:     }
123:   }
124:   PetscCall(VecRestoreArrayRead(bb, &b));
125:   PetscCall(VecRestoreArray(xx, &x));
126:   PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n));
127:   PetscFunctionReturn(PETSC_SUCCESS);
128: }

130: /* Block operations are done by accessing one column at a time */
131: /* Default MatSolve for block size 13 */

133: PetscErrorCode MatSolve_SeqBAIJ_13_NaturalOrdering(Mat A, Vec bb, Vec xx)
134: {
135:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
136:   const PetscInt     n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2;
137:   PetscInt           i, k, nz, idx, idt, m;
138:   const MatScalar   *aa = a->a, *v;
139:   PetscScalar        s[13];
140:   PetscScalar       *x, xv;
141:   const PetscScalar *b;

143:   PetscFunctionBegin;
144:   PetscCall(VecGetArrayRead(bb, &b));
145:   PetscCall(VecGetArray(xx, &x));

147:   /* forward solve the lower triangular */
148:   for (i = 0; i < n; i++) {
149:     v           = aa + bs2 * ai[i];
150:     vi          = aj + ai[i];
151:     nz          = ai[i + 1] - ai[i];
152:     idt         = bs * i;
153:     x[idt]      = b[idt];
154:     x[1 + idt]  = b[1 + idt];
155:     x[2 + idt]  = b[2 + idt];
156:     x[3 + idt]  = b[3 + idt];
157:     x[4 + idt]  = b[4 + idt];
158:     x[5 + idt]  = b[5 + idt];
159:     x[6 + idt]  = b[6 + idt];
160:     x[7 + idt]  = b[7 + idt];
161:     x[8 + idt]  = b[8 + idt];
162:     x[9 + idt]  = b[9 + idt];
163:     x[10 + idt] = b[10 + idt];
164:     x[11 + idt] = b[11 + idt];
165:     x[12 + idt] = b[12 + idt];
166:     for (m = 0; m < nz; m++) {
167:       idx = bs * vi[m];
168:       for (k = 0; k < bs; k++) {
169:         xv = x[k + idx];
170:         x[idt] -= v[0] * xv;
171:         x[1 + idt] -= v[1] * xv;
172:         x[2 + idt] -= v[2] * xv;
173:         x[3 + idt] -= v[3] * xv;
174:         x[4 + idt] -= v[4] * xv;
175:         x[5 + idt] -= v[5] * xv;
176:         x[6 + idt] -= v[6] * xv;
177:         x[7 + idt] -= v[7] * xv;
178:         x[8 + idt] -= v[8] * xv;
179:         x[9 + idt] -= v[9] * xv;
180:         x[10 + idt] -= v[10] * xv;
181:         x[11 + idt] -= v[11] * xv;
182:         x[12 + idt] -= v[12] * xv;
183:         v += 13;
184:       }
185:     }
186:   }
187:   /* backward solve the upper triangular */
188:   for (i = n - 1; i >= 0; i--) {
189:     v     = aa + bs2 * (adiag[i + 1] + 1);
190:     vi    = aj + adiag[i + 1] + 1;
191:     nz    = adiag[i] - adiag[i + 1] - 1;
192:     idt   = bs * i;
193:     s[0]  = x[idt];
194:     s[1]  = x[1 + idt];
195:     s[2]  = x[2 + idt];
196:     s[3]  = x[3 + idt];
197:     s[4]  = x[4 + idt];
198:     s[5]  = x[5 + idt];
199:     s[6]  = x[6 + idt];
200:     s[7]  = x[7 + idt];
201:     s[8]  = x[8 + idt];
202:     s[9]  = x[9 + idt];
203:     s[10] = x[10 + idt];
204:     s[11] = x[11 + idt];
205:     s[12] = x[12 + idt];

207:     for (m = 0; m < nz; m++) {
208:       idx = bs * vi[m];
209:       for (k = 0; k < bs; k++) {
210:         xv = x[k + idx];
211:         s[0] -= v[0] * xv;
212:         s[1] -= v[1] * xv;
213:         s[2] -= v[2] * xv;
214:         s[3] -= v[3] * xv;
215:         s[4] -= v[4] * xv;
216:         s[5] -= v[5] * xv;
217:         s[6] -= v[6] * xv;
218:         s[7] -= v[7] * xv;
219:         s[8] -= v[8] * xv;
220:         s[9] -= v[9] * xv;
221:         s[10] -= v[10] * xv;
222:         s[11] -= v[11] * xv;
223:         s[12] -= v[12] * xv;
224:         v += 13;
225:       }
226:     }
227:     PetscCall(PetscArrayzero(x + idt, bs));
228:     for (k = 0; k < bs; k++) {
229:       x[idt] += v[0] * s[k];
230:       x[1 + idt] += v[1] * s[k];
231:       x[2 + idt] += v[2] * s[k];
232:       x[3 + idt] += v[3] * s[k];
233:       x[4 + idt] += v[4] * s[k];
234:       x[5 + idt] += v[5] * s[k];
235:       x[6 + idt] += v[6] * s[k];
236:       x[7 + idt] += v[7] * s[k];
237:       x[8 + idt] += v[8] * s[k];
238:       x[9 + idt] += v[9] * s[k];
239:       x[10 + idt] += v[10] * s[k];
240:       x[11 + idt] += v[11] * s[k];
241:       x[12 + idt] += v[12] * s[k];
242:       v += 13;
243:     }
244:   }
245:   PetscCall(VecRestoreArrayRead(bb, &b));
246:   PetscCall(VecRestoreArray(xx, &x));
247:   PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n));
248:   PetscFunctionReturn(PETSC_SUCCESS);
249: }

251: /* Block operations are done by accessing one column at a time */
252: /* Default MatSolve for block size 12 */

254: PetscErrorCode MatSolve_SeqBAIJ_12_NaturalOrdering(Mat A, Vec bb, Vec xx)
255: {
256:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
257:   const PetscInt     n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2;
258:   PetscInt           i, k, nz, idx, idt, m;
259:   const MatScalar   *aa = a->a, *v;
260:   PetscScalar        s[12];
261:   PetscScalar       *x, xv;
262:   const PetscScalar *b;

264:   PetscFunctionBegin;
265:   PetscCall(VecGetArrayRead(bb, &b));
266:   PetscCall(VecGetArray(xx, &x));

268:   /* forward solve the lower triangular */
269:   for (i = 0; i < n; i++) {
270:     v           = aa + bs2 * ai[i];
271:     vi          = aj + ai[i];
272:     nz          = ai[i + 1] - ai[i];
273:     idt         = bs * i;
274:     x[idt]      = b[idt];
275:     x[1 + idt]  = b[1 + idt];
276:     x[2 + idt]  = b[2 + idt];
277:     x[3 + idt]  = b[3 + idt];
278:     x[4 + idt]  = b[4 + idt];
279:     x[5 + idt]  = b[5 + idt];
280:     x[6 + idt]  = b[6 + idt];
281:     x[7 + idt]  = b[7 + idt];
282:     x[8 + idt]  = b[8 + idt];
283:     x[9 + idt]  = b[9 + idt];
284:     x[10 + idt] = b[10 + idt];
285:     x[11 + idt] = b[11 + idt];
286:     for (m = 0; m < nz; m++) {
287:       idx = bs * vi[m];
288:       for (k = 0; k < bs; k++) {
289:         xv = x[k + idx];
290:         x[idt] -= v[0] * xv;
291:         x[1 + idt] -= v[1] * xv;
292:         x[2 + idt] -= v[2] * xv;
293:         x[3 + idt] -= v[3] * xv;
294:         x[4 + idt] -= v[4] * xv;
295:         x[5 + idt] -= v[5] * xv;
296:         x[6 + idt] -= v[6] * xv;
297:         x[7 + idt] -= v[7] * xv;
298:         x[8 + idt] -= v[8] * xv;
299:         x[9 + idt] -= v[9] * xv;
300:         x[10 + idt] -= v[10] * xv;
301:         x[11 + idt] -= v[11] * xv;
302:         v += 12;
303:       }
304:     }
305:   }
306:   /* backward solve the upper triangular */
307:   for (i = n - 1; i >= 0; i--) {
308:     v     = aa + bs2 * (adiag[i + 1] + 1);
309:     vi    = aj + adiag[i + 1] + 1;
310:     nz    = adiag[i] - adiag[i + 1] - 1;
311:     idt   = bs * i;
312:     s[0]  = x[idt];
313:     s[1]  = x[1 + idt];
314:     s[2]  = x[2 + idt];
315:     s[3]  = x[3 + idt];
316:     s[4]  = x[4 + idt];
317:     s[5]  = x[5 + idt];
318:     s[6]  = x[6 + idt];
319:     s[7]  = x[7 + idt];
320:     s[8]  = x[8 + idt];
321:     s[9]  = x[9 + idt];
322:     s[10] = x[10 + idt];
323:     s[11] = x[11 + idt];

325:     for (m = 0; m < nz; m++) {
326:       idx = bs * vi[m];
327:       for (k = 0; k < bs; k++) {
328:         xv = x[k + idx];
329:         s[0] -= v[0] * xv;
330:         s[1] -= v[1] * xv;
331:         s[2] -= v[2] * xv;
332:         s[3] -= v[3] * xv;
333:         s[4] -= v[4] * xv;
334:         s[5] -= v[5] * xv;
335:         s[6] -= v[6] * xv;
336:         s[7] -= v[7] * xv;
337:         s[8] -= v[8] * xv;
338:         s[9] -= v[9] * xv;
339:         s[10] -= v[10] * xv;
340:         s[11] -= v[11] * xv;
341:         v += 12;
342:       }
343:     }
344:     PetscCall(PetscArrayzero(x + idt, bs));
345:     for (k = 0; k < bs; k++) {
346:       x[idt] += v[0] * s[k];
347:       x[1 + idt] += v[1] * s[k];
348:       x[2 + idt] += v[2] * s[k];
349:       x[3 + idt] += v[3] * s[k];
350:       x[4 + idt] += v[4] * s[k];
351:       x[5 + idt] += v[5] * s[k];
352:       x[6 + idt] += v[6] * s[k];
353:       x[7 + idt] += v[7] * s[k];
354:       x[8 + idt] += v[8] * s[k];
355:       x[9 + idt] += v[9] * s[k];
356:       x[10 + idt] += v[10] * s[k];
357:       x[11 + idt] += v[11] * s[k];
358:       v += 12;
359:     }
360:   }
361:   PetscCall(VecRestoreArrayRead(bb, &b));
362:   PetscCall(VecRestoreArray(xx, &x));
363:   PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n));
364:   PetscFunctionReturn(PETSC_SUCCESS);
365: }