Actual source code: pbjacobi.c


  2: /*
  3:    Include files needed for the PBJacobi preconditioner:
  4:      pcimpl.h - private include file intended for use by all preconditioners
  5: */

  7: #include <petsc/private/pcimpl.h>

  9: /*
 10:    Private context (data structure) for the PBJacobi preconditioner.
 11: */
 12: typedef struct {
 13:   const MatScalar *diag;
 14:   PetscInt        bs,mbs;
 15: } PC_PBJacobi;

 17: static PetscErrorCode PCApply_PBJacobi_1(PC pc,Vec x,Vec y)
 18: {
 19:   PC_PBJacobi       *jac = (PC_PBJacobi*)pc->data;
 20:   PetscInt          i,m = jac->mbs;
 21:   const MatScalar   *diag = jac->diag;
 22:   const PetscScalar *xx;
 23:   PetscScalar       *yy;

 25:   VecGetArrayRead(x,&xx);
 26:   VecGetArray(y,&yy);
 27:   for (i=0; i<m; i++) yy[i] = diag[i]*xx[i];
 28:   VecRestoreArrayRead(x,&xx);
 29:   VecRestoreArray(y,&yy);
 30:   PetscLogFlops(m);
 31:   return 0;
 32: }

 34: static PetscErrorCode PCApply_PBJacobi_2(PC pc,Vec x,Vec y)
 35: {
 36:   PC_PBJacobi     *jac = (PC_PBJacobi*)pc->data;
 37:   PetscInt        i,m = jac->mbs;
 38:   const MatScalar *diag = jac->diag;
 39:   PetscScalar     x0,x1,*yy;
 40:   const PetscScalar *xx;

 42:   VecGetArrayRead(x,&xx);
 43:   VecGetArray(y,&yy);
 44:   for (i=0; i<m; i++) {
 45:     x0        = xx[2*i]; x1 = xx[2*i+1];
 46:     yy[2*i]   = diag[0]*x0 + diag[2]*x1;
 47:     yy[2*i+1] = diag[1]*x0 + diag[3]*x1;
 48:     diag     += 4;
 49:   }
 50:   VecRestoreArrayRead(x,&xx);
 51:   VecRestoreArray(y,&yy);
 52:   PetscLogFlops(6.0*m);
 53:   return 0;
 54: }
 55: static PetscErrorCode PCApply_PBJacobi_3(PC pc,Vec x,Vec y)
 56: {
 57:   PC_PBJacobi     *jac = (PC_PBJacobi*)pc->data;
 58:   PetscInt        i,m = jac->mbs;
 59:   const MatScalar *diag = jac->diag;
 60:   PetscScalar     x0,x1,x2,*yy;
 61:   const PetscScalar *xx;

 63:   VecGetArrayRead(x,&xx);
 64:   VecGetArray(y,&yy);
 65:   for (i=0; i<m; i++) {
 66:     x0 = xx[3*i]; x1 = xx[3*i+1]; x2 = xx[3*i+2];

 68:     yy[3*i]   = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
 69:     yy[3*i+1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
 70:     yy[3*i+2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
 71:     diag     += 9;
 72:   }
 73:   VecRestoreArrayRead(x,&xx);
 74:   VecRestoreArray(y,&yy);
 75:   PetscLogFlops(15.0*m);
 76:   return 0;
 77: }
 78: static PetscErrorCode PCApply_PBJacobi_4(PC pc,Vec x,Vec y)
 79: {
 80:   PC_PBJacobi     *jac = (PC_PBJacobi*)pc->data;
 81:   PetscInt        i,m = jac->mbs;
 82:   const MatScalar *diag = jac->diag;
 83:   PetscScalar     x0,x1,x2,x3,*yy;
 84:   const PetscScalar *xx;

 86:   VecGetArrayRead(x,&xx);
 87:   VecGetArray(y,&yy);
 88:   for (i=0; i<m; i++) {
 89:     x0 = xx[4*i]; x1 = xx[4*i+1]; x2 = xx[4*i+2]; x3 = xx[4*i+3];

 91:     yy[4*i]   = diag[0]*x0 + diag[4]*x1 + diag[8]*x2  + diag[12]*x3;
 92:     yy[4*i+1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2  + diag[13]*x3;
 93:     yy[4*i+2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2 + diag[14]*x3;
 94:     yy[4*i+3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2 + diag[15]*x3;
 95:     diag     += 16;
 96:   }
 97:   VecRestoreArrayRead(x,&xx);
 98:   VecRestoreArray(y,&yy);
 99:   PetscLogFlops(28.0*m);
100:   return 0;
101: }
102: static PetscErrorCode PCApply_PBJacobi_5(PC pc,Vec x,Vec y)
103: {
104:   PC_PBJacobi     *jac = (PC_PBJacobi*)pc->data;
105:   PetscInt        i,m = jac->mbs;
106:   const MatScalar *diag = jac->diag;
107:   PetscScalar     x0,x1,x2,x3,x4,*yy;
108:   const PetscScalar *xx;

110:   VecGetArrayRead(x,&xx);
111:   VecGetArray(y,&yy);
112:   for (i=0; i<m; i++) {
113:     x0 = xx[5*i]; x1 = xx[5*i+1]; x2 = xx[5*i+2]; x3 = xx[5*i+3]; x4 = xx[5*i+4];

115:     yy[5*i]   = diag[0]*x0 + diag[5]*x1 + diag[10]*x2  + diag[15]*x3 + diag[20]*x4;
116:     yy[5*i+1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2  + diag[16]*x3 + diag[21]*x4;
117:     yy[5*i+2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
118:     yy[5*i+3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
119:     yy[5*i+4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
120:     diag     += 25;
121:   }
122:   VecRestoreArrayRead(x,&xx);
123:   VecRestoreArray(y,&yy);
124:   PetscLogFlops(45.0*m);
125:   return 0;
126: }
127: static PetscErrorCode PCApply_PBJacobi_6(PC pc,Vec x,Vec y)
128: {
129:   PC_PBJacobi     *jac = (PC_PBJacobi*)pc->data;
130:   PetscInt        i,m = jac->mbs;
131:   const MatScalar *diag = jac->diag;
132:   PetscScalar     x0,x1,x2,x3,x4,x5,*yy;
133:   const PetscScalar *xx;

135:   VecGetArrayRead(x,&xx);
136:   VecGetArray(y,&yy);
137:   for (i=0; i<m; i++) {
138:     x0 = xx[6*i]; x1 = xx[6*i+1]; x2 = xx[6*i+2]; x3 = xx[6*i+3]; x4 = xx[6*i+4]; x5 = xx[6*i+5];

140:     yy[6*i]   = diag[0]*x0 + diag[6]*x1  + diag[12]*x2  + diag[18]*x3 + diag[24]*x4 + diag[30]*x5;
141:     yy[6*i+1] = diag[1]*x0 + diag[7]*x1  + diag[13]*x2  + diag[19]*x3 + diag[25]*x4 + diag[31]*x5;
142:     yy[6*i+2] = diag[2]*x0 + diag[8]*x1  + diag[14]*x2  + diag[20]*x3 + diag[26]*x4 + diag[32]*x5;
143:     yy[6*i+3] = diag[3]*x0 + diag[9]*x1  + diag[15]*x2  + diag[21]*x3 + diag[27]*x4 + diag[33]*x5;
144:     yy[6*i+4] = diag[4]*x0 + diag[10]*x1 + diag[16]*x2  + diag[22]*x3 + diag[28]*x4 + diag[34]*x5;
145:     yy[6*i+5] = diag[5]*x0 + diag[11]*x1 + diag[17]*x2  + diag[23]*x3 + diag[29]*x4 + diag[35]*x5;
146:     diag     += 36;
147:   }
148:   VecRestoreArrayRead(x,&xx);
149:   VecRestoreArray(y,&yy);
150:   PetscLogFlops(66.0*m);
151:   return 0;
152: }
153: static PetscErrorCode PCApply_PBJacobi_7(PC pc,Vec x,Vec y)
154: {
155:   PC_PBJacobi     *jac = (PC_PBJacobi*)pc->data;
156:   PetscInt        i,m = jac->mbs;
157:   const MatScalar *diag = jac->diag;
158:   PetscScalar     x0,x1,x2,x3,x4,x5,x6,*yy;
159:   const PetscScalar *xx;

161:   VecGetArrayRead(x,&xx);
162:   VecGetArray(y,&yy);
163:   for (i=0; i<m; i++) {
164:     x0 = xx[7*i]; x1 = xx[7*i+1]; x2 = xx[7*i+2]; x3 = xx[7*i+3]; x4 = xx[7*i+4]; x5 = xx[7*i+5]; x6 = xx[7*i+6];

166:     yy[7*i]   = diag[0]*x0 + diag[7]*x1  + diag[14]*x2  + diag[21]*x3 + diag[28]*x4 + diag[35]*x5 + diag[42]*x6;
167:     yy[7*i+1] = diag[1]*x0 + diag[8]*x1  + diag[15]*x2  + diag[22]*x3 + diag[29]*x4 + diag[36]*x5 + diag[43]*x6;
168:     yy[7*i+2] = diag[2]*x0 + diag[9]*x1  + diag[16]*x2  + diag[23]*x3 + diag[30]*x4 + diag[37]*x5 + diag[44]*x6;
169:     yy[7*i+3] = diag[3]*x0 + diag[10]*x1 + diag[17]*x2  + diag[24]*x3 + diag[31]*x4 + diag[38]*x5 + diag[45]*x6;
170:     yy[7*i+4] = diag[4]*x0 + diag[11]*x1 + diag[18]*x2  + diag[25]*x3 + diag[32]*x4 + diag[39]*x5 + diag[46]*x6;
171:     yy[7*i+5] = diag[5]*x0 + diag[12]*x1 + diag[19]*x2  + diag[26]*x3 + diag[33]*x4 + diag[40]*x5 + diag[47]*x6;
172:     yy[7*i+6] = diag[6]*x0 + diag[13]*x1 + diag[20]*x2  + diag[27]*x3 + diag[34]*x4 + diag[41]*x5 + diag[48]*x6;
173:     diag     += 49;
174:   }
175:   VecRestoreArrayRead(x,&xx);
176:   VecRestoreArray(y,&yy);
177:   PetscLogFlops(91.0*m); /* 2*bs2 - bs */
178:   return 0;
179: }
180: static PetscErrorCode PCApply_PBJacobi_N(PC pc,Vec x,Vec y)
181: {
182:   PC_PBJacobi       *jac = (PC_PBJacobi*)pc->data;
183:   PetscInt          i,ib,jb;
184:   const PetscInt    m = jac->mbs;
185:   const PetscInt    bs = jac->bs;
186:   const MatScalar   *diag = jac->diag;
187:   PetscScalar       *yy;
188:   const PetscScalar *xx;

190:   VecGetArrayRead(x,&xx);
191:   VecGetArray(y,&yy);
192:   for (i=0; i<m; i++) {
193:     for (ib=0; ib<bs; ib++) {
194:       PetscScalar rowsum = 0;
195:       for (jb=0; jb<bs; jb++) {
196:         rowsum += diag[ib+jb*bs] * xx[bs*i+jb];
197:       }
198:       yy[bs*i+ib] = rowsum;
199:     }
200:     diag += bs*bs;
201:   }
202:   VecRestoreArrayRead(x,&xx);
203:   VecRestoreArray(y,&yy);
204:   PetscLogFlops((2.0*bs*bs-bs)*m); /* 2*bs2 - bs */
205:   return 0;
206: }

208: static PetscErrorCode PCApplyTranspose_PBJacobi_N(PC pc,Vec x,Vec y)
209: {
210:   PC_PBJacobi       *jac = (PC_PBJacobi*)pc->data;
211:   PetscInt          i,j,k,m = jac->mbs,bs=jac->bs;
212:   const MatScalar   *diag = jac->diag;
213:   const PetscScalar *xx;
214:   PetscScalar       *yy;

216:   VecGetArrayRead(x,&xx);
217:   VecGetArray(y,&yy);
218:   for (i=0; i<m; i++) {
219:     for (j=0; j<bs; j++) yy[i*bs+j] = 0.;
220:     for (j=0; j<bs; j++) {
221:       for (k=0; k<bs; k++) {
222:         yy[i*bs+k] += diag[k*bs+j]*xx[i*bs+j];
223:       }
224:     }
225:     diag += bs*bs;
226:   }
227:   VecRestoreArrayRead(x,&xx);
228:   VecRestoreArray(y,&yy);
229:   PetscLogFlops(m*bs*(2*bs-1));
230:   return 0;
231: }

233: /* -------------------------------------------------------------------------- */
234: static PetscErrorCode PCSetUp_PBJacobi(PC pc)
235: {
236:   PC_PBJacobi    *jac = (PC_PBJacobi*)pc->data;
237:   Mat            A = pc->pmat;
238:   MatFactorError err;
239:   PetscInt       nlocal;

241:   MatInvertBlockDiagonal(A,&jac->diag);
242:   MatFactorGetError(A,&err);
243:   if (err) pc->failedreason = (PCFailedReason)err;

245:   MatGetBlockSize(A,&jac->bs);
246:   MatGetLocalSize(A,&nlocal,NULL);
247:   jac->mbs = nlocal/jac->bs;
248:   switch (jac->bs) {
249:   case 1:
250:     pc->ops->apply = PCApply_PBJacobi_1;
251:     break;
252:   case 2:
253:     pc->ops->apply = PCApply_PBJacobi_2;
254:     break;
255:   case 3:
256:     pc->ops->apply = PCApply_PBJacobi_3;
257:     break;
258:   case 4:
259:     pc->ops->apply = PCApply_PBJacobi_4;
260:     break;
261:   case 5:
262:     pc->ops->apply = PCApply_PBJacobi_5;
263:     break;
264:   case 6:
265:     pc->ops->apply = PCApply_PBJacobi_6;
266:     break;
267:   case 7:
268:     pc->ops->apply = PCApply_PBJacobi_7;
269:     break;
270:   default:
271:     pc->ops->apply = PCApply_PBJacobi_N;
272:     break;
273:   }
274:   pc->ops->applytranspose = PCApplyTranspose_PBJacobi_N;
275:   return 0;
276: }
277: /* -------------------------------------------------------------------------- */
278: static PetscErrorCode PCDestroy_PBJacobi(PC pc)
279: {
280:   /*
281:       Free the private data structure that was hanging off the PC
282:   */
283:   PetscFree(pc->data);
284:   return 0;
285: }

287: static PetscErrorCode PCView_PBJacobi(PC pc,PetscViewer viewer)
288: {
289:   PC_PBJacobi    *jac = (PC_PBJacobi*)pc->data;
290:   PetscBool      iascii;

292:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
293:   if (iascii) {
294:     PetscViewerASCIIPrintf(viewer,"  point-block size %D\n",jac->bs);
295:   }
296:   return 0;
297: }

299: /* -------------------------------------------------------------------------- */
300: /*MC
301:      PCPBJACOBI - Point block Jacobi preconditioner

303:    Notes:
304:      See PCJACOBI for point Jacobi preconditioning, PCVPBJACOBI for variable size point block Jacobi and PCBJACOBI for large blocks

306:      This works for AIJ and BAIJ matrices and uses the blocksize provided to the matrix

308:      Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot
309:      is detected a PETSc error is generated.

311:    Developer Notes:
312:      This should support the PCSetErrorIfFailure() flag set to PETSC_TRUE to allow
313:      the factorization to continue even after a zero pivot is found resulting in a Nan and hence
314:      terminating KSP with a KSP_DIVERGED_NANORIF allowing
315:      a nonlinear solver/ODE integrator to recover without stopping the program as currently happens.

317:      Perhaps should provide an option that allows generation of a valid preconditioner
318:      even if a block is singular as the PCJACOBI does.

320:    Level: beginner

322: .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC, PCJACOBI, PCVPBJACOBI, PCBJACOBI

324: M*/

326: PETSC_EXTERN PetscErrorCode PCCreate_PBJacobi(PC pc)
327: {
328:   PC_PBJacobi    *jac;

330:   /*
331:      Creates the private data structure for this preconditioner and
332:      attach it to the PC object.
333:   */
334:   PetscNewLog(pc,&jac);
335:   pc->data = (void*)jac;

337:   /*
338:      Initialize the pointers to vectors to ZERO; these will be used to store
339:      diagonal entries of the matrix for fast preconditioner application.
340:   */
341:   jac->diag = NULL;

343:   /*
344:       Set the pointers for the functions that are provided above.
345:       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
346:       are called, they will automatically call these functions.  Note we
347:       choose not to provide a couple of these functions since they are
348:       not needed.
349:   */
350:   pc->ops->apply               = NULL; /*set depending on the block size */
351:   pc->ops->applytranspose      = NULL;
352:   pc->ops->setup               = PCSetUp_PBJacobi;
353:   pc->ops->destroy             = PCDestroy_PBJacobi;
354:   pc->ops->setfromoptions      = NULL;
355:   pc->ops->view                = PCView_PBJacobi;
356:   pc->ops->applyrichardson     = NULL;
357:   pc->ops->applysymmetricleft  = NULL;
358:   pc->ops->applysymmetricright = NULL;
359:   return 0;
360: }