Actual source code: pch2opus.c

  1: #include <petsc/private/pcimpl.h>
  2: #include <petsc/private/matimpl.h>
  3: #include <h2opusconf.h>

  5: /* Use GPU only if H2OPUS is configured for GPU */
  6: #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU)
  7: #define PETSC_H2OPUS_USE_GPU
  8: #endif

 10: typedef struct {
 11:   Mat         A;
 12:   Mat         M;
 13:   PetscScalar s0;

 15:   /* sampler for Newton-Schultz */
 16:   Mat      S;
 17:   PetscInt hyperorder;
 18:   Vec      wns[4];
 19:   Mat      wnsmat[4];

 21:   /* convergence testing */
 22:   Mat       T;
 23:   Vec       w;
 24:   PetscBool testMA;

 26:   /* Support for PCSetCoordinates */
 27:   PetscInt  sdim;
 28:   PetscInt  nlocc;
 29:   PetscReal *coords;

 31:   /* Newton-Schultz customization */
 32:   PetscInt  maxits;
 33:   PetscReal rtol,atol;
 34:   PetscBool monitor;
 35:   PetscBool useapproximatenorms;
 36:   NormType  normtype;

 38:   /* Used when pmat != MATH2OPUS */
 39:   PetscReal eta;
 40:   PetscInt  leafsize;
 41:   PetscInt  max_rank;
 42:   PetscInt  bs;
 43:   PetscReal mrtol;

 45:   PetscBool boundtocpu;
 46: } PC_H2OPUS;

 48: PETSC_EXTERN PetscErrorCode MatNorm_H2OPUS(Mat,NormType,PetscReal*);

 50: static PetscErrorCode PCReset_H2OPUS(PC pc)
 51: {
 52:   PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;

 54:   pch2opus->sdim  = 0;
 55:   pch2opus->nlocc = 0;
 56:   PetscFree(pch2opus->coords);
 57:   MatDestroy(&pch2opus->A);
 58:   MatDestroy(&pch2opus->M);
 59:   MatDestroy(&pch2opus->T);
 60:   VecDestroy(&pch2opus->w);
 61:   MatDestroy(&pch2opus->S);
 62:   VecDestroy(&pch2opus->wns[0]);
 63:   VecDestroy(&pch2opus->wns[1]);
 64:   VecDestroy(&pch2opus->wns[2]);
 65:   VecDestroy(&pch2opus->wns[3]);
 66:   MatDestroy(&pch2opus->wnsmat[0]);
 67:   MatDestroy(&pch2opus->wnsmat[1]);
 68:   MatDestroy(&pch2opus->wnsmat[2]);
 69:   MatDestroy(&pch2opus->wnsmat[3]);
 70:   return 0;
 71: }

 73: static PetscErrorCode PCSetCoordinates_H2OPUS(PC pc, PetscInt sdim, PetscInt nlocc, PetscReal *coords)
 74: {
 75:   PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
 76:   PetscBool  reset    = PETSC_TRUE;

 78:   if (pch2opus->sdim && sdim == pch2opus->sdim && nlocc == pch2opus->nlocc) {
 79:     PetscArraycmp(pch2opus->coords,coords,sdim*nlocc,&reset);
 80:     reset = (PetscBool)!reset;
 81:   }
 82:   MPIU_Allreduce(MPI_IN_PLACE,&reset,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
 83:   if (reset) {
 84:     PCReset_H2OPUS(pc);
 85:     PetscMalloc1(sdim*nlocc,&pch2opus->coords);
 86:     PetscArraycpy(pch2opus->coords,coords,sdim*nlocc);
 87:     pch2opus->sdim  = sdim;
 88:     pch2opus->nlocc = nlocc;
 89:   }
 90:   return 0;
 91: }

 93: static PetscErrorCode PCDestroy_H2OPUS(PC pc)
 94: {
 95:   PCReset_H2OPUS(pc);
 96:   PetscObjectComposeFunction((PetscObject)pc,"PCSetCoordinates_C",NULL);
 97:   PetscFree(pc->data);
 98:   return 0;
 99: }

101: static PetscErrorCode PCSetFromOptions_H2OPUS(PetscOptionItems *PetscOptionsObject,PC pc)
102: {
103:   PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;

105:   PetscOptionsHead(PetscOptionsObject,"H2OPUS options");
106:   PetscOptionsInt("-pc_h2opus_maxits","Maximum number of iterations for Newton-Schultz",NULL,pch2opus->maxits,&pch2opus->maxits,NULL);
107:   PetscOptionsBool("-pc_h2opus_monitor","Monitor Newton-Schultz convergence",NULL,pch2opus->monitor,&pch2opus->monitor,NULL);
108:   PetscOptionsReal("-pc_h2opus_atol","Absolute tolerance",NULL,pch2opus->atol,&pch2opus->atol,NULL);
109:   PetscOptionsReal("-pc_h2opus_rtol","Relative tolerance",NULL,pch2opus->rtol,&pch2opus->rtol,NULL);
110:   PetscOptionsEnum("-pc_h2opus_norm_type","Norm type for convergence monitoring",NULL,NormTypes,(PetscEnum)pch2opus->normtype,(PetscEnum*)&pch2opus->normtype,NULL);
111:   PetscOptionsInt("-pc_h2opus_hyperorder","Hyper power order of sampling",NULL,pch2opus->hyperorder,&pch2opus->hyperorder,NULL);
112:   PetscOptionsInt("-pc_h2opus_leafsize","Leaf size when constructed from kernel",NULL,pch2opus->leafsize,&pch2opus->leafsize,NULL);
113:   PetscOptionsReal("-pc_h2opus_eta","Admissibility condition tolerance",NULL,pch2opus->eta,&pch2opus->eta,NULL);
114:   PetscOptionsInt("-pc_h2opus_maxrank","Maximum rank when constructed from matvecs",NULL,pch2opus->max_rank,&pch2opus->max_rank,NULL);
115:   PetscOptionsInt("-pc_h2opus_samples","Number of samples to be taken concurrently when constructing from matvecs",NULL,pch2opus->bs,&pch2opus->bs,NULL);
116:   PetscOptionsReal("-pc_h2opus_mrtol","Relative tolerance for construction from sampling",NULL,pch2opus->mrtol,&pch2opus->mrtol,NULL);
117:   PetscOptionsTail();
118:   return 0;
119: }

121: typedef struct {
122:   Mat A;
123:   Mat M;
124:   Vec w;
125: } AAtCtx;

127: static PetscErrorCode MatMult_AAt(Mat A, Vec x, Vec y)
128: {
129:   AAtCtx *aat;

131:   MatShellGetContext(A,(void*)&aat);
132:   /* MatMultTranspose(aat->M,x,aat->w); */
133:   MatMultTranspose(aat->A,x,aat->w);
134:   MatMult(aat->A,aat->w,y);
135:   return 0;
136: }

138: static PetscErrorCode PCH2OpusSetUpInit(PC pc)
139: {
140:   PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
141:   Mat        A        = pc->useAmat ? pc->mat : pc->pmat, AAt;
142:   PetscInt   M,m;
143:   VecType    vtype;
144:   PetscReal  n;
145:   AAtCtx     aat;

147:   aat.A = A;
148:   aat.M = pch2opus->M; /* unused so far */
149:   MatCreateVecs(A,NULL,&aat.w);
150:   MatGetSize(A,&M,NULL);
151:   MatGetLocalSize(A,&m,NULL);
152:   MatCreateShell(PetscObjectComm((PetscObject)A),m,m,M,M,&aat,&AAt);
153:   MatBindToCPU(AAt,pch2opus->boundtocpu);
154:   MatShellSetOperation(AAt,MATOP_MULT,(void (*)(void))MatMult_AAt);
155:   MatShellSetOperation(AAt,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMult_AAt);
156:   MatShellSetOperation(AAt,MATOP_NORM,(void (*)(void))MatNorm_H2OPUS);
157:   MatGetVecType(A,&vtype);
158:   MatShellSetVecType(AAt,vtype);
159:   MatNorm(AAt,NORM_1,&n);
160:   pch2opus->s0 = 1./n;
161:   VecDestroy(&aat.w);
162:   MatDestroy(&AAt);
163:   return 0;
164: }

166: static PetscErrorCode PCApplyKernel_H2OPUS(PC pc, Vec x, Vec y, PetscBool t)
167: {
168:   PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;

170:   if (t) MatMultTranspose(pch2opus->M,x,y);
171:   else  MatMult(pch2opus->M,x,y);
172:   return 0;
173: }

175: static PetscErrorCode PCApplyMatKernel_H2OPUS(PC pc, Mat X, Mat Y, PetscBool t)
176: {
177:   PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;

179:   if (t) MatTransposeMatMult(pch2opus->M,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);
180:   else   MatMatMult(pch2opus->M,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);
181:   return 0;
182: }

184: static PetscErrorCode PCApplyMat_H2OPUS(PC pc, Mat X, Mat Y)
185: {
186:   PCApplyMatKernel_H2OPUS(pc,X,Y,PETSC_FALSE);
187:   return 0;
188: }

190: static PetscErrorCode PCApplyTransposeMat_H2OPUS(PC pc, Mat X, Mat Y)
191: {
192:   PCApplyMatKernel_H2OPUS(pc,X,Y,PETSC_TRUE);
193:   return 0;
194: }

196: static PetscErrorCode PCApply_H2OPUS(PC pc, Vec x, Vec y)
197: {
198:   PCApplyKernel_H2OPUS(pc,x,y,PETSC_FALSE);
199:   return 0;
200: }

202: static PetscErrorCode PCApplyTranspose_H2OPUS(PC pc, Vec x, Vec y)
203: {
204:   PCApplyKernel_H2OPUS(pc,x,y,PETSC_TRUE);
205:   return 0;
206: }

208: /* used to test the norm of (M^-1 A - I) */
209: static PetscErrorCode MatMultKernel_MAmI(Mat M, Vec x, Vec y, PetscBool t)
210: {
211:   PC         pc;
212:   Mat        A;
213:   PC_H2OPUS *pch2opus;
214:   PetscBool  sideleft = PETSC_TRUE;

216:   MatShellGetContext(M,(void*)&pc);
217:   pch2opus = (PC_H2OPUS*)pc->data;
218:   if (!pch2opus->w) MatCreateVecs(pch2opus->M,&pch2opus->w,NULL);
219:   A = pch2opus->A;
220:   VecBindToCPU(pch2opus->w,pch2opus->boundtocpu);
221:   if (t) {
222:     if (sideleft) {
223:       PCApplyTranspose_H2OPUS(pc,x,pch2opus->w);
224:       MatMultTranspose(A,pch2opus->w,y);
225:     } else {
226:       MatMultTranspose(A,x,pch2opus->w);
227:       PCApplyTranspose_H2OPUS(pc,pch2opus->w,y);
228:     }
229:   } else {
230:     if (sideleft) {
231:       MatMult(A,x,pch2opus->w);
232:       PCApply_H2OPUS(pc,pch2opus->w,y);
233:     } else {
234:       PCApply_H2OPUS(pc,x,pch2opus->w);
235:       MatMult(A,pch2opus->w,y);
236:     }
237:   }
238:   if (!pch2opus->testMA) VecAXPY(y,-1.0,x);
239:   return 0;
240: }

242: static PetscErrorCode MatMult_MAmI(Mat A, Vec x, Vec y)
243: {
244:   MatMultKernel_MAmI(A,x,y,PETSC_FALSE);
245:   return 0;
246: }

248: static PetscErrorCode MatMultTranspose_MAmI(Mat A, Vec x, Vec y)
249: {
250:   MatMultKernel_MAmI(A,x,y,PETSC_TRUE);
251:   return 0;
252: }

254: /* HyperPower kernel:
255: Y = R = x
256: for i = 1 . . . l - 1 do
257:   R = (I - A * Xk) * R
258:   Y = Y + R
259: Y = Xk * Y
260: */
261: static PetscErrorCode MatMultKernel_Hyper(Mat M, Vec x, Vec y, PetscBool t)
262: {
263:   PC         pc;
264:   Mat        A;
265:   PC_H2OPUS *pch2opus;

267:   MatShellGetContext(M,(void*)&pc);
268:   pch2opus = (PC_H2OPUS*)pc->data;
269:   A = pch2opus->A;
270:   MatCreateVecs(pch2opus->M,pch2opus->wns[0] ? NULL : &pch2opus->wns[0],pch2opus->wns[1] ? NULL : &pch2opus->wns[1]);
271:   MatCreateVecs(pch2opus->M,pch2opus->wns[2] ? NULL : &pch2opus->wns[2],pch2opus->wns[3] ? NULL : &pch2opus->wns[3]);
272:   VecBindToCPU(pch2opus->wns[0],pch2opus->boundtocpu);
273:   VecBindToCPU(pch2opus->wns[1],pch2opus->boundtocpu);
274:   VecBindToCPU(pch2opus->wns[2],pch2opus->boundtocpu);
275:   VecBindToCPU(pch2opus->wns[3],pch2opus->boundtocpu);
276:   VecCopy(x,pch2opus->wns[0]);
277:   VecCopy(x,pch2opus->wns[3]);
278:   if (t) {
279:     for (PetscInt i=0;i<pch2opus->hyperorder-1;i++) {
280:       MatMultTranspose(A,pch2opus->wns[0],pch2opus->wns[1]);
281:       PCApplyTranspose_H2OPUS(pc,pch2opus->wns[1],pch2opus->wns[2]);
282:       VecAXPY(pch2opus->wns[0],-1.,pch2opus->wns[2]);
283:       VecAXPY(pch2opus->wns[3], 1.,pch2opus->wns[0]);
284:     }
285:     PCApplyTranspose_H2OPUS(pc,pch2opus->wns[3],y);
286:   } else {
287:     for (PetscInt i=0;i<pch2opus->hyperorder-1;i++) {
288:       PCApply_H2OPUS(pc,pch2opus->wns[0],pch2opus->wns[1]);
289:       MatMult(A,pch2opus->wns[1],pch2opus->wns[2]);
290:       VecAXPY(pch2opus->wns[0],-1.,pch2opus->wns[2]);
291:       VecAXPY(pch2opus->wns[3], 1.,pch2opus->wns[0]);
292:     }
293:     PCApply_H2OPUS(pc,pch2opus->wns[3],y);
294:   }
295:   return 0;
296: }

298: static PetscErrorCode MatMult_Hyper(Mat M, Vec x, Vec y)
299: {
300:   MatMultKernel_Hyper(M,x,y,PETSC_FALSE);
301:   return 0;
302: }

304: static PetscErrorCode MatMultTranspose_Hyper(Mat M, Vec x, Vec y)
305: {
306:   MatMultKernel_Hyper(M,x,y,PETSC_TRUE);
307:   return 0;
308: }

310: /* Hyper power kernel, MatMat version */
311: static PetscErrorCode MatMatMultKernel_Hyper(Mat M, Mat X, Mat Y, PetscBool t)
312: {
313:   PC         pc;
314:   Mat        A;
315:   PC_H2OPUS *pch2opus;
316:   PetscInt   i;

318:   MatShellGetContext(M,(void*)&pc);
319:   pch2opus = (PC_H2OPUS*)pc->data;
320:   A = pch2opus->A;
321:   if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
322:     MatDestroy(&pch2opus->wnsmat[0]);
323:     MatDestroy(&pch2opus->wnsmat[1]);
324:   }
325:   if (!pch2opus->wnsmat[0]) {
326:     MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[0]);
327:     MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[1]);
328:   }
329:   if (pch2opus->wnsmat[2] && pch2opus->wnsmat[2]->cmap->N != X->cmap->N) {
330:     MatDestroy(&pch2opus->wnsmat[2]);
331:     MatDestroy(&pch2opus->wnsmat[3]);
332:   }
333:   if (!pch2opus->wnsmat[2]) {
334:     MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[2]);
335:     MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[3]);
336:   }
337:   MatCopy(X,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN);
338:   MatCopy(X,pch2opus->wnsmat[3],SAME_NONZERO_PATTERN);
339:   if (t) {
340:     for (i=0;i<pch2opus->hyperorder-1;i++) {
341:       MatTransposeMatMult(A,pch2opus->wnsmat[0],MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[1]);
342:       PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[1],pch2opus->wnsmat[2]);
343:       MatAXPY(pch2opus->wnsmat[0],-1.,pch2opus->wnsmat[2],SAME_NONZERO_PATTERN);
344:       MatAXPY(pch2opus->wnsmat[3],1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN);
345:     }
346:     PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[3],Y);
347:   } else {
348:     for (i=0;i<pch2opus->hyperorder-1;i++) {
349:       PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[0],pch2opus->wnsmat[1]);
350:       MatMatMult(A,pch2opus->wnsmat[1],MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[2]);
351:       MatAXPY(pch2opus->wnsmat[0],-1.,pch2opus->wnsmat[2],SAME_NONZERO_PATTERN);
352:       MatAXPY(pch2opus->wnsmat[3],1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN);
353:     }
354:     PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[3],Y);
355:   }
356:   return 0;
357: }

359: static PetscErrorCode MatMatMultNumeric_Hyper(Mat M, Mat X, Mat Y,void *ctx)
360: {
361:   MatMatMultKernel_Hyper(M,X,Y,PETSC_FALSE);
362:   return 0;
363: }

365: /* Basic Newton-Schultz sampler: (2 * I - M * A)*M */
366: static PetscErrorCode MatMultKernel_NS(Mat M, Vec x, Vec y, PetscBool t)
367: {
368:   PC         pc;
369:   Mat        A;
370:   PC_H2OPUS *pch2opus;

372:   MatShellGetContext(M,(void*)&pc);
373:   pch2opus = (PC_H2OPUS*)pc->data;
374:   A = pch2opus->A;
375:   MatCreateVecs(pch2opus->M,pch2opus->wns[0] ? NULL : &pch2opus->wns[0],pch2opus->wns[1] ? NULL : &pch2opus->wns[1]);
376:   VecBindToCPU(pch2opus->wns[0],pch2opus->boundtocpu);
377:   VecBindToCPU(pch2opus->wns[1],pch2opus->boundtocpu);
378:   if (t) {
379:     PCApplyTranspose_H2OPUS(pc,x,y);
380:     MatMultTranspose(A,y,pch2opus->wns[1]);
381:     PCApplyTranspose_H2OPUS(pc,pch2opus->wns[1],pch2opus->wns[0]);
382:     VecAXPBY(y,-1.,2.,pch2opus->wns[0]);
383:   } else {
384:     PCApply_H2OPUS(pc,x,y);
385:     MatMult(A,y,pch2opus->wns[0]);
386:     PCApply_H2OPUS(pc,pch2opus->wns[0],pch2opus->wns[1]);
387:     VecAXPBY(y,-1.,2.,pch2opus->wns[1]);
388:   }
389:   return 0;
390: }

392: static PetscErrorCode MatMult_NS(Mat M, Vec x, Vec y)
393: {
394:   MatMultKernel_NS(M,x,y,PETSC_FALSE);
395:   return 0;
396: }

398: static PetscErrorCode MatMultTranspose_NS(Mat M, Vec x, Vec y)
399: {
400:   MatMultKernel_NS(M,x,y,PETSC_TRUE);
401:   return 0;
402: }

404: /* Basic Newton-Schultz sampler: (2 * I - M * A)*M, MatMat version */
405: static PetscErrorCode MatMatMultKernel_NS(Mat M, Mat X, Mat Y, PetscBool t)
406: {
407:   PC         pc;
408:   Mat        A;
409:   PC_H2OPUS *pch2opus;

411:   MatShellGetContext(M,(void*)&pc);
412:   pch2opus = (PC_H2OPUS*)pc->data;
413:   A = pch2opus->A;
414:   if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
415:     MatDestroy(&pch2opus->wnsmat[0]);
416:     MatDestroy(&pch2opus->wnsmat[1]);
417:   }
418:   if (!pch2opus->wnsmat[0]) {
419:     MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[0]);
420:     MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[1]);
421:   }
422:   if (t) {
423:     PCApplyTransposeMat_H2OPUS(pc,X,Y);
424:     MatTransposeMatMult(A,Y,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[1]);
425:     PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[1],pch2opus->wnsmat[0]);
426:     MatScale(Y,2.);
427:     MatAXPY(Y,-1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN);
428:   } else {
429:     PCApplyMat_H2OPUS(pc,X,Y);
430:     MatMatMult(A,Y,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[0]);
431:     PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[0],pch2opus->wnsmat[1]);
432:     MatScale(Y,2.);
433:     MatAXPY(Y,-1.,pch2opus->wnsmat[1],SAME_NONZERO_PATTERN);
434:   }
435:   return 0;
436: }

438: static PetscErrorCode MatMatMultNumeric_NS(Mat M, Mat X, Mat Y, void *ctx)
439: {
440:   MatMatMultKernel_NS(M,X,Y,PETSC_FALSE);
441:   return 0;
442: }

444: static PetscErrorCode PCH2OpusSetUpSampler_Private(PC pc)
445: {
446:   PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
447:   Mat        A        = pc->useAmat ? pc->mat : pc->pmat;

449:   if (!pch2opus->S) {
450:     PetscInt M,N,m,n;

452:     MatGetSize(A,&M,&N);
453:     MatGetLocalSize(A,&m,&n);
454:     MatCreateShell(PetscObjectComm((PetscObject)A),m,n,M,N,pc,&pch2opus->S);
455:     MatSetBlockSizesFromMats(pch2opus->S,A,A);
456: #if defined(PETSC_H2OPUS_USE_GPU)
457:     MatShellSetVecType(pch2opus->S,VECCUDA);
458: #endif
459:   }
460:   if (pch2opus->hyperorder >= 2) {
461:     MatShellSetOperation(pch2opus->S,MATOP_MULT,(void (*)(void))MatMult_Hyper);
462:     MatShellSetOperation(pch2opus->S,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_Hyper);
463:     MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_Hyper,NULL,MATDENSE,MATDENSE);
464:     MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_Hyper,NULL,MATDENSECUDA,MATDENSECUDA);
465:   } else {
466:     MatShellSetOperation(pch2opus->S,MATOP_MULT,(void (*)(void))MatMult_NS);
467:     MatShellSetOperation(pch2opus->S,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_NS);
468:     MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_NS,NULL,MATDENSE,MATDENSE);
469:     MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_NS,NULL,MATDENSECUDA,MATDENSECUDA);
470:   }
471:   MatPropagateSymmetryOptions(A,pch2opus->S);
472:   MatBindToCPU(pch2opus->S,pch2opus->boundtocpu);
473:   /* XXX */
474:   MatSetOption(pch2opus->S,MAT_SYMMETRIC,PETSC_TRUE);
475:   return 0;
476: }

478: static PetscErrorCode PCSetUp_H2OPUS(PC pc)
479: {
480:   PC_H2OPUS *pch2opus  = (PC_H2OPUS*)pc->data;
481:   Mat        A         = pc->useAmat ? pc->mat : pc->pmat;
482:   NormType   norm      = pch2opus->normtype;
483:   PetscReal  initerr   = 0.0,err;
484:   PetscReal  initerrMA = 0.0,errMA;
485:   PetscBool  ish2opus;

487:   if (!pch2opus->T) {
488:     PetscInt    M,N,m,n;
489:     const char *prefix;

491:     PCGetOptionsPrefix(pc,&prefix);
492:     MatGetSize(A,&M,&N);
493:     MatGetLocalSize(A,&m,&n);
494:     MatCreateShell(PetscObjectComm((PetscObject)pc->pmat),m,n,M,N,pc,&pch2opus->T);
495:     MatSetBlockSizesFromMats(pch2opus->T,A,A);
496:     MatShellSetOperation(pch2opus->T,MATOP_MULT,(void (*)(void))MatMult_MAmI);
497:     MatShellSetOperation(pch2opus->T,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_MAmI);
498:     MatShellSetOperation(pch2opus->T,MATOP_NORM,(void (*)(void))MatNorm_H2OPUS);
499: #if defined(PETSC_H2OPUS_USE_GPU)
500:     MatShellSetVecType(pch2opus->T,VECCUDA);
501: #endif
502:     PetscLogObjectParent((PetscObject)pc,(PetscObject)pch2opus->T);
503:     MatSetOptionsPrefix(pch2opus->T,prefix);
504:     MatAppendOptionsPrefix(pch2opus->T,"pc_h2opus_est_");
505:   }
506:   MatDestroy(&pch2opus->A);
507:   PetscObjectTypeCompare((PetscObject)A,MATH2OPUS,&ish2opus);
508:   if (ish2opus) {
509:     PetscObjectReference((PetscObject)A);
510:     pch2opus->A = A;
511:   } else {
512:     const char *prefix;
513:     MatCreateH2OpusFromMat(A,pch2opus->sdim,pch2opus->coords,PETSC_FALSE,pch2opus->eta,pch2opus->leafsize,pch2opus->max_rank,pch2opus->bs,pch2opus->mrtol,&pch2opus->A);
514:     /* XXX */
515:     MatSetOption(pch2opus->A,MAT_SYMMETRIC,PETSC_TRUE);
516:     PCGetOptionsPrefix(pc,&prefix);
517:     MatSetOptionsPrefix(pch2opus->A,prefix);
518:     MatAppendOptionsPrefix(pch2opus->A,"pc_h2opus_init_");
519:     MatSetFromOptions(pch2opus->A);
520:     MatAssemblyBegin(pch2opus->A,MAT_FINAL_ASSEMBLY);
521:     MatAssemblyEnd(pch2opus->A,MAT_FINAL_ASSEMBLY);
522:     /* XXX */
523:     MatSetOption(pch2opus->A,MAT_SYMMETRIC,PETSC_TRUE);
524:   }
525: #if defined(PETSC_H2OPUS_USE_GPU)
526:   pch2opus->boundtocpu = pch2opus->A->boundtocpu;
527: #endif
528:   MatBindToCPU(pch2opus->T,pch2opus->boundtocpu);
529:   if (pch2opus->M) { /* see if we can reuse M as initial guess */
530:     PetscReal reuse;

532:     MatBindToCPU(pch2opus->M,pch2opus->boundtocpu);
533:     MatNorm(pch2opus->T,norm,&reuse);
534:     if (reuse >= 1.0) MatDestroy(&pch2opus->M);
535:   }
536:   if (!pch2opus->M) {
537:     const char *prefix;
538:     MatDuplicate(pch2opus->A,MAT_COPY_VALUES,&pch2opus->M);
539:     PCGetOptionsPrefix(pc,&prefix);
540:     MatSetOptionsPrefix(pch2opus->M,prefix);
541:     MatAppendOptionsPrefix(pch2opus->M,"pc_h2opus_inv_");
542:     MatSetFromOptions(pch2opus->M);
543:     PCH2OpusSetUpInit(pc);
544:     MatScale(pch2opus->M,pch2opus->s0);
545:   }
546:   /* A and M have the same h2 matrix structure, save on reordering routines */
547:   MatH2OpusSetNativeMult(pch2opus->A,PETSC_TRUE);
548:   MatH2OpusSetNativeMult(pch2opus->M,PETSC_TRUE);
549:   if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) {
550:     MatNorm(pch2opus->T,norm,&initerr);
551:     pch2opus->testMA = PETSC_TRUE;
552:     MatNorm(pch2opus->T,norm,&initerrMA);
553:     pch2opus->testMA = PETSC_FALSE;
554:   }
555:   if (PetscIsInfOrNanReal(initerr)) pc->failedreason = PC_SETUP_ERROR;
556:   err   = initerr;
557:   errMA = initerrMA;
558:   if (pch2opus->monitor) {
559:     PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A - I|| NORM%s abs %g rel %g\n",0,NormTypes[norm],(double)err,(double)(err/initerr));
560:     PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A||     NORM%s abs %g rel %g\n",0,NormTypes[norm],(double)errMA,(double)(errMA/initerrMA));
561:   }
562:   if (initerr > pch2opus->atol && !pc->failedreason) {
563:     PetscInt i;

565:     PCH2OpusSetUpSampler_Private(pc);
566:     for (i = 0; i < pch2opus->maxits; i++) {
567:       Mat         M;
568:       const char* prefix;

570:       MatDuplicate(pch2opus->M,MAT_SHARE_NONZERO_PATTERN,&M);
571:       MatGetOptionsPrefix(pch2opus->M,&prefix);
572:       MatSetOptionsPrefix(M,prefix);
573:       MatH2OpusSetSamplingMat(M,pch2opus->S,PETSC_DECIDE,PETSC_DECIDE);
574:       MatSetFromOptions(M);
575:       MatH2OpusSetNativeMult(M,PETSC_TRUE);
576:       MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
577:       MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);

579:       MatDestroy(&pch2opus->M);
580:       pch2opus->M = M;
581:       if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) {
582:         MatNorm(pch2opus->T,norm,&err);
583:         pch2opus->testMA = PETSC_TRUE;
584:         MatNorm(pch2opus->T,norm,&errMA);
585:         pch2opus->testMA = PETSC_FALSE;
586:       }
587:       if (pch2opus->monitor) {
588:         PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A - I|| NORM%s abs %g rel %g\n",i+1,NormTypes[norm],(double)err,(double)(err/initerr));
589:         PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A||     NORM%s abs %g rel %g\n",i+1,NormTypes[norm],(double)errMA,(double)(errMA/initerrMA));
590:       }
591:       if (PetscIsInfOrNanReal(err)) pc->failedreason = PC_SETUP_ERROR;
592:       if (err < pch2opus->atol || err < pch2opus->rtol*initerr || pc->failedreason) break;
593:     }
594:   }
595:   /* cleanup setup workspace */
596:   MatH2OpusSetNativeMult(pch2opus->A,PETSC_FALSE);
597:   MatH2OpusSetNativeMult(pch2opus->M,PETSC_FALSE);
598:   VecDestroy(&pch2opus->wns[0]);
599:   VecDestroy(&pch2opus->wns[1]);
600:   VecDestroy(&pch2opus->wns[2]);
601:   VecDestroy(&pch2opus->wns[3]);
602:   MatDestroy(&pch2opus->wnsmat[0]);
603:   MatDestroy(&pch2opus->wnsmat[1]);
604:   MatDestroy(&pch2opus->wnsmat[2]);
605:   MatDestroy(&pch2opus->wnsmat[3]);
606:   return 0;
607: }

609: static PetscErrorCode PCView_H2OPUS(PC pc, PetscViewer viewer)
610: {
611:   PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
612:   PetscBool  isascii;

614:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
615:   if (isascii) {
616:     if (pch2opus->A && pch2opus->A != pc->mat && pch2opus->A != pc->pmat) {
617:       PetscViewerASCIIPrintf(viewer,"Initial approximation matrix\n");
618:       PetscViewerASCIIPushTab(viewer);
619:       PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO_DETAIL);
620:       MatView(pch2opus->A,viewer);
621:       PetscViewerPopFormat(viewer);
622:       PetscViewerASCIIPopTab(viewer);
623:     }
624:     if (pch2opus->M) {
625:       PetscViewerASCIIPrintf(viewer,"Inner matrix constructed\n");
626:       PetscViewerASCIIPushTab(viewer);
627:       PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO_DETAIL);
628:       MatView(pch2opus->M,viewer);
629:       PetscViewerPopFormat(viewer);
630:       PetscViewerASCIIPopTab(viewer);
631:     }
632:     PetscViewerASCIIPrintf(viewer,"Initial scaling: %g\n",pch2opus->s0);
633:   }
634:   return 0;
635: }

637: PETSC_EXTERN PetscErrorCode PCCreate_H2OPUS(PC pc)
638: {
639:   PC_H2OPUS *pch2opus;

641:   PetscNewLog(pc,&pch2opus);
642:   pc->data = (void*)pch2opus;

644:   pch2opus->atol       = 1.e-2;
645:   pch2opus->rtol       = 1.e-6;
646:   pch2opus->maxits     = 50;
647:   pch2opus->hyperorder = 1; /* defaults to basic NewtonSchultz */
648:   pch2opus->normtype   = NORM_2;

650:   /* these are needed when we are sampling the pmat */
651:   pch2opus->eta        = PETSC_DECIDE;
652:   pch2opus->leafsize   = PETSC_DECIDE;
653:   pch2opus->max_rank   = PETSC_DECIDE;
654:   pch2opus->bs         = PETSC_DECIDE;
655:   pch2opus->mrtol      = PETSC_DECIDE;
656: #if defined(PETSC_H2OPUS_USE_GPU)
657:   pch2opus->boundtocpu = PETSC_FALSE;
658: #else
659:   pch2opus->boundtocpu = PETSC_TRUE;
660: #endif
661:   pc->ops->destroy        = PCDestroy_H2OPUS;
662:   pc->ops->setup          = PCSetUp_H2OPUS;
663:   pc->ops->apply          = PCApply_H2OPUS;
664:   pc->ops->matapply       = PCApplyMat_H2OPUS;
665:   pc->ops->applytranspose = PCApplyTranspose_H2OPUS;
666:   pc->ops->reset          = PCReset_H2OPUS;
667:   pc->ops->setfromoptions = PCSetFromOptions_H2OPUS;
668:   pc->ops->view           = PCView_H2OPUS;

670:   PetscObjectComposeFunction((PetscObject)pc,"PCSetCoordinates_C",PCSetCoordinates_H2OPUS);
671:   return 0;
672: }