Actual source code: pipecgrr.c
 
   petsc-3.10.3 2018-12-18
   
  2:  #include <petsc/private/kspimpl.h>
  4: /*
  5:      KSPSetUp_PIPECGRR - Sets up the workspace needed by the PIPECGRR method.
  7:       This is called once, usually automatically by KSPSolve() or KSPSetUp()
  8:      but can be called directly by KSPSetUp()
  9: */
 10: static PetscErrorCode KSPSetUp_PIPECGRR(KSP ksp)
 11: {
 15:   /* get work vectors needed by PIPECGRR */
 16:   KSPSetWorkVecs(ksp,9);
 17:   return(0);
 18: }
 20: /*
 21:  KSPSolve_PIPECGRR - This routine actually applies the pipelined conjugate gradient method with automated residual replacement
 23:  Input Parameter:
 24:  .     ksp - the Krylov space object that was set to use conjugate gradient, by, for
 25:              example, KSPCreate(MPI_Comm,KSP *ksp); KSPSetType(ksp,KSPCG);
 26: */
 27: static PetscErrorCode  KSPSolve_PIPECGRR(KSP ksp)
 28: {
 30:   PetscInt       i = 0,replace = 0,totreplaces = 0,nsize;
 31:   PetscScalar    alpha = 0.0,beta = 0.0,gamma = 0.0,gammaold = 0.0,delta = 0.0,alphap = 0.0,betap = 0.0;
 32:   PetscReal      dp = 0.0,nsi = 0.0,sqn = 0.0,Anorm = 0.0,rnp = 0.0,pnp = 0.0,snp = 0.0,unp = 0.0,wnp = 0.0,xnp = 0.0,qnp = 0.0,znp = 0.0,mnz = 5.0,tol = PETSC_SQRT_MACHINE_EPSILON,eps = PETSC_MACHINE_EPSILON;
 33:   PetscReal      ds = 0.0,dz = 0.0,dx = 0.0,dpp = 0.0,dq = 0.0,dm = 0.0,du = 0.0,dw = 0.0,db = 0.0,errr = 0.0,errrprev = 0.0,errs = 0.0,errw = 0.0,errz = 0.0,errncr = 0.0,errncs = 0.0,errncw = 0.0,errncz = 0.0;
 34:   Vec            X,B,Z,P,W,Q,U,M,N,R,S;
 35:   Mat            Amat,Pmat;
 36:   PetscBool      diagonalscale;
 39:   PCGetDiagonalScale(ksp->pc,&diagonalscale);
 40:   if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name);
 42:   X = ksp->vec_sol;
 43:   B = ksp->vec_rhs;
 44:   M = ksp->work[0];
 45:   Z = ksp->work[1];
 46:   P = ksp->work[2];
 47:   N = ksp->work[3];
 48:   W = ksp->work[4];
 49:   Q = ksp->work[5];
 50:   U = ksp->work[6];
 51:   R = ksp->work[7];
 52:   S = ksp->work[8];
 54:   PCGetOperators(ksp->pc,&Amat,&Pmat);
 56:   ksp->its = 0;
 57:   if (!ksp->guess_zero) {
 58:     KSP_MatMult(ksp,Amat,X,R);  /*  r <- b - Ax  */
 59:     VecAYPX(R,-1.0,B);
 60:   } else {
 61:     VecCopy(B,R);               /*  r <- b (x is 0)  */
 62:   }
 64:   KSP_PCApply(ksp,R,U);         /*  u <- Br  */
 66:   switch (ksp->normtype) {
 67:   case KSP_NORM_PRECONDITIONED:
 68:     VecNormBegin(U,NORM_2,&dp); /*  dp <- u'*u = e'*A'*B'*B*A'*e'  */
 69:     VecNormBegin(B,NORM_2,&db);
 70:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));
 71:     KSP_MatMult(ksp,Amat,U,W);  /*  w <- Au  */
 72:     VecNormEnd(U,NORM_2,&dp);
 73:     VecNormEnd(B,NORM_2,&db);
 74:     break;
 75:   case KSP_NORM_UNPRECONDITIONED:
 76:     VecNormBegin(R,NORM_2,&dp); /*  dp <- r'*r = e'*A'*A*e  */
 77:     VecNormBegin(B,NORM_2,&db);
 78:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
 79:     KSP_MatMult(ksp,Amat,U,W);  /*  w <- Au  */
 80:     VecNormEnd(R,NORM_2,&dp);
 81:     VecNormEnd(B,NORM_2,&db);
 82:     break;
 83:   case KSP_NORM_NATURAL:
 84:     VecDotBegin(R,U,&gamma);    /*  gamma <- u'*r  */
 85:     VecNormBegin(B,NORM_2,&db);
 86:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
 87:     KSP_MatMult(ksp,Amat,U,W);  /*  w <- Au  */
 88:     VecDotEnd(R,U,&gamma);
 89:     VecNormEnd(B,NORM_2,&db);
 90:     KSPCheckDot(ksp,gamma);
 91:     dp = PetscSqrtReal(PetscAbsScalar(gamma));       /*  dp <- r'*u = r'*B*r = e'*A'*B*A*e  */
 92:     break;
 93:   case KSP_NORM_NONE:
 94:     KSP_MatMult(ksp,Amat,U,W);
 95:     dp   = 0.0;
 96:     break;
 97:   default: SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]);
 98:   }
 99:   KSPLogResidualHistory(ksp,dp);
100:   KSPMonitor(ksp,0,dp);
101:   ksp->rnorm = dp;
102:   (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP); /*  test for convergence  */
103:   if (ksp->reason) return(0);
105:   MatNorm(Amat,NORM_INFINITY,&Anorm);
106:   VecGetSize(B,&nsize);
107:   nsi = (PetscReal) nsize;
108:   sqn = PetscSqrtReal(nsi);
109: 
110:   do {
111:     if (i > 1) {
112:       pnp = dpp;
113:       snp = ds;
114:       qnp = dq;
115:       znp = dz;
116:     }
117:     if (i > 0) {
118:       rnp = dp;
119:       unp = du;
120:       wnp = dw;
121:       xnp = dx;
122:       alphap = alpha;
123:       betap = beta;
124:     }
126:     if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
127:       VecNormBegin(R,NORM_2,&dp);
128:     } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
129:       VecNormBegin(U,NORM_2,&dp);
130:     }
131:     if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) {
132:       VecDotBegin(R,U,&gamma);
133:     }
134:     VecDotBegin(W,U,&delta);
135: 
136:     if (i > 0) {
137:       VecNormBegin(S,NORM_2,&ds);
138:       VecNormBegin(Z,NORM_2,&dz);
139:       VecNormBegin(P,NORM_2,&dpp);
140:       VecNormBegin(Q,NORM_2,&dq);
141:       VecNormBegin(M,NORM_2,&dm);
142:     }
143:     VecNormBegin(X,NORM_2,&dx);
144:     VecNormBegin(U,NORM_2,&du);
145:     VecNormBegin(W,NORM_2,&dw);
147:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
148:     KSP_PCApply(ksp,W,M);           /*   m <- Bw       */
149:     KSP_MatMult(ksp,Amat,M,N);      /*   n <- Am       */
151:     if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
152:       VecNormEnd(R,NORM_2,&dp);
153:     } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
154:       VecNormEnd(U,NORM_2,&dp);
155:     }
156:     if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) {
157:       VecDotEnd(R,U,&gamma);
158:     }
159:     VecDotEnd(W,U,&delta);
160: 
161:     if (i > 0) {
162:       VecNormEnd(S,NORM_2,&ds);
163:       VecNormEnd(Z,NORM_2,&dz);
164:       VecNormEnd(P,NORM_2,&dpp);
165:       VecNormEnd(Q,NORM_2,&dq);
166:       VecNormEnd(M,NORM_2,&dm);
167:     }
168:     VecNormEnd(X,NORM_2,&dx);
169:     VecNormEnd(U,NORM_2,&du);
170:     VecNormEnd(W,NORM_2,&dw);
172:     if (i > 0) {
173:       if (ksp->normtype == KSP_NORM_NATURAL) dp = PetscSqrtReal(PetscAbsScalar(gamma));
174:       else if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;
176:       ksp->rnorm = dp;
177:       KSPLogResidualHistory(ksp,dp);
178:       KSPMonitor(ksp,i,dp);
179:       (*ksp->converged)(ksp,i,dp,&ksp->reason,ksp->cnvP);
180:       if (ksp->reason) break;
181:     }
183:     if (i == 0) {
184:       alpha = gamma / delta;
185:       VecCopy(N,Z);          /*  z <- n  */
186:       VecCopy(M,Q);          /*  q <- m  */
187:       VecCopy(U,P);          /*  p <- u  */
188:       VecCopy(W,S);          /*  s <- w  */
189:     } else {
190:       beta = gamma / gammaold;
191:       alpha = gamma / (delta - beta / alpha * gamma);
192:       VecAYPX(Z,beta,N);     /*  z <- n + beta * z  */
193:       VecAYPX(Q,beta,M);     /*  q <- m + beta * q  */
194:       VecAYPX(P,beta,U);     /*  p <- u + beta * p  */
195:       VecAYPX(S,beta,W);     /*  s <- w + beta * s  */
196:     }
197:     VecAXPY(X, alpha,P);     /*  x <- x + alpha * p  */
198:     VecAXPY(U,-alpha,Q);     /*  u <- u - alpha * q  */
199:     VecAXPY(W,-alpha,Z);     /*  w <- w - alpha * z  */
200:     VecAXPY(R,-alpha,S);     /*  r <- r - alpha * s  */
201:     gammaold = gamma;
203:     if (i > 0) {
204:       errncr = PetscSqrtReal(Anorm*xnp+2.0*Anorm*PetscAbsScalar(alphap)*dpp+rnp+2.0*PetscAbsScalar(alphap)*ds)*eps;
205:       errncw = PetscSqrtReal(Anorm*unp+2.0*Anorm*PetscAbsScalar(alphap)*dq+wnp+2.0*PetscAbsScalar(alphap)*dz)*eps;
206:     }
207:     if (i > 1) {
208:       errncs = PetscSqrtReal(Anorm*unp+2.0*Anorm*PetscAbsScalar(betap)*pnp+wnp+2.0*PetscAbsScalar(betap)*snp)*eps;
209:       errncz = PetscSqrtReal((mnz*sqn+2)*Anorm*dm+2.0*Anorm*PetscAbsScalar(betap)*qnp+2.0*PetscAbsScalar(betap)*znp)*eps;
210:     }
212:     if (i > 0) {
213:       if (i == 1) {
214:         errr = PetscSqrtReal((mnz*sqn+1)*Anorm*xnp+db)*eps+PetscSqrtReal(PetscAbsScalar(alphap)*mnz*sqn*Anorm*dpp)*eps+errncr;
215:         errs = PetscSqrtReal(mnz*sqn*Anorm*dpp)*eps;
216:         errw = PetscSqrtReal(mnz*sqn*Anorm*unp)*eps+PetscSqrtReal(PetscAbsScalar(alphap)*mnz*sqn*Anorm*dq)*eps+errncw;
217:         errz = PetscSqrtReal(mnz*sqn*Anorm*dq)*eps;
218:       } else if (replace == 1) {
219:         errrprev = errr;
220:         errr = PetscSqrtReal((mnz*sqn+1)*Anorm*dx+db)*eps;
221:         errs = PetscSqrtReal(mnz*sqn*Anorm*dpp)*eps;
222:         errw = PetscSqrtReal(mnz*sqn*Anorm*du)*eps;
223:         errz = PetscSqrtReal(mnz*sqn*Anorm*dq)*eps;
224:         replace = 0;
225:       } else {
226:         errrprev = errr;
227:         errr = errr+PetscAbsScalar(alphap)*PetscAbsScalar(betap)*errs+PetscAbsScalar(alphap)*errw+errncr+PetscAbsScalar(alphap)*errncs;
228:         errs = errw+PetscAbsScalar(betap)*errs+errncs;
229:         errw = errw+PetscAbsScalar(alphap)*PetscAbsScalar(betap)*errz+errncw+PetscAbsScalar(alphap)*errncz;
230:         errz = PetscAbsScalar(betap)*errz+errncz;
231:       }
232:       if (i > 1 && errrprev <= (tol * rnp) && errr > (tol * dp)) {
233:         KSP_MatMult(ksp,Amat,X,R);        /*  r <- Ax - b  */
234:         VecAYPX(R,-1.0,B);
235:         KSP_PCApply(ksp,R,U);             /*  u <- Br  */
236:         KSP_MatMult(ksp,Amat,U,W);        /*  w <- Au  */
237:         KSP_MatMult(ksp,Amat,P,S);        /*  s <- Ap  */
238:         KSP_PCApply(ksp,S,Q);             /*  q <- Bs  */
239:         KSP_MatMult(ksp,Amat,Q,Z);        /*  z <- Aq  */
240:         replace = 1;
241:         totreplaces++;
242:       }
243:     }
245:     i++;
246:     ksp->its = i;
248:   } while (i<ksp->max_it);
249:   if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
250:   return(0);
251: }
254: /*MC
255:    KSPPIPECGRR - Pipelined conjugate gradient method with automated residual replacements.
257:    This method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard CG.  The
258:    non-blocking reduction is overlapped by the matrix-vector product and preconditioner application.
260:    KSPPIPECGRR improves the robustness of KSPPIPECG by adding an automated residual replacement strategy. 
261:    True residual and other auxiliary variables are computed explicitly in a number of dynamically determined 
262:    iterations to counteract the accumulation of rounding errors and thus attain a higher maximal final accuracy.
264:    See also KSPPIPECG, which is identical to KSPPIPECGRR without residual replacements.
265:    See also KSPPIPECR, where the reduction is only overlapped with the matrix-vector product.
267:    Level: intermediate
269:    Notes:
270:    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for 
271:    performance of pipelined methods. See the FAQ on the PETSc website for details.
273:    Contributed by:
274:    Siegfried Cools, Universiteit Antwerpen, Dept. Mathematics & Computer Science,
275:    European FP7 Project on EXascale Algorithms and Advanced Computational Techniques (EXA2CT) / Research Foundation Flanders (FWO)
277:    Reference:
278:    S. Cools, E.F. Yetkin, E. Agullo, L. Giraud, W. Vanroose, "Analyzing the effect of local rounding error
279:    propagation on the maximal attainable accuracy of the pipelined Conjugate Gradients method",
280:    SIAM Journal on Matrix Analysis and Applications (SIMAX), 39(1):426–450, 2018.
282: .seealso: KSPCreate(), KSPSetType(), KSPPIPECR, KSPGROPPCG, KSPPIPECG, KSPPGMRES, KSPCG, KSPPIPEBCGS, KSPCGUseSingleReduction()
283: M*/
284: PETSC_EXTERN PetscErrorCode KSPCreate_PIPECGRR(KSP ksp)
285: {
289:   KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_LEFT,2);
290:   KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,2);
291:   KSPSetSupportedNorm(ksp,KSP_NORM_NATURAL,PC_LEFT,2);
292:   KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_LEFT,1);
294:   ksp->ops->setup          = KSPSetUp_PIPECGRR;
295:   ksp->ops->solve          = KSPSolve_PIPECGRR;
296:   ksp->ops->destroy        = KSPDestroyDefault;
297:   ksp->ops->view           = 0;
298:   ksp->ops->setfromoptions = 0;
299:   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
300:   ksp->ops->buildresidual  = KSPBuildResidualDefault;
301:   return(0);
302: }