Actual source code: ex51.c
  2: static char help[] = "This example solves a linear system in parallel with KSP.  The matrix\n\
  3: uses arbitrary order polynomials for finite elements on the unit square.  To test the parallel\n\
  4: matrix assembly, the matrix is intentionally laid out across processors\n\
  5: differently from the way it is assembled.  Input arguments are:\n\
  6:   -m <size> -p <order> : mesh size and polynomial order\n\n";
  8: /* Contributed by Travis Austin <austin@txcorp.com>, 2010.
  9:    based on src/ksp/ksp/tutorials/ex3.c
 10:  */
 12: #include <petscksp.h>
 14: /* Declare user-defined routines */
 15: static PetscReal      src(PetscReal,PetscReal);
 16: static PetscReal      ubdy(PetscReal,PetscReal);
 17: static PetscReal      polyBasisFunc(PetscInt,PetscInt,PetscReal*,PetscReal);
 18: static PetscReal      derivPolyBasisFunc(PetscInt,PetscInt,PetscReal*,PetscReal);
 19: static PetscErrorCode Form1DElementMass(PetscReal,PetscInt,PetscReal*,PetscReal*,PetscScalar*);
 20: static PetscErrorCode Form1DElementStiffness(PetscReal,PetscInt,PetscReal*,PetscReal*,PetscScalar*);
 21: static PetscErrorCode Form2DElementMass(PetscInt,PetscScalar*,PetscScalar*);
 22: static PetscErrorCode Form2DElementStiffness(PetscInt,PetscScalar*,PetscScalar*,PetscScalar*);
 23: static PetscErrorCode FormNodalRhs(PetscInt,PetscReal,PetscReal,PetscReal,PetscReal*,PetscScalar*);
 24: static PetscErrorCode FormNodalSoln(PetscInt,PetscReal,PetscReal,PetscReal,PetscReal*,PetscScalar*);
 25: static void leggaulob(PetscReal, PetscReal, PetscReal [], PetscReal [], int);
 26: static void qAndLEvaluation(int, PetscReal, PetscReal*, PetscReal*, PetscReal*);
 28: int main(int argc,char **args)
 29: {
 30:   PetscInt       p = 2, m = 5;
 31:   PetscInt       num1Dnodes, num2Dnodes;
 32:   PetscScalar    *Ke1D,*Ke2D,*Me1D,*Me2D;
 33:   PetscScalar    *r,*ue,val;
 34:   Vec            u,ustar,b,q;
 35:   Mat            A,Mass;
 36:   KSP            ksp;
 37:   PetscInt       M,N;
 38:   PetscMPIInt    rank,size;
 39:   PetscReal      x,y,h,norm;
 40:   PetscInt       *idx,indx,count,*rows,i,j,k,start,end,its;
 41:   PetscReal      *rowsx,*rowsy;
 42:   PetscReal      *gllNode, *gllWgts;
 45:   PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr;
 46:   PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Options for p-FEM","");
 47:   PetscOptionsInt("-m","Number of elements in each direction","None",m,&m,NULL);
 48:   PetscOptionsInt("-p","Order of each element (tensor product basis)","None",p,&p,NULL);
 49:   PetscOptionsEnd();
 50:   if (p <=0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Option -p value should be greater than zero");
 51:   N    = (p*m+1)*(p*m+1); /* dimension of matrix */
 52:   M    = m*m; /* number of elements */
 53:   h    = 1.0/m; /* mesh width */
 54:   MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
 55:   MPI_Comm_size(PETSC_COMM_WORLD,&size);
 57:   /* Create stiffness matrix */
 58:   MatCreate(PETSC_COMM_WORLD,&A);
 59:   MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,N,N);
 60:   MatSetFromOptions(A);
 61:   MatSetUp(A);
 63:   /* Create matrix  */
 64:   MatCreate(PETSC_COMM_WORLD,&Mass);
 65:   MatSetSizes(Mass,PETSC_DECIDE,PETSC_DECIDE,N,N);
 66:   MatSetFromOptions(Mass);
 67:   MatSetUp(Mass);
 68:   start = rank*(M/size) + ((M%size) < rank ? (M%size) : rank);
 69:   end   = start + M/size + ((M%size) > rank);
 71:   /* Allocate element stiffness matrices */
 72:   num1Dnodes = (p+1);
 73:   num2Dnodes = num1Dnodes*num1Dnodes;
 75:   PetscMalloc1(num1Dnodes*num1Dnodes,&Me1D);
 76:   PetscMalloc1(num1Dnodes*num1Dnodes,&Ke1D);
 77:   PetscMalloc1(num2Dnodes*num2Dnodes,&Me2D);
 78:   PetscMalloc1(num2Dnodes*num2Dnodes,&Ke2D);
 79:   PetscMalloc1(num2Dnodes,&idx);
 80:   PetscMalloc1(num2Dnodes,&r);
 81:   PetscMalloc1(num2Dnodes,&ue);
 83:   /* Allocate quadrature and create stiffness matrices */
 84:   PetscMalloc1(p+1,&gllNode);
 85:   PetscMalloc1(p+1,&gllWgts);
 86:   leggaulob(0.0,1.0,gllNode,gllWgts,p); /* Get GLL nodes and weights */
 87:   Form1DElementMass(h,p,gllNode,gllWgts,Me1D);
 88:   Form1DElementStiffness(h,p,gllNode,gllWgts,Ke1D);
 89:   Form2DElementMass(p,Me1D,Me2D);
 90:   Form2DElementStiffness(p,Ke1D,Me1D,Ke2D);
 92:   /* Assemble matrix */
 93:   for (i=start; i<end; i++) {
 94:      indx = 0;
 95:      for (k=0;k<(p+1);++k) {
 96:        for (j=0;j<(p+1);++j) {
 97:          idx[indx++] = p*(p*m+1)*(i/m) + p*(i % m) + k*(p*m+1) + j;
 98:        }
 99:      }
100:      MatSetValues(A,num2Dnodes,idx,num2Dnodes,idx,Ke2D,ADD_VALUES);
101:      MatSetValues(Mass,num2Dnodes,idx,num2Dnodes,idx,Me2D,ADD_VALUES);
102:   }
103:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
104:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
105:   MatAssemblyBegin(Mass,MAT_FINAL_ASSEMBLY);
106:   MatAssemblyEnd(Mass,MAT_FINAL_ASSEMBLY);
108:   PetscFree(Me1D);
109:   PetscFree(Ke1D);
110:   PetscFree(Me2D);
111:   PetscFree(Ke2D);
113:   /* Create right-hand-side and solution vectors */
114:   VecCreate(PETSC_COMM_WORLD,&u);
115:   VecSetSizes(u,PETSC_DECIDE,N);
116:   VecSetFromOptions(u);
117:   PetscObjectSetName((PetscObject)u,"Approx. Solution");
118:   VecDuplicate(u,&b);
119:   PetscObjectSetName((PetscObject)b,"Right hand side");
120:   VecDuplicate(u,&q);
121:   PetscObjectSetName((PetscObject)q,"Right hand side 2");
122:   VecDuplicate(b,&ustar);
123:   VecSet(u,0.0);
124:   VecSet(b,0.0);
125:   VecSet(q,0.0);
127:   /* Assemble nodal right-hand-side and soln vector  */
128:   for (i=start; i<end; i++) {
129:     x    = h*(i % m);
130:     y    = h*(i/m);
131:     indx = 0;
132:     for (k=0;k<(p+1);++k) {
133:       for (j=0;j<(p+1);++j) {
134:         idx[indx++] = p*(p*m+1)*(i/m) + p*(i % m) + k*(p*m+1) + j;
135:       }
136:     }
137:     FormNodalRhs(p,x,y,h,gllNode,r);
138:     FormNodalSoln(p,x,y,h,gllNode,ue);
139:     VecSetValues(q,num2Dnodes,idx,r,INSERT_VALUES);
140:     VecSetValues(ustar,num2Dnodes,idx,ue,INSERT_VALUES);
141:   }
142:   VecAssemblyBegin(q);
143:   VecAssemblyEnd(q);
144:   VecAssemblyBegin(ustar);
145:   VecAssemblyEnd(ustar);
147:   PetscFree(idx);
148:   PetscFree(r);
149:   PetscFree(ue);
151:   /* Get FE right-hand side vector */
152:   MatMult(Mass,q,b);
154:   /* Modify matrix and right-hand-side for Dirichlet boundary conditions */
155:   PetscMalloc1(4*p*m,&rows);
156:   PetscMalloc1(4*p*m,&rowsx);
157:   PetscMalloc1(4*p*m,&rowsy);
158:   for (i=0; i<p*m+1; i++) {
159:     rows[i]          = i; /* bottom */
160:     rowsx[i]         = (i/p)*h+gllNode[i%p]*h;
161:     rowsy[i]         = 0.0;
162:     rows[3*p*m-1+i]  = (p*m)*(p*m+1) + i; /* top */
163:     rowsx[3*p*m-1+i] = (i/p)*h+gllNode[i%p]*h;
164:     rowsy[3*p*m-1+i] = 1.0;
165:   }
166:   count = p*m+1; /* left side */
167:   indx  = 1;
168:   for (i=p*m+1; i<(p*m)*(p*m+1); i+= (p*m+1)) {
169:     rows[count]    = i;
170:     rowsx[count]   = 0.0;
171:     rowsy[count++] = (indx/p)*h+gllNode[indx%p]*h;
172:     indx++;
173:   }
174:   count = 2*p*m; /* right side */
175:   indx  = 1;
176:   for (i=2*p*m+1; i<(p*m)*(p*m+1); i+= (p*m+1)) {
177:     rows[count]    = i;
178:     rowsx[count]   = 1.0;
179:     rowsy[count++] = (indx/p)*h+gllNode[indx%p]*h;
180:     indx++;
181:   }
182:   for (i=0; i<4*p*m; i++) {
183:     x    = rowsx[i];
184:     y    = rowsy[i];
185:     val  = ubdy(x,y);
186:     VecSetValues(b,1,&rows[i],&val,INSERT_VALUES);
187:     VecSetValues(u,1,&rows[i],&val,INSERT_VALUES);
188:   }
189:   MatZeroRows(A,4*p*m,rows,1.0,0,0);
190:   PetscFree(rows);
191:   PetscFree(rowsx);
192:   PetscFree(rowsy);
194:   VecAssemblyBegin(u);
195:   VecAssemblyEnd(u);
196:   VecAssemblyBegin(b);
197:   VecAssemblyEnd(b);
199:   /* Solve linear system */
200:   KSPCreate(PETSC_COMM_WORLD,&ksp);
201:   KSPSetOperators(ksp,A,A);
202:   KSPSetInitialGuessNonzero(ksp,PETSC_TRUE);
203:   KSPSetFromOptions(ksp);
204:   KSPSolve(ksp,b,u);
206:   /* Check error */
207:   VecAXPY(u,-1.0,ustar);
208:   VecNorm(u,NORM_2,&norm);
209:   KSPGetIterationNumber(ksp,&its);
210:   PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g Iterations %D\n",(double)(norm*h),its);
212:   PetscFree(gllNode);
213:   PetscFree(gllWgts);
215:   KSPDestroy(&ksp);
216:   VecDestroy(&u);
217:   VecDestroy(&b);
218:   VecDestroy(&q);
219:   VecDestroy(&ustar);
220:   MatDestroy(&A);
221:   MatDestroy(&Mass);
223:   PetscFinalize();
224:   return ierr;
225: }
227: /* --------------------------------------------------------------------- */
229: /* 1d element stiffness mass matrix  */
230: static PetscErrorCode Form1DElementMass(PetscReal H,PetscInt P,PetscReal *gqn,PetscReal *gqw,PetscScalar *Me1D)
231: {
232:   PetscInt i,j,k;
233:   PetscInt indx;
236:   for (j=0; j<(P+1); ++j) {
237:     for (i=0; i<(P+1); ++i) {
238:       indx       = j*(P+1)+i;
239:       Me1D[indx] = 0.0;
240:       for (k=0; k<(P+1);++k) {
241:         Me1D[indx] += H*gqw[k]*polyBasisFunc(P,i,gqn,gqn[k])*polyBasisFunc(P,j,gqn,gqn[k]);
242:       }
243:     }
244:   }
245:   return(0);
246: }
248: /* --------------------------------------------------------------------- */
250: /* 1d element stiffness matrix for derivative */
251: static PetscErrorCode Form1DElementStiffness(PetscReal H,PetscInt P,PetscReal *gqn,PetscReal *gqw,PetscScalar *Ke1D)
252: {
253:   PetscInt i,j,k;
254:   PetscInt indx;
257:   for (j=0;j<(P+1);++j) {
258:     for (i=0;i<(P+1);++i) {
259:       indx = j*(P+1)+i;
260:       Ke1D[indx] = 0.0;
261:       for (k=0; k<(P+1);++k) {
262:         Ke1D[indx] += (1./H)*gqw[k]*derivPolyBasisFunc(P,i,gqn,gqn[k])*derivPolyBasisFunc(P,j,gqn,gqn[k]);
263:       }
264:     }
265:   }
266:   return(0);
267: }
269: /* --------------------------------------------------------------------- */
271:    /* element mass matrix */
272: static PetscErrorCode Form2DElementMass(PetscInt P,PetscScalar *Me1D,PetscScalar *Me2D)
273: {
274:   PetscInt i1,j1,i2,j2;
275:   PetscInt indx1,indx2,indx3;
278:   for (j2=0;j2<(P+1);++j2) {
279:     for (i2=0; i2<(P+1);++i2) {
280:       for (j1=0;j1<(P+1);++j1) {
281:         for (i1=0;i1<(P+1);++i1) {
282:           indx1 = j1*(P+1)+i1;
283:           indx2 = j2*(P+1)+i2;
284:           indx3 = (j2*(P+1)+j1)*(P+1)*(P+1)+(i2*(P+1)+i1);
285:           Me2D[indx3] = Me1D[indx1]*Me1D[indx2];
286:         }
287:       }
288:     }
289:   }
290:   return(0);
291: }
293: /* --------------------------------------------------------------------- */
295: /* element stiffness for Laplacian */
296: static PetscErrorCode Form2DElementStiffness(PetscInt P,PetscScalar *Ke1D,PetscScalar *Me1D,PetscScalar *Ke2D)
297: {
298:   PetscInt i1,j1,i2,j2;
299:   PetscInt indx1,indx2,indx3;
302:   for (j2=0;j2<(P+1);++j2) {
303:     for (i2=0; i2<(P+1);++i2) {
304:       for (j1=0;j1<(P+1);++j1) {
305:         for (i1=0;i1<(P+1);++i1) {
306:           indx1 = j1*(P+1)+i1;
307:           indx2 = j2*(P+1)+i2;
308:           indx3 = (j2*(P+1)+j1)*(P+1)*(P+1)+(i2*(P+1)+i1);
309:           Ke2D[indx3] = Ke1D[indx1]*Me1D[indx2] + Me1D[indx1]*Ke1D[indx2];
310:         }
311:       }
312:     }
313:   }
314:   return(0);
315: }
317: /* --------------------------------------------------------------------- */
319: static PetscErrorCode FormNodalRhs(PetscInt P,PetscReal x,PetscReal y,PetscReal H,PetscReal* nds,PetscScalar *r)
320: {
321:   PetscInt i,j,indx;
324:   indx=0;
325:   for (j=0;j<(P+1);++j) {
326:     for (i=0;i<(P+1);++i) {
327:       r[indx] = src(x+H*nds[i],y+H*nds[j]);
328:       indx++;
329:     }
330:   }
331:   return(0);
332: }
334: /* --------------------------------------------------------------------- */
336: static PetscErrorCode FormNodalSoln(PetscInt P,PetscReal x,PetscReal y,PetscReal H,PetscReal* nds,PetscScalar *u)
337: {
338:   PetscInt i,j,indx;
341:   indx=0;
342:   for (j=0;j<(P+1);++j) {
343:     for (i=0;i<(P+1);++i) {
344:       u[indx] = ubdy(x+H*nds[i],y+H*nds[j]);
345:       indx++;
346:     }
347:   }
348:   return(0);
349: }
351: /* --------------------------------------------------------------------- */
353: static PetscReal polyBasisFunc(PetscInt order, PetscInt basis, PetscReal *xLocVal, PetscReal xval)
354: {
355:   PetscReal denominator = 1.;
356:   PetscReal numerator   = 1.;
357:   PetscInt  i           =0;
359:   for (i=0; i<(order+1); i++) {
360:     if (i!=basis) {
361:       numerator   *= (xval-xLocVal[i]);
362:       denominator *= (xLocVal[basis]-xLocVal[i]);
363:     }
364:   }
365:   return (numerator/denominator);
366: }
368: /* --------------------------------------------------------------------- */
370: static PetscReal derivPolyBasisFunc(PetscInt order, PetscInt basis, PetscReal *xLocVal, PetscReal xval)
371: {
372:   PetscReal denominator;
373:   PetscReal numerator;
374:   PetscReal numtmp;
375:   PetscInt  i=0,j=0;
377:   denominator=1.;
378:   for (i=0; i<(order+1); i++) {
379:     if (i!=basis) denominator *= (xLocVal[basis]-xLocVal[i]);
380:   }
381:   numerator = 0.;
382:   for (j=0;j<(order+1);++j) {
383:     if (j != basis) {
384:       numtmp = 1.;
385:       for (i=0;i<(order+1);++i) {
386:         if (i!=basis && j!=i) numtmp *= (xval-xLocVal[i]);
387:       }
388:       numerator += numtmp;
389:     }
390:   }
392:   return (numerator/denominator);
393: }
395: /* --------------------------------------------------------------------- */
397: static PetscReal ubdy(PetscReal x,PetscReal y)
398: {
399:   return x*x*y*y;
400: }
402: static PetscReal src(PetscReal x,PetscReal y)
403: {
404:   return -2.*y*y - 2.*x*x;
405: }
406: /* --------------------------------------------------------------------- */
408: static void leggaulob(PetscReal x1, PetscReal x2, PetscReal x[], PetscReal w[], int n)
409: /*******************************************************************************
410: Given the lower and upper limits of integration x1 and x2, and given n, this
411: routine returns arrays x[0..n-1] and w[0..n-1] of length n, containing the abscissas
412: and weights of the Gauss-Lobatto-Legendre n-point quadrature formula.
413: *******************************************************************************/
414: {
415:   PetscInt    j,m;
416:   PetscReal z1,z,xm,xl,q,qp,Ln,scale;
417:   if (n==1) {
418:     x[0] = x1;   /* Scale the root to the desired interval, */
419:     x[1] = x2;   /* and put in its symmetric counterpart.   */
420:     w[0] = 1.;   /* Compute the weight */
421:     w[1] = 1.;   /* and its symmetric counterpart. */
422:   } else {
423:     x[0] = x1;   /* Scale the root to the desired interval, */
424:     x[n] = x2;   /* and put in its symmetric counterpart.   */
425:     w[0] = 2./(n*(n+1));   /* Compute the weight */
426:     w[n] = 2./(n*(n+1));   /* and its symmetric counterpart. */
427:     m    = (n+1)/2; /* The roots are symmetric, so we only find half of them. */
428:     xm   = 0.5*(x2+x1);
429:     xl   = 0.5*(x2-x1);
430:     for (j=1; j<=(m-1); j++) { /* Loop over the desired roots. */
431:       z=-1.0*PetscCosReal((PETSC_PI*(j+0.25)/(n))-(3.0/(8.0*n*PETSC_PI))*(1.0/(j+0.25)));
432:       /* Starting with the above approximation to the ith root, we enter */
433:       /* the main loop of refinement by Newton's method.                 */
434:       do {
435:         qAndLEvaluation(n,z,&q,&qp,&Ln);
436:         z1 = z;
437:         z  = z1-q/qp; /* Newton's method. */
438:       } while (PetscAbsReal(z-z1) > 3.0e-11);
439:       qAndLEvaluation(n,z,&q,&qp,&Ln);
440:       x[j]   = xm+xl*z;      /* Scale the root to the desired interval, */
441:       x[n-j] = xm-xl*z;      /* and put in its symmetric counterpart.   */
442:       w[j]   = 2.0/(n*(n+1)*Ln*Ln);  /* Compute the weight */
443:       w[n-j] = w[j];                 /* and its symmetric counterpart. */
444:     }
445:   }
446:   if (n%2==0) {
447:     qAndLEvaluation(n,0.0,&q,&qp,&Ln);
448:     x[n/2]=(x2-x1)/2.0;
449:     w[n/2]=2.0/(n*(n+1)*Ln*Ln);
450:   }
451:   /* scale the weights according to mapping from [-1,1] to [0,1] */
452:   scale = (x2-x1)/2.0;
453:   for (j=0; j<=n; ++j) w[j] = w[j]*scale;
454: }
457: /******************************************************************************/
458: static void qAndLEvaluation(int n, PetscReal x, PetscReal *q, PetscReal *qp, PetscReal *Ln)
459: /*******************************************************************************
460: Compute the polynomial qn(x) = L_{N+1}(x) - L_{n-1}(x) and its derivative in
461: addition to L_N(x) as these are needed for the GLL points.  See the book titled
462: "Implementing Spectral Methods for Partial Differential Equations: Algorithms
463: for Scientists and Engineers" by David A. Kopriva.
464: *******************************************************************************/
465: {
466:   PetscInt k;
468:   PetscReal Lnp;
469:   PetscReal Lnp1, Lnp1p;
470:   PetscReal Lnm1, Lnm1p;
471:   PetscReal Lnm2, Lnm2p;
473:   Lnm1  = 1.0;
474:   *Ln   = x;
475:   Lnm1p = 0.0;
476:   Lnp   = 1.0;
478:   for (k=2; k<=n; ++k) {
479:     Lnm2  = Lnm1;
480:     Lnm1  = *Ln;
481:     Lnm2p = Lnm1p;
482:     Lnm1p = Lnp;
483:     *Ln   = (2.*k-1.)/(1.0*k)*x*Lnm1 - (k-1.)/(1.0*k)*Lnm2;
484:     Lnp   = Lnm2p + (2.0*k-1)*Lnm1;
485:   }
486:   k     = n+1;
487:   Lnp1  = (2.*k-1.)/(1.0*k)*x*(*Ln) - (k-1.)/(1.0*k)*Lnm1;
488:   Lnp1p = Lnm1p + (2.0*k-1)*(*Ln);
489:   *q    = Lnp1 - Lnm1;
490:   *qp   = Lnp1p - Lnm1p;
491: }
495: /*TEST
497:    test:
498:       nsize: 2
499:       args: -ksp_monitor_short
501: TEST*/