Actual source code: ex66.c
  1: static char help[] = "Tests MATHARA\n\n";
  3: #include <petscmat.h>
  4: #include <petscsf.h>
  6: static PetscScalar GenEntry_Symm(PetscInt sdim, PetscReal x[], PetscReal y[], void *ctx)
  7: {
  8:     PetscInt  d;
  9:     PetscReal clength = sdim == 3 ? 0.2 : 0.1;
 10:     PetscReal dist, diff = 0.0;
 12:     for (d = 0; d < sdim; d++) { diff += (x[d] - y[d]) * (x[d] - y[d]); }
 13:     dist = PetscSqrtReal(diff);
 14:     return PetscExpReal(-dist / clength);
 15: }
 17: static PetscScalar GenEntry_Unsymm(PetscInt sdim, PetscReal x[], PetscReal y[], void *ctx)
 18: {
 19:     PetscInt  d;
 20:     PetscReal clength = sdim == 3 ? 0.2 : 0.1;
 21:     PetscReal dist, diff = 0.0, nx = 0.0, ny = 0.0;
 23:     for (d = 0; d < sdim; d++) { nx += x[d]*x[d]; }
 24:     for (d = 0; d < sdim; d++) { ny += y[d]*y[d]; }
 25:     for (d = 0; d < sdim; d++) { diff += (x[d] - y[d]) * (x[d] - y[d]); }
 26:     dist = PetscSqrtReal(diff);
 27:     return nx > ny ? PetscExpReal(-dist / clength) : PetscExpReal(-dist / clength) + 1.;
 28: }
 30: int main(int argc,char **argv)
 31: {
 32:   Mat            A,B,C,D;
 33:   Vec            v,x,y,Ax,Ay,Bx,By;
 34:   PetscRandom    r;
 35:   PetscLayout    map;
 36:   PetscScalar    *Adata = NULL, *Cdata = NULL, scale = 1.0;
 37:   PetscReal      *coords,nA,nD,nB,err,nX,norms[3];
 38:   PetscInt       N, n = 64, dim = 1, i, j, nrhs = 11, lda = 0, ldc = 0, nt, ntrials = 2;
 39:   PetscMPIInt    size,rank;
 40:   PetscBool      testlayout = PETSC_FALSE,flg,symm = PETSC_FALSE, Asymm = PETSC_TRUE, kernel = PETSC_TRUE;
 41:   PetscBool      checkexpl = PETSC_FALSE, agpu = PETSC_FALSE, bgpu = PETSC_FALSE, cgpu = PETSC_FALSE;
 42:   PetscBool      testtrans, testnorm, randommat = PETSC_TRUE;
 43:   void           (*approxnormfunc)(void);
 44:   void           (*Anormfunc)(void);
 47:   PETSC_MPI_THREAD_REQUIRED = MPI_THREAD_MULTIPLE;
 48:   PetscInitialize(&argc,&argv,(char*) 0,help);if (ierr) return ierr;
 49:   PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);
 50:   PetscOptionsGetInt(NULL,NULL,"-nrhs",&nrhs,NULL);
 51:   PetscOptionsGetInt(NULL,NULL,"-dim",&dim,NULL);
 52:   PetscOptionsGetInt(NULL,NULL,"-lda",&lda,NULL);
 53:   PetscOptionsGetInt(NULL,NULL,"-ldc",&ldc,NULL);
 54:   PetscOptionsGetInt(NULL,NULL,"-matmattrials",&ntrials,NULL);
 55:   PetscOptionsGetBool(NULL,NULL,"-randommat",&randommat,NULL);
 56:   PetscOptionsGetBool(NULL,NULL,"-testlayout",&testlayout,NULL);
 57:   PetscOptionsGetBool(NULL,NULL,"-Asymm",&Asymm,NULL);
 58:   PetscOptionsGetBool(NULL,NULL,"-symm",&symm,NULL);
 59:   PetscOptionsGetBool(NULL,NULL,"-kernel",&kernel,NULL);
 60:   PetscOptionsGetBool(NULL,NULL,"-checkexpl",&checkexpl,NULL);
 61:   PetscOptionsGetBool(NULL,NULL,"-agpu",&agpu,NULL);
 62:   PetscOptionsGetBool(NULL,NULL,"-bgpu",&bgpu,NULL);
 63:   PetscOptionsGetBool(NULL,NULL,"-cgpu",&cgpu,NULL);
 64:   PetscOptionsGetScalar(NULL,NULL,"-scale",&scale,NULL);
 65:   if (!Asymm) symm = PETSC_FALSE;
 67:   MPI_Comm_size(PETSC_COMM_WORLD,&size);
 68:   /* MatMultTranspose for nonsymmetric matrices not implemented */
 69:   testtrans = (PetscBool)(size == 1 || symm);
 70:   testnorm = (PetscBool)(size == 1 || symm);
 72:   MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
 73:   PetscLayoutCreate(PETSC_COMM_WORLD,&map);
 74:   if (testlayout) {
 75:     if (rank%2) n = PetscMax(2*n-5*rank,0);
 76:     else n = 2*n+rank;
 77:   }
 78:   PetscLayoutSetLocalSize(map,n);
 79:   PetscLayoutSetUp(map);
 80:   PetscLayoutGetSize(map,&N);
 81:   PetscLayoutDestroy(&map);
 83:   PetscRandomCreate(PETSC_COMM_WORLD,&r);
 84:   PetscRandomSetFromOptions(r);
 85:   if (lda) {
 86:     PetscMalloc1(N*(n+lda),&Adata);
 87:   }
 88:   MatCreateDense(PETSC_COMM_WORLD,n,n,N,N,Adata,&A);
 89:   MatDenseSetLDA(A,n+lda);
 90:   PetscMalloc1(n*dim,&coords);
 91:   for (j = 0; j < n; j++) {
 92:     for (i = 0; i < dim; i++) {
 93:       PetscScalar a;
 95:       PetscRandomGetValue(r,&a);
 96:       coords[j*dim + i] = PetscRealPart(a);
 97:     }
 98:   }
 99:   if (kernel || !randommat) {
100:     MatHaraKernel k = Asymm ? GenEntry_Symm : GenEntry_Unsymm;
101:     PetscInt      ist,ien;
102:     PetscReal     *gcoords;
104:     if (size > 1) { /* replicated coords so that we can populate the dense matrix */
105:       PetscSF      sf;
106:       MPI_Datatype dtype;
108:       MPI_Type_contiguous(dim,MPIU_REAL,&dtype);
109:       MPI_Type_commit(&dtype);
111:       PetscSFCreate(PETSC_COMM_WORLD,&sf);
112:       MatGetLayouts(A,&map,NULL);
113:       PetscSFSetGraphWithPattern(sf,map,PETSCSF_PATTERN_ALLGATHER);
114:       PetscMalloc1(dim*N,&gcoords);
115:       PetscSFBcastBegin(sf,dtype,coords,gcoords,MPI_REPLACE);
116:       PetscSFBcastEnd(sf,dtype,coords,gcoords,MPI_REPLACE);
117:       PetscSFDestroy(&sf);
118:       MPI_Type_free(&dtype);
119:     } else gcoords = (PetscReal*)coords;
121:     MatGetOwnershipRange(A,&ist,&ien);
122:     for (i = ist; i < ien; i++) {
123:       for (j = 0; j < N; j++) {
124:         MatSetValue(A,i,j,(*k)(dim,gcoords + i*dim,gcoords + j*dim,NULL),INSERT_VALUES);
125:       }
126:     }
127:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
128:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
129:     if (kernel) {
130:       MatCreateHaraFromKernel(PETSC_COMM_WORLD,n,n,N,N,dim,coords,k,NULL,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,&B);
131:     } else {
132:       MatCreateHaraFromMat(A,dim,coords,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,&B);
133:     }
134:     if (gcoords != coords) { PetscFree(gcoords); }
135:   } else {
136:     MatSetRandom(A,r);
137:     if (Asymm) {
138:       MatTranspose(A,MAT_INITIAL_MATRIX,&B);
139:       MatAXPY(A,1.0,B,SAME_NONZERO_PATTERN);
140:       MatDestroy(&B);
141:       MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);
142:     }
143:     MatCreateHaraFromMat(A,dim,coords,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,&B);
144:   }
145:   if (agpu) {
146:     MatConvert(A,MATDENSECUDA,MAT_INPLACE_MATRIX,&A);
147:   }
148:   MatViewFromOptions(A,NULL,"-A_view");
150:   MatSetOption(B,MAT_SYMMETRIC,symm);
151:   PetscFree(coords);
153:   /* assemble the H-matrix */
154:   MatBindToCPU(B,(PetscBool)!bgpu);
155:   MatSetFromOptions(B);
156:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
157:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
158:   MatViewFromOptions(B,NULL,"-B_view");
160:   /* Test MatScale */
161:   MatScale(A,scale);
162:   MatScale(B,scale);
164:   /* Test MatMult */
165:   MatCreateVecs(A,&Ax,&Ay);
166:   MatCreateVecs(B,&Bx,&By);
167:   VecSetRandom(Ax,r);
168:   VecCopy(Ax,Bx);
169:   MatMult(A,Ax,Ay);
170:   MatMult(B,Bx,By);
171:   VecViewFromOptions(Ay,NULL,"-mult_vec_view");
172:   VecViewFromOptions(By,NULL,"-mult_vec_view");
173:   VecNorm(Ay,NORM_INFINITY,&nX);
174:   VecAXPY(Ay,-1.0,By);
175:   VecViewFromOptions(Ay,NULL,"-mult_vec_view");
176:   VecNorm(Ay,NORM_INFINITY,&err);
177:   PetscPrintf(PETSC_COMM_WORLD,"MatMult err %g\n",err/nX);
178:   VecScale(By,-1.0);
179:   MatMultAdd(B,Bx,By,By);
180:   VecNorm(By,NORM_INFINITY,&err);
181:   VecViewFromOptions(By,NULL,"-mult_vec_view");
182:   if (err > PETSC_SMALL) {
183:     PetscPrintf(PETSC_COMM_WORLD,"MatMultAdd err %g\n",err);
184:   }
186:   /* Test MatNorm */
187:   MatNorm(A,NORM_INFINITY,&norms[0]);
188:   MatNorm(A,NORM_1,&norms[1]);
189:   norms[2] = -1.; /* NORM_2 not supported */
190:   PetscPrintf(PETSC_COMM_WORLD,"A Matrix norms:        infty=%g, norm_1=%g, norm_2=%g\n",(double)norms[0],(double)norms[1],(double)norms[2]);
191:   MatGetOperation(A,MATOP_NORM,&Anormfunc);
192:   MatGetOperation(B,MATOP_NORM,&approxnormfunc);
193:   MatSetOperation(A,MATOP_NORM,approxnormfunc);
194:   MatNorm(A,NORM_INFINITY,&norms[0]);
195:   MatNorm(A,NORM_1,&norms[1]);
196:   MatNorm(A,NORM_2,&norms[2]);
197:   PetscPrintf(PETSC_COMM_WORLD,"A Approx Matrix norms: infty=%g, norm_1=%g, norm_2=%g\n",(double)norms[0],(double)norms[1],(double)norms[2]);
198:   if (testnorm) {
199:     MatNorm(B,NORM_INFINITY,&norms[0]);
200:     MatNorm(B,NORM_1,&norms[1]);
201:     MatNorm(B,NORM_2,&norms[2]);
202:   } else {
203:     norms[0] = -1.;
204:     norms[1] = -1.;
205:     norms[2] = -1.;
206:   }
207:   PetscPrintf(PETSC_COMM_WORLD,"B Approx Matrix norms: infty=%g, norm_1=%g, norm_2=%g\n",(double)norms[0],(double)norms[1],(double)norms[2]);
208:   MatSetOperation(A,MATOP_NORM,Anormfunc);
210:   /* Test MatDuplicate */
211:   MatDuplicate(B,MAT_COPY_VALUES,&D);
212:   MatMultEqual(B,D,10,&flg);
213:   if (!flg) {
214:     PetscPrintf(PETSC_COMM_WORLD,"MatMult error after MatDuplicate\n");
215:   }
216:   if (testtrans) {
217:     MatMultTransposeEqual(B,D,10,&flg);
218:     if (!flg) {
219:       PetscPrintf(PETSC_COMM_WORLD,"MatMultTranpose error after MatDuplicate\n");
220:     }
221:   }
222:   MatDestroy(&D);
224:   if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */
225:     VecSetRandom(Ay,r);
226:     VecCopy(Ay,By);
227:     MatMultTranspose(A,Ay,Ax);
228:     MatMultTranspose(B,By,Bx);
229:     VecViewFromOptions(Ax,NULL,"-multtrans_vec_view");
230:     VecViewFromOptions(Bx,NULL,"-multtrans_vec_view");
231:     VecNorm(Ax,NORM_INFINITY,&nX);
232:     VecAXPY(Ax,-1.0,Bx);
233:     VecViewFromOptions(Ax,NULL,"-multtrans_vec_view");
234:     VecNorm(Ax,NORM_INFINITY,&err);
235:     PetscPrintf(PETSC_COMM_WORLD,"MatMultTranspose err %g\n",err/nX);
236:     VecScale(Bx,-1.0);
237:     MatMultTransposeAdd(B,By,Bx,Bx);
238:     VecNorm(Bx,NORM_INFINITY,&err);
239:     if (err > PETSC_SMALL) {
240:       PetscPrintf(PETSC_COMM_WORLD,"MatMultTransposeAdd err %g\n",err);
241:     }
242:   }
243:   VecDestroy(&Ax);
244:   VecDestroy(&Ay);
245:   VecDestroy(&Bx);
246:   VecDestroy(&By);
248:   /* Test MatMatMult */
249:   if (ldc) {
250:     PetscMalloc1(nrhs*(n+ldc),&Cdata);
251:   }
252:   MatCreateDense(PETSC_COMM_WORLD,n,PETSC_DECIDE,N,nrhs,Cdata,&C);
253:   MatDenseSetLDA(C,n+ldc);
254:   MatSetRandom(C,r);
255:   if (cgpu) {
256:     MatConvert(C,MATDENSECUDA,MAT_INPLACE_MATRIX,&C);
257:   }
258:   for (nt = 0; nt < ntrials; nt++) {
259:     MatMatMult(B,C,nt ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&D);
260:     MatViewFromOptions(D,NULL,"-bc_view");
261:     PetscObjectBaseTypeCompareAny((PetscObject)D,&flg,MATSEQDENSE,MATMPIDENSE,"");
262:     if (flg) {
263:       MatCreateVecs(B,&x,&y);
264:       MatCreateVecs(D,NULL,&v);
265:       for (i = 0; i < nrhs; i++) {
266:         MatGetColumnVector(D,v,i);
267:         MatGetColumnVector(C,x,i);
268:         MatMult(B,x,y);
269:         VecAXPY(y,-1.0,v);
270:         VecNorm(y,NORM_INFINITY,&err);
271:         if (err > PETSC_SMALL) { PetscPrintf(PETSC_COMM_WORLD,"MatMat err %D %g\n",i,err); }
272:       }
273:       VecDestroy(&y);
274:       VecDestroy(&x);
275:       VecDestroy(&v);
276:     }
277:   }
278:   MatDestroy(&D);
280:   /* Test MatTransposeMatMult */
281:   if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */
282:     for (nt = 0; nt < ntrials; nt++) {
283:       MatTransposeMatMult(B,C,nt ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&D);
284:       MatViewFromOptions(D,NULL,"-btc_view");
285:       PetscObjectBaseTypeCompareAny((PetscObject)D,&flg,MATSEQDENSE,MATMPIDENSE,"");
286:       if (flg) {
287:         MatCreateVecs(B,&y,&x);
288:         MatCreateVecs(D,NULL,&v);
289:         for (i = 0; i < nrhs; i++) {
290:           MatGetColumnVector(D,v,i);
291:           MatGetColumnVector(C,x,i);
292:           MatMultTranspose(B,x,y);
293:           VecAXPY(y,-1.0,v);
294:           VecNorm(y,NORM_INFINITY,&err);
295:           if (err > PETSC_SMALL) { PetscPrintf(PETSC_COMM_WORLD,"MatTransMat err %D %g\n",i,err); }
296:         }
297:         VecDestroy(&y);
298:         VecDestroy(&x);
299:         VecDestroy(&v);
300:       }
301:     }
302:     MatDestroy(&D);
303:   }
305:   /* check explicit operator */
306:   if (checkexpl) {
307:     Mat Be, Bet;
309:     MatComputeOperator(B,MATDENSE,&D);
310:     MatDuplicate(D,MAT_COPY_VALUES,&Be);
311:     MatNorm(D,NORM_FROBENIUS,&nB);
312:     MatViewFromOptions(D,NULL,"-expl_view");
313:     MatAXPY(D,-1.0,A,SAME_NONZERO_PATTERN);
314:     MatViewFromOptions(D,NULL,"-diff_view");
315:     MatNorm(D,NORM_FROBENIUS,&nD);
316:     MatNorm(A,NORM_FROBENIUS,&nA);
317:     PetscPrintf(PETSC_COMM_WORLD,"Approximation error %g (%g / %g, %g)\n",nD/nA,nD,nA,nB);
318:     MatDestroy(&D);
320:     if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */
321:       MatTranspose(A,MAT_INPLACE_MATRIX,&A);
322:       MatComputeOperatorTranspose(B,MATDENSE,&D);
323:       MatDuplicate(D,MAT_COPY_VALUES,&Bet);
324:       MatNorm(D,NORM_FROBENIUS,&nB);
325:       MatViewFromOptions(D,NULL,"-expl_trans_view");
326:       MatAXPY(D,-1.0,A,SAME_NONZERO_PATTERN);
327:       MatViewFromOptions(D,NULL,"-diff_trans_view");
328:       MatNorm(D,NORM_FROBENIUS,&nD);
329:       MatNorm(A,NORM_FROBENIUS,&nA);
330:       PetscPrintf(PETSC_COMM_WORLD,"Approximation error transpose %g (%g / %g, %g)\n",nD/nA,nD,nA,nB);
331:       MatDestroy(&D);
333:       MatTranspose(Bet,MAT_INPLACE_MATRIX,&Bet);
334:       MatAXPY(Be,-1.0,Bet,SAME_NONZERO_PATTERN);
335:       MatViewFromOptions(Be,NULL,"-diff_expl_view");
336:       MatNorm(Be,NORM_FROBENIUS,&nB);
337:       PetscPrintf(PETSC_COMM_WORLD,"Approximation error B - (B^T)^T %g\n",nB);
338:       MatDestroy(&Be);
339:       MatDestroy(&Bet);
340:     }
341:   }
342:   MatDestroy(&A);
343:   MatDestroy(&B);
344:   MatDestroy(&C);
345:   PetscRandomDestroy(&r);
346:   PetscFree(Cdata);
347:   PetscFree(Adata);
348:   PetscFinalize();
349:   return ierr;
350: }
352: /*TEST
354:    build:
355:      requires: hara
357: #tests from kernel
358:    test:
359:      requires: hara
360:      nsize: 1
361:      suffix: 1
362:      args: -n {{17 33}} -kernel 1 -dim {{1 2 3}} -symm {{0 1}} -checkexpl -bgpu 0
364:    test:
365:      requires: hara
366:      nsize: 1
367:      suffix: 1_ld
368:      output_file: output/ex66_1.out
369:      args: -n 33 -kernel 1 -dim 1 -lda 13 -ldc 11 -symm 0 -checkexpl -bgpu 0
371:    test:
372:      requires: hara cuda
373:      nsize: 1
374:      suffix: 1_cuda
375:      output_file: output/ex66_1.out
376:      args: -n {{17 33}} -kernel 1 -dim {{1 2 3}} -symm {{0 1}} -checkexpl -bgpu 1
378:    test:
379:      requires: hara cuda
380:      nsize: 1
381:      suffix: 1_cuda_ld
382:      output_file: output/ex66_1.out
383:      args: -n 33 -kernel 1 -dim 1 -lda 13 -ldc 11 -symm 0 -checkexpl -bgpu 1
385:    test:
386:      requires: hara define(PETSC_HAVE_MPI_INIT_THREAD)
387:      nsize: 2
388:      suffix: 1_par
389:      args: -n 32 -kernel 1 -dim 1 -ldc 12 -testlayout {{0 1}} -bgpu 0 -cgpu 0
391:    test:
392:      requires: hara cuda define(PETSC_HAVE_MPI_INIT_THREAD)
393:      nsize: 2
394:      suffix: 1_par_cuda
395:      args: -n 32 -kernel 1 -dim 1 -ldc 12 -testlayout {{0 1}} -bgpu {{0 1}} -cgpu {{0 1}}
396:      output_file: output/ex66_1_par.out
398: #tests from matrix sampling (parallel or unsymmetric not supported)
399:    test:
400:      requires: hara
401:      nsize: 1
402:      suffix: 2
403:      args: -n {{17 33}} -kernel 0 -dim 2 -symm 1 -checkexpl -bgpu 0
405:    test:
406:      requires: hara cuda
407:      nsize: 1
408:      suffix: 2_cuda
409:      output_file: output/ex66_2.out
410:      args: -n {{17 33}} -kernel 0 -dim 2 -symm 1 -checkexpl -bgpu {{0 1}} -agpu {{0 1}}
412: TEST*/