Actual source code: ex60.c
  1: static char help[] = "Working out corner cases of the ASM preconditioner.\n\n";
  3: #include <petscksp.h>
  5: int main(int argc,char **args)
  6: {
  7:   KSP            ksp;
  8:   PC             pc;
  9:   Mat            A;
 10:   Vec            u, x, b;
 11:   PetscReal      error;
 12:   PetscMPIInt    rank, size, sized;
 13:   PetscInt       M = 8, N = 8, m, n, rstart, rend, r;
 14:   PetscBool      userSubdomains = PETSC_FALSE;
 17:   PetscInitialize(&argc, &args, NULL,help);if (ierr) return ierr;
 18:   PetscOptionsGetInt(NULL,NULL, "-M", &M, NULL);
 19:   PetscOptionsGetInt(NULL,NULL, "-N", &N, NULL);
 20:   PetscOptionsGetBool(NULL,NULL, "-user_subdomains", &userSubdomains, NULL);
 21:   /* Do parallel decomposition */
 22:   MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
 23:   MPI_Comm_size(PETSC_COMM_WORLD, &size);
 24:   sized = (PetscMPIInt) PetscSqrtReal((PetscReal) size);
 25:   if (PetscSqr(sized) != size) SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "This test may only be run on a number of processes which is a perfect square, not %d", (int) size);
 26:   if (M % sized) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "The number of x-vertices %D does not divide the number of x-processes %d", M, (int) sized);
 27:   if (N % sized) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "The number of y-vertices %D does not divide the number of y-processes %d", N, (int) sized);
 28:   /* Assemble the matrix for the five point stencil, YET AGAIN
 29:        Every other process will be empty */
 30:   MatCreate(PETSC_COMM_WORLD, &A);
 31:   m    = (sized > 1) ? (rank % 2) ? 0 : 2*M/sized : M;
 32:   n    = N/sized;
 33:   MatSetSizes(A, m*n, m*n, M*N, M*N);
 34:   MatSetFromOptions(A);
 35:   MatSetUp(A);
 36:   MatGetOwnershipRange(A, &rstart, &rend);
 37:   for (r = rstart; r < rend; ++r) {
 38:     const PetscScalar diag = 4.0, offdiag = -1.0;
 39:     const PetscInt    i    = r/N;
 40:     const PetscInt    j    = r - i*N;
 41:     PetscInt          c;
 43:     if (i > 0)   {c = r - n; MatSetValues(A, 1, &r, 1, &c, &offdiag, INSERT_VALUES);}
 44:     if (i < M-1) {c = r + n; MatSetValues(A, 1, &r, 1, &c, &offdiag, INSERT_VALUES);}
 45:     if (j > 0)   {c = r - 1; MatSetValues(A, 1, &r, 1, &c, &offdiag, INSERT_VALUES);}
 46:     if (j < N-1) {c = r + 1; MatSetValues(A, 1, &r, 1, &c, &offdiag, INSERT_VALUES);}
 47:     MatSetValues(A, 1, &r, 1, &r, &diag, INSERT_VALUES);
 48:   }
 49:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
 50:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
 51:   /* Setup Solve */
 52:   MatCreateVecs(A, &x, &b);
 53:   VecDuplicate(x, &u);
 54:   VecSet(u, 1.0);
 55:   MatMult(A, u, b);
 56:   KSPCreate(PETSC_COMM_WORLD, &ksp);
 57:   KSPSetOperators(ksp, A, A);
 58:   KSPGetPC(ksp, &pc);
 59:   PCSetType(pc, PCASM);
 60:   /* Setup ASM by hand */
 61:   if (userSubdomains) {
 62:     IS        is;
 63:     PetscInt *rows;
 65:     /* Use no overlap for now */
 66:     PetscMalloc1(rend-rstart, &rows);
 67:     for (r = rstart; r < rend; ++r) rows[r-rstart] = r;
 68:     ISCreateGeneral(PETSC_COMM_SELF, rend-rstart, rows, PETSC_OWN_POINTER, &is);
 69:     PCASMSetLocalSubdomains(pc, 1, &is, &is);
 70:     ISDestroy(&is);
 71:   }
 72:   KSPSetFromOptions(ksp);
 73:   /* Solve and Compare */
 74:   KSPSolve(ksp, b, x);
 75:   VecAXPY(x, -1.0, u);
 76:   VecNorm(x, NORM_INFINITY, &error);
 77:   PetscPrintf(PETSC_COMM_WORLD, "Infinity norm of the error: %g\n", (double) error);
 78:   /* Cleanup */
 79:   KSPDestroy(&ksp);
 80:   MatDestroy(&A);
 81:   VecDestroy(&u);
 82:   VecDestroy(&x);
 83:   VecDestroy(&b);
 84:   PetscFinalize();
 85:   return ierr;
 86: }
 89: /*TEST
 91:    test:
 92:       suffix: 0
 93:       args: -ksp_view
 95:    test:
 96:       requires: kokkos_kernels
 97:       suffix: 0_kokkos
 98:       args: -ksp_view -mat_type aijkokkos
100:    test:
101:       requires: cuda
102:       suffix: 0_cuda
103:       args: -ksp_view -mat_type aijcusparse -sub_pc_factor_mat_solver_type cusparse
105:    test:
106:       suffix: 1
107:       nsize: 4
108:       args: -ksp_view
110:    test:
111:       requires: kokkos_kernels
112:       suffix: 1_kokkos
113:       nsize: 4
114:       args: -ksp_view -mat_type aijkokkos
116:    test:
117:       requires: cuda
118:       suffix: 1_cuda
119:       nsize: 4
120:       args: -ksp_view -mat_type aijcusparse -sub_pc_factor_mat_solver_type cusparse
122:    test:
123:       suffix: 2
124:       nsize: 4
125:       args: -user_subdomains -ksp_view
127:    test:
128:       requires: kokkos_kernels
129:       suffix: 2_kokkos
130:       nsize: 4
131:       args: -user_subdomains -ksp_view -mat_type aijkokkos
133:    test:
134:       requires: cuda
135:       suffix: 2_cuda
136:       nsize: 4
137:       args: -user_subdomains -ksp_view -mat_type aijcusparse -sub_pc_factor_mat_solver_type cusparse
139: TEST*/