Actual source code: ex158.c

  1: static char help[] = "Illustrate how to use mpi FFTW and PETSc-FFTW interface \n\n";

  3: /*
  4:  Usage:
  5:    mpiexec -n <np> ./ex158 -use_FFTW_interface NO
  6:    mpiexec -n <np> ./ex158 -use_FFTW_interface YES
  7: */

  9: #include <petscmat.h>
 10: #include <fftw3-mpi.h>

 12: int main(int argc, char **args)
 13: {
 14:   PetscMPIInt rank, size;
 15:   PetscInt    N0 = 50, N1 = 20, N = N0 * N1;
 16:   PetscRandom rdm;
 17:   PetscScalar a;
 18:   PetscReal   enorm;
 19:   Vec         x, y, z;
 20:   PetscBool   view = PETSC_FALSE, use_interface = PETSC_TRUE;

 22:   PetscFunctionBeginUser;
 23:   PetscCall(PetscInitialize(&argc, &args, NULL, help));
 24: #if defined(PETSC_USE_COMPLEX)
 25:   SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_SUP, "This example requires real numbers. Your current scalar type is complex");
 26: #endif

 28:   PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "FFTW Options", "ex158");
 29:   PetscCall(PetscOptionsBool("-use_FFTW_interface", "Use PETSc-FFTW interface", "ex158", use_interface, &use_interface, NULL));
 30:   PetscOptionsEnd();

 32:   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
 33:   PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));

 35:   PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rdm));
 36:   PetscCall(PetscRandomSetFromOptions(rdm));

 38:   if (!use_interface) {
 39:     /* Use mpi FFTW without PETSc-FFTW interface, 2D case only */
 40:     /*---------------------------------------------------------*/
 41:     fftw_plan     fplan, bplan;
 42:     fftw_complex *data_in, *data_out, *data_out2;
 43:     ptrdiff_t     alloc_local, local_n0, local_0_start;

 45:     if (rank == 0) printf("Use FFTW without PETSc-FFTW interface\n");
 46:     fftw_mpi_init();
 47:     N           = N0 * N1;
 48:     alloc_local = fftw_mpi_local_size_2d(N0, N1, PETSC_COMM_WORLD, &local_n0, &local_0_start);

 50:     data_in   = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
 51:     data_out  = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
 52:     data_out2 = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);

 54:     PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, (PetscInt)local_n0 * N1, (PetscInt)N, (const PetscScalar *)data_in, &x));
 55:     PetscCall(PetscObjectSetName((PetscObject)x, "Real Space vector"));
 56:     PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, (PetscInt)local_n0 * N1, (PetscInt)N, (const PetscScalar *)data_out, &y));
 57:     PetscCall(PetscObjectSetName((PetscObject)y, "Frequency space vector"));
 58:     PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, (PetscInt)local_n0 * N1, (PetscInt)N, (const PetscScalar *)data_out2, &z));
 59:     PetscCall(PetscObjectSetName((PetscObject)z, "Reconstructed vector"));

 61:     fplan = fftw_mpi_plan_dft_2d(N0, N1, data_in, data_out, PETSC_COMM_WORLD, FFTW_FORWARD, FFTW_ESTIMATE);
 62:     bplan = fftw_mpi_plan_dft_2d(N0, N1, data_out, data_out2, PETSC_COMM_WORLD, FFTW_BACKWARD, FFTW_ESTIMATE);

 64:     PetscCall(VecSetRandom(x, rdm));
 65:     if (view) PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD));

 67:     fftw_execute(fplan);
 68:     if (view) PetscCall(VecView(y, PETSC_VIEWER_STDOUT_WORLD));

 70:     fftw_execute(bplan);

 72:     /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */
 73:     a = 1.0 / (PetscReal)N;
 74:     PetscCall(VecScale(z, a));
 75:     if (view) PetscCall(VecView(z, PETSC_VIEWER_STDOUT_WORLD));
 76:     PetscCall(VecAXPY(z, -1.0, x));
 77:     PetscCall(VecNorm(z, NORM_1, &enorm));
 78:     if (enorm > 1.e-11) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Error norm of |x - z| %g\n", (double)enorm));

 80:     /* Free spaces */
 81:     fftw_destroy_plan(fplan);
 82:     fftw_destroy_plan(bplan);
 83:     fftw_free(data_in);
 84:     PetscCall(VecDestroy(&x));
 85:     fftw_free(data_out);
 86:     PetscCall(VecDestroy(&y));
 87:     fftw_free(data_out2);
 88:     PetscCall(VecDestroy(&z));

 90:   } else {
 91:     /* Use PETSc-FFTW interface                  */
 92:     /*-------------------------------------------*/
 93:     PetscInt i, *dim, k, DIM;
 94:     Mat      A;
 95:     Vec      input, output;

 97:     N = 30;
 98:     for (i = 2; i < 3; i++) { /* (i=3,4: -- error in VecScatterPetscToFFTW(A,input,x); */
 99:       DIM = i;
100:       PetscCall(PetscMalloc1(i, &dim));
101:       for (k = 0; k < i; k++) dim[k] = 30;
102:       N *= dim[i - 1];

104:       /* Create FFTW object */
105:       if (rank == 0) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Use PETSc-FFTW interface...%d-DIM:%d \n", DIM, N));
106:       PetscCall(MatCreateFFT(PETSC_COMM_WORLD, DIM, dim, MATFFTW, &A));

108:       /* Create FFTW vectors that are compatible with parallel layout of A */
109:       PetscCall(MatCreateVecsFFTW(A, &x, &y, &z));
110:       PetscCall(PetscObjectSetName((PetscObject)x, "Real space vector"));
111:       PetscCall(PetscObjectSetName((PetscObject)y, "Frequency space vector"));
112:       PetscCall(PetscObjectSetName((PetscObject)z, "Reconstructed vector"));

114:       /* Create and set PETSc vector */
115:       PetscCall(VecCreate(PETSC_COMM_WORLD, &input));
116:       PetscCall(VecSetSizes(input, PETSC_DECIDE, N));
117:       PetscCall(VecSetFromOptions(input));
118:       PetscCall(VecSetRandom(input, rdm));
119:       PetscCall(VecDuplicate(input, &output));
120:       if (view) PetscCall(VecView(input, PETSC_VIEWER_STDOUT_WORLD));

122:       /* Vector input is copied to another vector x using VecScatterPetscToFFTW. This is because the user data
123:          can have any parallel layout. But FFTW requires special parallel layout of the data. Hence the original
124:          data which is in the vector "input" here, needs to be copied to a vector x, which has the correct parallel
125:          layout for FFTW. Also, during parallel real transform, this pads extra zeros automatically
126:          at the end of last  dimension. This padding is required by FFTW to perform parallel real D.F.T.  */
127:       PetscCall(VecScatterPetscToFFTW(A, input, x)); /* buggy for dim = 3, 4... */

129:       /* Apply FFTW_FORWARD and FFTW_BACKWARD */
130:       PetscCall(MatMult(A, x, y));
131:       if (view) PetscCall(VecView(y, PETSC_VIEWER_STDOUT_WORLD));
132:       PetscCall(MatMultTranspose(A, y, z));

134:       /* Output from Backward DFT needs to be modified to obtain user readable data the routine VecScatterFFTWToPetsc
135:          performs the job. In some sense this is the reverse operation of VecScatterPetscToFFTW. This routine gets rid of
136:          the extra spaces that were artificially padded to perform real parallel transform.    */
137:       PetscCall(VecScatterFFTWToPetsc(A, z, output));

139:       /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */
140:       a = 1.0 / (PetscReal)N;
141:       PetscCall(VecScale(output, a));
142:       if (view) PetscCall(VecView(output, PETSC_VIEWER_STDOUT_WORLD));
143:       PetscCall(VecAXPY(output, -1.0, input));
144:       PetscCall(VecNorm(output, NORM_1, &enorm));
145:       if (enorm > 1.e-09 && rank == 0) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Error norm of |x - z| %e\n", enorm));

147:       /* Free spaces */
148:       PetscCall(PetscFree(dim));
149:       PetscCall(VecDestroy(&input));
150:       PetscCall(VecDestroy(&output));
151:       PetscCall(VecDestroy(&x));
152:       PetscCall(VecDestroy(&y));
153:       PetscCall(VecDestroy(&z));
154:       PetscCall(MatDestroy(&A));
155:     }
156:   }
157:   PetscCall(PetscRandomDestroy(&rdm));
158:   PetscCall(PetscFinalize());
159:   return 0;
160: }

162: /*TEST

164:    build:
165:       requires: !mpiuni fftw !complex

167:    test:
168:       output_file: output/ex158.out

170:    test:
171:       suffix: 2
172:       nsize: 3

174: TEST*/