diff --git a/EnhancedSolver/2D-mpi/src/discretization.c b/EnhancedSolver/2D-mpi/src/discretization.c index 94c6acb..1b91d0d 100644 --- a/EnhancedSolver/2D-mpi/src/discretization.c +++ b/EnhancedSolver/2D-mpi/src/discretization.c @@ -114,7 +114,9 @@ void initDiscretiztion(Discretization* d, Parameter* params) d->f = allocate(64, size * sizeof(double)); d->g = allocate(64, size * sizeof(double)); d->grid.s = allocate(64, size * sizeof(double)); - + #ifdef _OPENMP + #pragma omp parallel for + #endif for (int i = 0; i < size; i++) { d->u[i] = params->u_init; d->v[i] = params->v_init; diff --git a/EnhancedSolver/2D-mpi/src/solver-mg.c b/EnhancedSolver/2D-mpi/src/solver-mg.c index 27eaa17..3b00823 100644 --- a/EnhancedSolver/2D-mpi/src/solver-mg.c +++ b/EnhancedSolver/2D-mpi/src/solver-mg.c @@ -6,6 +6,7 @@ */ #include #include +#include #include "allocate.h" #include "solver.h" @@ -53,7 +54,9 @@ static void restrictMG(Solver* s, int level, Comm* comm) #ifdef _MPI commExchange(comm, old); #endif - + #ifdef _OPENMP + #pragma omp parallel for + #endif for (int j = 1; j < (jmaxLocal / 2) + 1; j++) { for (int i = 1; i < (imaxLocal / 2) + 1; i++) { R(i, j) = (OLD(2 * i - 1, 2 * j - 1) + OLD(2 * i, 2 * j - 1) * 2 + @@ -73,7 +76,9 @@ static void prolongate(Solver* s, int level, Comm* comm) double* old = s->r[level + 1]; double* e = s->r[level]; - + #ifdef _OPENMP + #pragma omp parallel for + #endif for (int j = 2; j < jmaxLocal + 1; j += 2) { for (int i = 2; i < imaxLocal + 1; i += 2) { E(i, j) = OLD(i / 2, j / 2); @@ -87,6 +92,9 @@ static void correct(Solver* s, double* p, int level, Comm* comm) int imaxLocal = comm->imaxLocal; int jmaxLocal = comm->jmaxLocal; + #ifdef _OPENMP + #pragma omp parallel for + #endif for (int j = 1; j < jmaxLocal + 1; ++j) { for (int i = 1; i < imaxLocal + 1; ++i) { P(i, j) += E(i, j); @@ -97,25 +105,38 @@ static void correct(Solver* s, double* p, int level, Comm* comm) static void setBoundaryCondition(Solver* s, double* p, int imaxLocal, int jmaxLocal) { #ifdef _MPI + if (commIsBoundary(s->comm, B)) { // set bottom bc + #ifdef _OPENMP + #pragma omp parallel for + #endif for (int i = 1; i < imaxLocal + 1; i++) { P(i, 0) = P(i, 1); } } if (commIsBoundary(s->comm, T)) { // set top bc + #ifdef _OPENMP + #pragma omp parallel for + #endif for (int i = 1; i < imaxLocal + 1; i++) { P(i, jmaxLocal + 1) = P(i, jmaxLocal); } } if (commIsBoundary(s->comm, L)) { // set left bc + #ifdef _OPENMP + #pragma omp parallel for + #endif for (int j = 1; j < jmaxLocal + 1; j++) { P(0, j) = P(1, j); } } if (commIsBoundary(s->comm, R)) { // set right bc + #ifdef _OPENMP + #pragma omp parallel for + #endif for (int j = 1; j < jmaxLocal + 1; j++) { P(imaxLocal + 1, j) = P(imaxLocal, j); } @@ -160,17 +181,33 @@ static void smooth(Solver* s, double* p, double* rhs, int level, Comm* comm) #ifdef _MPI commExchange(comm, p); #endif - + #ifdef _OPENMP + #pragma message("Enabling OPENMP for loop") + #pragma omp parallel private(isw) + { + isw = jsw; + #pragma omp for + for (int j = 1; j < jmaxLocal + 1; j++) { + for (int i = isw; i < imaxLocal + 1; i += 2) { + P(i, j) -= factor * + (RHS(i, j) - + ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 + + (P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2)); + } + isw = 3 - isw; + } + } + #else for (int j = 1; j < jmaxLocal + 1; j++) { - for (int i = isw; i < imaxLocal + 1; i += 2) { - + for (int i = isw; i < imaxLocal + 1; i += 2) { P(i, j) -= factor * - (RHS(i, j) - - ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 + - (P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2)); + (RHS(i, j) - + ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 + + (P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2)); } isw = 3 - isw; } + #endif jsw = 3 - jsw; } } @@ -199,7 +236,24 @@ static double calculateResidual(Solver* s, double* p, double* rhs, int level, Co #ifdef _MPI commExchange(comm, p); #endif + #ifdef _OPENMP + #pragma omp parallel private(isw) + { + isw = jsw; + #pragma omp for + for (int j = 1; j < jmaxLocal + 1; j++) { + for (int i = isw; i < imaxLocal + 1; i += 2) { + + R(i, j) = RHS(i, j) - + ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 + + (P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2); + res += (R(i, j) * R(i, j)); + } + isw = 3 - isw; + } + } + #else for (int j = 1; j < jmaxLocal + 1; j++) { for (int i = isw; i < imaxLocal + 1; i += 2) { @@ -211,6 +265,7 @@ static double calculateResidual(Solver* s, double* p, double* rhs, int level, Co } isw = 3 - isw; } + #endif jsw = 3 - jsw; } @@ -297,6 +352,17 @@ void initSolver(Solver* s, Discretization* d, Parameter* p) int jmax = s->grid->jmax; int levels = s->levels; printf("Using Multigrid solver with %d levels\n", levels); + #ifdef _MPI + #ifdef _OPENMP + if (commIsMaster(s->comm)) { + #pragma omp parallel + { + #pragma omp single + printf("Detected %d threads per rank (%d)\n", omp_get_num_threads(), s->comm->size); + } + } + #endif + #endif s->r = malloc(levels * sizeof(double*)); s->e = malloc(levels * sizeof(double*));