diff --git a/PoissonSolver/2D-omp/src/main.c b/PoissonSolver/2D-omp/src/main.c index ac8bc85..e0e6dc2 100644 --- a/PoissonSolver/2D-omp/src/main.c +++ b/PoissonSolver/2D-omp/src/main.c @@ -25,7 +25,6 @@ int main(int argc, char** argv) Parameter params; Solver solver; initParameter(¶ms); - LIKWID_MARKER_INIT; #pragma omp parallel { if (dummy == 1 || omp_get_thread_num() == 0) @@ -39,10 +38,11 @@ int main(int argc, char** argv) readParameter(¶ms, argv[1]); initSolver(&solver, ¶ms, 2); - LIKWID_PROFILE("RB", solveRB); + startTime = getTimeStamp(); + solveRB(&solver); + endTime = getTimeStamp(); printf(" %.2fs\n", endTime - startTime); writeResult(&solver, "p.dat"); - LIKWID_MARKER_CLOSE; return EXIT_SUCCESS; } diff --git a/PoissonSolver/2D-omp/src/solver.c b/PoissonSolver/2D-omp/src/solver.c index b306265..ff9cc22 100644 --- a/PoissonSolver/2D-omp/src/solver.c +++ b/PoissonSolver/2D-omp/src/solver.c @@ -116,87 +116,95 @@ void initSolver(Solver* solver, Parameter* params, int problem) void solveRB(Solver* solver) { - int imax = solver->imax; - int jmax = solver->jmax; - double eps = solver->eps; - int itermax = solver->itermax; - double dx2 = solver->dx * solver->dx; - double dy2 = solver->dy * solver->dy; - double idx2 = 1.0 / dx2; - double idy2 = 1.0 / dy2; - double factor = solver->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2); - double* p = solver->p; - double* rhs = solver->rhs; - double epssq = eps * eps; - int it = 0; - double res = 1.0; - int dim[2] = { 0 }; - int num_threads = 1; + + const int imax = solver->imax; + const int jmax = solver->jmax; + const int itermax = solver->itermax; + const double epssq = solver->eps * solver->eps; + + const double dx2 = solver->dx * solver->dx; + const double dy2 = solver->dy * solver->dy; + const double idx2 = 1.0 / dx2; + const double idy2 = 1.0 / dy2; + const double factor = solver->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2); + + double* __restrict p = solver->p; + double* __restrict rhs = solver->rhs; + + int dim[2] = { 0 }; #pragma omp parallel +#pragma omp single { -#pragma omp critical - num_threads = omp_get_num_threads(); + omp_create_dim(omp_get_num_threads(), dim); } - omp_create_dim(num_threads, dim); - printf("%d: { %d, %d}\n", num_threads, dim[0], dim[1]); - while ((res >= epssq) && (it < itermax)) { -#pragma omp parallel + + double res = 0.0; + + for (int it = 0; it < itermax; ++it) { + + res = 0.0; + +#pragma omp parallel reduction(+ : res) { - res = 0.0; - int jsw, isw; - double local_res = 0.0; - int li_start = get_dim_start(get_x_choord(omp_get_thread_num(), dim), - dim[0], - solver->imax); - int lj_start = get_dim_start(get_y_choord(omp_get_thread_num(), dim), - dim[1], - solver->jmax); - int limax = li_start + distribute_dim(get_x_choord(omp_get_thread_num(), dim), - dim[0], - solver->imax); - int ljmax = lj_start + distribute_dim(get_y_choord(omp_get_thread_num(), dim), - dim[1], - solver->jmax); - jsw = ((li_start) % 2 == 0) == ((lj_start) % 2 == 0) ? 1 : 2; - for (int pass = 0; pass < 2; pass++) { - isw = jsw; - for (int i = li_start + 1; i < limax + 1; i++) { + const int tid = omp_get_thread_num(); + + const int li_start = get_dim_start(get_x_choord(tid, dim), dim[0], imax); + const int lj_start = get_dim_start(get_y_choord(tid, dim), dim[1], jmax); + + const int limax = li_start + + distribute_dim(get_x_choord(tid, dim), dim[0], imax); + const int ljmax = lj_start + + distribute_dim(get_y_choord(tid, dim), dim[1], jmax); + + + int jsw = ((li_start) % 2 == 0) == ((lj_start) % 2 == 0) ? 1 : 2; + + for (int pass = 0; pass < 2; ++pass) { + int isw = jsw; + for (int i = li_start + 1; i < limax + 1; ++i) { for (int j = lj_start + isw; j < ljmax + 1; j += 2) { + double r = RHS(i, j) - ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 + (P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2); - P(i, j) -= (factor * r); - res += (r * r); + P(i, j) -= factor * r; + res += r * r; /* reduction variable */ } isw = 3 - isw; } +#pragma omp barrier jsw = 3 - jsw; } -#pragma omp critical - { - res += local_res; - } - } -#pragma omp parallel for - for (int i = 1; i < imax + 1; i++) { - P(i, 0) = P(i, 1); - P(i, jmax + 1) = P(i, jmax); - } -#pragma omp parallel for - for (int j = 1; j < jmax + 1; j++) { - P(0, j) = P(1, j); - P(imax + 1, j) = P(imax, j); - } + if (lj_start == 0) + for (int i = li_start + 1; i < limax + 1; i++) + P(i, 0) = P(i, 1); + if (ljmax == jmax) + for (int i = li_start + 1; i < limax + 1; i++) + P(i, ljmax + 1) = P(i, ljmax); + if (li_start == 0) + for (int j = lj_start + 1; j < ljmax + 1; j++) + P(0, j) = P(1, j); + if (limax == imax) + for (int j = lj_start + 1; j < ljmax + 1; j++) + P(limax + 1, j) = P(limax, j); + + } + + res /= (double)(imax * jmax); - res = res / (double)(imax * jmax); #ifdef DEBUG printf("%d Residuum: %e\n", it, res); #endif - it++; + + if (res < epssq) { + printf("Solver took %d iterations to reach %e\n", it + 1, sqrt(res)); + return; + } } - printf("Solver took %d iterations to reach %f\n", it, sqrt(res)); + + printf("Solver reached itermax (%d) with residual %e\n", itermax, sqrt(res)); } void writeResult(Solver* solver, char* filename)