From 946fefae5fa15bb3a08c5b70890472a14bbc4378 Mon Sep 17 00:00:00 2001 From: indiano Date: Sun, 4 May 2025 21:51:01 +0200 Subject: [PATCH] ft: OpenMP solver now solves over square subdomains --- PoissonSolver/2D-omp/src/main.c | 40 +---- PoissonSolver/2D-omp/src/solver.c | 287 +++++++++++------------------- 2 files changed, 112 insertions(+), 215 deletions(-) diff --git a/PoissonSolver/2D-omp/src/main.c b/PoissonSolver/2D-omp/src/main.c index 11b9a1d..ac8bc85 100644 --- a/PoissonSolver/2D-omp/src/main.c +++ b/PoissonSolver/2D-omp/src/main.c @@ -6,10 +6,10 @@ #include #include "likwid-marker.h" +#include "omp.h" #include "parameter.h" #include "solver.h" #include "timing.h" -#include "omp.h" #define LIKWID_PROFILE(tag, call) \ startTime = getTimeStamp(); \ @@ -18,21 +18,18 @@ LIKWID_MARKER_STOP(#tag); \ endTime = getTimeStamp(); -enum VARIANT { SOR = 1, RB, RBA }; - int main(int argc, char** argv) { int volatile dummy = 0; - int variant = RB; double startTime, endTime; Parameter params; Solver solver; initParameter(¶ms); LIKWID_MARKER_INIT; - #pragma omp parallel +#pragma omp parallel { - if(dummy==1 || omp_get_thread_num()==0) - printf("OMP_THREADS_DETECTED: %d\n",omp_get_num_threads()); + if (dummy == 1 || omp_get_thread_num() == 0) + printf("OMP_THREADS_DETECTED: %d\n", omp_get_num_threads()); } if (argc < 2) { printf("Usage: %s \n", argv[0]); @@ -40,36 +37,11 @@ int main(int argc, char** argv) } readParameter(¶ms, argv[1]); - // printParameter(¶ms); - if (argc == 3) { - variant = atoi(argv[2]); - } - if (argc == 4) { - sscanf("%lf", argv[3], ¶ms.omg); - } initSolver(&solver, ¶ms, 2); - writeResult(&solver, "p-0.dat"); - - switch (variant) { - case SOR: - printf("Plain SOR\n"); - fflush(stdout); - LIKWID_PROFILE("SOR", solve); - break; - case RB: - printf("Red-black SOR\n"); - fflush(stdout); - LIKWID_PROFILE("RB", solveRB); - break; - case RBA: - printf("Red-black SOR with acceleration\n"); - fflush(stdout); - LIKWID_PROFILE("RBA", solveRBA); - break; - } + LIKWID_PROFILE("RB", solveRB); printf(" %.2fs\n", endTime - startTime); - writeResult(&solver, "p-final.dat"); + writeResult(&solver, "p.dat"); LIKWID_MARKER_CLOSE; return EXIT_SUCCESS; diff --git a/PoissonSolver/2D-omp/src/solver.c b/PoissonSolver/2D-omp/src/solver.c index 0160892..d4b52f9 100644 --- a/PoissonSolver/2D-omp/src/solver.c +++ b/PoissonSolver/2D-omp/src/solver.c @@ -9,11 +9,42 @@ #include "allocate.h" #include "parameter.h" #include "solver.h" +#include +#include #define PI 3.14159265358979323846 -#define P(i, j) p[(j) * (imax + 2) + (i)] -#define RHS(i, j) rhs[(j) * (imax + 2) + (i)] +#define P(i, j) p[(i) * (jmax + 2) + (j)] +#define RHS(i, j) rhs[(i) * (jmax + 2) + (j)] +void omp_create_dim(int num_threads, int* dim) +{ + int center_int = (int)sqrt(num_threads); + dim[0] = num_threads; + dim[1] = 1; + for (int num = center_int; num != 1; num--) + if (num_threads % num == 0) { + dim[0] = num; + dim[1] = num_threads / num; + break; + } +} + +int distribute_dim(int dim_rank, int dim_comm_size, int dim_size) +{ + int base_size = dim_size / dim_comm_size; + return dim_rank < (dim_size % dim_comm_size) ? base_size + 1 : base_size; +} + +int get_dim_start(int dim_rank, int dim_comm_size, int dim_size) +{ + int dim_start = 0; + for (int i = 0; i < dim_rank; i++) + dim_start += distribute_dim(i, dim_comm_size, dim_size); + return dim_start; +} + +int get_x_choord(const int proc_num, const int const* dims) { return proc_num / dims[1]; } +int get_y_choord(const int proc_num, const int const* dims) { return proc_num % dims[1]; } void initSolver(Solver* solver, Parameter* params, int problem) { solver->imax = params->imax; @@ -35,132 +66,102 @@ void initSolver(Solver* solver, Parameter* params, int problem) double dy = solver->dy; double* p = solver->p; double* rhs = solver->rhs; - #pragma omp parallel for collapse(2) - for (int j = 0; j < jmax + 2; j++) { - for (int i = 0; i < imax + 2; i++) { - P(i, j) = sin(2.0 * PI * i * dx * 2.0) + sin(2.0 * PI * j * dy * 2.0); +#pragma omp parallel for collapse(2) + for (int i = 0; i < imax + 2; i++) { + for (int j = 0; j < jmax + 2; j++) { + // P(i, j) = sin(2.0 * PI * i * dx * 2.0) + sin(2.0 * PI * j * dy * 2.0); + P(i, j) = 0; } } if (problem == 2) { - #pragma omp parallel for collapse(2) - for (int j = 0; j < jmax + 2; j++) { - for (int i = 0; i < imax + 2; i++) { +#pragma omp parallel for collapse(2) + for (int i = 0; i < imax + 2; i++) { + for (int j = 0; j < jmax + 2; j++) { RHS(i, j) = sin(2.0 * PI * i * dx); } } } else { - #pragma omp parallel for collapse(2) - for (int j = 0; j < jmax + 2; j++) { - for (int i = 0; i < imax + 2; i++) { +#pragma omp parallel for collapse(2) + for (int i = 0; i < imax + 2; i++) { + for (int j = 0; j < jmax + 2; j++) { RHS(i, j) = 0.0; } } } } -void solve(Solver* solver) -{ - int imax = solver->imax; - int jmax = solver->jmax; - double eps = solver->eps; - int itermax = solver->itermax; - double dx2 = solver->dx * solver->dx; - double dy2 = solver->dy * solver->dy; - double idx2 = 1.0 / dx2; - double idy2 = 1.0 / dy2; - double factor = solver->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2); - double* p = solver->p; - double* rhs = solver->rhs; - double epssq = eps * eps; - int it = 0; - double res = 1.0; - char filename[20]; - - while ((res >= epssq) && (it < itermax)) { - res = 0.0; - - for (int j = 1; j < jmax + 1; j++) { - for (int i = 1; i < imax + 1; i++) { - - double r = RHS(i, j) - - ((P(i - 1, j) - 2.0 * P(i, j) + P(i + 1, j)) * idx2 + - (P(i, j - 1) - 2.0 * P(i, j) + P(i, j + 1)) * idy2); - - P(i, j) -= (factor * r); - res += (r * r); - } - } - - for (int i = 1; i < imax + 1; i++) { - P(i, 0) = P(i, 1); - P(i, jmax + 1) = P(i, jmax); - } - - for (int j = 1; j < jmax + 1; j++) { - P(0, j) = P(1, j); - P(imax + 1, j) = P(imax, j); - } - - res = res / (double)(imax * jmax); -#ifdef DEBUG - printf("%d Residuum: %e\n", it, res); -#endif -#ifdef ANIMATE - sprintf(filename, "p-%d.dat", it); - writeResult(solver, filename); -#endif - it++; - } - - printf("%d, %f\n", it, solver->omega); -} - void solveRB(Solver* solver) { - int imax = solver->imax; - int jmax = solver->jmax; - double eps = solver->eps; - int itermax = solver->itermax; - double dx2 = solver->dx * solver->dx; - double dy2 = solver->dy * solver->dy; - double idx2 = 1.0 / dx2; - double idy2 = 1.0 / dy2; - double factor = solver->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2); - double* p = solver->p; - double* rhs = solver->rhs; - double epssq = eps * eps; - int it = 0; - double res = 1.0; - int pass, jsw, isw; - + int imax = solver->imax; + int jmax = solver->jmax; + double eps = solver->eps; + int itermax = solver->itermax; + double dx2 = solver->dx * solver->dx; + double dy2 = solver->dy * solver->dy; + double idx2 = 1.0 / dx2; + double idy2 = 1.0 / dy2; + double factor = solver->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2); + double* p = solver->p; + double* rhs = solver->rhs; + double epssq = eps * eps; + int it = 0; + double res = 1.0; + int dim[2] = { 0 }; + int num_threads = 1; +#pragma omp parallel + { +#pragma omp critical + num_threads = omp_get_num_threads(); + } + omp_create_dim(num_threads, dim); + printf("%d: { %d, %d}\n", num_threads, dim[0], dim[1]); while ((res >= epssq) && (it < itermax)) { res = 0.0; - jsw = 1; +#pragma omp parallel + { + int jsw, isw; + double local_res = 0.0; + int li_start = get_dim_start(get_x_choord(omp_get_thread_num(), dim), + dim[0], + solver->imax); + int lj_start = get_dim_start(get_y_choord(omp_get_thread_num(), dim), + dim[1], + solver->jmax); + int limax = li_start + distribute_dim(get_x_choord(omp_get_thread_num(), dim), + dim[0], + solver->imax); + int ljmax = lj_start + distribute_dim(get_y_choord(omp_get_thread_num(), dim), + dim[1], + solver->jmax); + jsw = ((li_start) % 2 == 0) == ((lj_start) % 2 == 0) ? 1 : 2; + for (int pass = 0; pass < 2; pass++) { + isw = jsw; + for (int i = li_start + 1; i < limax + 1; i++) { + for (int j = lj_start + isw; j < ljmax + 1; j += 2) { + double r = RHS(i, j) - + ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 + + (P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * + idy2); - for (pass = 0; pass < 2; pass++) { - isw = jsw; - #pragma omp parallel for firstprivate(isw) - for (int j = 1; j < jmax + 1; j++) { - for (int i = isw; i < imax + 1; i += 2) { - - double r = RHS(i, j) - - ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 + - (P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2); - - P(i, j) -= (factor * r); - res += (r * r); + P(i, j) -= (factor * r); + res += (r * r); + } + isw = 3 - isw; } - isw = 3 - isw; + jsw = 3 - jsw; + } +#pragma omp critical + { + res += local_res; } - jsw = 3 - jsw; } - #pragma omp parallel for +#pragma omp parallel for for (int i = 1; i < imax + 1; i++) { P(i, 0) = P(i, 1); P(i, jmax + 1) = P(i, jmax); } - #pragma omp parallel for +#pragma omp parallel for for (int j = 1; j < jmax + 1; j++) { P(0, j) = P(1, j); P(imax + 1, j) = P(imax, j); @@ -169,86 +170,10 @@ void solveRB(Solver* solver) res = res / (double)(imax * jmax); #ifdef DEBUG printf("%d Residuum: %e\n", it, res); -#endif -// #ifdef ANIMATE -// sprintf(filename, "p-%d.dat", it); -// writeResult(solver, filename); -// #endif -// it++; - } - - printf("Solver took %d iterations to reach %f\n", it, sqrt(res)); - printf("%d, %f\n", it, solver->omega); -} - -void solveRBA(Solver* solver) -{ - int imax = solver->imax; - int jmax = solver->jmax; - double eps = solver->eps; - int itermax = solver->itermax; - double dx2 = solver->dx * solver->dx; - double dy2 = solver->dy * solver->dy; - double idx2 = 1.0 / dx2; - double idy2 = 1.0 / dy2; - double factor = 0.5 * (dx2 * dy2) / (dx2 + dy2); - double rho = solver->rho; - double* p = solver->p; - double* rhs = solver->rhs; - double epssq = eps * eps; - int it = 0; - double res = 1.0; - int pass, jsw, isw; - double omega = 1.0; - - while ((res >= epssq) && (it < itermax)) { - res = 0.0; - jsw = 1; - - for (pass = 0; pass < 2; pass++) { - isw = jsw; - - for (int j = 1; j < jmax + 1; j++) { - for (int i = isw; i < imax + 1; i += 2) { - - double r = RHS(i, j) - - ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 + - (P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2); - - P(i, j) -= (omega * factor * r); - res += (r * r); - } - isw = 3 - isw; - } - jsw = 3 - jsw; - omega = (it == 0 && pass == 0 ? 1.0 / (1.0 - 0.5 * rho * rho) - : 1.0 / (1.0 - 0.25 * rho * rho * omega)); - } - - for (int i = 1; i < imax + 1; i++) { - P(i, 0) = P(i, 1); - P(i, jmax + 1) = P(i, jmax); - } - - for (int j = 1; j < jmax + 1; j++) { - P(0, j) = P(1, j); - P(imax + 1, j) = P(imax, j); - } - - res = res / (double)(imax * jmax); -#ifdef DEBUG - printf("%d Residuum: %e Omega: %e\n", it, res, omega); -#endif -#ifdef ANIMATE - sprintf(filename, "p-%d.dat", it); - writeResult(solver, filename); #endif it++; } - - // printf("Final omega: %f\n", omega); - // printf("Solver took %d iterations to reach %f\n", it, sqrt(res)); - printf("%d, %f\n", it, omega); + printf("Solver took %d iterations to reach %f\n", it, sqrt(res)); } void writeResult(Solver* solver, char* filename) @@ -265,8 +190,8 @@ void writeResult(Solver* solver, char* filename) exit(EXIT_FAILURE); } - for (int j = 0; j < jmax + 2; j++) { - for (int i = 0; i < imax + 2; i++) { + for (int i = 1; i < imax + 1; i++) { + for (int j = 1; j < jmax + 1; j++) { fprintf(fp, "%f ", P(i, j)); } fprintf(fp, "\n");