/* Copyright (C) NHR@FAU, University Erlangen-Nuremberg. * All rights reserved. * Use of this source code is governed by a MIT-style * license that can be found in the LICENSE file. */ #include "math.h" #include "stdio.h" #include "stdlib.h" #include "allocate.h" #include "parameter.h" #include "solver.h" #include #include #define PI 3.14159265358979323846 #define P(i, j) p[(i) * (jmax + 2) + (j)] #define RHS(i, j) rhs[(i) * (jmax + 2) + (j)] void omp_create_dim(int num_threads, int* dim) { int center_int = (int)sqrt(num_threads); dim[0] = num_threads; dim[1] = 1; for (int num = center_int; num != 1; num--) if (num_threads % num == 0) { dim[0] = num; dim[1] = num_threads / num; break; } } int distribute_dim(int dim_rank, int dim_comm_size, int dim_size) { int base_size = dim_size / dim_comm_size; return dim_rank < (dim_size % dim_comm_size) ? base_size + 1 : base_size; } int get_dim_start(int dim_rank, int dim_comm_size, int dim_size) { int dim_start = 0; for (int i = 0; i < dim_rank; i++) dim_start += distribute_dim(i, dim_comm_size, dim_size); return dim_start; } int get_x_choord(const int proc_num, const int const* dims) { return proc_num / dims[1]; } int get_y_choord(const int proc_num, const int const* dims) { return proc_num % dims[1]; } void initSolver(Solver* solver, Parameter* params, int problem) { solver->imax = params->imax; solver->jmax = params->jmax; solver->dx = params->xlength / params->imax; solver->dy = params->ylength / params->jmax; solver->eps = params->eps; solver->omega = params->omg; solver->rho = params->rho; solver->itermax = params->itermax; int imax = solver->imax; int jmax = solver->jmax; size_t bytesize = (imax + 2) * (jmax + 2) * sizeof(double); solver->p = allocate(64, bytesize); solver->rhs = allocate(64, bytesize); double dx = solver->dx; double dy = solver->dy; double* p = solver->p; double* rhs = solver->rhs; int dim[2] = { 0 }; int num_threads = 1; #pragma omp parallel { #pragma omp critical num_threads = omp_get_num_threads(); } omp_create_dim(num_threads, dim); printf("%d: { %d, %d}\n", num_threads, dim[0], dim[1]); #pragma omp parallel { int jsw, isw; double local_res = 0.0; int li_start = get_dim_start(get_x_choord(omp_get_thread_num(), dim), dim[0], solver->imax); int lj_start = get_dim_start(get_y_choord(omp_get_thread_num(), dim), dim[1], solver->jmax); int limax = li_start + distribute_dim(get_x_choord(omp_get_thread_num(), dim), dim[0], solver->imax); int ljmax = lj_start + distribute_dim(get_y_choord(omp_get_thread_num(), dim), dim[1], solver->jmax); for (int i = li_start; i < limax + 2; i++) { for (int j = lj_start; j < ljmax + 2; j++) { P(i, j) = sin(2.0 * PI * i * dx * 2.0) + sin(2.0 * PI * j * dy * 2.0); } } if (problem == 2) { for (int i = li_start; i < limax + 2; i++) { for (int j = lj_start; j < ljmax + 2; j++) { RHS(i, j) = sin(2.0 * PI * i * dx); } } } else { for (int i = li_start; i < limax + 2; i++) { for (int j = lj_start; j < ljmax + 2; j++) { RHS(i, j) = 0.0; } } } } } void solveRB(Solver* solver) { const int imax = solver->imax; const int jmax = solver->jmax; const int itermax = solver->itermax; const double epssq = solver->eps * solver->eps; const double dx2 = solver->dx * solver->dx; const double dy2 = solver->dy * solver->dy; const double idx2 = 1.0 / dx2; const double idy2 = 1.0 / dy2; const double factor = solver->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2); double* __restrict p = solver->p; double* __restrict rhs = solver->rhs; int dim[2] = { 0 }; #pragma omp parallel #pragma omp single { omp_create_dim(omp_get_num_threads(), dim); } double res = 0.0; #pragma omp parallel shared(res) { const int tid = omp_get_thread_num(); const int li_start = get_dim_start(get_x_choord(tid, dim), dim[0], imax); const int lj_start = get_dim_start(get_y_choord(tid, dim), dim[1], jmax); const int limax = li_start + distribute_dim(get_x_choord(tid, dim), dim[0], imax); const int ljmax = lj_start + distribute_dim(get_y_choord(tid, dim), dim[1], jmax); for (int it = 0; it < itermax; ++it) { #pragma omp single { res = 0; } double local_res = 0; int jsw = ((li_start) % 2 == 0) == ((lj_start) % 2 == 0) ? 1 : 2; for (int pass = 0; pass < 2; ++pass) { int isw = jsw; for (int i = li_start + 1; i < limax + 1; ++i) { for (int j = lj_start + isw; j < ljmax + 1; j += 2) { double r = RHS(i, j) - ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 + (P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2); P(i, j) -= factor * r; local_res += r * r; /* reduction variable */ } isw = 3 - isw; } #pragma omp barrier jsw = 3 - jsw; } #pragma omp critical { res += local_res; } if (lj_start == 0) for (int i = li_start + 1; i < limax + 1; i++) P(i, 0) = P(i, 1); if (ljmax == jmax) for (int i = li_start + 1; i < limax + 1; i++) P(i, ljmax + 1) = P(i, ljmax); if (li_start == 0) for (int j = lj_start + 1; j < ljmax + 1; j++) P(0, j) = P(1, j); if (limax == imax) for (int j = lj_start + 1; j < ljmax + 1; j++) P(limax + 1, j) = P(limax, j); #pragma omp single { res /= (double)(imax * jmax); #ifdef DEBUG printf("%d Residuum: %e\n", it, res); #endif } #pragma omp barrier if (res < epssq) { break; } #pragma omp barrier } } printf("Solver reached itermax (%d) with residual %e\n", itermax, sqrt(res)); } void writeResult(Solver* solver, char* filename) { int imax = solver->imax; int jmax = solver->jmax; double* p = solver->p; FILE* fp; fp = fopen(filename, "w"); if (fp == NULL) { printf("Error!\n"); exit(EXIT_FAILURE); } for (int i = 1; i < imax + 1; i++) { for (int j = 1; j < jmax + 1; j++) { fprintf(fp, "%f ", P(i, j)); } fprintf(fp, "\n"); } fclose(fp); }