forked from moebiusband/NuSiF-Solver
235 lines
7.2 KiB
C
235 lines
7.2 KiB
C
/* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
|
* All rights reserved.
|
|
* Use of this source code is governed by a MIT-style
|
|
* license that can be found in the LICENSE file. */
|
|
#include "math.h"
|
|
#include "stdio.h"
|
|
#include "stdlib.h"
|
|
|
|
#include "allocate.h"
|
|
#include "parameter.h"
|
|
#include "solver.h"
|
|
#include <omp.h>
|
|
#include <stddef.h>
|
|
|
|
#define PI 3.14159265358979323846
|
|
#define P(i, j) p[(i) * (jmax + 2) + (j)]
|
|
#define RHS(i, j) rhs[(i) * (jmax + 2) + (j)]
|
|
|
|
void omp_create_dim(int num_threads, int* dim)
|
|
{
|
|
int center_int = (int)sqrt(num_threads);
|
|
dim[0] = num_threads;
|
|
dim[1] = 1;
|
|
for (int num = center_int; num != 1; num--)
|
|
if (num_threads % num == 0) {
|
|
dim[0] = num;
|
|
dim[1] = num_threads / num;
|
|
break;
|
|
}
|
|
}
|
|
|
|
int distribute_dim(int dim_rank, int dim_comm_size, int dim_size)
|
|
{
|
|
int base_size = dim_size / dim_comm_size;
|
|
return dim_rank < (dim_size % dim_comm_size) ? base_size + 1 : base_size;
|
|
}
|
|
|
|
int get_dim_start(int dim_rank, int dim_comm_size, int dim_size)
|
|
{
|
|
int dim_start = 0;
|
|
for (int i = 0; i < dim_rank; i++)
|
|
dim_start += distribute_dim(i, dim_comm_size, dim_size);
|
|
return dim_start;
|
|
}
|
|
|
|
int get_x_choord(const int proc_num, const int const* dims) { return proc_num / dims[1]; }
|
|
int get_y_choord(const int proc_num, const int const* dims) { return proc_num % dims[1]; }
|
|
|
|
void initSolver(Solver* solver, Parameter* params, int problem)
|
|
{
|
|
solver->imax = params->imax;
|
|
solver->jmax = params->jmax;
|
|
solver->dx = params->xlength / params->imax;
|
|
solver->dy = params->ylength / params->jmax;
|
|
solver->eps = params->eps;
|
|
solver->omega = params->omg;
|
|
solver->rho = params->rho;
|
|
solver->itermax = params->itermax;
|
|
|
|
int imax = solver->imax;
|
|
int jmax = solver->jmax;
|
|
size_t bytesize = (imax + 2) * (jmax + 2) * sizeof(double);
|
|
solver->p = allocate(64, bytesize);
|
|
solver->rhs = allocate(64, bytesize);
|
|
|
|
double dx = solver->dx;
|
|
double dy = solver->dy;
|
|
double* p = solver->p;
|
|
double* rhs = solver->rhs;
|
|
int dim[2] = { 0 };
|
|
int num_threads = 1;
|
|
#pragma omp parallel
|
|
{
|
|
#pragma omp critical
|
|
num_threads = omp_get_num_threads();
|
|
}
|
|
omp_create_dim(num_threads, dim);
|
|
printf("%d: { %d, %d}\n", num_threads, dim[0], dim[1]);
|
|
#pragma omp parallel
|
|
{
|
|
int jsw, isw;
|
|
double local_res = 0.0;
|
|
int li_start = get_dim_start(get_x_choord(omp_get_thread_num(), dim),
|
|
dim[0],
|
|
solver->imax);
|
|
int lj_start = get_dim_start(get_y_choord(omp_get_thread_num(), dim),
|
|
dim[1],
|
|
solver->jmax);
|
|
int limax = li_start + distribute_dim(get_x_choord(omp_get_thread_num(), dim),
|
|
dim[0],
|
|
solver->imax);
|
|
int ljmax = lj_start + distribute_dim(get_y_choord(omp_get_thread_num(), dim),
|
|
dim[1],
|
|
solver->jmax);
|
|
|
|
for (int i = li_start; i < limax + 2; i++) {
|
|
for (int j = lj_start; j < ljmax + 2; j++) {
|
|
P(i, j) = sin(2.0 * PI * i * dx * 2.0) + sin(2.0 * PI * j * dy * 2.0);
|
|
}
|
|
}
|
|
|
|
if (problem == 2) {
|
|
for (int i = li_start; i < limax + 2; i++) {
|
|
for (int j = lj_start; j < ljmax + 2; j++) {
|
|
RHS(i, j) = sin(2.0 * PI * i * dx);
|
|
}
|
|
}
|
|
} else {
|
|
for (int i = li_start; i < limax + 2; i++) {
|
|
for (int j = lj_start; j < ljmax + 2; j++) {
|
|
RHS(i, j) = 0.0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void solveRB(Solver* solver)
|
|
{
|
|
|
|
const int imax = solver->imax;
|
|
const int jmax = solver->jmax;
|
|
const int itermax = solver->itermax;
|
|
const double epssq = solver->eps * solver->eps;
|
|
|
|
const double dx2 = solver->dx * solver->dx;
|
|
const double dy2 = solver->dy * solver->dy;
|
|
const double idx2 = 1.0 / dx2;
|
|
const double idy2 = 1.0 / dy2;
|
|
const double factor = solver->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
|
|
|
|
double* __restrict p = solver->p;
|
|
double* __restrict rhs = solver->rhs;
|
|
|
|
int dim[2] = { 0 };
|
|
#pragma omp parallel
|
|
#pragma omp single
|
|
{
|
|
omp_create_dim(omp_get_num_threads(), dim);
|
|
}
|
|
|
|
double res = 0.0;
|
|
#pragma omp parallel shared(res)
|
|
{
|
|
const int tid = omp_get_thread_num();
|
|
const int li_start = get_dim_start(get_x_choord(tid, dim), dim[0], imax);
|
|
const int lj_start = get_dim_start(get_y_choord(tid, dim), dim[1], jmax);
|
|
|
|
const int limax = li_start + distribute_dim(get_x_choord(tid, dim), dim[0], imax);
|
|
const int ljmax = lj_start + distribute_dim(get_y_choord(tid, dim), dim[1], jmax);
|
|
|
|
for (int it = 0; it < itermax; ++it) {
|
|
|
|
#pragma omp single
|
|
{
|
|
res = 0;
|
|
}
|
|
double local_res = 0;
|
|
int jsw = ((li_start) % 2 == 0) == ((lj_start) % 2 == 0) ? 1 : 2;
|
|
|
|
for (int pass = 0; pass < 2; ++pass) {
|
|
int isw = jsw;
|
|
for (int i = li_start + 1; i < limax + 1; ++i) {
|
|
for (int j = lj_start + isw; j < ljmax + 1; j += 2) {
|
|
|
|
double r = RHS(i, j) -
|
|
((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
|
|
(P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) *
|
|
idy2);
|
|
|
|
P(i, j) -= factor * r;
|
|
local_res += r * r; /* reduction variable */
|
|
}
|
|
isw = 3 - isw;
|
|
}
|
|
#pragma omp barrier
|
|
jsw = 3 - jsw;
|
|
}
|
|
#pragma omp critical
|
|
{
|
|
res += local_res;
|
|
}
|
|
if (lj_start == 0)
|
|
for (int i = li_start + 1; i < limax + 1; i++)
|
|
P(i, 0) = P(i, 1);
|
|
if (ljmax == jmax)
|
|
for (int i = li_start + 1; i < limax + 1; i++)
|
|
P(i, ljmax + 1) = P(i, ljmax);
|
|
if (li_start == 0)
|
|
for (int j = lj_start + 1; j < ljmax + 1; j++)
|
|
P(0, j) = P(1, j);
|
|
if (limax == imax)
|
|
for (int j = lj_start + 1; j < ljmax + 1; j++)
|
|
P(limax + 1, j) = P(limax, j);
|
|
#pragma omp single
|
|
{
|
|
res /= (double)(imax * jmax);
|
|
#ifdef DEBUG
|
|
printf("%d Residuum: %e\n", it, res);
|
|
#endif
|
|
}
|
|
#pragma omp barrier
|
|
if (res < epssq) {
|
|
break;
|
|
}
|
|
#pragma omp barrier
|
|
}
|
|
}
|
|
printf("Solver reached itermax (%d) with residual %e\n", itermax, sqrt(res));
|
|
}
|
|
|
|
void writeResult(Solver* solver, char* filename)
|
|
{
|
|
int imax = solver->imax;
|
|
int jmax = solver->jmax;
|
|
double* p = solver->p;
|
|
|
|
FILE* fp;
|
|
fp = fopen(filename, "w");
|
|
|
|
if (fp == NULL) {
|
|
printf("Error!\n");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
for (int i = 1; i < imax + 1; i++) {
|
|
for (int j = 1; j < jmax + 1; j++) {
|
|
fprintf(fp, "%f ", P(i, j));
|
|
}
|
|
fprintf(fp, "\n");
|
|
}
|
|
|
|
fclose(fp);
|
|
}
|