Compare commits

..

4 Commits
main ... main

25 changed files with 3218 additions and 406 deletions

View File

@ -114,7 +114,9 @@ void initDiscretiztion(Discretization* d, Parameter* params)
d->f = allocate(64, size * sizeof(double));
d->g = allocate(64, size * sizeof(double));
d->grid.s = allocate(64, size * sizeof(double));
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int i = 0; i < size; i++) {
d->u[i] = params->u_init;
d->v[i] = params->v_init;

View File

@ -6,6 +6,7 @@
*/
#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
#include "allocate.h"
#include "solver.h"
@ -53,7 +54,9 @@ static void restrictMG(Solver* s, int level, Comm* comm)
#ifdef _MPI
commExchange(comm, old);
#endif
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int j = 1; j < (jmaxLocal / 2) + 1; j++) {
for (int i = 1; i < (imaxLocal / 2) + 1; i++) {
R(i, j) = (OLD(2 * i - 1, 2 * j - 1) + OLD(2 * i, 2 * j - 1) * 2 +
@ -73,7 +76,9 @@ static void prolongate(Solver* s, int level, Comm* comm)
double* old = s->r[level + 1];
double* e = s->r[level];
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int j = 2; j < jmaxLocal + 1; j += 2) {
for (int i = 2; i < imaxLocal + 1; i += 2) {
E(i, j) = OLD(i / 2, j / 2);
@ -87,6 +92,9 @@ static void correct(Solver* s, double* p, int level, Comm* comm)
int imaxLocal = comm->imaxLocal;
int jmaxLocal = comm->jmaxLocal;
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int j = 1; j < jmaxLocal + 1; ++j) {
for (int i = 1; i < imaxLocal + 1; ++i) {
P(i, j) += E(i, j);
@ -97,25 +105,38 @@ static void correct(Solver* s, double* p, int level, Comm* comm)
static void setBoundaryCondition(Solver* s, double* p, int imaxLocal, int jmaxLocal)
{
#ifdef _MPI
if (commIsBoundary(s->comm, B)) { // set bottom bc
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int i = 1; i < imaxLocal + 1; i++) {
P(i, 0) = P(i, 1);
}
}
if (commIsBoundary(s->comm, T)) { // set top bc
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int i = 1; i < imaxLocal + 1; i++) {
P(i, jmaxLocal + 1) = P(i, jmaxLocal);
}
}
if (commIsBoundary(s->comm, L)) { // set left bc
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int j = 1; j < jmaxLocal + 1; j++) {
P(0, j) = P(1, j);
}
}
if (commIsBoundary(s->comm, R)) { // set right bc
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int j = 1; j < jmaxLocal + 1; j++) {
P(imaxLocal + 1, j) = P(imaxLocal, j);
}
@ -160,17 +181,33 @@ static void smooth(Solver* s, double* p, double* rhs, int level, Comm* comm)
#ifdef _MPI
commExchange(comm, p);
#endif
#ifdef _OPENMP
#pragma message("Enabling OPENMP for loop")
#pragma omp parallel private(isw)
{
isw = jsw;
#pragma omp for
for (int j = 1; j < jmaxLocal + 1; j++) {
for (int i = isw; i < imaxLocal + 1; i += 2) {
P(i, j) -= factor *
(RHS(i, j) -
((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
(P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2));
}
isw = 3 - isw;
}
}
#else
for (int j = 1; j < jmaxLocal + 1; j++) {
for (int i = isw; i < imaxLocal + 1; i += 2) {
for (int i = isw; i < imaxLocal + 1; i += 2) {
P(i, j) -= factor *
(RHS(i, j) -
((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
(P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2));
(RHS(i, j) -
((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
(P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2));
}
isw = 3 - isw;
}
#endif
jsw = 3 - jsw;
}
}
@ -199,7 +236,24 @@ static double calculateResidual(Solver* s, double* p, double* rhs, int level, Co
#ifdef _MPI
commExchange(comm, p);
#endif
#ifdef _OPENMP
#pragma omp parallel private(isw)
{
isw = jsw;
#pragma omp for
for (int j = 1; j < jmaxLocal + 1; j++) {
for (int i = isw; i < imaxLocal + 1; i += 2) {
R(i, j) = RHS(i, j) -
((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
(P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
res += (R(i, j) * R(i, j));
}
isw = 3 - isw;
}
}
#else
for (int j = 1; j < jmaxLocal + 1; j++) {
for (int i = isw; i < imaxLocal + 1; i += 2) {
@ -211,6 +265,7 @@ static double calculateResidual(Solver* s, double* p, double* rhs, int level, Co
}
isw = 3 - isw;
}
#endif
jsw = 3 - jsw;
}
@ -297,6 +352,17 @@ void initSolver(Solver* s, Discretization* d, Parameter* p)
int jmax = s->grid->jmax;
int levels = s->levels;
printf("Using Multigrid solver with %d levels\n", levels);
#ifdef _MPI
#ifdef _OPENMP
if (commIsMaster(s->comm)) {
#pragma omp parallel
{
#pragma omp single
printf("Detected %d threads per rank (%d)\n", omp_get_num_threads(), s->comm->size);
}
}
#endif
#endif
s->r = malloc(levels * sizeof(double*));
s->e = malloc(levels * sizeof(double*));

View File

@ -13,430 +13,422 @@
#include "parameter.h"
#include "solver.h"
#define PI 3.14159265358979323846
#define P(i, j) p[(j) * (imax + 2) + (i)]
#define PI 3.14159265358979323846
#define P(i, j) p[(j) * (imax + 2) + (i)]
#define RHS(i, j) rhs[(j) * (imax + 2) + (i)]
static int sizeOfRank(int rank, int size, int N)
{
return N / size + ((N % size > rank) ? 1 : 0);
static int sizeOfRank(int rank, int size, int N) {
return N / size + ((N % size > rank) ? 1 : 0);
}
static void print(Solver* solver)
{
double* p = solver->p;
int imax = solver->imax;
static void print(Solver *solver) {
double *p = solver->p;
int imax = solver->imax;
printf("### RANK %d #######################################################\n",
solver->rank);
for (int j = 0; j < solver->jmaxLocal + 2; j++) {
printf("%02d: ", j);
for (int i = 0; i < solver->imax + 2; i++) {
printf("%12.8f ", P(i, j));
}
printf("\n");
printf(
"### RANK %d #######################################################\n",
solver->rank);
for (int j = 0; j < solver->jmaxLocal + 2; j++) {
printf("%02d: ", j);
for (int i = 0; i < solver->imax + 2; i++) {
printf("%12.8f ", P(i, j));
}
fflush(stdout);
printf("\n");
}
fflush(stdout);
}
static void exchange(Solver* solver)
{
MPI_Request requests[4] = { MPI_REQUEST_NULL,
MPI_REQUEST_NULL,
MPI_REQUEST_NULL,
MPI_REQUEST_NULL };
static void exchange(Solver *solver) {
MPI_Request requests[4] = {MPI_REQUEST_NULL, MPI_REQUEST_NULL,
MPI_REQUEST_NULL, MPI_REQUEST_NULL};
/* exchange ghost cells with top neighbor */
if (solver->rank + 1 < solver->size) {
int top = solver->rank + 1;
double* src = solver->p + (solver->jmaxLocal) * (solver->imax + 2) + 1;
double* dst = solver->p + (solver->jmaxLocal + 1) * (solver->imax + 2) + 1;
/* exchange ghost cells with top neighbor */
if (solver->rank + 1 < solver->size) {
int top = solver->rank + 1;
double *src = solver->p + (solver->jmaxLocal) * (solver->imax + 2) + 1;
double *dst = solver->p + (solver->jmaxLocal + 1) * (solver->imax + 2) + 1;
MPI_Isend(src, solver->imax, MPI_DOUBLE, top, 1, MPI_COMM_WORLD, &requests[0]);
MPI_Irecv(dst, solver->imax, MPI_DOUBLE, top, 2, MPI_COMM_WORLD, &requests[1]);
}
MPI_Isend(src, solver->imax, MPI_DOUBLE, top, 1, MPI_COMM_WORLD,
&requests[0]);
MPI_Irecv(dst, solver->imax, MPI_DOUBLE, top, 2, MPI_COMM_WORLD,
&requests[1]);
}
/* exchange ghost cells with bottom neighbor */
if (solver->rank > 0) {
int bottom = solver->rank - 1;
double* src = solver->p + (solver->imax + 2) + 1;
double* dst = solver->p + 1;
/* exchange ghost cells with bottom neighbor */
if (solver->rank > 0) {
int bottom = solver->rank - 1;
double *src = solver->p + (solver->imax + 2) + 1;
double *dst = solver->p + 1;
MPI_Isend(src, solver->imax, MPI_DOUBLE, bottom, 2, MPI_COMM_WORLD, &requests[2]);
MPI_Irecv(dst, solver->imax, MPI_DOUBLE, bottom, 1, MPI_COMM_WORLD, &requests[3]);
}
MPI_Isend(src, solver->imax, MPI_DOUBLE, bottom, 2, MPI_COMM_WORLD,
&requests[2]);
MPI_Irecv(dst, solver->imax, MPI_DOUBLE, bottom, 1, MPI_COMM_WORLD,
&requests[3]);
}
MPI_Waitall(4, requests, MPI_STATUSES_IGNORE);
MPI_Waitall(4, requests, MPI_STATUSES_IGNORE);
}
void getResult(Solver* solver)
{
double* Pall = NULL;
int *rcvCounts, *displs;
void getResult(Solver *solver) {
double *Pall = NULL;
int *rcvCounts, *displs;
if (solver->rank == 0) {
Pall = allocate(64, (solver->imax + 2) * (solver->jmax + 2) * sizeof(double));
rcvCounts = (int*)malloc(solver->size * sizeof(int));
displs = (int*)malloc(solver->size * sizeof(int));
rcvCounts[0] = solver->jmaxLocal * (solver->imax + 2);
displs[0] = 0;
int cursor = rcvCounts[0];
if (solver->rank == 0) {
Pall =
allocate(64, (solver->imax + 2) * (solver->jmax + 2) * sizeof(double));
rcvCounts = (int *)malloc(solver->size * sizeof(int));
displs = (int *)malloc(solver->size * sizeof(int));
rcvCounts[0] = (solver->jmaxLocal + 1) * (solver->imax + 2);
displs[0] = 0;
int cursor = rcvCounts[0];
for (int i = 1; i < solver->size; i++) {
rcvCounts[i] = sizeOfRank(i, solver->size, solver->jmax) * (solver->imax + 2);
displs[i] = cursor;
cursor += rcvCounts[i];
}
for (int i = 1; i < solver->size; i++) {
rcvCounts[i] =
sizeOfRank(i, solver->size, solver->jmax) * (solver->imax + 2);
displs[i] = cursor;
cursor += rcvCounts[i];
}
rcvCounts[solver->size - 1] =
(sizeOfRank(solver->size - 1, solver->size, solver->jmax)+1) *
(solver->imax + 2);
}
int cnt = solver->jmaxLocal * (solver->imax + 2);
double *sendbuffer = solver->p + (solver->imax + 2);
if(solver->rank == 0 || solver->rank == solver->size-1) cnt= (solver->jmaxLocal+1)*(solver->imax+2);
if(solver->rank == 0) sendbuffer = solver->p;
MPI_Gatherv(sendbuffer, cnt, MPI_DOUBLE, Pall, rcvCounts, displs, MPI_DOUBLE,
0, MPI_COMM_WORLD);
int cnt = solver->jmaxLocal * (solver->imax + 2);
double* sendbuffer = solver->p + (solver->imax + 2);
MPI_Gatherv(sendbuffer,
cnt,
MPI_DOUBLE,
Pall,
rcvCounts,
displs,
MPI_DOUBLE,
0,
MPI_COMM_WORLD);
if (solver->rank == 0) {
writeResult(solver, Pall, "p.dat");
}
if (solver->rank == 0) {
writeResult(solver, Pall, "p.dat");
}
}
void initSolver(Solver* solver, Parameter* params, int problem)
{
MPI_Comm_rank(MPI_COMM_WORLD, &(solver->rank));
MPI_Comm_size(MPI_COMM_WORLD, &(solver->size));
solver->imax = params->imax;
solver->jmax = params->jmax;
solver->jmaxLocal = sizeOfRank(solver->rank, solver->size, solver->jmax);
printf("RANK %d: imaxLocal : %d, jmaxLocal : %d\n",
solver->rank,
solver->imax,
solver->jmaxLocal);
void initSolver(Solver *solver, Parameter *params, int problem) {
MPI_Comm_rank(MPI_COMM_WORLD, &(solver->rank));
MPI_Comm_size(MPI_COMM_WORLD, &(solver->size));
solver->imax = params->imax;
solver->jmax = params->jmax;
solver->jmaxLocal = sizeOfRank(solver->rank, solver->size, solver->jmax);
printf("RANK %d: imaxLocal : %d, jmaxLocal : %d\n", solver->rank,
solver->imax, solver->jmaxLocal);
solver->dx = params->xlength / params->imax;
solver->dy = params->ylength / params->jmax;
solver->ys = solver->rank * solver->jmaxLocal * solver->dy;
solver->eps = params->eps;
solver->omega = params->omg;
solver->itermax = params->itermax;
solver->dx = params->xlength / params->imax;
solver->dy = params->ylength / params->jmax;
solver->ys = solver->rank * solver->jmaxLocal * solver->dy;
solver->eps = params->eps;
solver->omega = params->omg;
solver->itermax = params->itermax;
int imax = solver->imax;
int jmax = solver->jmax;
int jmaxLocal = solver->jmaxLocal;
solver->p = allocate(64, (imax + 2) * (jmaxLocal + 2) * sizeof(double));
solver->rhs = allocate(64, (imax + 2) * (jmax + 2) * sizeof(double));
int imax = solver->imax;
int jmax = solver->jmax;
int jmaxLocal = solver->jmaxLocal;
solver->p = allocate(64, (imax + 2) * (jmaxLocal + 2) * sizeof(double));
solver->rhs = allocate(64, (imax + 2) * (jmax + 2) * sizeof(double));
double dx = solver->dx;
double dy = solver->dy;
double* p = solver->p;
double* rhs = solver->rhs;
double dx = solver->dx;
double dy = solver->dy;
double *p = solver->p;
double *rhs = solver->rhs;
for (int j = 0; j < jmaxLocal + 2; j++) {
double y = solver->ys + j * dy;
for (int i = 0; i < imax + 2; i++) {
P(i, j) = sin(4.0 * PI * i * dx) + sin(4.0 * PI * y);
}
}
if (problem == 2) {
for (int j = 0; j < jmax + 2; j++) {
for (int i = 0; i < imax + 2; i++) {
RHS(i, j) = sin(2.0 * PI * i * dx);
}
}
} else {
for (int j = 0; j < jmax + 2; j++) {
for (int i = 0; i < imax + 2; i++) {
RHS(i, j) = 0.0;
}
}
}
}
void debug(Solver* solver)
{
int imax = solver->imax;
int rank = solver->rank;
double* p = solver->p;
/* for( int j=0; j < solver->jmaxLocal+2; j++ ) { */
/* for( int i=0; i < solver->imax+2; i++ ) { */
/* P(i, j) = (double) rank; */
/* } */
/* } */
/* for ( int i=0; i < solver->size; i++) { */
/* if ( i == rank ) { */
/* print(solver); */
/* } */
/* MPI_Barrier(MPI_COMM_WORLD); */
/* } */
/* if ( rank == 0 ) { */
/* printf("##########################################################\n"); */
/* printf("## Exchange ghost layers\n"); */
/* printf("##########################################################\n"); */
/* } */
/* exchange(solver); */
for (int i = 0; i < solver->size; i++) {
if (i == rank) {
print(solver);
}
MPI_Barrier(MPI_COMM_WORLD);
}
}
int solve(Solver* solver)
{
double r;
int it = 0;
double res, res1;
int imax = solver->imax;
int jmax = solver->jmax;
int jmaxLocal = solver->jmaxLocal;
double eps = solver->eps;
double omega = solver->omega;
int itermax = solver->itermax;
double dx2 = solver->dx * solver->dx;
double dy2 = solver->dy * solver->dy;
double idx2 = 1.0 / dx2;
double idy2 = 1.0 / dy2;
double factor = omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
double* p = solver->p;
double* rhs = solver->rhs;
double epssq = eps * eps;
res = eps + 1.0;
while ((res >= epssq) && (it < itermax)) {
res = 0.0;
exchange(solver);
for (int j = 1; j < jmaxLocal + 1; j++) {
for (int i = 1; i < imax + 1; i++) {
r = RHS(i, j) - ((P(i - 1, j) - 2.0 * P(i, j) + P(i + 1, j)) * idx2 +
(P(i, j - 1) - 2.0 * P(i, j) + P(i, j + 1)) * idy2);
P(i, j) -= (factor * r);
res += (r * r);
}
}
if (solver->rank == 0) {
for (int i = 1; i < imax + 1; i++) {
P(i, 0) = P(i, 1);
}
}
if (solver->rank == (solver->size - 1)) {
for (int i = 1; i < imax + 1; i++) {
P(i, jmaxLocal + 1) = P(i, jmaxLocal);
}
}
for (int j = 1; j < jmaxLocal + 1; j++) {
P(0, j) = P(1, j);
P(imax + 1, j) = P(imax, j);
}
MPI_Allreduce(&res, &res1, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
res = res1;
res = sqrt(res / (imax * jmax));
#ifdef DEBUG
if (solver->rank == 0) {
printf("%d Residuum: %e\n", it, res1);
}
#endif
it++;
}
if (solver->rank == 0) {
printf("Solver took %d iterations\n", it);
}
if (res < eps) {
return 1;
} else {
return 0;
}
}
int solveRB(Solver* solver)
{
double r;
int it = 0;
double res, res1;
int imax = solver->imax;
int jmax = solver->jmax;
int jmaxLocal = solver->jmaxLocal;
double eps = solver->eps;
double omega = solver->omega;
int itermax = solver->itermax;
double dx2 = solver->dx * solver->dx;
double dy2 = solver->dy * solver->dy;
double idx2 = 1.0 / dx2;
double idy2 = 1.0 / dy2;
double factor = omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
double* p = solver->p;
double* rhs = solver->rhs;
int pass, jsw, isw;
double epssq = eps * eps;
res = eps + 1.0;
while ((res >= epssq) && (it < itermax)) {
res = 0.0;
jsw = 1;
for (pass = 0; pass < 2; pass++) {
isw = jsw;
exchange(solver);
for (int j = 1; j < jmaxLocal + 1; j++) {
for (int i = isw; i < imax + 1; i += 2) {
double r = RHS(i, j) -
((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
(P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
P(i, j) -= (factor * r);
res += (r * r);
}
isw = 3 - isw;
}
jsw = 3 - jsw;
}
for (int i = 1; i < imax + 1; i++) {
P(i, 0) = P(i, 1);
P(i, jmaxLocal + 1) = P(i, jmaxLocal);
}
for (int j = 1; j < jmaxLocal + 1; j++) {
P(0, j) = P(1, j);
P(imax + 1, j) = P(imax, j);
}
MPI_Allreduce(&res, &res1, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
res = res1;
res = res / (double)(imax * jmax);
#ifdef DEBUG
printf("%d Residuum: %e\n", it, res);
#endif
it++;
}
if (solver->rank == 0) {
printf("Solver took %d iterations\n", it);
}
if (res < eps) {
return 1;
} else {
return 0;
}
}
int solveRBA(Solver* solver)
{
double r;
int it = 0;
double res;
int imax = solver->imax;
int jmax = solver->jmax;
int jmaxLocal = solver->jmaxLocal;
double eps = solver->eps;
double omega = solver->omega;
int itermax = solver->itermax;
double dx2 = solver->dx * solver->dx;
double dy2 = solver->dy * solver->dy;
double idx2 = 1.0 / dx2;
double idy2 = 1.0 / dy2;
double factor = omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
double* p = solver->p;
double* rhs = solver->rhs;
int pass, jsw, isw;
double rho = solver->rho;
double epssq = eps * eps;
res = eps + 1.0;
while ((res >= epssq) && (it < itermax)) {
res = 0.0;
jsw = 1;
for (pass = 0; pass < 2; pass++) {
isw = jsw;
exchange(solver);
for (int j = 1; j < jmaxLocal + 1; j++) {
for (int i = isw; i < imax + 1; i += 2) {
double r = RHS(i, j) -
((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
(P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
P(i, j) -= (omega * factor * r);
res += (r * r);
}
isw = 3 - isw;
}
jsw = 3 - jsw;
omega = (it == 0 && pass == 0 ? 1.0 / (1.0 - 0.5 * rho * rho)
: 1.0 / (1.0 - 0.25 * rho * rho * omega));
}
for (int i = 1; i < imax + 1; i++) {
P(i, 0) = P(i, 1);
P(i, jmaxLocal + 1) = P(i, jmaxLocal);
}
for (int j = 1; j < jmaxLocal + 1; j++) {
P(0, j) = P(1, j);
P(imax + 1, j) = P(imax, j);
}
res = res / (double)(imax * jmax);
#ifdef DEBUG
printf("%d Residuum: %e Omega: %e\n", it, res, omega);
#endif
it++;
}
printf("Final omega: %f\n", omega);
printf("Solver took %d iterations to reach %f\n", it, sqrt(res));
}
void writeResult(Solver* solver, double* m, char* filename)
{
int imax = solver->imax;
int jmax = solver->jmax;
double* p = solver->p;
FILE* fp;
fp = fopen(filename, "w");
if (fp == NULL) {
printf("Error!\n");
exit(EXIT_FAILURE);
for (int j = 0; j < jmaxLocal + 2; j++) {
double y = solver->ys + j * dy;
for (int i = 0; i < imax + 2; i++) {
P(i, j) = sin(4.0 * PI * i * dx) + sin(4.0 * PI * y);
}
}
if (problem == 2) {
for (int j = 0; j < jmax + 2; j++) {
for (int i = 0; i < imax + 2; i++) {
fprintf(fp, "%f ", m[j * (imax + 2) + i]);
}
fprintf(fp, "\n");
for (int i = 0; i < imax + 2; i++) {
RHS(i, j) = sin(2.0 * PI * i * dx);
}
}
} else {
for (int j = 0; j < jmax + 2; j++) {
for (int i = 0; i < imax + 2; i++) {
RHS(i, j) = 0.0;
}
}
}
}
void debug(Solver *solver) {
int imax = solver->imax;
int rank = solver->rank;
double *p = solver->p;
/* for( int j=0; j < solver->jmaxLocal+2; j++ ) { */
/* for( int i=0; i < solver->imax+2; i++ ) { */
/* P(i, j) = (double) rank; */
/* } */
/* } */
/* for ( int i=0; i < solver->size; i++) { */
/* if ( i == rank ) { */
/* print(solver); */
/* } */
/* MPI_Barrier(MPI_COMM_WORLD); */
/* } */
/* if ( rank == 0 ) { */
/* printf("##########################################################\n");
*/
/* printf("## Exchange ghost layers\n"); */
/* printf("##########################################################\n");
*/
/* } */
/* exchange(solver); */
for (int i = 0; i < solver->size; i++) {
if (i == rank) {
print(solver);
}
MPI_Barrier(MPI_COMM_WORLD);
}
}
int solve(Solver *solver) {
double r;
int it = 0;
double res, res1;
int imax = solver->imax;
int jmax = solver->jmax;
int jmaxLocal = solver->jmaxLocal;
double eps = solver->eps;
double omega = solver->omega;
int itermax = solver->itermax;
double dx2 = solver->dx * solver->dx;
double dy2 = solver->dy * solver->dy;
double idx2 = 1.0 / dx2;
double idy2 = 1.0 / dy2;
double factor = omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
double *p = solver->p;
double *rhs = solver->rhs;
double epssq = eps * eps;
res = eps + 1.0;
while ((res >= epssq) && (it < itermax)) {
res = 0.0;
exchange(solver);
for (int j = 1; j < jmaxLocal + 1; j++) {
for (int i = 1; i < imax + 1; i++) {
r = RHS(i, j) - ((P(i - 1, j) - 2.0 * P(i, j) + P(i + 1, j)) * idx2 +
(P(i, j - 1) - 2.0 * P(i, j) + P(i, j + 1)) * idy2);
P(i, j) -= (factor * r);
res += (r * r);
}
}
fclose(fp);
if (solver->rank == 0) {
for (int i = 1; i < imax + 1; i++) {
P(i, 0) = P(i, 1);
}
}
if (solver->rank == (solver->size - 1)) {
for (int i = 1; i < imax + 1; i++) {
P(i, jmaxLocal + 1) = P(i, jmaxLocal);
}
}
for (int j = 1; j < jmaxLocal + 1; j++) {
P(0, j) = P(1, j);
P(imax + 1, j) = P(imax, j);
}
MPI_Allreduce(&res, &res1, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
res = res1;
res = sqrt(res / (imax * jmax));
#ifdef DEBUG
if (solver->rank == 0) {
printf("%d Residuum: %e\n", it, res1);
}
#endif
it++;
}
if (solver->rank == 0) {
printf("Solver took %d iterations\n", it);
}
if (res < eps) {
return 1;
} else {
return 0;
}
}
int solveRB(Solver *solver) {
double r;
int it = 0;
double res, res1;
int imax = solver->imax;
int jmax = solver->jmax;
int jmaxLocal = solver->jmaxLocal;
double eps = solver->eps;
double omega = solver->omega;
int itermax = solver->itermax;
double dx2 = solver->dx * solver->dx;
double dy2 = solver->dy * solver->dy;
double idx2 = 1.0 / dx2;
double idy2 = 1.0 / dy2;
double factor = omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
double *p = solver->p;
double *rhs = solver->rhs;
int pass, jsw, isw;
double epssq = eps * eps;
res = eps + 1.0;
while ((res >= epssq) && (it < itermax)) {
res = 0.0;
jsw = 1;
for (pass = 0; pass < 2; pass++) {
isw = jsw;
exchange(solver);
for (int j = 1; j < jmaxLocal + 1; j++) {
for (int i = isw; i < imax + 1; i += 2) {
double r =
RHS(i, j) - ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
(P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
P(i, j) -= (factor * r);
res += (r * r);
}
isw = 3 - isw;
}
jsw = 3 - jsw;
}
for (int i = 1; i < imax + 1; i++) {
P(i, 0) = P(i, 1);
P(i, jmaxLocal + 1) = P(i, jmaxLocal);
}
for (int j = 1; j < jmaxLocal + 1; j++) {
P(0, j) = P(1, j);
P(imax + 1, j) = P(imax, j);
}
MPI_Allreduce(&res, &res1, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
res = res1;
res = res / (double)(imax * jmax);
#ifdef DEBUG
printf("%d Residuum: %e\n", it, res);
#endif
it++;
}
if (solver->rank == 0) {
printf("Solver took %d iterations\n", it);
}
if (res < eps) {
return 1;
} else {
return 0;
}
}
int solveRBA(Solver *solver) {
double r;
int it = 0;
double res;
int imax = solver->imax;
int jmax = solver->jmax;
int jmaxLocal = solver->jmaxLocal;
double eps = solver->eps;
double omega = solver->omega;
int itermax = solver->itermax;
double dx2 = solver->dx * solver->dx;
double dy2 = solver->dy * solver->dy;
double idx2 = 1.0 / dx2;
double idy2 = 1.0 / dy2;
double factor = omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
double *p = solver->p;
double *rhs = solver->rhs;
int pass, jsw, isw;
double rho = solver->rho;
double epssq = eps * eps;
res = eps + 1.0;
while ((res >= epssq) && (it < itermax)) {
res = 0.0;
jsw = 1;
for (pass = 0; pass < 2; pass++) {
isw = jsw;
exchange(solver);
for (int j = 1; j < jmaxLocal + 1; j++) {
for (int i = isw; i < imax + 1; i += 2) {
double r =
RHS(i, j) - ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
(P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
P(i, j) -= (omega * factor * r);
res += (r * r);
}
isw = 3 - isw;
}
jsw = 3 - jsw;
omega = (it == 0 && pass == 0 ? 1.0 / (1.0 - 0.5 * rho * rho)
: 1.0 / (1.0 - 0.25 * rho * rho * omega));
}
for (int i = 1; i < imax + 1; i++) {
P(i, 0) = P(i, 1);
P(i, jmaxLocal + 1) = P(i, jmaxLocal);
}
for (int j = 1; j < jmaxLocal + 1; j++) {
P(0, j) = P(1, j);
P(imax + 1, j) = P(imax, j);
}
res = res / (double)(imax * jmax);
#ifdef DEBUG
printf("%d Residuum: %e Omega: %e\n", it, res, omega);
#endif
it++;
}
printf("Final omega: %f\n", omega);
printf("Solver took %d iterations to reach %f\n", it, sqrt(res));
}
void writeResult(Solver *solver, double *m, char *filename) {
int imax = solver->imax;
int jmax = solver->jmax;
double *p = solver->p;
FILE *fp;
fp = fopen(filename, "w");
if (fp == NULL) {
printf("Error!\n");
exit(EXIT_FAILURE);
}
for (int j = 0; j < jmax + 2; j++) {
for (int i = 0; i < imax + 2; i++) {
fprintf(fp, "%f ", m[j * (imax + 2) + i]);
}
fprintf(fp, "\n");
}
fclose(fp);
}

View File

@ -15,7 +15,7 @@ typedef struct {
int jmaxLocal;
int rank;
int size;
double rho;
double *p, *rhs;
double eps, omega;
int itermax;

View File

@ -0,0 +1,62 @@
#=======================================================================================
# Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
# All rights reserved.
# Use of this source code is governed by a MIT-style
# license that can be found in the LICENSE file.
#=======================================================================================
#CONFIGURE BUILD SYSTEM
TARGET = exe-$(TAG)
BUILD_DIR = ./$(TAG)
SRC_DIR = ./src
MAKE_DIR = ./
Q ?= @
#DO NOT EDIT BELOW
include $(MAKE_DIR)/config.mk
include $(MAKE_DIR)/include_$(TAG).mk
INCLUDES += -I$(SRC_DIR)/includes -I$(BUILD_DIR)
VPATH = $(SRC_DIR)
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c))
OBJ = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c))
CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(OPTIONS) $(INCLUDES)
${TARGET}: $(BUILD_DIR) $(OBJ)
$(info ===> LINKING $(TARGET))
$(Q)${LINKER} ${LFLAGS} -o $(TARGET) $(OBJ) $(LIBS)
$(BUILD_DIR)/%.o: %.c $(MAKE_DIR)/include_$(TAG).mk
$(info ===> COMPILE $@)
$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
$(Q)$(GCC) $(CPPFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d
$(BUILD_DIR)/%.s: %.c
$(info ===> GENERATE ASM $@)
$(CC) -S $(CPPFLAGS) $(CFLAGS) $< -o $@
.PHONY: clean distclean tags info asm
clean:
$(info ===> CLEAN)
@rm -rf $(BUILD_DIR)
@rm -f tags
distclean: clean
$(info ===> DIST CLEAN)
@rm -f $(TARGET)
info:
$(info $(CFLAGS))
$(Q)$(CC) $(VERSION)
asm: $(BUILD_DIR) $(ASM)
tags:
$(info ===> GENERATE TAGS)
$(Q)ctags -R
$(BUILD_DIR):
@mkdir $(BUILD_DIR)
-include $(OBJ:.o=.d)

View File

@ -0,0 +1,48 @@
# C source skeleton
## Build
1. Configure the toolchain and additional options in `config.mk`:
```
# Supported: GCC, CLANG, ICC
TAG ?= GCC
ENABLE_OPENMP ?= false
OPTIONS += -DARRAY_ALIGNMENT=64
#OPTIONS += -DVERBOSE_AFFINITY
#OPTIONS += -DVERBOSE_DATASIZE
#OPTIONS += -DVERBOSE_TIMER
```
The verbosity options enable detailed output about affinity settings, allocation sizes and timer resolution.
2. Build with:
```
make
```
You can build multiple toolchains in the same directory, but notice that the Makefile is only acting on the one currently set.
Intermediate build results are located in the `<TOOLCHAIN>` directory.
To output the executed commands use:
```
make Q=
```
3. Clean up with:
```
make clean
```
to clean intermediate build results.
```
make distclean
```
to clean intermediate build results and binary.
4. (Optional) Generate assembler:
```
make asm
```
The assembler files will also be located in the `<TOOLCHAIN>` directory.

View File

@ -0,0 +1,15 @@
set term png size 1024,768 enhanced font ,12
set datafile separator whitespace
set grid
set hidden3d
set xrange [0:40]
set yrange [0:40]
set zrange [-2:2]
input(n) = sprintf("p-%d.dat", n)
output(n) = sprintf("%03d.png", n)
do for [i=1:50] {
set output output(i)
splot input(i) matrix using 1:2:3 with lines
}

View File

@ -0,0 +1,9 @@
# Supported: GCC, CLANG, ICC
TAG ?= GCC
#Feature options
OPTIONS += -DARRAY_ALIGNMENT=64
#OPTIONS += -DVERBOSE_AFFINITY
#OPTIONS += -DVERBOSE_DATASIZE
#OPTIONS += -DVERBOSE_TIMER
ENABLE_OPENMP += true

View File

@ -0,0 +1,18 @@
CC = clang
GCC = cc
LINKER = $(CC)
ifeq ($(ENABLE_OPENMP),true)
OPENMP = -fopenmp
#OPENMP = -Xpreprocessor -fopenmp #required on Macos with homebrew libomp
LIBS = # -lomp
endif
VERSION = --version
CFLAGS = -Ofast -std=c99 $(OPENMP)
#CFLAGS = -Ofast -fnt-store=aggressive -std=c99 $(OPENMP) #AMD CLANG
LFLAGS = $(OPENMP) -lm
DEFINES = -D_GNU_SOURCE
DEFINES += -DANIMATE
# DEFINES += -DDEBUG
INCLUDES =

View File

@ -0,0 +1,14 @@
CC = gcc
GCC = gcc
LINKER = $(CC)
ifeq ($(ENABLE_OPENMP),true)
OPENMP = -fopenmp
endif
VERSION = --version
CFLAGS = -Ofast -ffreestanding -std=c99 $(OPENMP)
LFLAGS = $(OPENMP)
DEFINES = -D_GNU_SOURCE
INCLUDES =
LIBS = -lm

View File

@ -0,0 +1,14 @@
CC = icc
GCC = gcc
LINKER = $(CC)
ifeq ($(ENABLE_OPENMP),true)
OPENMP = -qopenmp
endif
VERSION = --version
CFLAGS = -O3 -xHost -qopt-zmm-usage=high -std=c99 $(OPENMP)
LFLAGS = $(OPENMP)
DEFINES = -D_GNU_SOURCE
INCLUDES =
LIBS =

1918
PoissonSolver/2D-omp/p-0.dat Normal file

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,22 @@
# Problem specific Data:
# ---------------------
name poisson
# Geometry Data:
# -------------
xlength 1.0 # domain size in x-direction
ylength 1.0 # domain size in y-direction
imax 6000 # number of interior cells in x-direction
jmax 6000 # number of interior cells in y-direction
# Pressure Iteration Data:
# -----------------------
itermax 100000 # maximal number of pressure iteration in one time step
eps 0.000001 # stopping tolerance for pressure iteration
rho 0.99999 # relaxation parameter for SOR iteration
omg 1.2 # relaxation parameter for SOR iteration
#===============================================================================

View File

@ -0,0 +1,37 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
void* allocate (int alignment, size_t bytesize)
{
int errorCode;
void* ptr;
errorCode = posix_memalign(&ptr, alignment, bytesize);
if (errorCode) {
if (errorCode == EINVAL) {
fprintf(stderr,
"Error: Alignment parameter is not a power of two\n");
exit(EXIT_FAILURE);
}
if (errorCode == ENOMEM) {
fprintf(stderr,
"Error: Insufficient memory to fulfill the request\n");
exit(EXIT_FAILURE);
}
}
if (ptr == NULL) {
fprintf(stderr, "Error: posix_memalign failed!\n");
exit(EXIT_FAILURE);
}
return ptr;
}

View File

@ -0,0 +1,11 @@
/* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file. */
#ifndef __ALLOCATE_H_
#define __ALLOCATE_H_
#include <stdlib.h>
extern void* allocate(int alignment, size_t bytesize);
#endif

View File

@ -0,0 +1,53 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* =======================================================================================
*/
#ifndef LIKWID_MARKERS_H
#define LIKWID_MARKERS_H
#ifdef LIKWID_PERFMON
#include <likwid.h>
#define LIKWID_MARKER_INIT likwid_markerInit()
#define LIKWID_MARKER_THREADINIT likwid_markerThreadInit()
#define LIKWID_MARKER_SWITCH likwid_markerNextGroup()
#define LIKWID_MARKER_REGISTER(regionTag) likwid_markerRegisterRegion(regionTag)
#define LIKWID_MARKER_START(regionTag) likwid_markerStartRegion(regionTag)
#define LIKWID_MARKER_STOP(regionTag) likwid_markerStopRegion(regionTag)
#define LIKWID_MARKER_CLOSE likwid_markerClose()
#define LIKWID_MARKER_RESET(regionTag) likwid_markerResetRegion(regionTag)
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count) likwid_markerGetRegion(regionTag, nevents, events, time, count)
#else /* LIKWID_PERFMON */
#define LIKWID_MARKER_INIT
#define LIKWID_MARKER_THREADINIT
#define LIKWID_MARKER_SWITCH
#define LIKWID_MARKER_REGISTER(regionTag)
#define LIKWID_MARKER_START(regionTag)
#define LIKWID_MARKER_STOP(regionTag)
#define LIKWID_MARKER_CLOSE
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
#define LIKWID_MARKER_RESET(regionTag)
#endif /* LIKWID_PERFMON */
#endif /*LIKWID_MARKERS_H*/

View File

@ -0,0 +1,76 @@
/* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.ke
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file. */
#include <stdio.h>
#include <stdlib.h>
#include "likwid-marker.h"
#include "parameter.h"
#include "solver.h"
#include "timing.h"
#include "omp.h"
#define LIKWID_PROFILE(tag, call) \
startTime = getTimeStamp(); \
LIKWID_MARKER_START(#tag); \
call(&solver); \
LIKWID_MARKER_STOP(#tag); \
endTime = getTimeStamp();
enum VARIANT { SOR = 1, RB, RBA };
int main(int argc, char** argv)
{
int volatile dummy = 0;
int variant = RB;
double startTime, endTime;
Parameter params;
Solver solver;
initParameter(&params);
LIKWID_MARKER_INIT;
#pragma omp parallel
{
if(dummy==1 || omp_get_thread_num()==0)
printf("OMP_THREADS_DETECTED: %d\n",omp_get_num_threads());
}
if (argc < 2) {
printf("Usage: %s <configFile>\n", argv[0]);
exit(EXIT_SUCCESS);
}
readParameter(&params, argv[1]);
// printParameter(&params);
if (argc == 3) {
variant = atoi(argv[2]);
}
if (argc == 4) {
sscanf("%lf", argv[3], &params.omg);
}
initSolver(&solver, &params, 2);
writeResult(&solver, "p-0.dat");
switch (variant) {
case SOR:
printf("Plain SOR\n");
fflush(stdout);
LIKWID_PROFILE("SOR", solve);
break;
case RB:
printf("Red-black SOR\n");
fflush(stdout);
LIKWID_PROFILE("RB", solveRB);
break;
case RBA:
printf("Red-black SOR with acceleration\n");
fflush(stdout);
LIKWID_PROFILE("RBA", solveRBA);
break;
}
printf(" %.2fs\n", endTime - startTime);
writeResult(&solver, "p-final.dat");
LIKWID_MARKER_CLOSE;
return EXIT_SUCCESS;
}

View File

@ -0,0 +1,79 @@
/* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file. */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
//---
#include "parameter.h"
#include "util.h"
#define MAXLINE 4096
void initParameter(Parameter* param)
{
param->xlength = 1.0;
param->ylength = 1.0;
param->imax = 100;
param->jmax = 100;
param->itermax = 1000;
param->eps = 0.0001;
param->omg = 1.8;
param->rho = 0.99;
}
void readParameter(Parameter* param, const char* filename)
{
FILE* fp = fopen(filename, "r");
char line[MAXLINE];
int i;
if (!fp) {
fprintf(stderr, "Could not open parameter file: %s\n", filename);
exit(EXIT_FAILURE);
}
while (!feof(fp)) {
line[0] = '\0';
fgets(line, MAXLINE, fp);
for (i = 0; line[i] != '\0' && line[i] != '#'; i++)
;
line[i] = '\0';
char* tok = strtok(line, " ");
char* val = strtok(NULL, " ");
#define PARSE_PARAM(p, f) \
if (strncmp(tok, #p, sizeof(#p) / sizeof(#p[0]) - 1) == 0) { \
param->p = f(val); \
}
#define PARSE_STRING(p) PARSE_PARAM(p, strdup)
#define PARSE_INT(p) PARSE_PARAM(p, atoi)
#define PARSE_REAL(p) PARSE_PARAM(p, atof)
if (tok != NULL && val != NULL) {
PARSE_REAL(xlength);
PARSE_REAL(ylength);
PARSE_INT(imax);
PARSE_INT(jmax);
PARSE_INT(itermax);
PARSE_REAL(eps);
PARSE_REAL(omg);
PARSE_REAL(rho);
}
}
fclose(fp);
}
void printParameter(Parameter* param)
{
printf("Parameters:\n");
printf("Geometry data:\n");
printf("\tDomain box size (x, y): %e, %e\n", param->xlength, param->ylength);
printf("\tCells (x, y): %d, %d\n", param->imax, param->jmax);
printf("Iterative solver parameters:\n");
printf("\tMax iterations: %d\n", param->itermax);
printf("\tepsilon (stopping tolerance) : %e\n", param->eps);
printf("\tomega (SOR relaxation): %e\n", param->omg);
}

View File

@ -0,0 +1,18 @@
/* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file. */
#ifndef __PARAMETER_H_
#define __PARAMETER_H_
typedef struct {
double xlength, ylength;
int imax, jmax;
int itermax;
double eps, omg, rho, gamma;
} Parameter;
void initParameter(Parameter*);
void readParameter(Parameter*, const char*);
void printParameter(Parameter*);
#endif

View File

@ -0,0 +1,276 @@
/* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file. */
#include "math.h"
#include "stdio.h"
#include "stdlib.h"
#include "allocate.h"
#include "parameter.h"
#include "solver.h"
#define PI 3.14159265358979323846
#define P(i, j) p[(j) * (imax + 2) + (i)]
#define RHS(i, j) rhs[(j) * (imax + 2) + (i)]
void initSolver(Solver* solver, Parameter* params, int problem)
{
solver->imax = params->imax;
solver->jmax = params->jmax;
solver->dx = params->xlength / params->imax;
solver->dy = params->ylength / params->jmax;
solver->eps = params->eps;
solver->omega = params->omg;
solver->rho = params->rho;
solver->itermax = params->itermax;
int imax = solver->imax;
int jmax = solver->jmax;
size_t bytesize = (imax + 2) * (jmax + 2) * sizeof(double);
solver->p = allocate(64, bytesize);
solver->rhs = allocate(64, bytesize);
double dx = solver->dx;
double dy = solver->dy;
double* p = solver->p;
double* rhs = solver->rhs;
#pragma omp parallel for collapse(2)
for (int j = 0; j < jmax + 2; j++) {
for (int i = 0; i < imax + 2; i++) {
P(i, j) = sin(2.0 * PI * i * dx * 2.0) + sin(2.0 * PI * j * dy * 2.0);
}
}
if (problem == 2) {
#pragma omp parallel for collapse(2)
for (int j = 0; j < jmax + 2; j++) {
for (int i = 0; i < imax + 2; i++) {
RHS(i, j) = sin(2.0 * PI * i * dx);
}
}
} else {
#pragma omp parallel for collapse(2)
for (int j = 0; j < jmax + 2; j++) {
for (int i = 0; i < imax + 2; i++) {
RHS(i, j) = 0.0;
}
}
}
}
void solve(Solver* solver)
{
int imax = solver->imax;
int jmax = solver->jmax;
double eps = solver->eps;
int itermax = solver->itermax;
double dx2 = solver->dx * solver->dx;
double dy2 = solver->dy * solver->dy;
double idx2 = 1.0 / dx2;
double idy2 = 1.0 / dy2;
double factor = solver->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
double* p = solver->p;
double* rhs = solver->rhs;
double epssq = eps * eps;
int it = 0;
double res = 1.0;
char filename[20];
while ((res >= epssq) && (it < itermax)) {
res = 0.0;
for (int j = 1; j < jmax + 1; j++) {
for (int i = 1; i < imax + 1; i++) {
double r = RHS(i, j) -
((P(i - 1, j) - 2.0 * P(i, j) + P(i + 1, j)) * idx2 +
(P(i, j - 1) - 2.0 * P(i, j) + P(i, j + 1)) * idy2);
P(i, j) -= (factor * r);
res += (r * r);
}
}
for (int i = 1; i < imax + 1; i++) {
P(i, 0) = P(i, 1);
P(i, jmax + 1) = P(i, jmax);
}
for (int j = 1; j < jmax + 1; j++) {
P(0, j) = P(1, j);
P(imax + 1, j) = P(imax, j);
}
res = res / (double)(imax * jmax);
#ifdef DEBUG
printf("%d Residuum: %e\n", it, res);
#endif
#ifdef ANIMATE
sprintf(filename, "p-%d.dat", it);
writeResult(solver, filename);
#endif
it++;
}
printf("%d, %f\n", it, solver->omega);
}
void solveRB(Solver* solver)
{
int imax = solver->imax;
int jmax = solver->jmax;
double eps = solver->eps;
int itermax = solver->itermax;
double dx2 = solver->dx * solver->dx;
double dy2 = solver->dy * solver->dy;
double idx2 = 1.0 / dx2;
double idy2 = 1.0 / dy2;
double factor = solver->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
double* p = solver->p;
double* rhs = solver->rhs;
double epssq = eps * eps;
int it = 0;
double res = 1.0;
int pass, jsw, isw;
while ((res >= epssq) && (it < itermax)) {
res = 0.0;
jsw = 1;
for (pass = 0; pass < 2; pass++) {
isw = jsw;
#pragma omp parallel for firstprivate(isw)
for (int j = 1; j < jmax + 1; j++) {
for (int i = isw; i < imax + 1; i += 2) {
double r = RHS(i, j) -
((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
(P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
P(i, j) -= (factor * r);
res += (r * r);
}
isw = 3 - isw;
}
jsw = 3 - jsw;
}
#pragma omp parallel for
for (int i = 1; i < imax + 1; i++) {
P(i, 0) = P(i, 1);
P(i, jmax + 1) = P(i, jmax);
}
#pragma omp parallel for
for (int j = 1; j < jmax + 1; j++) {
P(0, j) = P(1, j);
P(imax + 1, j) = P(imax, j);
}
res = res / (double)(imax * jmax);
#ifdef DEBUG
printf("%d Residuum: %e\n", it, res);
#endif
// #ifdef ANIMATE
// sprintf(filename, "p-%d.dat", it);
// writeResult(solver, filename);
// #endif
// it++;
}
printf("Solver took %d iterations to reach %f\n", it, sqrt(res));
printf("%d, %f\n", it, solver->omega);
}
void solveRBA(Solver* solver)
{
int imax = solver->imax;
int jmax = solver->jmax;
double eps = solver->eps;
int itermax = solver->itermax;
double dx2 = solver->dx * solver->dx;
double dy2 = solver->dy * solver->dy;
double idx2 = 1.0 / dx2;
double idy2 = 1.0 / dy2;
double factor = 0.5 * (dx2 * dy2) / (dx2 + dy2);
double rho = solver->rho;
double* p = solver->p;
double* rhs = solver->rhs;
double epssq = eps * eps;
int it = 0;
double res = 1.0;
int pass, jsw, isw;
double omega = 1.0;
while ((res >= epssq) && (it < itermax)) {
res = 0.0;
jsw = 1;
for (pass = 0; pass < 2; pass++) {
isw = jsw;
for (int j = 1; j < jmax + 1; j++) {
for (int i = isw; i < imax + 1; i += 2) {
double r = RHS(i, j) -
((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
(P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
P(i, j) -= (omega * factor * r);
res += (r * r);
}
isw = 3 - isw;
}
jsw = 3 - jsw;
omega = (it == 0 && pass == 0 ? 1.0 / (1.0 - 0.5 * rho * rho)
: 1.0 / (1.0 - 0.25 * rho * rho * omega));
}
for (int i = 1; i < imax + 1; i++) {
P(i, 0) = P(i, 1);
P(i, jmax + 1) = P(i, jmax);
}
for (int j = 1; j < jmax + 1; j++) {
P(0, j) = P(1, j);
P(imax + 1, j) = P(imax, j);
}
res = res / (double)(imax * jmax);
#ifdef DEBUG
printf("%d Residuum: %e Omega: %e\n", it, res, omega);
#endif
#ifdef ANIMATE
sprintf(filename, "p-%d.dat", it);
writeResult(solver, filename);
#endif
it++;
}
// printf("Final omega: %f\n", omega);
// printf("Solver took %d iterations to reach %f\n", it, sqrt(res));
printf("%d, %f\n", it, omega);
}
void writeResult(Solver* solver, char* filename)
{
int imax = solver->imax;
int jmax = solver->jmax;
double* p = solver->p;
FILE* fp;
fp = fopen(filename, "w");
if (fp == NULL) {
printf("Error!\n");
exit(EXIT_FAILURE);
}
for (int j = 0; j < jmax + 2; j++) {
for (int i = 0; i < imax + 2; i++) {
fprintf(fp, "%f ", P(i, j));
}
fprintf(fp, "\n");
}
fclose(fp);
}

View File

@ -0,0 +1,22 @@
/* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file. */
#ifndef __SOLVER_H_
#define __SOLVER_H_
#include "parameter.h"
typedef struct {
double dx, dy;
int imax, jmax;
double *p, *rhs;
double eps, omega, rho;
int itermax;
} Solver;
extern void initSolver(Solver*, Parameter*, int problem);
extern void writeResult(Solver*, char*);
extern void solve(Solver*);
extern void solveRB(Solver*);
extern void solveRBA(Solver*);
#endif

View File

@ -0,0 +1,22 @@
/* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file. */
#include <stdlib.h>
#include <time.h>
double getTimeStamp()
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
}
double getTimeResolution()
{
struct timespec ts;
clock_getres(CLOCK_MONOTONIC, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
}
double getTimeStamp_() { return getTimeStamp(); }

View File

@ -0,0 +1,11 @@
/* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file. */
#ifndef __TIMING_H_
#define __TIMING_H_
extern double getTimeStamp();
extern double getTimeResolution();
#endif // __TIMING_H_

View File

@ -0,0 +1,20 @@
/* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file. */
#ifndef __UTIL_H_
#define __UTIL_H_
#define HLINE \
"----------------------------------------------------------------------------\n"
#ifndef MIN
#define MIN(x, y) ((x) < (y) ? (x) : (y))
#endif
#ifndef MAX
#define MAX(x, y) ((x) > (y) ? (x) : (y))
#endif
#ifndef ABS
#define ABS(a) ((a) >= 0 ? (a) : -(a))
#endif
#endif // __UTIL_H_

View File

@ -0,0 +1,7 @@
set terminal png size 1024,768 enhanced font ,12
set output 'p.png'
set datafile separator whitespace
set grid
set hidden3d
splot 'p.dat' matrix using 1:2:3 with lines