diff --git a/.clangd b/.clangd index 372227d..f226d9f 100644 --- a/.clangd +++ b/.clangd @@ -1,3 +1,3 @@ CompileFlags: - Add: [-I/usr/local/include, -I/opt/homebrew/include] + Add: [-I/usr/local/include, -I/opt/homebrew/include, -D_MPI] Compiler: clang diff --git a/BasicSolver/2D-mpi-v1/src/main.c b/BasicSolver/2D-mpi-v1/src/main.c index d685367..cfab771 100644 --- a/BasicSolver/2D-mpi-v1/src/main.c +++ b/BasicSolver/2D-mpi-v1/src/main.c @@ -15,12 +15,11 @@ #include "progress.h" #include "solver.h" #include "timing.h" -#include int main(int argc, char** argv) { - int rank; - double S, E; + int rank = 0; + double start, end; Parameter params; Solver solver; @@ -44,7 +43,7 @@ int main(int argc, char** argv) double te = solver.te; double t = 0.0; - S = getTimeStamp(); + start = getTimeStamp(); while (t <= te) { if (tau > 0.0) { computeTimestep(&solver); @@ -56,7 +55,6 @@ int main(int argc, char** argv) computeRHS(&solver); solve(&solver); adaptUV(&solver); - /* exit(EXIT_SUCCESS); */ t += solver.dt; #ifdef VERBOSE @@ -67,10 +65,10 @@ int main(int argc, char** argv) printProgress(t); #endif } - E = getTimeStamp(); + end = getTimeStamp(); stopProgress(); if (rank == 0) { - printf("Solution took %.2fs\n", E - S); + printf("Solution took %.2fs\n", end - start); } collectResult(&solver); diff --git a/BasicSolver/2D-mpi-v1/src/solver.c b/BasicSolver/2D-mpi-v1/src/solver.c index 6bc575d..a621b1c 100644 --- a/BasicSolver/2D-mpi-v1/src/solver.c +++ b/BasicSolver/2D-mpi-v1/src/solver.c @@ -108,64 +108,69 @@ static void shift(Solver* solver) MPI_Waitall(2, requests, MPI_STATUSES_IGNORE); } -void collectResult(Solver* solver) +static void gatherArray( + Solver* solver, int cnt, int* rcvCounts, int* displs, double* src, double* dst) { - double* Pall = NULL; - double* Uall = NULL; - double* Vall = NULL; - int *rcvCounts, *displs; + double* sendbuffer = src + (solver->imax + 2); if (solver->rank == 0) { - Pall = allocate(64, (solver->imax + 2) * (solver->jmax + 2) * sizeof(double)); - Uall = allocate(64, (solver->imax + 2) * (solver->jmax + 2) * sizeof(double)); - Vall = allocate(64, (solver->imax + 2) * (solver->jmax + 2) * sizeof(double)); + sendbuffer = src; + } + + MPI_Gatherv(sendbuffer, + cnt, + MPI_DOUBLE, + dst, + rcvCounts, + displs, + MPI_DOUBLE, + 0, + MPI_COMM_WORLD); +} + +void collectResult(Solver* solver) +{ + double* p = NULL; + double* u = NULL; + double* v = NULL; + int *rcvCounts, *displs; + int cnt = solver->jmaxLocal * (solver->imax + 2); + + if (solver->rank == 0) { + p = allocate(64, (solver->imax + 2) * (solver->jmax + 2) * sizeof(double)); + u = allocate(64, (solver->imax + 2) * (solver->jmax + 2) * sizeof(double)); + v = allocate(64, (solver->imax + 2) * (solver->jmax + 2) * sizeof(double)); rcvCounts = (int*)malloc(solver->size * sizeof(int)); displs = (int*)malloc(solver->size * sizeof(int)); - rcvCounts[0] = solver->jmaxLocal * (solver->imax + 2); + } + + if (solver->rank == 0 && solver->size == 1) { + cnt = (solver->jmaxLocal + 2) * (solver->imax + 2); + } else if (solver->rank == 0 || solver->rank == (solver->size - 1)) { + cnt = (solver->jmaxLocal + 1) * (solver->imax + 2); + } + + MPI_Gather(&cnt, 1, MPI_INTEGER, rcvCounts, 1, MPI_INTEGER, 0, MPI_COMM_WORLD); + + if (solver->rank == 0) { displs[0] = 0; int cursor = rcvCounts[0]; for (int i = 1; i < solver->size; i++) { - rcvCounts[i] = sizeOfRank(i, solver->size, solver->jmax) * (solver->imax + 2); displs[i] = cursor; cursor += rcvCounts[i]; } } - int cnt = solver->jmaxLocal * (solver->imax + 2); - double* sendbuffer = solver->p + (solver->imax + 2); - MPI_Gatherv(sendbuffer, - cnt, - MPI_DOUBLE, - Pall, - rcvCounts, - displs, - MPI_DOUBLE, - 0, - MPI_COMM_WORLD); - sendbuffer = solver->u + (solver->imax + 2); - MPI_Gatherv(sendbuffer, - cnt, - MPI_DOUBLE, - Uall, - rcvCounts, - displs, - MPI_DOUBLE, - 0, - MPI_COMM_WORLD); - sendbuffer = solver->v + (solver->imax + 2); - MPI_Gatherv(sendbuffer, - cnt, - MPI_DOUBLE, - Vall, - rcvCounts, - displs, - MPI_DOUBLE, - 0, - MPI_COMM_WORLD); + gatherArray(solver, cnt, rcvCounts, displs, solver->p, p); + gatherArray(solver, cnt, rcvCounts, displs, solver->u, u); + gatherArray(solver, cnt, rcvCounts, displs, solver->v, v); if (solver->rank == 0) { - writeResult(solver, Pall, Uall, Vall); + writeResult(solver, p, u, v); + free(p); + free(u); + free(v); } } @@ -253,8 +258,8 @@ void initSolver(Solver* solver, Parameter* params) double dx = solver->dx; double dy = solver->dy; - double inv_sqr_sum = 1.0 / (dx * dx) + 1.0 / (dy * dy); - solver->dtBound = 0.5 * solver->re * 1.0 / inv_sqr_sum; + double invSquareSum = 1.0 / (dx * dx) + 1.0 / (dy * dy); + solver->dtBound = 0.5 * solver->re * 1.0 / invSquareSum; #ifdef VERBOSE printConfig(solver); #endif @@ -678,10 +683,10 @@ void writeResult(Solver* solver, double* p, double* u, double* v) y = dy * (j - 0.5); for (int i = 1; i < imax + 1; i++) { x = dx * (i - 0.5); - double vel_u = (U(i, j) + U(i - 1, j)) / 2.0; - double vel_v = (V(i, j) + V(i, j - 1)) / 2.0; - double len = sqrt((vel_u * vel_u) + (vel_v * vel_v)); - fprintf(fp, "%.2f %.2f %f %f %f\n", x, y, vel_u, vel_v, len); + double vu = (U(i, j) + U(i - 1, j)) / 2.0; + double vv = (V(i, j) + V(i, j - 1)) / 2.0; + double len = sqrt((vu * vu) + (vv * vv)); + fprintf(fp, "%.2f %.2f %f %f %f\n", x, y, vu, vv, len); } } diff --git a/BasicSolver/3D-mpi-io/config.mk b/BasicSolver/3D-mpi-io/config.mk index 88fec3f..c163666 100644 --- a/BasicSolver/3D-mpi-io/config.mk +++ b/BasicSolver/3D-mpi-io/config.mk @@ -1,5 +1,6 @@ # Supported: GCC, CLANG, ICC -TAG ?= ICC +TAG ?= CLANG +ENABLE_MPI ?= true ENABLE_OPENMP ?= false #Feature options diff --git a/BasicSolver/3D-mpi-io/include_CLANG.mk b/BasicSolver/3D-mpi-io/include_CLANG.mk index 889fa93..a1e37e0 100644 --- a/BasicSolver/3D-mpi-io/include_CLANG.mk +++ b/BasicSolver/3D-mpi-io/include_CLANG.mk @@ -1,4 +1,10 @@ +ifeq ($(ENABLE_MPI),true) CC = mpicc +DEFINES = -D_MPI +else +CC = cc +endif + GCC = cc LINKER = $(CC) @@ -9,9 +15,7 @@ LIBS = # -lomp endif VERSION = --version -# CFLAGS = -O3 -std=c17 $(OPENMP) CFLAGS = -Ofast -std=c17 -#CFLAGS = -Ofast -fnt-store=aggressive -std=c99 $(OPENMP) #AMD CLANG LFLAGS = $(OPENMP) -lm -DEFINES = -D_GNU_SOURCE# -DDEBUG +DEFINES += -D_GNU_SOURCE# -DDEBUG INCLUDES = diff --git a/BasicSolver/3D-mpi-io/src/allocate.c b/BasicSolver/3D-mpi-io/src/allocate.c index 81e1e9d..6af6c7f 100644 --- a/BasicSolver/3D-mpi-io/src/allocate.c +++ b/BasicSolver/3D-mpi-io/src/allocate.c @@ -5,10 +5,13 @@ * license that can be found in the LICENSE file. */ #include +#include #include #include -void* allocate(int alignment, size_t bytesize) +#include "allocate.h" + +void* allocate(size_t alignment, size_t bytesize) { int errorCode; void* ptr; diff --git a/BasicSolver/3D-mpi-io/src/allocate.h b/BasicSolver/3D-mpi-io/src/allocate.h index 54cfe06..1537eb2 100644 --- a/BasicSolver/3D-mpi-io/src/allocate.h +++ b/BasicSolver/3D-mpi-io/src/allocate.h @@ -8,6 +8,6 @@ #define __ALLOCATE_H_ #include -extern void* allocate(int alignment, size_t bytesize); +extern void* allocate(size_t alignment, size_t bytesize); #endif diff --git a/BasicSolver/3D-mpi-io/src/comm.c b/BasicSolver/3D-mpi-io/src/comm.c index add6b20..c03688d 100644 --- a/BasicSolver/3D-mpi-io/src/comm.c +++ b/BasicSolver/3D-mpi-io/src/comm.c @@ -4,7 +4,9 @@ * Use of this source code is governed by a MIT style * license that can be found in the LICENSE file. */ +#if defined(_MPI) #include +#endif #include #include #include @@ -12,6 +14,7 @@ #include "allocate.h" #include "comm.h" +#if defined(_MPI) // subroutines local to this module static int sizeOfRank(int rank, int size, int N) { @@ -123,19 +126,23 @@ static int sum(int* sizes, int position) return sum; } +#endif // exported subroutines void commReduction(double* v, int op) { +#if defined(_MPI) if (op == MAX) { MPI_Allreduce(MPI_IN_PLACE, v, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); } else if (op == SUM) { MPI_Allreduce(MPI_IN_PLACE, v, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); } +#endif } int commIsBoundary(Comm* c, Direction direction) { +#if defined(_MPI) switch (direction) { case LEFT: return c->coords[ICORD] == 0; @@ -159,12 +166,14 @@ int commIsBoundary(Comm* c, Direction direction) printf("ERROR!\n"); break; } +#endif - return 0; + return 1; } void commExchange(Comm* c, double* grid) { +#if defined(_MPI) int counts[6] = { 1, 1, 1, 1, 1, 1 }; MPI_Aint displs[6] = { 0, 0, 0, 0, 0, 0 }; @@ -177,10 +186,12 @@ void commExchange(Comm* c, double* grid) displs, c->rbufferTypes, c->comm); +#endif } void commShift(Comm* c, double* f, double* g, double* h) { +#if defined(_MPI) MPI_Request requests[6] = { MPI_REQUEST_NULL, MPI_REQUEST_NULL, MPI_REQUEST_NULL, @@ -228,10 +239,12 @@ void commShift(Comm* c, double* f, double* g, double* h) MPI_Isend(h, 1, c->sbufferTypes[BACK], c->neighbours[BACK], 2, c->comm, &requests[5]); MPI_Waitall(6, requests, MPI_STATUSES_IGNORE); +#endif } void commGetOffsets(Comm* c, int offsets[], int kmax, int jmax, int imax) { +#if defined(_MPI) int sum = 0; for (int i = 0; i < c->coords[ICORD]; i++) { @@ -250,10 +263,12 @@ void commGetOffsets(Comm* c, int offsets[], int kmax, int jmax, int imax) sum += sizeOfRank(i, c->dims[KCORD], kmax); } offsets[KDIM] = sum; +#endif } void commPrintConfig(Comm* c) { +#if defined(_MPI) fflush(stdout); MPI_Barrier(MPI_COMM_WORLD); if (commIsMaster(c)) { @@ -283,13 +298,21 @@ void commPrintConfig(Comm* c) } } MPI_Barrier(MPI_COMM_WORLD); +#endif } -void commInit(Comm* c, int kmax, int jmax, int imax) +void commInit(Comm* c, int argc, char** argv) { - /* setup communication */ +#if defined(_MPI) + MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &(c->rank)); MPI_Comm_size(MPI_COMM_WORLD, &(c->size)); +#endif +} + +void commPartition(Comm* c, int kmax, int jmax, int imax) +{ +#if defined(_MPI) int dims[NDIMS] = { 0, 0, 0 }; int periods[NDIMS] = { 0, 0, 0 }; MPI_Dims_create(c->size, NDIMS, dims); @@ -316,12 +339,21 @@ void commInit(Comm* c, int kmax, int jmax, int imax) setupCommunication(c, FRONT, HALO); setupCommunication(c, BACK, BULK); setupCommunication(c, BACK, HALO); +#else + c->imaxLocal = imax; + c->jmaxLocal = jmax; + c->kmaxLocal = kmax; +#endif } -void commFree(Comm* c) +void commFinalize(Comm* c) { +#if defined(_MPI) for (int i = 0; i < NDIRS; i++) { MPI_Type_free(&c->sbufferTypes[i]); MPI_Type_free(&c->rbufferTypes[i]); } + + MPI_Finalize(); +#endif } diff --git a/BasicSolver/3D-mpi-io/src/comm.h b/BasicSolver/3D-mpi-io/src/comm.h index fead94f..017555c 100644 --- a/BasicSolver/3D-mpi-io/src/comm.h +++ b/BasicSolver/3D-mpi-io/src/comm.h @@ -1,12 +1,14 @@ /* - * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. + * Copyright (C) 2024 NHR@FAU, University Erlangen-Nuremberg. * All rights reserved. This file is part of nusif-solver. * Use of this source code is governed by a MIT style * license that can be found in the LICENSE file. */ #ifndef __COMM_H_ #define __COMM_H_ +#if defined(_MPI) #include +#endif /* * Spatial directions: * ICORD (0) from 0 (LEFT) to imax (RIGHT) @@ -24,17 +26,20 @@ enum op { MAX = 0, SUM }; typedef struct { int rank; int size; +#if defined(_MPI) MPI_Comm comm; MPI_Datatype sbufferTypes[NDIRS]; MPI_Datatype rbufferTypes[NDIRS]; +#endif int neighbours[NDIRS]; int coords[NDIMS], dims[NDIMS]; int imaxLocal, jmaxLocal, kmaxLocal; MPI_File fh; } Comm; -extern void commInit(Comm* comm, int kmax, int jmax, int imax); -extern void commFree(Comm* comm); +extern void commInit(Comm* c, int argc, char** argv); +extern void commPartition(Comm* c, int kmax, int jmax, int imax); +extern void commFinalize(Comm* comm); extern void commPrintConfig(Comm*); extern void commExchange(Comm*, double*); extern void commShift(Comm* c, double* f, double* g, double* h); diff --git a/BasicSolver/3D-mpi-io/src/main.c b/BasicSolver/3D-mpi-io/src/main.c index 17fc72f..8a1c88a 100644 --- a/BasicSolver/3D-mpi-io/src/main.c +++ b/BasicSolver/3D-mpi-io/src/main.c @@ -1,89 +1,84 @@ /* - * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. + * Copyright (C) 2024 NHR@FAU, University Erlangen-Nuremberg. * All rights reserved. * Use of this source code is governed by a MIT-style * license that can be found in the LICENSE file. */ #include #include -#include #include #include #include #include "allocate.h" +#include "comm.h" #include "parameter.h" #include "progress.h" #include "solver.h" -#include "test.h" #include "timing.h" #include "vtkWriter.h" int main(int argc, char** argv) { - int rank; double timeStart, timeStop; - Parameter params; - Solver solver; + Parameter p; + Solver s; - MPI_Init(&argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - initParameter(¶ms); + commInit(&s.comm, argc, argv); + initParameter(&p); if (argc != 2) { printf("Usage: %s \n", argv[0]); exit(EXIT_SUCCESS); } - readParameter(¶ms, argv[1]); - if (commIsMaster(&solver.comm)) { - printParameter(¶ms); + readParameter(&p, argv[1]); + commPartition(&s.comm, p.kmax, p.jmax, p.imax); + if (commIsMaster(&s.comm)) { + printParameter(&p); } - initSolver(&solver, ¶ms); + initSolver(&s, &p); #ifndef VERBOSE - initProgress(solver.te); + initProgress(s.te); #endif - double tau = solver.tau; - double te = solver.te; + double tau = s.tau; + double te = s.te; double t = 0.0; - int nt = 0; timeStart = getTimeStamp(); while (t <= te) { - if (tau > 0.0) computeTimestep(&solver); - setBoundaryConditions(&solver); - setSpecialBoundaryCondition(&solver); - computeFG(&solver); - computeRHS(&solver); - // if (nt % 100 == 0) normalizePressure(&solver); - solve(&solver); - adaptUV(&solver); - t += solver.dt; - nt++; + if (tau > 0.0) computeTimestep(&s); + setBoundaryConditions(&s); + setSpecialBoundaryCondition(&s); + computeFG(&s); + computeRHS(&s); + solve(&s); + adaptUV(&s); + t += s.dt; #ifdef VERBOSE - if (commIsMaster(&solver.comm)) { - printf("TIME %f , TIMESTEP %f\n", t, solver.dt); + if (commIsMaster(&s.comm)) { + printf("TIME %f , TIMESTEP %f\n", t, s.dt); } #else printProgress(t); #endif } timeStop = getTimeStamp(); +#ifndef VERBOSE stopProgress(); - if (commIsMaster(&solver.comm)) { +#endif + if (commIsMaster(&s.comm)) { printf("Solution took %.2fs\n", timeStop - timeStart); } - // testInit(&solver); - VtkOptions opts = { .grid = solver.grid, .comm = solver.comm }; - vtkOpen(&opts, solver.problem); - vtkScalar(&opts, "pressure", solver.p); - vtkVector(&opts, "velocity", (VtkVector) { solver.u, solver.v, solver.w }); + VtkOptions opts = { .grid = s.grid, .comm = s.comm }; + vtkOpen(&opts, s.problem); + vtkScalar(&opts, "pressure", s.p); + vtkVector(&opts, "velocity", (VtkVector) { s.u, s.v, s.w }); vtkClose(&opts); - commFree(&solver.comm); - MPI_Finalize(); + commFinalize(&s.comm); return EXIT_SUCCESS; } diff --git a/BasicSolver/3D-mpi-io/src/progress.h b/BasicSolver/3D-mpi-io/src/progress.h index 9ef2d96..4d02cdb 100644 --- a/BasicSolver/3D-mpi-io/src/progress.h +++ b/BasicSolver/3D-mpi-io/src/progress.h @@ -9,6 +9,6 @@ extern void initProgress(double); extern void printProgress(double); -extern void stopProgress(); +extern void stopProgress(void); #endif diff --git a/BasicSolver/3D-mpi-io/src/solver.c b/BasicSolver/3D-mpi-io/src/solver.c index 2a49504..1ba1042 100644 --- a/BasicSolver/3D-mpi-io/src/solver.c +++ b/BasicSolver/3D-mpi-io/src/solver.c @@ -104,7 +104,6 @@ void initSolver(Solver* s, Parameter* params) s->tau = params->tau; s->gamma = params->gamma; - commInit(&s->comm, s->grid.kmax, s->grid.jmax, s->grid.imax); /* allocate arrays */ int imaxLocal = s->comm.imaxLocal; int jmaxLocal = s->comm.jmaxLocal; @@ -199,18 +198,23 @@ void solve(Solver* s) double epssq = eps * eps; int it = 0; double res = 1.0; - commExchange(&s->comm, p); + int pass, ksw, jsw, isw; while ((res >= epssq) && (it < itermax)) { - res = 0.0; + ksw = 1; + + for (pass = 0; pass < 2; pass++) { + jsw = ksw; + commExchange(&s->comm, p); for (int k = 1; k < kmaxLocal + 1; k++) { + isw = jsw; for (int j = 1; j < jmaxLocal + 1; j++) { - for (int i = 1; i < imaxLocal + 1; i++) { + for (int i = isw; i < imaxLocal + 1; i += 2) { - double r = RHS(i, j, k) - - ((P(i + 1, j, k) - 2.0 * P(i, j, k) + P(i - 1, j, k)) * - idx2 + + double r = + RHS(i, j, k) - + ((P(i + 1, j, k) - 2.0 * P(i, j, k) + P(i - 1, j, k)) * idx2 + (P(i, j + 1, k) - 2.0 * P(i, j, k) + P(i, j - 1, k)) * idy2 + (P(i, j, k + 1) - 2.0 * P(i, j, k) + P(i, j, k - 1)) * @@ -219,7 +223,11 @@ void solve(Solver* s) P(i, j, k) -= (factor * r); res += (r * r); } + isw = 3 - isw; } + jsw = 3 - jsw; + } + ksw = 3 - ksw; } if (commIsBoundary(&s->comm, FRONT)) { diff --git a/BasicSolver/3D-mpi-io/src/solver.h b/BasicSolver/3D-mpi-io/src/solver.h index ac9f450..f951ec8 100644 --- a/BasicSolver/3D-mpi-io/src/solver.h +++ b/BasicSolver/3D-mpi-io/src/solver.h @@ -33,13 +33,13 @@ typedef struct { Comm comm; } Solver; -void initSolver(Solver*, Parameter*); -void computeRHS(Solver*); -void solve(Solver*); -void normalizePressure(Solver*); -void computeTimestep(Solver*); -void setBoundaryConditions(Solver*); -void setSpecialBoundaryCondition(Solver*); -void computeFG(Solver*); -void adaptUV(Solver*); +extern void initSolver(Solver*, Parameter*); +extern void computeRHS(Solver*); +extern void solve(Solver*); +extern void normalizePressure(Solver*); +extern void computeTimestep(Solver*); +extern void setBoundaryConditions(Solver*); +extern void setSpecialBoundaryCondition(Solver*); +extern void computeFG(Solver*); +extern void adaptUV(Solver*); #endif diff --git a/BasicSolver/3D-mpi-io/src/timing.c b/BasicSolver/3D-mpi-io/src/timing.c index c4025a4..e578f18 100644 --- a/BasicSolver/3D-mpi-io/src/timing.c +++ b/BasicSolver/3D-mpi-io/src/timing.c @@ -7,18 +7,16 @@ #include #include -double getTimeStamp() +double getTimeStamp(void) { struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9; } -double getTimeResolution() +double getTimeResolution(void) { struct timespec ts; clock_getres(CLOCK_MONOTONIC, &ts); return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9; } - -double getTimeStamp_() { return getTimeStamp(); } diff --git a/BasicSolver/3D-mpi-io/src/timing.h b/BasicSolver/3D-mpi-io/src/timing.h index db95329..58fb5ac 100644 --- a/BasicSolver/3D-mpi-io/src/timing.h +++ b/BasicSolver/3D-mpi-io/src/timing.h @@ -7,8 +7,7 @@ #ifndef __TIMING_H_ #define __TIMING_H_ -extern double getTimeStamp(); -extern double getTimeResolution(); -extern double getTimeStamp_(); +extern double getTimeStamp(void); +extern double getTimeResolution(void); #endif // __TIMING_H_ diff --git a/BasicSolver/3D-mpi/config.mk b/BasicSolver/3D-mpi/config.mk index af5f1a0..c163666 100644 --- a/BasicSolver/3D-mpi/config.mk +++ b/BasicSolver/3D-mpi/config.mk @@ -1,5 +1,6 @@ # Supported: GCC, CLANG, ICC TAG ?= CLANG +ENABLE_MPI ?= true ENABLE_OPENMP ?= false #Feature options diff --git a/BasicSolver/3D-mpi/include_CLANG.mk b/BasicSolver/3D-mpi/include_CLANG.mk index 889fa93..5b036ab 100644 --- a/BasicSolver/3D-mpi/include_CLANG.mk +++ b/BasicSolver/3D-mpi/include_CLANG.mk @@ -1,4 +1,10 @@ +ifeq ($(ENABLE_MPI),true) CC = mpicc +DEFINES = -D_MPI +else +CC = cc +endif + GCC = cc LINKER = $(CC) @@ -9,9 +15,7 @@ LIBS = # -lomp endif VERSION = --version -# CFLAGS = -O3 -std=c17 $(OPENMP) CFLAGS = -Ofast -std=c17 -#CFLAGS = -Ofast -fnt-store=aggressive -std=c99 $(OPENMP) #AMD CLANG LFLAGS = $(OPENMP) -lm -DEFINES = -D_GNU_SOURCE# -DDEBUG +DEFINES += -D_GNU_SOURCE# -DDEBUG INCLUDES = diff --git a/BasicSolver/3D-mpi/src/allocate.c b/BasicSolver/3D-mpi/src/allocate.c index 81e1e9d..6af6c7f 100644 --- a/BasicSolver/3D-mpi/src/allocate.c +++ b/BasicSolver/3D-mpi/src/allocate.c @@ -5,10 +5,13 @@ * license that can be found in the LICENSE file. */ #include +#include #include #include -void* allocate(int alignment, size_t bytesize) +#include "allocate.h" + +void* allocate(size_t alignment, size_t bytesize) { int errorCode; void* ptr; diff --git a/BasicSolver/3D-mpi/src/allocate.h b/BasicSolver/3D-mpi/src/allocate.h index 54cfe06..1537eb2 100644 --- a/BasicSolver/3D-mpi/src/allocate.h +++ b/BasicSolver/3D-mpi/src/allocate.h @@ -8,6 +8,6 @@ #define __ALLOCATE_H_ #include -extern void* allocate(int alignment, size_t bytesize); +extern void* allocate(size_t alignment, size_t bytesize); #endif diff --git a/BasicSolver/3D-mpi/src/comm.c b/BasicSolver/3D-mpi/src/comm.c index 568b550..b9ad171 100644 --- a/BasicSolver/3D-mpi/src/comm.c +++ b/BasicSolver/3D-mpi/src/comm.c @@ -4,7 +4,9 @@ * Use of this source code is governed by a MIT style * license that can be found in the LICENSE file. */ +#if defined(_MPI) #include +#endif #include #include #include @@ -12,6 +14,7 @@ #include "allocate.h" #include "comm.h" +#if defined(_MPI) // subroutines local to this module static int sizeOfRank(int rank, int size, int N) { @@ -177,19 +180,23 @@ static int sum(int* sizes, int position) return sum; } +#endif // exported subroutines void commReduction(double* v, int op) { +#if defined(_MPI) if (op == MAX) { MPI_Allreduce(MPI_IN_PLACE, v, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); } else if (op == SUM) { MPI_Allreduce(MPI_IN_PLACE, v, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); } +#endif } int commIsBoundary(Comm* c, Direction direction) { +#if defined(_MPI) switch (direction) { case LEFT: return c->coords[ICORD] == 0; @@ -213,12 +220,14 @@ int commIsBoundary(Comm* c, Direction direction) printf("ERROR!\n"); break; } +#endif - return 0; + return 1; } void commExchange(Comm* c, double* grid) { +#if defined(_MPI) int counts[6] = { 1, 1, 1, 1, 1, 1 }; MPI_Aint displs[6] = { 0, 0, 0, 0, 0, 0 }; @@ -231,10 +240,12 @@ void commExchange(Comm* c, double* grid) displs, c->rbufferTypes, c->comm); +#endif } void commShift(Comm* c, double* f, double* g, double* h) { +#if defined(_MPI) MPI_Request requests[6] = { MPI_REQUEST_NULL, MPI_REQUEST_NULL, MPI_REQUEST_NULL, @@ -282,6 +293,7 @@ void commShift(Comm* c, double* f, double* g, double* h) MPI_Isend(h, 1, c->sbufferTypes[BACK], c->neighbours[BACK], 2, c->comm, &requests[5]); MPI_Waitall(6, requests, MPI_STATUSES_IGNORE); +#endif } #define G(v, i, j, k) \ @@ -300,6 +312,7 @@ void commCollectResult(Comm* c, int jmax, int imax) { +#if defined(_MPI) int imaxLocal = c->imaxLocal; int jmaxLocal = c->jmaxLocal; int kmaxLocal = c->kmaxLocal; @@ -426,10 +439,12 @@ void commCollectResult(Comm* c, imax); free(tmp); +#endif } void commPrintConfig(Comm* c) { +#if defined(_MPI) fflush(stdout); MPI_Barrier(MPI_COMM_WORLD); if (commIsMaster(c)) { @@ -459,13 +474,24 @@ void commPrintConfig(Comm* c) } } MPI_Barrier(MPI_COMM_WORLD); +#endif } -void commInit(Comm* c, int kmax, int jmax, int imax) +void commInit(Comm* c, int argc, char** argv) { - /* setup communication */ +#if defined(_MPI) + MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &(c->rank)); MPI_Comm_size(MPI_COMM_WORLD, &(c->size)); +#else + c->rank = 0; + c->size = 1; +#endif +} + +void commPartition(Comm* c, int kmax, int jmax, int imax) +{ +#if defined(_MPI) int dims[NDIMS] = { 0, 0, 0 }; int periods[NDIMS] = { 0, 0, 0 }; MPI_Dims_create(c->size, NDIMS, dims); @@ -492,12 +518,21 @@ void commInit(Comm* c, int kmax, int jmax, int imax) setupCommunication(c, FRONT, HALO); setupCommunication(c, BACK, BULK); setupCommunication(c, BACK, HALO); +#else + c->imaxLocal = imax; + c->jmaxLocal = jmax; + c->kmaxLocal = kmax; +#endif } -void commFree(Comm* c) +void commFinalize(Comm* c) { +#if defined(_MPI) for (int i = 0; i < NDIRS; i++) { MPI_Type_free(&c->sbufferTypes[i]); MPI_Type_free(&c->rbufferTypes[i]); } + + MPI_Finalize(); +#endif } diff --git a/BasicSolver/3D-mpi/src/comm.h b/BasicSolver/3D-mpi/src/comm.h index 243db14..64f7082 100644 --- a/BasicSolver/3D-mpi/src/comm.h +++ b/BasicSolver/3D-mpi/src/comm.h @@ -1,12 +1,14 @@ /* - * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. + * Copyright (C) 2024 NHR@FAU, University Erlangen-Nuremberg. * All rights reserved. This file is part of nusif-solver. * Use of this source code is governed by a MIT style * license that can be found in the LICENSE file. */ #ifndef __COMM_H_ #define __COMM_H_ +#if defined(_MPI) #include +#endif /* * Spatial directions: * ICORD (0) from 0 (LEFT) to imax (RIGHT) @@ -24,17 +26,19 @@ enum op { MAX = 0, SUM }; typedef struct { int rank; int size; +#if defined(_MPI) MPI_Comm comm; MPI_Datatype sbufferTypes[NDIRS]; MPI_Datatype rbufferTypes[NDIRS]; +#endif int neighbours[NDIRS]; int coords[NDIMS], dims[NDIMS]; int imaxLocal, jmaxLocal, kmaxLocal; - MPI_File fh; } Comm; -extern void commInit(Comm* comm, int kmax, int jmax, int imax); -extern void commFree(Comm* comm); +extern void commInit(Comm* c, int argc, char** argv); +extern void commPartition(Comm* c, int kmax, int jmax, int imax); +extern void commFinalize(Comm* comm); extern void commPrintConfig(Comm*); extern void commExchange(Comm*, double*); extern void commShift(Comm* c, double* f, double* g, double* h); diff --git a/BasicSolver/3D-mpi/src/main.c b/BasicSolver/3D-mpi/src/main.c index f53e2a1..ae7b165 100644 --- a/BasicSolver/3D-mpi/src/main.c +++ b/BasicSolver/3D-mpi/src/main.c @@ -1,90 +1,82 @@ /* - * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. + * Copyright (C) 2024 NHR@FAU, University Erlangen-Nuremberg. * All rights reserved. * Use of this source code is governed by a MIT-style * license that can be found in the LICENSE file. */ #include #include -#include #include #include #include #include "allocate.h" +#include "comm.h" #include "parameter.h" #include "progress.h" #include "solver.h" -#include "test.h" #include "timing.h" #include "vtkWriter.h" int main(int argc, char** argv) { - int rank; double timeStart, timeStop; - Parameter params; - Solver solver; + Parameter p; + Solver s; - MPI_Init(&argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - initParameter(¶ms); + commInit(&s.comm, argc, argv); + initParameter(&p); if (argc != 2) { printf("Usage: %s \n", argv[0]); exit(EXIT_SUCCESS); } - readParameter(¶ms, argv[1]); - if (commIsMaster(&solver.comm)) { - printParameter(¶ms); + readParameter(&p, argv[1]); + commPartition(&s.comm, p.kmax, p.jmax, p.imax); + if (commIsMaster(&s.comm)) { + printParameter(&p); } - initSolver(&solver, ¶ms); + initSolver(&s, &p); #ifndef VERBOSE - initProgress(solver.te); + initProgress(s.te); #endif - double tau = solver.tau; - double te = solver.te; + double tau = s.tau; + double te = s.te; double t = 0.0; - int nt = 0; timeStart = getTimeStamp(); while (t <= te) { - if (tau > 0.0) computeTimestep(&solver); - setBoundaryConditions(&solver); - setSpecialBoundaryCondition(&solver); - computeFG(&solver); - computeRHS(&solver); - // if (nt % 100 == 0) normalizePressure(&solver); - solve(&solver); - adaptUV(&solver); - t += solver.dt; - nt++; + if (tau > 0.0) computeTimestep(&s); + setBoundaryConditions(&s); + setSpecialBoundaryCondition(&s); + computeFG(&s); + computeRHS(&s); + solve(&s); + adaptUV(&s); + t += s.dt; #ifdef VERBOSE - if (commIsMaster(&solver.comm)) { - printf("TIME %f , TIMESTEP %f\n", t, solver.dt); + if (commIsMaster(&s.comm)) { + printf("TIME %f , TIMESTEP %f\n", t, s.dt); } #else printProgress(t); #endif } timeStop = getTimeStamp(); +#ifndef VERBOSE stopProgress(); - if (commIsMaster(&solver.comm)) { +#endif + if (commIsMaster(&s.comm)) { printf("Solution took %.2fs\n", timeStop - timeStart); } - // testInit(&solver); - // commExchange(&solver.comm, solver.p); - // testPrintHalo(&solver, solver.p); - double *pg, *ug, *vg, *wg; - if (commIsMaster(&solver.comm)) { - size_t bytesize = solver.grid.imax * solver.grid.jmax * solver.grid.kmax * - sizeof(double); + if (commIsMaster(&s.comm)) { + size_t bytesize = s.grid.imax * s.grid.jmax * s.grid.kmax * sizeof(double); pg = allocate(64, bytesize); ug = allocate(64, bytesize); @@ -92,26 +84,25 @@ int main(int argc, char** argv) wg = allocate(64, bytesize); } - commCollectResult(&solver.comm, + commCollectResult(&s.comm, ug, vg, wg, pg, - solver.u, - solver.v, - solver.w, - solver.p, - solver.grid.kmax, - solver.grid.jmax, - solver.grid.imax); + s.u, + s.v, + s.w, + s.p, + s.grid.kmax, + s.grid.jmax, + s.grid.imax); - VtkOptions opts = { .grid = solver.grid, .comm = solver.comm }; - vtkOpen(&opts, solver.problem); + VtkOptions opts = { .grid = s.grid, .comm = s.comm }; + vtkOpen(&opts, s.problem); vtkScalar(&opts, "pressure", pg); vtkVector(&opts, "velocity", (VtkVector) { ug, vg, wg }); vtkClose(&opts); - commFree(&solver.comm); - MPI_Finalize(); + commFinalize(&s.comm); return EXIT_SUCCESS; } diff --git a/BasicSolver/3D-mpi/src/progress.h b/BasicSolver/3D-mpi/src/progress.h index 9ef2d96..4d02cdb 100644 --- a/BasicSolver/3D-mpi/src/progress.h +++ b/BasicSolver/3D-mpi/src/progress.h @@ -9,6 +9,6 @@ extern void initProgress(double); extern void printProgress(double); -extern void stopProgress(); +extern void stopProgress(void); #endif diff --git a/BasicSolver/3D-mpi/src/solver.c b/BasicSolver/3D-mpi/src/solver.c index 2a49504..1a6282f 100644 --- a/BasicSolver/3D-mpi/src/solver.c +++ b/BasicSolver/3D-mpi/src/solver.c @@ -104,7 +104,6 @@ void initSolver(Solver* s, Parameter* params) s->tau = params->tau; s->gamma = params->gamma; - commInit(&s->comm, s->grid.kmax, s->grid.jmax, s->grid.imax); /* allocate arrays */ int imaxLocal = s->comm.imaxLocal; int jmaxLocal = s->comm.jmaxLocal; @@ -199,27 +198,36 @@ void solve(Solver* s) double epssq = eps * eps; int it = 0; double res = 1.0; - commExchange(&s->comm, p); + int pass, ksw, jsw, isw; while ((res >= epssq) && (it < itermax)) { - res = 0.0; + ksw = 1; - for (int k = 1; k < kmaxLocal + 1; k++) { - for (int j = 1; j < jmaxLocal + 1; j++) { - for (int i = 1; i < imaxLocal + 1; i++) { + for (pass = 0; pass < 2; pass++) { + jsw = ksw; + commExchange(&s->comm, p); - double r = RHS(i, j, k) - - ((P(i + 1, j, k) - 2.0 * P(i, j, k) + P(i - 1, j, k)) * - idx2 + - (P(i, j + 1, k) - 2.0 * P(i, j, k) + P(i, j - 1, k)) * - idy2 + - (P(i, j, k + 1) - 2.0 * P(i, j, k) + P(i, j, k - 1)) * - idz2); + for (int k = 1; k < kmaxLocal + 1; k++) { + isw = jsw; + for (int j = 1; j < jmaxLocal + 1; j++) { + for (int i = isw; i < imaxLocal + 1; i += 2) { - P(i, j, k) -= (factor * r); - res += (r * r); + double r = + RHS(i, j, k) - + ((P(i + 1, j, k) - 2.0 * P(i, j, k) + P(i - 1, j, k)) * idx2 + + (P(i, j + 1, k) - 2.0 * P(i, j, k) + P(i, j - 1, k)) * + idy2 + + (P(i, j, k + 1) - 2.0 * P(i, j, k) + P(i, j, k - 1)) * + idz2); + + P(i, j, k) -= (factor * r); + res += (r * r); + } + isw = 3 - isw; } + jsw = 3 - jsw; } + ksw = 3 - ksw; } if (commIsBoundary(&s->comm, FRONT)) { diff --git a/BasicSolver/3D-mpi/src/solver.h b/BasicSolver/3D-mpi/src/solver.h index ac9f450..f951ec8 100644 --- a/BasicSolver/3D-mpi/src/solver.h +++ b/BasicSolver/3D-mpi/src/solver.h @@ -33,13 +33,13 @@ typedef struct { Comm comm; } Solver; -void initSolver(Solver*, Parameter*); -void computeRHS(Solver*); -void solve(Solver*); -void normalizePressure(Solver*); -void computeTimestep(Solver*); -void setBoundaryConditions(Solver*); -void setSpecialBoundaryCondition(Solver*); -void computeFG(Solver*); -void adaptUV(Solver*); +extern void initSolver(Solver*, Parameter*); +extern void computeRHS(Solver*); +extern void solve(Solver*); +extern void normalizePressure(Solver*); +extern void computeTimestep(Solver*); +extern void setBoundaryConditions(Solver*); +extern void setSpecialBoundaryCondition(Solver*); +extern void computeFG(Solver*); +extern void adaptUV(Solver*); #endif diff --git a/BasicSolver/3D-mpi/src/timing.c b/BasicSolver/3D-mpi/src/timing.c index c4025a4..e578f18 100644 --- a/BasicSolver/3D-mpi/src/timing.c +++ b/BasicSolver/3D-mpi/src/timing.c @@ -7,18 +7,16 @@ #include #include -double getTimeStamp() +double getTimeStamp(void) { struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9; } -double getTimeResolution() +double getTimeResolution(void) { struct timespec ts; clock_getres(CLOCK_MONOTONIC, &ts); return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9; } - -double getTimeStamp_() { return getTimeStamp(); } diff --git a/BasicSolver/3D-mpi/src/timing.h b/BasicSolver/3D-mpi/src/timing.h index db95329..58fb5ac 100644 --- a/BasicSolver/3D-mpi/src/timing.h +++ b/BasicSolver/3D-mpi/src/timing.h @@ -7,8 +7,7 @@ #ifndef __TIMING_H_ #define __TIMING_H_ -extern double getTimeStamp(); -extern double getTimeResolution(); -extern double getTimeStamp_(); +extern double getTimeStamp(void); +extern double getTimeResolution(void); #endif // __TIMING_H_ diff --git a/BasicSolver/3D-seq/src/main.c b/BasicSolver/3D-seq/src/main.c index 73f9df2..54cc9c9 100644 --- a/BasicSolver/3D-seq/src/main.c +++ b/BasicSolver/3D-seq/src/main.c @@ -68,18 +68,18 @@ static void createBulkArrays(Solver* s, double* pg, double* ug, double* vg, doub int main(int argc, char** argv) { double timeStart, timeStop; - Parameter params; + Parameter p; Solver s; - initParameter(¶ms); + initParameter(&p); if (argc != 2) { printf("Usage: %s \n", argv[0]); exit(EXIT_SUCCESS); } - readParameter(¶ms, argv[1]); - printParameter(¶ms); - initSolver(&s, ¶ms); + readParameter(&p, argv[1]); + printParameter(&p); + initSolver(&s, &p); #ifndef VERBOSE initProgress(s.te); #endif diff --git a/BasicSolver/3D-seq/src/solver.c b/BasicSolver/3D-seq/src/solver.c index c741ece..f7f154a 100644 --- a/BasicSolver/3D-seq/src/solver.c +++ b/BasicSolver/3D-seq/src/solver.c @@ -41,13 +41,19 @@ static void printConfig(Solver* s) s->grid.xlength, s->grid.ylength, s->grid.zlength); - printf("\tCells (x, y, z): %d, %d, %d\n", s->grid.imax, s->grid.jmax, s->grid.kmax); - printf("\tCell size (dx, dy, dz): %f, %f, %f\n", s->grid.dx, s->grid.dy, s->grid.dz); + printf("\tCells (x, y, z): %d, %d, %d\n", + s->grid.imax, + s->grid.jmax, + s->grid.kmax); + printf("\tCell size (dx, dy, dz): %f, %f, %f\n", + s->grid.dx, + s->grid.dy, + s->grid.dz); printf("Timestep parameters:\n"); printf("\tDefault stepsize: %.2f, Final time %.2f\n", s->dt, s->te); printf("\tdt bound: %.6f\n", s->dtBound); printf("\tTau factor: %.2f\n", s->tau); - printf("Iterative s parameters:\n"); + printf("Iterative parameters:\n"); printf("\tMax iterations: %d\n", s->itermax); printf("\tepsilon (stopping tolerance) : %f\n", s->eps); printf("\tgamma factor: %f\n", s->gamma); @@ -63,6 +69,7 @@ void initSolver(Solver* s, Parameter* params) s->bcTop = params->bcTop; s->bcFront = params->bcFront; s->bcBack = params->bcBack; + s->grid.imax = params->imax; s->grid.jmax = params->jmax; s->grid.kmax = params->kmax; @@ -72,6 +79,7 @@ void initSolver(Solver* s, Parameter* params) s->grid.dx = params->xlength / params->imax; s->grid.dy = params->ylength / params->jmax; s->grid.dz = params->zlength / params->kmax; + s->eps = params->eps; s->omega = params->omg; s->itermax = params->itermax; @@ -129,6 +137,7 @@ void computeRHS(Solver* s) double idy = 1.0 / s->grid.dy; double idz = 1.0 / s->grid.dz; double idt = 1.0 / s->dt; + double* rhs = s->rhs; double* f = s->f; double* g = s->g; @@ -151,6 +160,7 @@ void solve(Solver* s) int imax = s->grid.imax; int jmax = s->grid.jmax; int kmax = s->grid.kmax; + double eps = s->eps; int itermax = s->itermax; double dx2 = s->grid.dx * s->grid.dx; @@ -159,6 +169,7 @@ void solve(Solver* s) double idx2 = 1.0 / dx2; double idy2 = 1.0 / dy2; double idz2 = 1.0 / dz2; + double factor = s->omega * 0.5 * (dx2 * dy2 * dz2) / (dy2 * dz2 + dx2 * dz2 + dx2 * dy2); double* p = s->p; @@ -340,6 +351,7 @@ void computeTimestep(Solver* s) double dx = s->grid.dx; double dy = s->grid.dy; double dz = s->grid.dz; + double umax = maxElement(s, s->u); double vmax = maxElement(s, s->v); double wmax = maxElement(s, s->w); @@ -604,9 +616,9 @@ void computeFG(Solver* s) double gx = s->gx; double gy = s->gy; double gz = s->gz; - double gamma = s->gamma; double dt = s->dt; + double gamma = s->gamma; double inverseRe = 1.0 / s->re; double inverseDx = 1.0 / s->grid.dx; double inverseDy = 1.0 / s->grid.dy;