Initial checkin

This commit is contained in:
Jan Eitzinger 2023-02-05 07:34:23 +01:00
parent b41d8eb7cc
commit 213e633a4d
269 changed files with 41070 additions and 0 deletions

View File

@ -0,0 +1,71 @@
#=======================================================================================
# Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
# All rights reserved.
# Use of this source code is governed by a MIT-style
# license that can be found in the LICENSE file.
#=======================================================================================
#CONFIGURE BUILD SYSTEM
TARGET = exe-$(TAG)
BUILD_DIR = ./$(TAG)
SRC_DIR = ./src
MAKE_DIR = ./
Q ?= @
#DO NOT EDIT BELOW
include $(MAKE_DIR)/config.mk
include $(MAKE_DIR)/include_$(TAG).mk
INCLUDES += -I$(SRC_DIR) -I$(BUILD_DIR)
VPATH = $(SRC_DIR)
SRC = $(wildcard $(SRC_DIR)/*.c)
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s, $(SRC))
OBJ = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o, $(SRC))
SOURCES = $(SRC) $(wildcard $(SRC_DIR)/*.h)
CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(OPTIONS) $(INCLUDES)
${TARGET}: $(BUILD_DIR) $(OBJ)
$(info ===> LINKING $(TARGET))
$(Q)${LINKER} ${LFLAGS} -o $(TARGET) $(OBJ) $(LIBS)
$(BUILD_DIR)/%.o: %.c $(MAKE_DIR)/include_$(TAG).mk $(MAKE_DIR)/config.mk
$(info ===> COMPILE $@)
$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
$(Q)$(GCC) $(CPPFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d
$(BUILD_DIR)/%.s: %.c
$(info ===> GENERATE ASM $@)
$(CC) -S $(CPPFLAGS) $(CFLAGS) $< -o $@
.PHONY: clean distclean tags info asm format
clean:
$(info ===> CLEAN)
@rm -rf $(BUILD_DIR)
@rm -f tags
distclean: clean
$(info ===> DIST CLEAN)
@rm -f $(TARGET)
info:
$(info $(CFLAGS))
$(Q)$(CC) $(VERSION)
asm: $(BUILD_DIR) $(ASM)
tags:
$(info ===> GENERATE TAGS)
$(Q)ctags -R
format:
@for src in $(SOURCES) ; do \
echo "Formatting $$src" ; \
clang-format -i $$src ; \
done
@echo "Done"
$(BUILD_DIR):
@mkdir $(BUILD_DIR)
-include $(OBJ:.o=.d)

View File

@ -0,0 +1,48 @@
# C source skeleton
## Build
1. Configure the toolchain and additional options in `config.mk`:
```
# Supported: GCC, CLANG, ICC
TAG ?= GCC
ENABLE_OPENMP ?= false
OPTIONS += -DARRAY_ALIGNMENT=64
#OPTIONS += -DVERBOSE_AFFINITY
#OPTIONS += -DVERBOSE_DATASIZE
#OPTIONS += -DVERBOSE_TIMER
```
The verbosity options enable detailed output about affinity settings, allocation sizes and timer resolution.
2. Build with:
```
make
```
You can build multiple toolchains in the same directory, but notice that the Makefile is only acting on the one currently set.
Intermediate build results are located in the `<TOOLCHAIN>` directory.
To output the executed commands use:
```
make Q=
```
3. Clean up with:
```
make clean
```
to clean intermediate build results.
```
make distclean
```
to clean intermediate build results and binary.
4. (Optional) Generate assembler:
```
make asm
```
The assembler files will also be located in the `<TOOLCHAIN>` directory.

View File

@ -0,0 +1,46 @@
#==============================================================================
# Laminar Canal Flow
#==============================================================================
# Problem specific Data:
# ---------------------
name canal # name of flow setup
bcLeft 3 # flags for boundary conditions
bcRight 3 # 1 = no-slip 3 = outflow
bcBottom 1 # 2 = free-slip 4 = periodic
bcTop 1 #
gx 0.0 # Body forces (e.g. gravity)
gy 0.0 #
re 100.0 # Reynolds number
u_init 1.0 # initial value for velocity in x-direction
v_init 0.0 # initial value for velocity in y-direction
p_init 0.0 # initial value for pressure
# Geometry Data:
# -------------
xlength 30.0 # domain size in x-direction
ylength 4.0 # domain size in y-direction
imax 200 # number of interior cells in x-direction
jmax 50 # number of interior cells in y-direction
# Time Data:
# ---------
te 100.0 # final time
dt 0.02 # time stepsize
tau 0.5 # safety factor for time stepsize control (<0 constant delt)
# Pressure Iteration Data:
# -----------------------
itermax 500 # maximal number of pressure iteration in one time step
eps 0.00001 # stopping tolerance for pressure iteration
omg 1.8 # relaxation parameter for SOR iteration
gamma 0.9 # upwind differencing factor gamma
#===============================================================================

View File

@ -0,0 +1,10 @@
# Supported: GCC, CLANG, ICC
TAG ?= CLANG
ENABLE_OPENMP ?= false
#Feature options
OPTIONS += -DARRAY_ALIGNMENT=64
#OPTIONS += -DVERBOSE
#OPTIONS += -DVERBOSE_AFFINITY
#OPTIONS += -DVERBOSE_DATASIZE
#OPTIONS += -DVERBOSE_TIMER

View File

@ -0,0 +1,46 @@
#==============================================================================
# Driven Cavity
#==============================================================================
# Problem specific Data:
# ---------------------
name dcavity # name of flow setup
bcLeft 1 # flags for boundary conditions
bcRight 1 # 1 = no-slip 3 = outflow
bcBottom 1 # 2 = free-slip 4 = periodic
bcTop 1 #
gx 0.0 # Body forces (e.g. gravity)
gy 0.0 #
re 500.0 # Reynolds number
u_init 0.0 # initial value for velocity in x-direction
v_init 0.0 # initial value for velocity in y-direction
p_init 0.0 # initial value for pressure
# Geometry Data:
# -------------
xlength 1.0 # domain size in x-direction
ylength 1.0 # domain size in y-direction
imax 100 # number of interior cells in x-direction
jmax 100 # number of interior cells in y-direction
# Time Data:
# ---------
te 25.0 # final time
dt 0.02 # time stepsize
tau 0.5 # safety factor for time stepsize control (<0 constant delt)
# Pressure Iteration Data:
# -----------------------
itermax 1000 # maximal number of pressure iteration in one time step
eps 0.001 # stopping tolerance for pressure iteration
omg 1.7 # relaxation parameter for SOR iteration
gamma 0.9 # upwind differencing factor gamma
#===============================================================================

View File

@ -0,0 +1,16 @@
CC = mpicc
GCC = cc
LINKER = $(CC)
ifeq ($(ENABLE_OPENMP),true)
OPENMP = -fopenmp
#OPENMP = -Xpreprocessor -fopenmp #required on Macos with homebrew libomp
LIBS = # -lomp
endif
VERSION = --version
CFLAGS = -Ofast -std=c99 $(OPENMP)
#CFLAGS = -Ofast -fnt-store=aggressive -std=c99 $(OPENMP) #AMD CLANG
LFLAGS = $(OPENMP)
DEFINES = -D_GNU_SOURCE# -DDEBUG
INCLUDES = -I/usr/local/include

View File

@ -0,0 +1,14 @@
CC = gcc
GCC = gcc
LINKER = $(CC)
ifeq ($(ENABLE_OPENMP),true)
OPENMP = -fopenmp
endif
VERSION = --version
CFLAGS = -Ofast -ffreestanding -std=c99 $(OPENMP)
LFLAGS = $(OPENMP)
DEFINES = -D_GNU_SOURCE
INCLUDES =
LIBS =

View File

@ -0,0 +1,14 @@
CC = mpiicc
GCC = gcc
LINKER = $(CC)
ifeq ($(ENABLE_OPENMP),true)
OPENMP = -qopenmp
endif
VERSION = --version
CFLAGS = -O3 -xHost -qopt-zmm-usage=high -std=c99 $(OPENMP)
LFLAGS = $(OPENMP)
DEFINES = -D_GNU_SOURCE
INCLUDES =
LIBS =

View File

@ -0,0 +1,61 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#ifdef __linux__
#ifdef _OPENMP
#include <pthread.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
#define MAX_NUM_THREADS 128
#define gettid() syscall(SYS_gettid)
static int getProcessorID(cpu_set_t* cpu_set)
{
int processorId;
for (processorId = 0; processorId < MAX_NUM_THREADS; processorId++) {
if (CPU_ISSET(processorId, cpu_set)) {
break;
}
}
return processorId;
}
int affinity_getProcessorId()
{
cpu_set_t cpu_set;
CPU_ZERO(&cpu_set);
sched_getaffinity(gettid(), sizeof(cpu_set_t), &cpu_set);
return getProcessorID(&cpu_set);
}
void affinity_pinThread(int processorId)
{
cpu_set_t cpuset;
pthread_t thread;
thread = pthread_self();
CPU_ZERO(&cpuset);
CPU_SET(processorId, &cpuset);
pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
}
void affinity_pinProcess(int processorId)
{
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
CPU_SET(processorId, &cpuset);
sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
}
#endif /*_OPENMP*/
#endif /*__linux__*/

View File

@ -0,0 +1,14 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#ifndef AFFINITY_H
#define AFFINITY_H
extern int affinity_getProcessorId();
extern void affinity_pinProcess(int);
extern void affinity_pinThread(int);
#endif /*AFFINITY_H*/

View File

@ -0,0 +1,35 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
void* allocate(int alignment, size_t bytesize)
{
int errorCode;
void* ptr;
errorCode = posix_memalign(&ptr, alignment, bytesize);
if (errorCode) {
if (errorCode == EINVAL) {
fprintf(stderr, "Error: Alignment parameter is not a power of two\n");
exit(EXIT_FAILURE);
}
if (errorCode == ENOMEM) {
fprintf(stderr, "Error: Insufficient memory to fulfill the request\n");
exit(EXIT_FAILURE);
}
}
if (ptr == NULL) {
fprintf(stderr, "Error: posix_memalign failed!\n");
exit(EXIT_FAILURE);
}
return ptr;
}

View File

@ -0,0 +1,13 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#ifndef __ALLOCATE_H_
#define __ALLOCATE_H_
#include <stdlib.h>
extern void* allocate(int alignment, size_t bytesize);
#endif

View File

@ -0,0 +1,54 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* =======================================================================================
*/
#ifndef LIKWID_MARKERS_H
#define LIKWID_MARKERS_H
#ifdef LIKWID_PERFMON
#include <likwid.h>
#define LIKWID_MARKER_INIT likwid_markerInit()
#define LIKWID_MARKER_THREADINIT likwid_markerThreadInit()
#define LIKWID_MARKER_SWITCH likwid_markerNextGroup()
#define LIKWID_MARKER_REGISTER(regionTag) likwid_markerRegisterRegion(regionTag)
#define LIKWID_MARKER_START(regionTag) likwid_markerStartRegion(regionTag)
#define LIKWID_MARKER_STOP(regionTag) likwid_markerStopRegion(regionTag)
#define LIKWID_MARKER_CLOSE likwid_markerClose()
#define LIKWID_MARKER_RESET(regionTag) likwid_markerResetRegion(regionTag)
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count) \
likwid_markerGetRegion(regionTag, nevents, events, time, count)
#else /* LIKWID_PERFMON */
#define LIKWID_MARKER_INIT
#define LIKWID_MARKER_THREADINIT
#define LIKWID_MARKER_SWITCH
#define LIKWID_MARKER_REGISTER(regionTag)
#define LIKWID_MARKER_START(regionTag)
#define LIKWID_MARKER_STOP(regionTag)
#define LIKWID_MARKER_CLOSE
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
#define LIKWID_MARKER_RESET(regionTag)
#endif /* LIKWID_PERFMON */
#endif /*LIKWID_MARKERS_H*/

View File

@ -0,0 +1,79 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#include <float.h>
#include <limits.h>
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "parameter.h"
#include "progress.h"
#include "solver.h"
#include "timing.h"
#include <mpi.h>
int main(int argc, char** argv)
{
int rank;
double S, E;
Parameter params;
Solver solver;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
initParameter(&params);
if (argc != 2) {
printf("Usage: %s <configFile>\n", argv[0]);
exit(EXIT_SUCCESS);
}
readParameter(&params, argv[1]);
if (rank == 0) {
printParameter(&params);
}
initSolver(&solver, &params);
initProgress(solver.te);
double tau = solver.tau;
double te = solver.te;
double t = 0.0;
S = getTimeStamp();
while (t <= te) {
if (tau > 0.0) {
computeTimestep(&solver);
}
setBoundaryConditions(&solver);
setSpecialBoundaryCondition(&solver);
computeFG(&solver);
computeRHS(&solver);
solve(&solver);
adaptUV(&solver);
/* exit(EXIT_SUCCESS); */
t += solver.dt;
#ifdef VERBOSE
if (rank == 0) {
printf("TIME %f , TIMESTEP %f\n", t, solver.dt);
}
#else
printProgress(t);
#endif
}
E = getTimeStamp();
stopProgress();
if (rank == 0) {
printf("Solution took %.2fs\n", E - S);
}
collectResult(&solver);
MPI_Finalize();
return EXIT_SUCCESS;
}

View File

@ -0,0 +1,111 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of nusif-solver.
* Use of this source code is governed by a MIT style
* license that can be found in the LICENSE file.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "parameter.h"
#include "util.h"
#define MAXLINE 4096
void initParameter(Parameter* param)
{
param->xlength = 1.0;
param->ylength = 1.0;
param->imax = 100;
param->jmax = 100;
param->itermax = 1000;
param->eps = 0.0001;
param->omg = 1.7;
param->re = 100.0;
param->gamma = 0.9;
param->tau = 0.5;
}
void readParameter(Parameter* param, const char* filename)
{
FILE* fp = fopen(filename, "r");
char line[MAXLINE];
int i;
if (!fp) {
fprintf(stderr, "Could not open parameter file: %s\n", filename);
exit(EXIT_FAILURE);
}
while (!feof(fp)) {
line[0] = '\0';
fgets(line, MAXLINE, fp);
for (i = 0; line[i] != '\0' && line[i] != '#'; i++)
;
line[i] = '\0';
char* tok = strtok(line, " ");
char* val = strtok(NULL, " ");
#define PARSE_PARAM(p, f) \
if (strncmp(tok, #p, sizeof(#p) / sizeof(#p[0]) - 1) == 0) { \
param->p = f(val); \
}
#define PARSE_STRING(p) PARSE_PARAM(p, strdup)
#define PARSE_INT(p) PARSE_PARAM(p, atoi)
#define PARSE_REAL(p) PARSE_PARAM(p, atof)
if (tok != NULL && val != NULL) {
PARSE_REAL(xlength);
PARSE_REAL(ylength);
PARSE_INT(imax);
PARSE_INT(jmax);
PARSE_INT(itermax);
PARSE_REAL(eps);
PARSE_REAL(omg);
PARSE_REAL(re);
PARSE_REAL(tau);
PARSE_REAL(gamma);
PARSE_REAL(dt);
PARSE_REAL(te);
PARSE_REAL(gx);
PARSE_REAL(gy);
PARSE_STRING(name);
PARSE_INT(bcLeft);
PARSE_INT(bcRight);
PARSE_INT(bcBottom);
PARSE_INT(bcTop);
PARSE_REAL(u_init);
PARSE_REAL(v_init);
PARSE_REAL(p_init);
}
}
fclose(fp);
}
void printParameter(Parameter* param)
{
printf("Parameters for %s\n", param->name);
printf("Boundary conditions Left:%d Right:%d Bottom:%d Top:%d\n",
param->bcLeft,
param->bcRight,
param->bcBottom,
param->bcTop);
printf("\tReynolds number: %.2f\n", param->re);
printf("\tInit arrays: U:%.2f V:%.2f P:%.2f\n",
param->u_init,
param->v_init,
param->p_init);
printf("Geometry data:\n");
printf("\tDomain box size (x, y): %.2f, %.2f\n", param->xlength, param->ylength);
printf("\tCells (x, y): %d, %d\n", param->imax, param->jmax);
printf("Timestep parameters:\n");
printf("\tDefault stepsize: %.2f, Final time %.2f\n", param->dt, param->te);
printf("\tTau factor: %.2f\n", param->tau);
printf("Iterative solver parameters:\n");
printf("\tMax iterations: %d\n", param->itermax);
printf("\tepsilon (stopping tolerance) : %f\n", param->eps);
printf("\tgamma (stopping tolerance) : %f\n", param->gamma);
printf("\tomega (SOR relaxation): %f\n", param->omg);
}

View File

@ -0,0 +1,26 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of nusif-solver.
* Use of this source code is governed by a MIT style
* license that can be found in the LICENSE file.
*/
#ifndef __PARAMETER_H_
#define __PARAMETER_H_
typedef struct {
double xlength, ylength;
int imax, jmax;
int itermax;
double eps, omg;
double re, tau, gamma;
double te, dt;
double gx, gy;
char* name;
int bcLeft, bcRight, bcBottom, bcTop;
double u_init, v_init, p_init;
} Parameter;
void initParameter(Parameter*);
void readParameter(Parameter*, const char*);
void printParameter(Parameter*);
#endif

View File

@ -0,0 +1,60 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of nusif-solver.
* Use of this source code is governed by a MIT style
* license that can be found in the LICENSE file.
*/
#include <math.h>
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "progress.h"
static double _end;
static int _current;
static int _rank = -1;
void initProgress(double end)
{
MPI_Comm_rank(MPI_COMM_WORLD, &_rank);
_end = end;
_current = 0;
if (_rank == 0) {
printf("[ ]");
fflush(stdout);
}
}
void printProgress(double current)
{
if (_rank == 0) {
int new = (int)rint((current / _end) * 10.0);
if (new > _current) {
char progress[11];
_current = new;
progress[0] = 0;
for (int i = 0; i < 10; i++) {
if (i < _current) {
sprintf(progress + strlen(progress), "#");
} else {
sprintf(progress + strlen(progress), " ");
}
}
printf("\r[%s]", progress);
}
fflush(stdout);
}
}
void stopProgress()
{
if (_rank == 0) {
printf("\n");
fflush(stdout);
}
}

View File

@ -0,0 +1,14 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#ifndef __PROGRESS_H_
#define __PROGRESS_H_
extern void initProgress(double);
extern void printProgress(double);
extern void stopProgress();
#endif

View File

@ -0,0 +1,689 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of nusif-solver.
* Use of this source code is governed by a MIT style
* license that can be found in the LICENSE file.
*/
#include <float.h>
#include <math.h>
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "allocate.h"
#include "parameter.h"
#include "solver.h"
#include "util.h"
#define P(i, j) p[(j) * (imax + 2) + (i)]
#define F(i, j) f[(j) * (imax + 2) + (i)]
#define G(i, j) g[(j) * (imax + 2) + (i)]
#define U(i, j) u[(j) * (imax + 2) + (i)]
#define V(i, j) v[(j) * (imax + 2) + (i)]
#define RHS(i, j) rhs[(j) * (imax + 2) + (i)]
static int sizeOfRank(int rank, int size, int N)
{
return N / size + ((N % size > rank) ? 1 : 0);
}
static void print(Solver* solver, double* grid)
{
int imax = solver->imax;
for (int i = 0; i < solver->size; i++) {
if (i == solver->rank) {
printf("### RANK %d "
"#######################################################\n",
solver->rank);
for (int j = 0; j < solver->jmaxLocal + 2; j++) {
printf("%02d: ", j);
for (int i = 0; i < solver->imax + 2; i++) {
printf("%12.8f ", grid[j * (imax + 2) + i]);
}
printf("\n");
}
fflush(stdout);
}
MPI_Barrier(MPI_COMM_WORLD);
}
}
static void exchange(Solver* solver, double* grid)
{
MPI_Request requests[4] = { MPI_REQUEST_NULL,
MPI_REQUEST_NULL,
MPI_REQUEST_NULL,
MPI_REQUEST_NULL };
/* exchange ghost cells with top neighbor */
if (solver->rank + 1 < solver->size) {
int top = solver->rank + 1;
double* src = grid + (solver->jmaxLocal) * (solver->imax + 2) + 1;
double* dst = grid + (solver->jmaxLocal + 1) * (solver->imax + 2) + 1;
MPI_Isend(src, solver->imax, MPI_DOUBLE, top, 1, MPI_COMM_WORLD, &requests[0]);
MPI_Irecv(dst, solver->imax, MPI_DOUBLE, top, 2, MPI_COMM_WORLD, &requests[1]);
}
/* exchange ghost cells with bottom neighbor */
if (solver->rank > 0) {
int bottom = solver->rank - 1;
double* src = grid + (solver->imax + 2) + 1;
double* dst = grid + 1;
MPI_Isend(src, solver->imax, MPI_DOUBLE, bottom, 2, MPI_COMM_WORLD, &requests[2]);
MPI_Irecv(dst, solver->imax, MPI_DOUBLE, bottom, 1, MPI_COMM_WORLD, &requests[3]);
}
MPI_Waitall(4, requests, MPI_STATUSES_IGNORE);
}
static void shift(Solver* solver)
{
MPI_Request requests[2] = { MPI_REQUEST_NULL, MPI_REQUEST_NULL };
double* g = solver->g;
/* shift G */
/* receive ghost cells from bottom neighbor */
if (solver->rank > 0) {
int bottom = solver->rank - 1;
MPI_Irecv(g + 1,
solver->imax,
MPI_DOUBLE,
bottom,
0,
MPI_COMM_WORLD,
&requests[0]);
}
if (solver->rank + 1 < solver->size) {
int top = solver->rank + 1;
double* buf = g + (solver->jmaxLocal) * (solver->imax + 2) + 1;
/* send ghost cells to top neighbor */
MPI_Isend(buf, solver->imax, MPI_DOUBLE, top, 0, MPI_COMM_WORLD, &requests[1]);
}
MPI_Waitall(2, requests, MPI_STATUSES_IGNORE);
}
void collectResult(Solver* solver)
{
double* Pall = NULL;
double* Uall = NULL;
double* Vall = NULL;
int *rcvCounts, *displs;
if (solver->rank == 0) {
Pall = allocate(64, (solver->imax + 2) * (solver->jmax + 2) * sizeof(double));
Uall = allocate(64, (solver->imax + 2) * (solver->jmax + 2) * sizeof(double));
Vall = allocate(64, (solver->imax + 2) * (solver->jmax + 2) * sizeof(double));
rcvCounts = (int*)malloc(solver->size * sizeof(int));
displs = (int*)malloc(solver->size * sizeof(int));
rcvCounts[0] = solver->jmaxLocal * (solver->imax + 2);
displs[0] = 0;
int cursor = rcvCounts[0];
for (int i = 1; i < solver->size; i++) {
rcvCounts[i] = sizeOfRank(i, solver->size, solver->jmax) * (solver->imax + 2);
displs[i] = cursor;
cursor += rcvCounts[i];
}
}
int cnt = solver->jmaxLocal * (solver->imax + 2);
double* sendbuffer = solver->p + (solver->imax + 2);
MPI_Gatherv(sendbuffer,
cnt,
MPI_DOUBLE,
Pall,
rcvCounts,
displs,
MPI_DOUBLE,
0,
MPI_COMM_WORLD);
sendbuffer = solver->u + (solver->imax + 2);
MPI_Gatherv(sendbuffer,
cnt,
MPI_DOUBLE,
Uall,
rcvCounts,
displs,
MPI_DOUBLE,
0,
MPI_COMM_WORLD);
sendbuffer = solver->v + (solver->imax + 2);
MPI_Gatherv(sendbuffer,
cnt,
MPI_DOUBLE,
Vall,
rcvCounts,
displs,
MPI_DOUBLE,
0,
MPI_COMM_WORLD);
if (solver->rank == 0) {
writeResult(solver, Pall, Uall, Vall);
}
}
static void printConfig(Solver* solver)
{
if (solver->rank == 0) {
printf("Parameters for #%s#\n", solver->problem);
printf("Boundary conditions Left:%d Right:%d Bottom:%d Top:%d\n",
solver->bcLeft,
solver->bcRight,
solver->bcBottom,
solver->bcTop);
printf("\tReynolds number: %.2f\n", solver->re);
printf("\tGx Gy: %.2f %.2f\n", solver->gx, solver->gy);
printf("Geometry data:\n");
printf("\tDomain box size (x, y): %.2f, %.2f\n",
solver->xlength,
solver->ylength);
printf("\tCells (x, y): %d, %d\n", solver->imax, solver->jmax);
printf("Timestep parameters:\n");
printf("\tDefault stepsize: %.2f, Final time %.2f\n", solver->dt, solver->te);
printf("\tdt bound: %.6f\n", solver->dtBound);
printf("\tTau factor: %.2f\n", solver->tau);
printf("Iterative solver parameters:\n");
printf("\tMax iterations: %d\n", solver->itermax);
printf("\tepsilon (stopping tolerance) : %f\n", solver->eps);
printf("\tgamma factor: %f\n", solver->gamma);
printf("\tomega (SOR relaxation): %f\n", solver->omega);
printf("Communication parameters:\n");
}
for (int i = 0; i < solver->size; i++) {
if (i == solver->rank) {
printf("\tRank %d of %d\n", solver->rank, solver->size);
printf("\tLocal domain size: %dx%d\n", solver->imax, solver->jmaxLocal);
fflush(stdout);
}
}
}
void initSolver(Solver* solver, Parameter* params)
{
MPI_Comm_rank(MPI_COMM_WORLD, &(solver->rank));
MPI_Comm_size(MPI_COMM_WORLD, &(solver->size));
solver->problem = params->name;
solver->bcLeft = params->bcLeft;
solver->bcRight = params->bcRight;
solver->bcBottom = params->bcBottom;
solver->bcTop = params->bcTop;
solver->imax = params->imax;
solver->jmax = params->jmax;
solver->jmaxLocal = sizeOfRank(solver->rank, solver->size, solver->jmax);
solver->xlength = params->xlength;
solver->ylength = params->ylength;
solver->dx = params->xlength / params->imax;
solver->dy = params->ylength / params->jmax;
solver->eps = params->eps;
solver->omega = params->omg;
solver->itermax = params->itermax;
solver->re = params->re;
solver->gx = params->gx;
solver->gy = params->gy;
solver->dt = params->dt;
solver->te = params->te;
solver->tau = params->tau;
solver->gamma = params->gamma;
int imax = solver->imax;
int jmaxLocal = solver->jmaxLocal;
size_t bytesize = (imax + 2) * (jmaxLocal + 2) * sizeof(double);
solver->u = allocate(64, bytesize);
solver->v = allocate(64, bytesize);
solver->p = allocate(64, bytesize);
solver->rhs = allocate(64, bytesize);
solver->f = allocate(64, bytesize);
solver->g = allocate(64, bytesize);
for (int i = 0; i < (imax + 2) * (jmaxLocal + 2); i++) {
solver->u[i] = params->u_init;
solver->v[i] = params->v_init;
solver->p[i] = params->p_init;
solver->rhs[i] = 0.0;
solver->f[i] = 0.0;
solver->g[i] = 0.0;
}
double dx = solver->dx;
double dy = solver->dy;
double inv_sqr_sum = 1.0 / (dx * dx) + 1.0 / (dy * dy);
solver->dtBound = 0.5 * solver->re * 1.0 / inv_sqr_sum;
#ifdef VERBOSE
printConfig(solver);
#endif
}
void computeRHS(Solver* solver)
{
int imax = solver->imax;
int jmaxLocal = solver->jmaxLocal;
double idx = 1.0 / solver->dx;
double idy = 1.0 / solver->dy;
double idt = 1.0 / solver->dt;
double* rhs = solver->rhs;
double* f = solver->f;
double* g = solver->g;
shift(solver);
for (int j = 1; j < jmaxLocal + 1; j++) {
for (int i = 1; i < imax + 1; i++) {
RHS(i, j) = ((F(i, j) - F(i - 1, j)) * idx + (G(i, j) - G(i, j - 1)) * idy) *
idt;
}
}
}
void solve(Solver* solver)
{
int imax = solver->imax;
int jmax = solver->jmax;
int jmaxLocal = solver->jmaxLocal;
double eps = solver->eps;
int itermax = solver->itermax;
double dx2 = solver->dx * solver->dx;
double dy2 = solver->dy * solver->dy;
double idx2 = 1.0 / dx2;
double idy2 = 1.0 / dy2;
double factor = solver->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
double* p = solver->p;
double* rhs = solver->rhs;
double epssq = eps * eps;
int it = 0;
double res = 1.0;
while ((res >= epssq) && (it < itermax)) {
res = 0.0;
exchange(solver, p);
for (int j = 1; j < jmaxLocal + 1; j++) {
for (int i = 1; i < imax + 1; i++) {
double r = RHS(i, j) -
((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
(P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
P(i, j) -= (factor * r);
res += (r * r);
}
}
if (solver->rank == 0) {
for (int i = 1; i < imax + 1; i++) {
P(i, 0) = P(i, 1);
}
}
if (solver->rank == (solver->size - 1)) {
for (int i = 1; i < imax + 1; i++) {
P(i, jmaxLocal + 1) = P(i, jmaxLocal);
}
}
for (int j = 1; j < jmaxLocal + 1; j++) {
P(0, j) = P(1, j);
P(imax + 1, j) = P(imax, j);
}
MPI_Allreduce(MPI_IN_PLACE, &res, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
res = res / (double)(imax * jmax);
#ifdef DEBUG
if (solver->rank == 0) {
printf("%d Residuum: %e\n", it, res);
}
#endif
it++;
}
#ifdef VERBOSE
if (solver->rank == 0) {
printf("Solver took %d iterations to reach %f\n", it, sqrt(res));
}
#endif
}
static double maxElement(Solver* solver, double* m)
{
int size = (solver->imax + 2) * (solver->jmaxLocal + 2);
double maxval = DBL_MIN;
for (int i = 0; i < size; i++) {
maxval = MAX(maxval, fabs(m[i]));
}
MPI_Allreduce(MPI_IN_PLACE, &maxval, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
return maxval;
}
void normalizePressure(Solver* solver)
{
int size = (solver->imax + 2) * (solver->jmaxLocal + 2);
double* p = solver->p;
double avgP = 0.0;
for (int i = 0; i < size; i++) {
avgP += p[i];
}
MPI_Allreduce(MPI_IN_PLACE, &avgP, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
avgP /= (solver->imax + 2) * (solver->jmax + 2);
for (int i = 0; i < size; i++) {
p[i] = p[i] - avgP;
}
}
void computeTimestep(Solver* solver)
{
double dt = solver->dtBound;
double dx = solver->dx;
double dy = solver->dy;
double umax = maxElement(solver, solver->u);
double vmax = maxElement(solver, solver->v);
if (umax > 0) {
dt = (dt > dx / umax) ? dx / umax : dt;
}
if (vmax > 0) {
dt = (dt > dy / vmax) ? dy / vmax : dt;
}
solver->dt = dt * solver->tau;
}
void setBoundaryConditions(Solver* solver)
{
int imax = solver->imax;
int jmaxLocal = solver->jmaxLocal;
double* u = solver->u;
double* v = solver->v;
// Left boundary
switch (solver->bcLeft) {
case NOSLIP:
for (int j = 1; j < jmaxLocal + 1; j++) {
U(0, j) = 0.0;
V(0, j) = -V(1, j);
}
break;
case SLIP:
for (int j = 1; j < jmaxLocal + 1; j++) {
U(0, j) = 0.0;
V(0, j) = V(1, j);
}
break;
case OUTFLOW:
for (int j = 1; j < jmaxLocal + 1; j++) {
U(0, j) = U(1, j);
V(0, j) = V(1, j);
}
break;
case PERIODIC:
break;
}
// Right boundary
switch (solver->bcRight) {
case NOSLIP:
for (int j = 1; j < jmaxLocal + 1; j++) {
U(imax, j) = 0.0;
V(imax + 1, j) = -V(imax, j);
}
break;
case SLIP:
for (int j = 1; j < jmaxLocal + 1; j++) {
U(imax, j) = 0.0;
V(imax + 1, j) = V(imax, j);
}
break;
case OUTFLOW:
for (int j = 1; j < jmaxLocal + 1; j++) {
U(imax, j) = U(imax - 1, j);
V(imax + 1, j) = V(imax, j);
}
break;
case PERIODIC:
break;
}
// Bottom boundary
if (solver->rank == 0) {
switch (solver->bcBottom) {
case NOSLIP:
for (int i = 1; i < imax + 1; i++) {
V(i, 0) = 0.0;
U(i, 0) = -U(i, 1);
}
break;
case SLIP:
for (int i = 1; i < imax + 1; i++) {
V(i, 0) = 0.0;
U(i, 0) = U(i, 1);
}
break;
case OUTFLOW:
for (int i = 1; i < imax + 1; i++) {
U(i, 0) = U(i, 1);
V(i, 0) = V(i, 1);
}
break;
case PERIODIC:
break;
}
}
// Top boundary
if (solver->rank == (solver->size - 1)) {
switch (solver->bcTop) {
case NOSLIP:
for (int i = 1; i < imax + 1; i++) {
V(i, jmaxLocal) = 0.0;
U(i, jmaxLocal + 1) = -U(i, jmaxLocal);
}
break;
case SLIP:
for (int i = 1; i < imax + 1; i++) {
V(i, jmaxLocal) = 0.0;
U(i, jmaxLocal + 1) = U(i, jmaxLocal);
}
break;
case OUTFLOW:
for (int i = 1; i < imax + 1; i++) {
U(i, jmaxLocal + 1) = U(i, jmaxLocal);
V(i, jmaxLocal) = V(i, jmaxLocal - 1);
}
break;
case PERIODIC:
break;
}
}
}
void setSpecialBoundaryCondition(Solver* solver)
{
int imax = solver->imax;
int jmaxLocal = solver->jmaxLocal;
double* u = solver->u;
if (strcmp(solver->problem, "dcavity") == 0) {
if (solver->rank == (solver->size - 1)) {
for (int i = 1; i < imax; i++) {
U(i, jmaxLocal + 1) = 2.0 - U(i, jmaxLocal);
}
}
} else if (strcmp(solver->problem, "canal") == 0) {
double ylength = solver->ylength;
double dy = solver->dy;
int rest = solver->jmax % solver->size;
int yc = solver->rank * (solver->jmax / solver->size) + MIN(rest, solver->rank);
double ys = dy * (yc + 0.5);
double y;
/* printf("RANK %d yc: %d ys: %f\n", solver->rank, yc, ys); */
for (int j = 1; j < jmaxLocal + 1; j++) {
y = ys + dy * (j - 0.5);
U(0, j) = y * (ylength - y) * 4.0 / (ylength * ylength);
}
}
/* print(solver, solver->u); */
}
void computeFG(Solver* solver)
{
double* u = solver->u;
double* v = solver->v;
double* f = solver->f;
double* g = solver->g;
int imax = solver->imax;
int jmaxLocal = solver->jmaxLocal;
double gx = solver->gx;
double gy = solver->gy;
double gamma = solver->gamma;
double dt = solver->dt;
double inverseRe = 1.0 / solver->re;
double inverseDx = 1.0 / solver->dx;
double inverseDy = 1.0 / solver->dy;
double du2dx, dv2dy, duvdx, duvdy;
double du2dx2, du2dy2, dv2dx2, dv2dy2;
exchange(solver, u);
exchange(solver, v);
for (int j = 1; j < jmaxLocal + 1; j++) {
for (int i = 1; i < imax + 1; i++) {
du2dx = inverseDx * 0.25 *
((U(i, j) + U(i + 1, j)) * (U(i, j) + U(i + 1, j)) -
(U(i, j) + U(i - 1, j)) * (U(i, j) + U(i - 1, j))) +
gamma * inverseDx * 0.25 *
(fabs(U(i, j) + U(i + 1, j)) * (U(i, j) - U(i + 1, j)) +
fabs(U(i, j) + U(i - 1, j)) * (U(i, j) - U(i - 1, j)));
duvdy = inverseDy * 0.25 *
((V(i, j) + V(i + 1, j)) * (U(i, j) + U(i, j + 1)) -
(V(i, j - 1) + V(i + 1, j - 1)) * (U(i, j) + U(i, j - 1))) +
gamma * inverseDy * 0.25 *
(fabs(V(i, j) + V(i + 1, j)) * (U(i, j) - U(i, j + 1)) +
fabs(V(i, j - 1) + V(i + 1, j - 1)) *
(U(i, j) - U(i, j - 1)));
du2dx2 = inverseDx * inverseDx * (U(i + 1, j) - 2.0 * U(i, j) + U(i - 1, j));
du2dy2 = inverseDy * inverseDy * (U(i, j + 1) - 2.0 * U(i, j) + U(i, j - 1));
F(i, j) = U(i, j) + dt * (inverseRe * (du2dx2 + du2dy2) - du2dx - duvdy + gx);
duvdx = inverseDx * 0.25 *
((U(i, j) + U(i, j + 1)) * (V(i, j) + V(i + 1, j)) -
(U(i - 1, j) + U(i - 1, j + 1)) * (V(i, j) + V(i - 1, j))) +
gamma * inverseDx * 0.25 *
(fabs(U(i, j) + U(i, j + 1)) * (V(i, j) - V(i + 1, j)) +
fabs(U(i - 1, j) + U(i - 1, j + 1)) *
(V(i, j) - V(i - 1, j)));
dv2dy = inverseDy * 0.25 *
((V(i, j) + V(i, j + 1)) * (V(i, j) + V(i, j + 1)) -
(V(i, j) + V(i, j - 1)) * (V(i, j) + V(i, j - 1))) +
gamma * inverseDy * 0.25 *
(fabs(V(i, j) + V(i, j + 1)) * (V(i, j) - V(i, j + 1)) +
fabs(V(i, j) + V(i, j - 1)) * (V(i, j) - V(i, j - 1)));
dv2dx2 = inverseDx * inverseDx * (V(i + 1, j) - 2.0 * V(i, j) + V(i - 1, j));
dv2dy2 = inverseDy * inverseDy * (V(i, j + 1) - 2.0 * V(i, j) + V(i, j - 1));
G(i, j) = V(i, j) + dt * (inverseRe * (dv2dx2 + dv2dy2) - duvdx - dv2dy + gy);
}
}
/* ----------------------------- boundary of F ---------------------------
*/
for (int j = 1; j < jmaxLocal + 1; j++) {
F(0, j) = U(0, j);
F(imax, j) = U(imax, j);
}
/* ----------------------------- boundary of G ---------------------------
*/
if (solver->rank == 0) {
for (int i = 1; i < imax + 1; i++) {
G(i, 0) = V(i, 0);
}
}
if (solver->rank == (solver->size - 1)) {
for (int i = 1; i < imax + 1; i++) {
G(i, jmaxLocal) = V(i, jmaxLocal);
}
}
}
void adaptUV(Solver* solver)
{
int imax = solver->imax;
int jmaxLocal = solver->jmaxLocal;
double* p = solver->p;
double* u = solver->u;
double* v = solver->v;
double* f = solver->f;
double* g = solver->g;
double factorX = solver->dt / solver->dx;
double factorY = solver->dt / solver->dy;
for (int j = 1; j < jmaxLocal + 1; j++) {
for (int i = 1; i < imax + 1; i++) {
U(i, j) = F(i, j) - (P(i + 1, j) - P(i, j)) * factorX;
V(i, j) = G(i, j) - (P(i, j + 1) - P(i, j)) * factorY;
}
}
}
void writeResult(Solver* solver, double* p, double* u, double* v)
{
int imax = solver->imax;
int jmax = solver->jmax;
double dx = solver->dx;
double dy = solver->dy;
double x = 0.0, y = 0.0;
FILE* fp;
fp = fopen("pressure.dat", "w");
if (fp == NULL) {
printf("Error!\n");
exit(EXIT_FAILURE);
}
for (int j = 1; j < jmax + 1; j++) {
y = (double)(j - 0.5) * dy;
for (int i = 1; i < imax + 1; i++) {
x = (double)(i - 0.5) * dx;
fprintf(fp, "%.2f %.2f %f\n", x, y, P(i, j));
}
fprintf(fp, "\n");
}
fclose(fp);
fp = fopen("velocity.dat", "w");
if (fp == NULL) {
printf("Error!\n");
exit(EXIT_FAILURE);
}
for (int j = 1; j < jmax + 1; j++) {
y = dy * (j - 0.5);
for (int i = 1; i < imax + 1; i++) {
x = dx * (i - 0.5);
double vel_u = (U(i, j) + U(i - 1, j)) / 2.0;
double vel_v = (V(i, j) + V(i, j - 1)) / 2.0;
double len = sqrt((vel_u * vel_u) + (vel_v * vel_v));
fprintf(fp, "%.2f %.2f %f %f %f\n", x, y, vel_u, vel_v, len);
}
}
fclose(fp);
}

View File

@ -0,0 +1,49 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of nusif-solver.
* Use of this source code is governed by a MIT style
* license that can be found in the LICENSE file.
*/
#ifndef __SOLVER_H_
#define __SOLVER_H_
#include "parameter.h"
enum BC { NOSLIP = 1, SLIP, OUTFLOW, PERIODIC };
typedef struct {
/* geometry and grid information */
double dx, dy;
int imax, jmax;
int jmaxLocal;
double xlength, ylength;
/* arrays */
double *p, *rhs;
double *f, *g;
double *u, *v;
/* parameters */
double eps, omega;
double re, tau, gamma;
double gx, gy;
/* time stepping */
int itermax;
double dt, te;
double dtBound;
char* problem;
int bcLeft, bcRight, bcBottom, bcTop;
/* mpi */
int rank;
int size;
} Solver;
void initSolver(Solver*, Parameter*);
void computeRHS(Solver*);
void solve(Solver*);
void normalizePressure(Solver*);
void computeTimestep(Solver*);
void setBoundaryConditions(Solver*);
void setSpecialBoundaryCondition(Solver*);
void computeFG(Solver*);
void adaptUV(Solver*);
void collectResult(Solver*);
void writeResult(Solver*, double*, double*, double*);
#endif

View File

@ -0,0 +1,24 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#include <stdlib.h>
#include <time.h>
double getTimeStamp()
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
}
double getTimeResolution()
{
struct timespec ts;
clock_getres(CLOCK_MONOTONIC, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
}
double getTimeStamp_() { return getTimeStamp(); }

View File

@ -0,0 +1,14 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#ifndef __TIMING_H_
#define __TIMING_H_
extern double getTimeStamp();
extern double getTimeResolution();
extern double getTimeStamp_();
#endif // __TIMING_H_

View File

@ -0,0 +1,23 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#ifndef __UTIL_H_
#define __UTIL_H_
#define HLINE \
"------------------------------------------------------------------------" \
"----\n"
#ifndef MIN
#define MIN(x, y) ((x) < (y) ? (x) : (y))
#endif
#ifndef MAX
#define MAX(x, y) ((x) > (y) ? (x) : (y))
#endif
#ifndef ABS
#define ABS(a) ((a) >= 0 ? (a) : -(a))
#endif
#endif // __UTIL_H_

View File

@ -0,0 +1,7 @@
set terminal png size 1024,768 enhanced font ,12
set output 'p.png'
set datafile separator whitespace
set grid
set hidden3d
splot 'pressure.dat' using 1:2:3 with lines

View File

@ -0,0 +1,5 @@
set terminal png size 1800,768 enhanced font ,12
set output 'velocity.png'
set datafile separator whitespace
plot 'velocity.dat' using 1:2:3:4:5 with vectors filled head size 0.01,20,60 lc palette

View File

@ -0,0 +1,71 @@
#=======================================================================================
# Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
# All rights reserved.
# Use of this source code is governed by a MIT-style
# license that can be found in the LICENSE file.
#=======================================================================================
#CONFIGURE BUILD SYSTEM
TARGET = exe-$(TAG)
BUILD_DIR = ./$(TAG)
SRC_DIR = ./src
MAKE_DIR = ./
Q ?= @
#DO NOT EDIT BELOW
include $(MAKE_DIR)/config.mk
include $(MAKE_DIR)/include_$(TAG).mk
INCLUDES += -I$(SRC_DIR) -I$(BUILD_DIR)
VPATH = $(SRC_DIR)
SRC = $(wildcard $(SRC_DIR)/*.c)
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s, $(SRC))
OBJ = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o, $(SRC))
SOURCES = $(SRC) $(wildcard $(SRC_DIR)/*.h)
CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(OPTIONS) $(INCLUDES)
${TARGET}: $(BUILD_DIR) $(OBJ)
$(info ===> LINKING $(TARGET))
$(Q)${LINKER} ${LFLAGS} -o $(TARGET) $(OBJ) $(LIBS)
$(BUILD_DIR)/%.o: %.c $(MAKE_DIR)/include_$(TAG).mk $(MAKE_DIR)/config.mk
$(info ===> COMPILE $@)
$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
$(Q)$(GCC) $(CPPFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d
$(BUILD_DIR)/%.s: %.c
$(info ===> GENERATE ASM $@)
$(CC) -S $(CPPFLAGS) $(CFLAGS) $< -o $@
.PHONY: clean distclean tags info asm format
clean:
$(info ===> CLEAN)
@rm -rf $(BUILD_DIR)
@rm -f tags
distclean: clean
$(info ===> DIST CLEAN)
@rm -f $(TARGET)
info:
$(info $(CFLAGS))
$(Q)$(CC) $(VERSION)
asm: $(BUILD_DIR) $(ASM)
tags:
$(info ===> GENERATE TAGS)
$(Q)ctags -R
format:
@for src in $(SOURCES) ; do \
echo "Formatting $$src" ; \
clang-format -i $$src ; \
done
@echo "Done"
$(BUILD_DIR):
@mkdir $(BUILD_DIR)
-include $(OBJ:.o=.d)

View File

@ -0,0 +1,48 @@
# C source skeleton
## Build
1. Configure the toolchain and additional options in `config.mk`:
```
# Supported: GCC, CLANG, ICC
TAG ?= GCC
ENABLE_OPENMP ?= false
OPTIONS += -DARRAY_ALIGNMENT=64
#OPTIONS += -DVERBOSE_AFFINITY
#OPTIONS += -DVERBOSE_DATASIZE
#OPTIONS += -DVERBOSE_TIMER
```
The verbosity options enable detailed output about affinity settings, allocation sizes and timer resolution.
2. Build with:
```
make
```
You can build multiple toolchains in the same directory, but notice that the Makefile is only acting on the one currently set.
Intermediate build results are located in the `<TOOLCHAIN>` directory.
To output the executed commands use:
```
make Q=
```
3. Clean up with:
```
make clean
```
to clean intermediate build results.
```
make distclean
```
to clean intermediate build results and binary.
4. (Optional) Generate assembler:
```
make asm
```
The assembler files will also be located in the `<TOOLCHAIN>` directory.

View File

@ -0,0 +1,46 @@
#==============================================================================
# Laminar Canal Flow
#==============================================================================
# Problem specific Data:
# ---------------------
name canal # name of flow setup
bcN 1 # flags for boundary conditions
bcE 3 # 1 = no-slip 3 = outflow
bcS 1 # 2 = free-slip 4 = periodic
bcW 3 #
gx 0.0 # Body forces (e.g. gravity)
gy 0.0 #
re 100.0 # Reynolds number
u_init 1.0 # initial value for velocity in x-direction
v_init 0.0 # initial value for velocity in y-direction
p_init 0.0 # initial value for pressure
# Geometry Data:
# -------------
xlength 30.0 # domain size in x-direction
ylength 4.0 # domain size in y-direction
imax 200 # number of interior cells in x-direction
jmax 50 # number of interior cells in y-direction
# Time Data:
# ---------
te 100.0 # final time
dt 0.02 # time stepsize
tau 0.5 # safety factor for time stepsize control (<0 constant delt)
# Pressure Iteration Data:
# -----------------------
itermax 500 # maximal number of pressure iteration in one time step
eps 0.00001 # stopping tolerance for pressure iteration
omg 1.8 # relaxation parameter for SOR iteration
gamma 0.9 # upwind differencing factor gamma
#===============================================================================

View File

@ -0,0 +1,10 @@
# Supported: GCC, CLANG, ICC
TAG ?= CLANG
ENABLE_OPENMP ?= false
#Feature options
OPTIONS += -DARRAY_ALIGNMENT=64
# OPTIONS += -DVERBOSE
#OPTIONS += -DVERBOSE_AFFINITY
#OPTIONS += -DVERBOSE_DATASIZE
#OPTIONS += -DVERBOSE_TIMER

View File

@ -0,0 +1,46 @@
#==============================================================================
# Driven Cavity
#==============================================================================
# Problem specific Data:
# ---------------------
name dcavity # name of flow setup
bcN 1 # flags for boundary conditions
bcE 1 # 1 = no-slip 3 = outflow
bcS 1 # 2 = free-slip 4 = periodic
bcW 1 #
gx 0.0 # Body forces (e.g. gravity)
gy 0.0 #
re 1000.0 # Reynolds number
u_init 0.0 # initial value for velocity in x-direction
v_init 0.0 # initial value for velocity in y-direction
p_init 0.0 # initial value for pressure
# Geometry Data:
# -------------
xlength 1.0 # domain size in x-direction
ylength 1.0 # domain size in y-direction
imax 100 # number of interior cells in x-direction
jmax 100 # number of interior cells in y-direction
# Time Data:
# ---------
te 10.0 # final time
dt 0.02 # time stepsize
tau 0.5 # safety factor for time stepsize control (<0 constant delt)
# Pressure Iteration Data:
# -----------------------
itermax 1000 # maximal number of pressure iteration in one time step
eps 0.001 # stopping tolerance for pressure iteration
omg 1.7 # relaxation parameter for SOR iteration
gamma 0.9 # upwind differencing factor gamma
#===============================================================================

View File

@ -0,0 +1,16 @@
CC = mpicc
GCC = cc
LINKER = $(CC)
ifeq ($(ENABLE_OPENMP),true)
OPENMP = -fopenmp
#OPENMP = -Xpreprocessor -fopenmp #required on Macos with homebrew libomp
LIBS = # -lomp
endif
VERSION = --version
CFLAGS = -Ofast -std=c99 $(OPENMP)
#CFLAGS = -Ofast -fnt-store=aggressive -std=c99 $(OPENMP) #AMD CLANG
LFLAGS = $(OPENMP)
DEFINES = -D_GNU_SOURCE# -DDEBUG
INCLUDES = -I/usr/local/include

View File

@ -0,0 +1,14 @@
CC = gcc
GCC = gcc
LINKER = $(CC)
ifeq ($(ENABLE_OPENMP),true)
OPENMP = -fopenmp
endif
VERSION = --version
CFLAGS = -Ofast -ffreestanding -std=c99 $(OPENMP)
LFLAGS = $(OPENMP)
DEFINES = -D_GNU_SOURCE
INCLUDES =
LIBS =

View File

@ -0,0 +1,14 @@
CC = mpiicc
GCC = gcc
LINKER = $(CC)
ifeq ($(ENABLE_OPENMP),true)
OPENMP = -qopenmp
endif
VERSION = --version
CFLAGS = -O3 -xHost -qopt-zmm-usage=high -std=c99 $(OPENMP)
LFLAGS = $(OPENMP)
DEFINES = -D_GNU_SOURCE
INCLUDES =
LIBS =

View File

@ -0,0 +1,61 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#ifdef __linux__
#ifdef _OPENMP
#include <pthread.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
#define MAX_NUM_THREADS 128
#define gettid() syscall(SYS_gettid)
static int getProcessorID(cpu_set_t* cpu_set)
{
int processorId;
for (processorId = 0; processorId < MAX_NUM_THREADS; processorId++) {
if (CPU_ISSET(processorId, cpu_set)) {
break;
}
}
return processorId;
}
int affinity_getProcessorId()
{
cpu_set_t cpu_set;
CPU_ZERO(&cpu_set);
sched_getaffinity(gettid(), sizeof(cpu_set_t), &cpu_set);
return getProcessorID(&cpu_set);
}
void affinity_pinThread(int processorId)
{
cpu_set_t cpuset;
pthread_t thread;
thread = pthread_self();
CPU_ZERO(&cpuset);
CPU_SET(processorId, &cpuset);
pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
}
void affinity_pinProcess(int processorId)
{
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
CPU_SET(processorId, &cpuset);
sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
}
#endif /*_OPENMP*/
#endif /*__linux__*/

View File

@ -0,0 +1,14 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#ifndef AFFINITY_H
#define AFFINITY_H
extern int affinity_getProcessorId();
extern void affinity_pinProcess(int);
extern void affinity_pinThread(int);
#endif /*AFFINITY_H*/

View File

@ -0,0 +1,35 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
void* allocate(int alignment, size_t bytesize)
{
int errorCode;
void* ptr;
errorCode = posix_memalign(&ptr, alignment, bytesize);
if (errorCode) {
if (errorCode == EINVAL) {
fprintf(stderr, "Error: Alignment parameter is not a power of two\n");
exit(EXIT_FAILURE);
}
if (errorCode == ENOMEM) {
fprintf(stderr, "Error: Insufficient memory to fulfill the request\n");
exit(EXIT_FAILURE);
}
}
if (ptr == NULL) {
fprintf(stderr, "Error: posix_memalign failed!\n");
exit(EXIT_FAILURE);
}
return ptr;
}

View File

@ -0,0 +1,13 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#ifndef __ALLOCATE_H_
#define __ALLOCATE_H_
#include <stdlib.h>
extern void* allocate(int alignment, size_t bytesize);
#endif

View File

@ -0,0 +1,54 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* =======================================================================================
*/
#ifndef LIKWID_MARKERS_H
#define LIKWID_MARKERS_H
#ifdef LIKWID_PERFMON
#include <likwid.h>
#define LIKWID_MARKER_INIT likwid_markerInit()
#define LIKWID_MARKER_THREADINIT likwid_markerThreadInit()
#define LIKWID_MARKER_SWITCH likwid_markerNextGroup()
#define LIKWID_MARKER_REGISTER(regionTag) likwid_markerRegisterRegion(regionTag)
#define LIKWID_MARKER_START(regionTag) likwid_markerStartRegion(regionTag)
#define LIKWID_MARKER_STOP(regionTag) likwid_markerStopRegion(regionTag)
#define LIKWID_MARKER_CLOSE likwid_markerClose()
#define LIKWID_MARKER_RESET(regionTag) likwid_markerResetRegion(regionTag)
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count) \
likwid_markerGetRegion(regionTag, nevents, events, time, count)
#else /* LIKWID_PERFMON */
#define LIKWID_MARKER_INIT
#define LIKWID_MARKER_THREADINIT
#define LIKWID_MARKER_SWITCH
#define LIKWID_MARKER_REGISTER(regionTag)
#define LIKWID_MARKER_START(regionTag)
#define LIKWID_MARKER_STOP(regionTag)
#define LIKWID_MARKER_CLOSE
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
#define LIKWID_MARKER_RESET(regionTag)
#endif /* LIKWID_PERFMON */
#endif /*LIKWID_MARKERS_H*/

View File

@ -0,0 +1,80 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#include <float.h>
#include <limits.h>
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "parameter.h"
#include "progress.h"
#include "solver.h"
#include "timing.h"
int main(int argc, char** argv)
{
int rank;
double S, E;
Parameter params;
Solver solver;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
initParameter(&params);
if (argc != 2) {
printf("Usage: %s <configFile>\n", argv[0]);
exit(EXIT_SUCCESS);
}
readParameter(&params, argv[1]);
if (rank == 0) {
printParameter(&params);
}
initSolver(&solver, &params);
/* debugExchange(&solver); */
/* debugBC(&solver); */
/* exit(EXIT_SUCCESS); */
initProgress(solver.te);
double tau = solver.tau;
double te = solver.te;
double t = 0.0;
S = getTimeStamp();
while (t <= te) {
if (tau > 0.0) {
computeTimestep(&solver);
}
setBoundaryConditions(&solver);
setSpecialBoundaryCondition(&solver);
computeFG(&solver);
computeRHS(&solver);
solve(&solver);
adaptUV(&solver);
t += solver.dt;
#ifdef VERBOSE
if (rank == 0) {
printf("TIME %f , TIMESTEP %f\n", t, solver.dt);
}
#else
printProgress(t);
#endif
}
E = getTimeStamp();
stopProgress();
if (rank == 0) {
printf("Solution took %.2fs\n", E - S);
}
collectResult(&solver);
MPI_Finalize();
return EXIT_SUCCESS;
}

View File

@ -0,0 +1,108 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of nusif-solver.
* Use of this source code is governed by a MIT style
* license that can be found in the LICENSE file.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "parameter.h"
#include "util.h"
#define MAXLINE 4096
void initParameter(Parameter* param)
{
param->xlength = 1.0;
param->ylength = 1.0;
param->imax = 100;
param->jmax = 100;
param->itermax = 1000;
param->eps = 0.0001;
param->omg = 1.8;
}
void readParameter(Parameter* param, const char* filename)
{
FILE* fp = fopen(filename, "r");
char line[MAXLINE];
int i;
if (!fp) {
fprintf(stderr, "Could not open parameter file: %s\n", filename);
exit(EXIT_FAILURE);
}
while (!feof(fp)) {
line[0] = '\0';
fgets(line, MAXLINE, fp);
for (i = 0; line[i] != '\0' && line[i] != '#'; i++)
;
line[i] = '\0';
char* tok = strtok(line, " ");
char* val = strtok(NULL, " ");
#define PARSE_PARAM(p, f) \
if (strncmp(tok, #p, sizeof(#p) / sizeof(#p[0]) - 1) == 0) { \
param->p = f(val); \
}
#define PARSE_STRING(p) PARSE_PARAM(p, strdup)
#define PARSE_INT(p) PARSE_PARAM(p, atoi)
#define PARSE_REAL(p) PARSE_PARAM(p, atof)
if (tok != NULL && val != NULL) {
PARSE_REAL(xlength);
PARSE_REAL(ylength);
PARSE_INT(imax);
PARSE_INT(jmax);
PARSE_INT(itermax);
PARSE_REAL(eps);
PARSE_REAL(omg);
PARSE_REAL(re);
PARSE_REAL(tau);
PARSE_REAL(gamma);
PARSE_REAL(dt);
PARSE_REAL(te);
PARSE_REAL(gx);
PARSE_REAL(gy);
PARSE_STRING(name);
PARSE_INT(bcN);
PARSE_INT(bcS);
PARSE_INT(bcE);
PARSE_INT(bcW);
PARSE_REAL(u_init);
PARSE_REAL(v_init);
PARSE_REAL(p_init);
}
}
fclose(fp);
}
void printParameter(Parameter* param)
{
printf("Parameters for %s\n", param->name);
printf("Boundary conditions N:%d E:%d S:%d W:%d\n",
param->bcN,
param->bcE,
param->bcS,
param->bcW);
printf("\tReynolds number: %.2f\n", param->re);
printf("\tInit arrays: U:%.2f V:%.2f P:%.2f\n",
param->u_init,
param->v_init,
param->p_init);
printf("Geometry data:\n");
printf("\tDomain box size (x, y): %.2f, %.2f\n", param->xlength, param->ylength);
printf("\tCells (x, y): %d, %d\n", param->imax, param->jmax);
printf("Timestep parameters:\n");
printf("\tDefault stepsize: %.2f, Final time %.2f\n", param->dt, param->te);
printf("\tTau factor: %.2f\n", param->tau);
printf("Iterative solver parameters:\n");
printf("\tMax iterations: %d\n", param->itermax);
printf("\tepsilon (stopping tolerance) : %f\n", param->eps);
printf("\tgamma (stopping tolerance) : %f\n", param->gamma);
printf("\tomega (SOR relaxation): %f\n", param->omg);
}

View File

@ -0,0 +1,26 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of nusif-solver.
* Use of this source code is governed by a MIT style
* license that can be found in the LICENSE file.
*/
#ifndef __PARAMETER_H_
#define __PARAMETER_H_
typedef struct {
double xlength, ylength;
int imax, jmax;
int itermax;
double eps, omg;
double re, tau, gamma;
double te, dt;
double gx, gy;
char* name;
int bcN, bcS, bcE, bcW;
double u_init, v_init, p_init;
} Parameter;
void initParameter(Parameter*);
void readParameter(Parameter*, const char*);
void printParameter(Parameter*);
#endif

View File

@ -0,0 +1,60 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of nusif-solver.
* Use of this source code is governed by a MIT style
* license that can be found in the LICENSE file.
*/
#include <math.h>
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "progress.h"
static double _end;
static int _current;
static int _rank = -1;
void initProgress(double end)
{
MPI_Comm_rank(MPI_COMM_WORLD, &_rank);
_end = end;
_current = 0;
if (_rank == 0) {
printf("[ ]");
fflush(stdout);
}
}
void printProgress(double current)
{
if (_rank == 0) {
int new = (int)rint((current / _end) * 10.0);
if (new > _current) {
char progress[11];
_current = new;
progress[0] = 0;
for (int i = 0; i < 10; i++) {
if (i < _current) {
sprintf(progress + strlen(progress), "#");
} else {
sprintf(progress + strlen(progress), " ");
}
}
printf("\r[%s]", progress);
}
fflush(stdout);
}
}
void stopProgress()
{
if (_rank == 0) {
printf("\n");
fflush(stdout);
}
}

View File

@ -0,0 +1,14 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#ifndef __PROGRESS_H_
#define __PROGRESS_H_
extern void initProgress(double);
extern void printProgress(double);
extern void stopProgress();
#endif

View File

@ -0,0 +1,900 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of nusif-solver.
* Use of this source code is governed by a MIT style
* license that can be found in the LICENSE file.
*/
#include <float.h>
#include <math.h>
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "allocate.h"
#include "parameter.h"
#include "solver.h"
#include "util.h"
#define P(i, j) p[(j) * (imaxLocal + 2) + (i)]
#define F(i, j) f[(j) * (imaxLocal + 2) + (i)]
#define G(i, j) g[(j) * (imaxLocal + 2) + (i)]
#define U(i, j) u[(j) * (imaxLocal + 2) + (i)]
#define V(i, j) v[(j) * (imaxLocal + 2) + (i)]
#define RHS(i, j) rhs[(j) * (imaxLocal + 2) + (i)]
#define NDIMS 2
#define IDIM 0
#define JDIM 1
static int sizeOfRank(int rank, int size, int N)
{
return N / size + ((N % size > rank) ? 1 : 0);
}
void print(Solver* solver, double* grid)
{
int imaxLocal = solver->imaxLocal;
for (int i = 0; i < solver->size; i++) {
if (i == solver->rank) {
printf(
"### RANK %d #######################################################\n",
solver->rank);
for (int j = 0; j < solver->jmaxLocal + 2; j++) {
printf("%02d: ", j);
for (int i = 0; i < solver->imaxLocal + 2; i++) {
printf("%12.8f ", grid[j * (imaxLocal + 2) + i]);
}
printf("\n");
}
fflush(stdout);
}
MPI_Barrier(MPI_COMM_WORLD);
}
}
static void exchange(Solver* solver, double* grid)
{
double* buf[8];
MPI_Request requests[8];
for (int i = 0; i < 8; i++)
requests[i] = MPI_REQUEST_NULL;
buf[0] = grid + 1; // recv bottom
buf[1] = grid + (solver->imaxLocal + 2) + 1; // send bottom
buf[2] = grid + (solver->jmaxLocal + 1) * (solver->imaxLocal + 2) + 1; // recv top
buf[3] = grid + (solver->jmaxLocal) * (solver->imaxLocal + 2) + 1; // send top
buf[4] = grid + (solver->imaxLocal + 2); // recv left
buf[5] = grid + (solver->imaxLocal + 2) + 1; // send left
buf[6] = grid + (solver->imaxLocal + 2) + (solver->imaxLocal + 1); // recv right
buf[7] = grid + (solver->imaxLocal + 2) + (solver->imaxLocal); // send right
for (int i = 0; i < 2; i++) {
int tag = 0;
if (solver->jNeighbours[i] != MPI_PROC_NULL) {
tag = solver->jNeighbours[i];
}
/* exchange ghost cells with bottom/top neighbor */
MPI_Irecv(buf[i * 2],
1,
solver->jBufferType,
solver->jNeighbours[i],
tag,
solver->comm,
&requests[i * 2]);
MPI_Isend(buf[(i * 2) + 1],
1,
solver->jBufferType,
solver->jNeighbours[i],
solver->rank,
solver->comm,
&requests[i * 2 + 1]);
tag = 0;
if (solver->iNeighbours[i] != MPI_PROC_NULL) {
tag = solver->iNeighbours[i];
}
/* exchange ghost cells with left/right neighbor */
MPI_Irecv(buf[i * 2 + 4],
1,
solver->iBufferType,
solver->iNeighbours[i],
tag,
solver->comm,
&requests[i * 2 + 4]);
MPI_Isend(buf[i * 2 + 5],
1,
solver->iBufferType,
solver->iNeighbours[i],
solver->rank,
solver->comm,
&requests[(i * 2) + 5]);
}
MPI_Waitall(8, requests, MPI_STATUSES_IGNORE);
}
static void shift(Solver* solver)
{
MPI_Request requests[4] = { MPI_REQUEST_NULL,
MPI_REQUEST_NULL,
MPI_REQUEST_NULL,
MPI_REQUEST_NULL };
double* f = solver->f;
double* g = solver->g;
/* shift G */
double* buf = g + 1;
/* receive ghost cells from bottom neighbor */
MPI_Irecv(buf,
1,
solver->jBufferType,
solver->jNeighbours[0],
0,
solver->comm,
&requests[0]);
buf = g + (solver->jmaxLocal) * (solver->imaxLocal + 2) + 1;
/* send ghost cells to top neighbor */
MPI_Isend(buf,
1,
solver->jBufferType,
solver->jNeighbours[1],
0,
solver->comm,
&requests[1]);
/* shift F */
buf = f + (solver->imaxLocal + 2);
/* receive ghost cells from left neighbor */
MPI_Irecv(buf,
1,
solver->iBufferType,
solver->iNeighbours[0],
1,
solver->comm,
&requests[2]);
buf = f + (solver->imaxLocal + 2) + (solver->imaxLocal);
/* send ghost cells to right neighbor */
MPI_Isend(buf,
1,
solver->iBufferType,
solver->iNeighbours[1],
1,
solver->comm,
&requests[3]);
MPI_Waitall(4, requests, MPI_STATUSES_IGNORE);
}
void debugExchange(Solver* solver)
{
int imaxLocal = solver->imaxLocal;
int jmaxLocal = solver->jmaxLocal;
for (int j = 0; j < jmaxLocal + 2; j++) {
for (int i = 0; i < solver->imaxLocal + 2; i++) {
solver->p[j * (imaxLocal + 2) + i] = solver->rank + 0.01 * i + 0.0001 * j;
}
}
collectResult(solver);
/* print(solver, solver->p); */
}
void debugBC(Solver* solver)
{
int imaxLocal = solver->imaxLocal;
int jmaxLocal = solver->jmaxLocal;
double* v = solver->v;
// Northern boundary
if (solver->coords[JDIM] == (solver->dims[JDIM] - 1)) { // set top bc
for (int i = 1; i < imaxLocal + 1; i++) {
V(i, jmaxLocal + 1) = 10.0 + solver->rank;
}
}
// Eastern boundary
if (solver->coords[IDIM] == (solver->dims[IDIM] - 1)) { // set right bc
for (int j = 1; j < jmaxLocal + 1; j++) {
V(imaxLocal + 1, j) = 20.0 + solver->rank;
}
}
// Southern boundary
if (solver->coords[JDIM] == 0) { // set bottom bc
for (int i = 1; i < imaxLocal + 1; i++) {
V(i, 0) = 30.0 + solver->rank;
}
}
// Western boundary
if (solver->coords[IDIM] == 0) { // set left bc
for (int j = 1; j < jmaxLocal + 1; j++) {
V(0, j) = 40.0 + solver->rank;
}
}
print(solver, solver->v);
}
static void assembleResult(Solver* solver,
double* src,
double* dst,
int imaxLocal[],
int jmaxLocal[],
int offset[])
{
MPI_Request* requests;
int numRequests = 1;
if (solver->rank == 0) {
numRequests = solver->size + 1;
} else {
numRequests = 1;
}
requests = (MPI_Request*)malloc(numRequests * sizeof(MPI_Request));
/* all ranks send their bulk array */
MPI_Datatype bulkType;
const int ndims = 2;
int oldSizes[ndims] = { solver->jmaxLocal + 2, solver->imaxLocal + 2 };
int newSizes[ndims] = { solver->jmaxLocal, solver->imaxLocal };
int starts[ndims] = { 1, 1 };
MPI_Type_create_subarray(2,
oldSizes,
newSizes,
starts,
MPI_ORDER_C,
MPI_DOUBLE,
&bulkType);
MPI_Type_commit(&bulkType);
MPI_Isend(src, 1, bulkType, 0, 0, solver->comm, &requests[0]);
/* rank 0 assembles the subdomains */
if (solver->rank == 0) {
for (int i = 0; i < solver->size; i++) {
MPI_Datatype domainType;
MPI_Type_vector(jmaxLocal[i],
imaxLocal[i],
solver->imax,
MPI_DOUBLE,
&domainType);
MPI_Type_commit(&domainType);
MPI_Irecv(dst + offset[i],
1,
domainType,
i,
0,
solver->comm,
&requests[i + 1]);
}
}
MPI_Waitall(numRequests, requests, MPI_STATUSES_IGNORE);
}
static int sum(int* sizes, int position)
{
int sum = 0;
for (int i = 0; i < position; i++) {
sum += sizes[i];
}
return sum;
}
void collectResult(Solver* solver)
{
double* Pall = NULL;
double* Uall = NULL;
double* Vall = NULL;
int offset[solver->size];
int imaxLocal[solver->size];
int jmaxLocal[solver->size];
MPI_Gather(&solver->imaxLocal, 1, MPI_INT, imaxLocal, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Gather(&solver->jmaxLocal, 1, MPI_INT, jmaxLocal, 1, MPI_INT, 0, MPI_COMM_WORLD);
if (solver->rank == 0) {
Pall = allocate(64, (solver->imax) * (solver->jmax) * sizeof(double));
Uall = allocate(64, (solver->imax) * (solver->jmax) * sizeof(double));
Vall = allocate(64, (solver->imax) * (solver->jmax) * sizeof(double));
for (int i = 0; i < solver->size; i++) {
int coords[2];
MPI_Cart_coords(solver->comm, i, 2, coords);
int ioffset = sum(imaxLocal, coords[0]);
int joffset = sum(jmaxLocal, coords[1]);
offset[i] = (joffset * solver->imax) + ioffset;
printf("Rank: %d, Coords(i,j): %d %d, Size(i,j): %d %d, Offset(i,j): %d %d\n",
i,
coords[0],
coords[1],
imaxLocal[i],
jmaxLocal[i],
ioffset,
joffset);
}
}
/* collect P */
assembleResult(solver, solver->p, Pall, imaxLocal, jmaxLocal, offset);
/* collect U */
assembleResult(solver, solver->u, Uall, imaxLocal, jmaxLocal, offset);
/* collect V */
assembleResult(solver, solver->v, Vall, imaxLocal, jmaxLocal, offset);
/* write to disk */
if (solver->rank == 0) writeResult(solver, Pall, Uall, Vall);
}
static void printConfig(Solver* solver)
{
if (solver->rank == 0) {
printf("Parameters for #%s#\n", solver->problem);
printf("Boundary conditions N:%d E:%d S:%d W:%d\n",
solver->bcN,
solver->bcE,
solver->bcS,
solver->bcW);
printf("\tReynolds number: %.2f\n", solver->re);
printf("\tGx Gy: %.2f %.2f\n", solver->gx, solver->gy);
printf("Geometry data:\n");
printf("\tDomain box size (x, y): %.2f, %.2f\n",
solver->xlength,
solver->ylength);
printf("\tCells (x, y): %d, %d\n", solver->imax, solver->jmax);
printf("Timestep parameters:\n");
printf("\tDefault stepsize: %.2f, Final time %.2f\n", solver->dt, solver->te);
printf("\tdt bound: %.6f\n", solver->dtBound);
printf("\tTau factor: %.2f\n", solver->tau);
printf("Iterative solver parameters:\n");
printf("\tMax iterations: %d\n", solver->itermax);
printf("\tepsilon (stopping tolerance) : %f\n", solver->eps);
printf("\tgamma factor: %f\n", solver->gamma);
printf("\tomega (SOR relaxation): %f\n", solver->omega);
printf("Communication parameters:\n");
}
for (int i = 0; i < solver->size; i++) {
if (i == solver->rank) {
printf("\tRank %d of %d\n", solver->rank, solver->size);
printf("\tNeighbours (b, t, l, r): %d, %d, %d, %d\n",
solver->jNeighbours[0],
solver->jNeighbours[1],
solver->iNeighbours[0],
solver->iNeighbours[1]);
printf("\tCoordinates %d,%d\n", solver->coords[0], solver->coords[1]);
printf("\tLocal domain size: %dx%d\n", solver->imaxLocal, solver->jmaxLocal);
fflush(stdout);
}
}
}
void initSolver(Solver* solver, Parameter* params)
{
solver->problem = params->name;
solver->bcN = params->bcN;
solver->bcS = params->bcS;
solver->bcW = params->bcW;
solver->bcE = params->bcE;
solver->imax = params->imax;
solver->jmax = params->jmax;
solver->xlength = params->xlength;
solver->ylength = params->ylength;
solver->dx = params->xlength / params->imax;
solver->dy = params->ylength / params->jmax;
solver->eps = params->eps;
solver->omega = params->omg;
solver->itermax = params->itermax;
solver->re = params->re;
solver->gx = params->gx;
solver->gy = params->gy;
solver->dt = params->dt;
solver->te = params->te;
solver->tau = params->tau;
solver->gamma = params->gamma;
/* setup communication */
MPI_Comm_rank(MPI_COMM_WORLD, &(solver->rank));
MPI_Comm_size(MPI_COMM_WORLD, &(solver->size));
int dims[NDIMS] = { 0, 0 };
int periods[NDIMS] = { 0, 0 };
MPI_Dims_create(solver->size, NDIMS, dims);
MPI_Cart_create(MPI_COMM_WORLD, NDIMS, dims, periods, 0, &solver->comm);
MPI_Cart_shift(solver->comm,
IDIM,
1,
&solver->iNeighbours[0],
&solver->iNeighbours[1]);
MPI_Cart_shift(solver->comm,
JDIM,
1,
&solver->jNeighbours[0],
&solver->jNeighbours[1]);
MPI_Cart_get(solver->comm, NDIMS, solver->dims, periods, solver->coords);
solver->imaxLocal = sizeOfRank(solver->rank, dims[IDIM], solver->imax);
solver->jmaxLocal = sizeOfRank(solver->rank, dims[JDIM], solver->jmax);
MPI_Type_contiguous(solver->imaxLocal, MPI_DOUBLE, &solver->jBufferType);
MPI_Type_commit(&solver->jBufferType);
MPI_Type_vector(solver->jmaxLocal,
1,
solver->imaxLocal + 2,
MPI_DOUBLE,
&solver->iBufferType);
MPI_Type_commit(&solver->iBufferType);
/* allocate arrays */
int imaxLocal = solver->imaxLocal;
int jmaxLocal = solver->jmaxLocal;
size_t bytesize = (imaxLocal + 2) * (jmaxLocal + 2) * sizeof(double);
solver->u = allocate(64, bytesize);
solver->v = allocate(64, bytesize);
solver->p = allocate(64, bytesize);
solver->rhs = allocate(64, bytesize);
solver->f = allocate(64, bytesize);
solver->g = allocate(64, bytesize);
for (int i = 0; i < (imaxLocal + 2) * (jmaxLocal + 2); i++) {
solver->u[i] = params->u_init;
solver->v[i] = params->v_init;
solver->p[i] = params->p_init;
solver->rhs[i] = 0.0;
solver->f[i] = 0.0;
solver->g[i] = 0.0;
}
double dx = solver->dx;
double dy = solver->dy;
double inv_sqr_sum = 1.0 / (dx * dx) + 1.0 / (dy * dy);
solver->dtBound = 0.5 * solver->re * 1.0 / inv_sqr_sum;
#ifdef VERBOSE
printConfig(solver);
#endif
}
void computeRHS(Solver* solver)
{
int imaxLocal = solver->imaxLocal;
int jmaxLocal = solver->jmaxLocal;
double idx = 1.0 / solver->dx;
double idy = 1.0 / solver->dy;
double idt = 1.0 / solver->dt;
double* rhs = solver->rhs;
double* f = solver->f;
double* g = solver->g;
shift(solver);
for (int j = 1; j < jmaxLocal + 1; j++) {
for (int i = 1; i < imaxLocal + 1; i++) {
RHS(i, j) = ((F(i, j) - F(i - 1, j)) * idx + (G(i, j) - G(i, j - 1)) * idy) *
idt;
}
}
}
int solve(Solver* solver)
{
int imax = solver->imax;
int jmax = solver->jmax;
int imaxLocal = solver->imaxLocal;
int jmaxLocal = solver->jmaxLocal;
double eps = solver->eps;
int itermax = solver->itermax;
double dx2 = solver->dx * solver->dx;
double dy2 = solver->dy * solver->dy;
double idx2 = 1.0 / dx2;
double idy2 = 1.0 / dy2;
double factor = solver->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
double* p = solver->p;
double* rhs = solver->rhs;
double epssq = eps * eps;
int it = 0;
double res = 1.0;
while ((res >= epssq) && (it < itermax)) {
res = 0.0;
exchange(solver, p);
for (int j = 1; j < jmaxLocal + 1; j++) {
for (int i = 1; i < imaxLocal + 1; i++) {
double r = RHS(i, j) -
((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
(P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
P(i, j) -= (factor * r);
res += (r * r);
}
}
if (solver->coords[JDIM] == 0) { // set bottom bc
for (int i = 1; i < imaxLocal + 1; i++) {
P(i, 0) = P(i, 1);
}
}
if (solver->coords[JDIM] == (solver->dims[JDIM] - 1)) { // set top bc
for (int i = 1; i < imaxLocal + 1; i++) {
P(i, jmaxLocal + 1) = P(i, jmaxLocal);
}
}
if (solver->coords[IDIM] == 0) { // set left bc
for (int j = 1; j < jmaxLocal + 1; j++) {
P(0, j) = P(1, j);
}
}
if (solver->coords[IDIM] == (solver->dims[IDIM] - 1)) { // set right bc
for (int j = 1; j < jmaxLocal + 1; j++) {
P(imaxLocal + 1, j) = P(imaxLocal, j);
}
}
MPI_Allreduce(MPI_IN_PLACE, &res, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
res = res / (double)(imax * jmax);
#ifdef DEBUG
if (solver->rank == 0) {
printf("%d Residuum: %e\n", it, res);
}
#endif
it++;
}
#ifdef VERBOSE
if (solver->rank == 0) {
printf("Solver took %d iterations to reach %f\n", it, sqrt(res));
}
#endif
if (res < eps) {
return 0;
} else {
return 1;
}
}
static double maxElement(Solver* solver, double* m)
{
int size = (solver->imaxLocal + 2) * (solver->jmaxLocal + 2);
double maxval = DBL_MIN;
for (int i = 0; i < size; i++) {
maxval = MAX(maxval, fabs(m[i]));
}
MPI_Allreduce(MPI_IN_PLACE, &maxval, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
return maxval;
}
void computeTimestep(Solver* solver)
{
double dt = solver->dtBound;
double dx = solver->dx;
double dy = solver->dy;
double umax = maxElement(solver, solver->u);
double vmax = maxElement(solver, solver->v);
if (umax > 0) {
dt = (dt > dx / umax) ? dx / umax : dt;
}
if (vmax > 0) {
dt = (dt > dy / vmax) ? dy / vmax : dt;
}
solver->dt = dt * solver->tau;
}
void setBoundaryConditions(Solver* solver)
{
int imaxLocal = solver->imaxLocal;
int jmaxLocal = solver->jmaxLocal;
double* u = solver->u;
double* v = solver->v;
// Northern boundary
if (solver->coords[JDIM] == (solver->dims[JDIM] - 1)) { // set top bc
switch (solver->bcN) {
case NOSLIP:
for (int i = 1; i < imaxLocal + 1; i++) {
V(i, jmaxLocal) = 0.0;
U(i, jmaxLocal + 1) = -U(i, jmaxLocal);
}
break;
case SLIP:
for (int i = 1; i < imaxLocal + 1; i++) {
V(i, jmaxLocal) = 0.0;
U(i, jmaxLocal + 1) = U(i, jmaxLocal);
}
break;
case OUTFLOW:
for (int i = 1; i < imaxLocal + 1; i++) {
U(i, jmaxLocal + 1) = U(i, jmaxLocal);
V(i, jmaxLocal) = V(i, jmaxLocal - 1);
}
break;
case PERIODIC:
break;
}
}
// Southern boundary
if (solver->coords[JDIM] == 0) { // set bottom bc
switch (solver->bcS) {
case NOSLIP:
for (int i = 1; i < imaxLocal + 1; i++) {
V(i, 0) = 0.0;
U(i, 0) = -U(i, 1);
}
break;
case SLIP:
for (int i = 1; i < imaxLocal + 1; i++) {
V(i, 0) = 0.0;
U(i, 0) = U(i, 1);
}
break;
case OUTFLOW:
for (int i = 1; i < imaxLocal + 1; i++) {
U(i, 0) = U(i, 1);
V(i, 0) = V(i, 1);
}
break;
case PERIODIC:
break;
}
}
// Eastern boundary
if (solver->coords[IDIM] == (solver->dims[IDIM] - 1)) { // set right bc
switch (solver->bcE) {
case NOSLIP:
for (int j = 1; j < jmaxLocal + 1; j++) {
U(imaxLocal, j) = 0.0;
V(imaxLocal + 1, j) = -V(imaxLocal, j);
}
break;
case SLIP:
for (int j = 1; j < jmaxLocal + 1; j++) {
U(imaxLocal, j) = 0.0;
V(imaxLocal + 1, j) = V(imaxLocal, j);
}
break;
case OUTFLOW:
for (int j = 1; j < jmaxLocal + 1; j++) {
U(imaxLocal, j) = U(imaxLocal - 1, j);
V(imaxLocal + 1, j) = V(imaxLocal, j);
}
break;
case PERIODIC:
break;
}
}
// Western boundary
if (solver->coords[IDIM] == 0) { // set left bc
switch (solver->bcW) {
case NOSLIP:
for (int j = 1; j < jmaxLocal + 1; j++) {
U(0, j) = 0.0;
V(0, j) = -V(1, j);
}
break;
case SLIP:
for (int j = 1; j < jmaxLocal + 1; j++) {
U(0, j) = 0.0;
V(0, j) = V(1, j);
}
break;
case OUTFLOW:
for (int j = 1; j < jmaxLocal + 1; j++) {
U(0, j) = U(1, j);
V(0, j) = V(1, j);
}
break;
case PERIODIC:
break;
}
}
}
void setSpecialBoundaryCondition(Solver* solver)
{
int imaxLocal = solver->imaxLocal;
int jmaxLocal = solver->jmaxLocal;
double* u = solver->u;
if (strcmp(solver->problem, "dcavity") == 0) {
if (solver->coords[JDIM] == (solver->dims[JDIM] - 1)) { // set top bc
for (int i = 1; i < imaxLocal + 1; i++) {
U(i, jmaxLocal + 1) = 2.0 - U(i, jmaxLocal);
}
}
} else if (strcmp(solver->problem, "canal") == 0) {
if (solver->coords[IDIM] == 0) { // set left bc
double ylength = solver->ylength;
double dy = solver->dy;
int rest = solver->jmax % solver->size;
int yc = solver->rank * (solver->jmax / solver->size) +
MIN(rest, solver->rank);
double ys = dy * (yc + 0.5);
double y;
/* printf("RANK %d yc: %d ys: %f\n", solver->rank, yc, ys); */
for (int j = 1; j < jmaxLocal + 1; j++) {
y = ys + dy * (j - 0.5);
U(0, j) = y * (ylength - y) * 4.0 / (ylength * ylength);
}
}
}
/* print(solver, solver->u); */
}
void computeFG(Solver* solver)
{
double* u = solver->u;
double* v = solver->v;
double* f = solver->f;
double* g = solver->g;
int imaxLocal = solver->imaxLocal;
int jmaxLocal = solver->jmaxLocal;
double gx = solver->gx;
double gy = solver->gy;
double gamma = solver->gamma;
double dt = solver->dt;
double inverseRe = 1.0 / solver->re;
double inverseDx = 1.0 / solver->dx;
double inverseDy = 1.0 / solver->dy;
double du2dx, dv2dy, duvdx, duvdy;
double du2dx2, du2dy2, dv2dx2, dv2dy2;
exchange(solver, u);
exchange(solver, v);
for (int j = 1; j < jmaxLocal + 1; j++) {
for (int i = 1; i < imaxLocal + 1; i++) {
du2dx = inverseDx * 0.25 *
((U(i, j) + U(i + 1, j)) * (U(i, j) + U(i + 1, j)) -
(U(i, j) + U(i - 1, j)) * (U(i, j) + U(i - 1, j))) +
gamma * inverseDx * 0.25 *
(fabs(U(i, j) + U(i + 1, j)) * (U(i, j) - U(i + 1, j)) +
fabs(U(i, j) + U(i - 1, j)) * (U(i, j) - U(i - 1, j)));
duvdy = inverseDy * 0.25 *
((V(i, j) + V(i + 1, j)) * (U(i, j) + U(i, j + 1)) -
(V(i, j - 1) + V(i + 1, j - 1)) * (U(i, j) + U(i, j - 1))) +
gamma * inverseDy * 0.25 *
(fabs(V(i, j) + V(i + 1, j)) * (U(i, j) - U(i, j + 1)) +
fabs(V(i, j - 1) + V(i + 1, j - 1)) *
(U(i, j) - U(i, j - 1)));
du2dx2 = inverseDx * inverseDx * (U(i + 1, j) - 2.0 * U(i, j) + U(i - 1, j));
du2dy2 = inverseDy * inverseDy * (U(i, j + 1) - 2.0 * U(i, j) + U(i, j - 1));
F(i, j) = U(i, j) + dt * (inverseRe * (du2dx2 + du2dy2) - du2dx - duvdy + gx);
duvdx = inverseDx * 0.25 *
((U(i, j) + U(i, j + 1)) * (V(i, j) + V(i + 1, j)) -
(U(i - 1, j) + U(i - 1, j + 1)) * (V(i, j) + V(i - 1, j))) +
gamma * inverseDx * 0.25 *
(fabs(U(i, j) + U(i, j + 1)) * (V(i, j) - V(i + 1, j)) +
fabs(U(i - 1, j) + U(i - 1, j + 1)) *
(V(i, j) - V(i - 1, j)));
dv2dy = inverseDy * 0.25 *
((V(i, j) + V(i, j + 1)) * (V(i, j) + V(i, j + 1)) -
(V(i, j) + V(i, j - 1)) * (V(i, j) + V(i, j - 1))) +
gamma * inverseDy * 0.25 *
(fabs(V(i, j) + V(i, j + 1)) * (V(i, j) - V(i, j + 1)) +
fabs(V(i, j) + V(i, j - 1)) * (V(i, j) - V(i, j - 1)));
dv2dx2 = inverseDx * inverseDx * (V(i + 1, j) - 2.0 * V(i, j) + V(i - 1, j));
dv2dy2 = inverseDy * inverseDy * (V(i, j + 1) - 2.0 * V(i, j) + V(i, j - 1));
G(i, j) = V(i, j) + dt * (inverseRe * (dv2dx2 + dv2dy2) - duvdx - dv2dy + gy);
}
}
/* ----------------------------- boundary of F --------------------------- */
if (solver->coords[IDIM] == 0) { // set left bc
for (int j = 1; j < jmaxLocal + 1; j++) {
F(0, j) = U(0, j);
}
}
if (solver->coords[IDIM] == (solver->dims[IDIM] - 1)) { // set right bc
for (int j = 1; j < jmaxLocal + 1; j++) {
F(imaxLocal, j) = U(imaxLocal, j);
}
}
/* ----------------------------- boundary of G --------------------------- */
if (solver->coords[JDIM] == 0) { // set bottom bc
for (int i = 1; i < imaxLocal + 1; i++) {
G(i, 0) = V(i, 0);
}
}
if (solver->coords[JDIM] == (solver->dims[JDIM] - 1)) { // set top bc
for (int i = 1; i < imaxLocal + 1; i++) {
G(i, jmaxLocal) = V(i, jmaxLocal);
}
}
}
void adaptUV(Solver* solver)
{
int imaxLocal = solver->imaxLocal;
int jmaxLocal = solver->jmaxLocal;
double* p = solver->p;
double* u = solver->u;
double* v = solver->v;
double* f = solver->f;
double* g = solver->g;
double factorX = solver->dt / solver->dx;
double factorY = solver->dt / solver->dy;
for (int j = 1; j < jmaxLocal + 1; j++) {
for (int i = 1; i < imaxLocal + 1; i++) {
U(i, j) = F(i, j) - (P(i + 1, j) - P(i, j)) * factorX;
V(i, j) = G(i, j) - (P(i, j + 1) - P(i, j)) * factorY;
}
}
}
void writeResult(Solver* solver, double* p, double* u, double* v)
{
int imax = solver->imax;
int jmax = solver->jmax;
double dx = solver->dx;
double dy = solver->dy;
double x = 0.0, y = 0.0;
FILE* fp;
fp = fopen("pressure.dat", "w");
if (fp == NULL) {
printf("Error!\n");
exit(EXIT_FAILURE);
}
for (int j = 1; j < jmax; j++) {
y = (double)(j - 0.5) * dy;
for (int i = 1; i < imax; i++) {
x = (double)(i - 0.5) * dx;
fprintf(fp, "%.2f %.2f %f\n", x, y, p[j * (imax) + i]);
}
fprintf(fp, "\n");
}
fclose(fp);
fp = fopen("velocity.dat", "w");
if (fp == NULL) {
printf("Error!\n");
exit(EXIT_FAILURE);
}
for (int j = 1; j < jmax; j++) {
y = dy * (j - 0.5);
for (int i = 1; i < imax; i++) {
x = dx * (i - 0.5);
double vel_u = (u[j * (imax) + i] + u[j * (imax) + (i - 1)]) / 2.0;
double vel_v = (v[j * (imax) + i] + v[(j - 1) * (imax) + i]) / 2.0;
double len = sqrt((vel_u * vel_u) + (vel_v * vel_v));
fprintf(fp, "%.2f %.2f %f %f %f\n", x, y, vel_u, vel_v, len);
}
}
fclose(fp);
}

View File

@ -0,0 +1,56 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of nusif-solver.
* Use of this source code is governed by a MIT style
* license that can be found in the LICENSE file.
*/
#ifndef __SOLVER_H_
#define __SOLVER_H_
#include "parameter.h"
#include <mpi.h>
enum BC { NOSLIP = 1, SLIP, OUTFLOW, PERIODIC };
typedef struct {
/* geometry and grid information */
double dx, dy;
int imax, jmax;
double xlength, ylength;
/* arrays */
double *p, *rhs;
double *f, *g;
double *u, *v;
/* parameters */
double eps, omega;
double re, tau, gamma;
double gx, gy;
/* time stepping */
int itermax;
double dt, te;
double dtBound;
char* problem;
int bcN, bcS, bcW, bcE;
/* mpi */
int rank;
int size;
MPI_Comm comm;
MPI_Datatype iBufferType, jBufferType;
int iNeighbours[2], jNeighbours[2];
int coords[2], dims[2];
int imaxLocal, jmaxLocal;
} Solver;
void initSolver(Solver*, Parameter*);
void computeRHS(Solver*);
int solve(Solver*);
void computeTimestep(Solver*);
void setBoundaryConditions(Solver*);
void setSpecialBoundaryCondition(Solver*);
void computeFG(Solver*);
void adaptUV(Solver*);
void collectResult(Solver*);
void writeResult(Solver*, double*, double*, double*);
void debugExchange(Solver*);
void debugBC(Solver*);
void print(Solver*, double*);
#endif

View File

@ -0,0 +1,24 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#include <stdlib.h>
#include <time.h>
double getTimeStamp()
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
}
double getTimeResolution()
{
struct timespec ts;
clock_getres(CLOCK_MONOTONIC, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
}
double getTimeStamp_() { return getTimeStamp(); }

View File

@ -0,0 +1,14 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#ifndef __TIMING_H_
#define __TIMING_H_
extern double getTimeStamp();
extern double getTimeResolution();
extern double getTimeStamp_();
#endif // __TIMING_H_

View File

@ -0,0 +1,22 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#ifndef __UTIL_H_
#define __UTIL_H_
#define HLINE \
"----------------------------------------------------------------------------\n"
#ifndef MIN
#define MIN(x, y) ((x) < (y) ? (x) : (y))
#endif
#ifndef MAX
#define MAX(x, y) ((x) > (y) ? (x) : (y))
#endif
#ifndef ABS
#define ABS(a) ((a) >= 0 ? (a) : -(a))
#endif
#endif // __UTIL_H_

View File

@ -0,0 +1,7 @@
set terminal png size 1024,768 enhanced font ,12
set output 'p.png'
set datafile separator whitespace
set grid
set hidden3d
splot 'pressure.dat' using 1:2:3 with lines

View File

@ -0,0 +1,5 @@
set terminal png size 1800,768 enhanced font ,12
set output 'velocity.png'
set datafile separator whitespace
plot 'velocity.dat' using 1:2:3:4:5 with vectors filled head size 0.01,20,60 lc palette

View File

@ -0,0 +1,71 @@
#=======================================================================================
# Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
# All rights reserved.
# Use of this source code is governed by a MIT-style
# license that can be found in the LICENSE file.
#=======================================================================================
#CONFIGURE BUILD SYSTEM
TARGET = exe-$(TAG)
BUILD_DIR = ./$(TAG)
SRC_DIR = ./src
MAKE_DIR = ./
Q ?= @
#DO NOT EDIT BELOW
include $(MAKE_DIR)/config.mk
include $(MAKE_DIR)/include_$(TAG).mk
INCLUDES += -I$(SRC_DIR) -I$(BUILD_DIR)
VPATH = $(SRC_DIR)
SRC = $(wildcard $(SRC_DIR)/*.c)
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s, $(SRC))
OBJ = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o, $(SRC))
SOURCES = $(SRC) $(wildcard $(SRC_DIR)/*.h)
CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(OPTIONS) $(INCLUDES)
${TARGET}: $(BUILD_DIR) $(OBJ)
$(info ===> LINKING $(TARGET))
$(Q)${LINKER} ${LFLAGS} -o $(TARGET) $(OBJ) $(LIBS)
$(BUILD_DIR)/%.o: %.c $(MAKE_DIR)/include_$(TAG).mk $(MAKE_DIR)/config.mk
$(info ===> COMPILE $@)
$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
$(Q)$(GCC) $(CPPFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d
$(BUILD_DIR)/%.s: %.c
$(info ===> GENERATE ASM $@)
$(CC) -S $(CPPFLAGS) $(CFLAGS) $< -o $@
.PHONY: clean distclean tags info asm format
clean:
$(info ===> CLEAN)
@rm -rf $(BUILD_DIR)
@rm -f tags
distclean: clean
$(info ===> DIST CLEAN)
@rm -f $(TARGET)
info:
$(info $(CFLAGS))
$(Q)$(CC) $(VERSION)
asm: $(BUILD_DIR) $(ASM)
tags:
$(info ===> GENERATE TAGS)
$(Q)ctags -R
format:
@for src in $(SOURCES) ; do \
echo "Formatting $$src" ; \
clang-format -i $$src ; \
done
@echo "Done"
$(BUILD_DIR):
@mkdir $(BUILD_DIR)
-include $(OBJ:.o=.d)

View File

@ -0,0 +1,48 @@
# C source skeleton
## Build
1. Configure the toolchain and additional options in `config.mk`:
```
# Supported: GCC, CLANG, ICC
TAG ?= GCC
ENABLE_OPENMP ?= false
OPTIONS += -DARRAY_ALIGNMENT=64
#OPTIONS += -DVERBOSE_AFFINITY
#OPTIONS += -DVERBOSE_DATASIZE
#OPTIONS += -DVERBOSE_TIMER
```
The verbosity options enable detailed output about affinity settings, allocation sizes and timer resolution.
2. Build with:
```
make
```
You can build multiple toolchains in the same directory, but notice that the Makefile is only acting on the one currently set.
Intermediate build results are located in the `<TOOLCHAIN>` directory.
To output the executed commands use:
```
make Q=
```
3. Clean up with:
```
make clean
```
to clean intermediate build results.
```
make distclean
```
to clean intermediate build results and binary.
4. (Optional) Generate assembler:
```
make asm
```
The assembler files will also be located in the `<TOOLCHAIN>` directory.

View File

@ -0,0 +1,46 @@
#==============================================================================
# Laminar Canal Flow
#==============================================================================
# Problem specific Data:
# ---------------------
name canal # name of flow setup
bcN 1 # flags for boundary conditions
bcE 3 # 1 = no-slip 3 = outflow
bcS 1 # 2 = free-slip 4 = periodic
bcW 3 #
gx 0.0 # Body forces (e.g. gravity)
gy 0.0 #
re 100.0 # Reynolds number
u_init 1.0 # initial value for velocity in x-direction
v_init 0.0 # initial value for velocity in y-direction
p_init 0.0 # initial value for pressure
# Geometry Data:
# -------------
xlength 30.0 # domain size in x-direction
ylength 4.0 # domain size in y-direction
imax 200 # number of interior cells in x-direction
jmax 50 # number of interior cells in y-direction
# Time Data:
# ---------
te 100.0 # final time
dt 0.02 # time stepsize
tau 0.5 # safety factor for time stepsize control (<0 constant delt)
# Pressure Iteration Data:
# -----------------------
itermax 500 # maximal number of pressure iteration in one time step
eps 0.00001 # stopping tolerance for pressure iteration
omg 1.8 # relaxation parameter for SOR iteration
gamma 0.9 # upwind differencing factor gamma
#===============================================================================

View File

@ -0,0 +1,10 @@
# Supported: GCC, CLANG, ICC
TAG ?= CLANG
ENABLE_OPENMP ?= false
#Feature options
OPTIONS += -DARRAY_ALIGNMENT=64
OPTIONS += -DVERBOSE
#OPTIONS += -DVERBOSE_AFFINITY
#OPTIONS += -DVERBOSE_DATASIZE
#OPTIONS += -DVERBOSE_TIMER

View File

@ -0,0 +1,46 @@
#==============================================================================
# Driven Cavity
#==============================================================================
# Problem specific Data:
# ---------------------
name dcavity # name of flow setup
bcTop 1 # flags for boundary conditions
bcBottom 1 # 1 = no-slip 3 = outflow
bcLeft 1 # 2 = free-slip 4 = periodic
bcRight 1 #
gx 0.0 # Body forces (e.g. gravity)
gy 0.0 #
re 10.0 # Reynolds number
u_init 0.0 # initial value for velocity in x-direction
v_init 0.0 # initial value for velocity in y-direction
p_init 0.0 # initial value for pressure
# Geometry Data:
# -------------
xlength 1.0 # domain size in x-direction
ylength 1.0 # domain size in y-direction
imax 100 # number of interior cells in x-direction
jmax 100 # number of interior cells in y-direction
# Time Data:
# ---------
te 5.0 # final time
dt 0.02 # time stepsize
tau 0.5 # safety factor for time stepsize control (<0 constant delt)
# Pressure Iteration Data:
# -----------------------
itermax 1000 # maximal number of pressure iteration in one time step
eps 0.001 # stopping tolerance for pressure iteration
omg 1.7 # relaxation parameter for SOR iteration
gamma 0.9 # upwind differencing factor gamma
#===============================================================================

View File

@ -0,0 +1,16 @@
CC = mpicc
GCC = cc
LINKER = $(CC)
ifeq ($(ENABLE_OPENMP),true)
OPENMP = -fopenmp
#OPENMP = -Xpreprocessor -fopenmp #required on Macos with homebrew libomp
LIBS = # -lomp
endif
VERSION = --version
CFLAGS = -Ofast -std=c99 $(OPENMP)
#CFLAGS = -Ofast -fnt-store=aggressive -std=c99 $(OPENMP) #AMD CLANG
LFLAGS = $(OPENMP)
DEFINES = -D_GNU_SOURCE# -DDEBUG
INCLUDES = -I/usr/local/include

View File

@ -0,0 +1,14 @@
CC = gcc
GCC = gcc
LINKER = $(CC)
ifeq ($(ENABLE_OPENMP),true)
OPENMP = -fopenmp
endif
VERSION = --version
CFLAGS = -Ofast -ffreestanding -std=c99 $(OPENMP)
LFLAGS = $(OPENMP)
DEFINES = -D_GNU_SOURCE
INCLUDES =
LIBS =

View File

@ -0,0 +1,14 @@
CC = mpiicc
GCC = gcc
LINKER = $(CC)
ifeq ($(ENABLE_OPENMP),true)
OPENMP = -qopenmp
endif
VERSION = --version
CFLAGS = -O3 -xHost -qopt-zmm-usage=high -std=c99 $(OPENMP)
LFLAGS = $(OPENMP)
DEFINES = -D_GNU_SOURCE
INCLUDES =
LIBS =

View File

@ -0,0 +1,61 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#ifdef __linux__
#ifdef _OPENMP
#include <pthread.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
#define MAX_NUM_THREADS 128
#define gettid() syscall(SYS_gettid)
static int getProcessorID(cpu_set_t* cpu_set)
{
int processorId;
for (processorId = 0; processorId < MAX_NUM_THREADS; processorId++) {
if (CPU_ISSET(processorId, cpu_set)) {
break;
}
}
return processorId;
}
int affinity_getProcessorId()
{
cpu_set_t cpu_set;
CPU_ZERO(&cpu_set);
sched_getaffinity(gettid(), sizeof(cpu_set_t), &cpu_set);
return getProcessorID(&cpu_set);
}
void affinity_pinThread(int processorId)
{
cpu_set_t cpuset;
pthread_t thread;
thread = pthread_self();
CPU_ZERO(&cpuset);
CPU_SET(processorId, &cpuset);
pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
}
void affinity_pinProcess(int processorId)
{
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
CPU_SET(processorId, &cpuset);
sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
}
#endif /*_OPENMP*/
#endif /*__linux__*/

View File

@ -0,0 +1,14 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#ifndef AFFINITY_H
#define AFFINITY_H
extern int affinity_getProcessorId();
extern void affinity_pinProcess(int);
extern void affinity_pinThread(int);
#endif /*AFFINITY_H*/

View File

@ -0,0 +1,35 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
void* allocate(int alignment, size_t bytesize)
{
int errorCode;
void* ptr;
errorCode = posix_memalign(&ptr, alignment, bytesize);
if (errorCode) {
if (errorCode == EINVAL) {
fprintf(stderr, "Error: Alignment parameter is not a power of two\n");
exit(EXIT_FAILURE);
}
if (errorCode == ENOMEM) {
fprintf(stderr, "Error: Insufficient memory to fulfill the request\n");
exit(EXIT_FAILURE);
}
}
if (ptr == NULL) {
fprintf(stderr, "Error: posix_memalign failed!\n");
exit(EXIT_FAILURE);
}
return ptr;
}

View File

@ -0,0 +1,13 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#ifndef __ALLOCATE_H_
#define __ALLOCATE_H_
#include <stdlib.h>
extern void* allocate(int alignment, size_t bytesize);
#endif

View File

@ -0,0 +1,54 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* =======================================================================================
*/
#ifndef LIKWID_MARKERS_H
#define LIKWID_MARKERS_H
#ifdef LIKWID_PERFMON
#include <likwid.h>
#define LIKWID_MARKER_INIT likwid_markerInit()
#define LIKWID_MARKER_THREADINIT likwid_markerThreadInit()
#define LIKWID_MARKER_SWITCH likwid_markerNextGroup()
#define LIKWID_MARKER_REGISTER(regionTag) likwid_markerRegisterRegion(regionTag)
#define LIKWID_MARKER_START(regionTag) likwid_markerStartRegion(regionTag)
#define LIKWID_MARKER_STOP(regionTag) likwid_markerStopRegion(regionTag)
#define LIKWID_MARKER_CLOSE likwid_markerClose()
#define LIKWID_MARKER_RESET(regionTag) likwid_markerResetRegion(regionTag)
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count) \
likwid_markerGetRegion(regionTag, nevents, events, time, count)
#else /* LIKWID_PERFMON */
#define LIKWID_MARKER_INIT
#define LIKWID_MARKER_THREADINIT
#define LIKWID_MARKER_SWITCH
#define LIKWID_MARKER_REGISTER(regionTag)
#define LIKWID_MARKER_START(regionTag)
#define LIKWID_MARKER_STOP(regionTag)
#define LIKWID_MARKER_CLOSE
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
#define LIKWID_MARKER_RESET(regionTag)
#endif /* LIKWID_PERFMON */
#endif /*LIKWID_MARKERS_H*/

View File

@ -0,0 +1,77 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#include <float.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "parameter.h"
#include "progress.h"
#include "solver.h"
#include "timing.h"
#include <mpi.h>
int main(int argc, char** argv)
{
int rank;
double S, E;
Parameter params;
Solver solver;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
initParameter(&params);
if (argc != 2) {
printf("Usage: %s <configFile>\n", argv[0]);
exit(EXIT_SUCCESS);
}
readParameter(&params, argv[1]);
if (rank == 0) {
printParameter(&params);
}
initSolver(&solver, &params);
initProgress(solver.te);
double tau = solver.tau;
double te = solver.te;
double t = 0.0;
S = getTimeStamp();
while (t <= te) {
if (tau > 0.0) {
computeTimestep(&solver);
}
setBoundaryConditions(&solver);
setSpecialBoundaryCondition(&solver);
computeFG(&solver);
computeRHS(&solver);
solve(&solver);
adaptUV(&solver);
t += solver.dt;
#ifdef VERBOSE
if (rank == 0) {
printf("TIME %f , TIMESTEP %f\n", t, solver.dt);
}
#else
printProgress(t);
#endif
}
E = getTimeStamp();
stopProgress();
if (rank == 0) {
printf("Solution took %.2fs\n", E - S);
}
collectResult(&solver);
MPI_Finalize();
return EXIT_SUCCESS;
}

View File

@ -0,0 +1,111 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of nusif-solver.
* Use of this source code is governed by a MIT style
* license that can be found in the LICENSE file.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "parameter.h"
#include "util.h"
#define MAXLINE 4096
void initParameter(Parameter* param)
{
param->xlength = 1.0;
param->ylength = 1.0;
param->imax = 100;
param->jmax = 100;
param->itermax = 1000;
param->eps = 0.0001;
param->omg = 1.7;
param->re = 100.0;
param->gamma = 0.9;
param->tau = 0.5;
}
void readParameter(Parameter* param, const char* filename)
{
FILE* fp = fopen(filename, "r");
char line[MAXLINE];
int i;
if (!fp) {
fprintf(stderr, "Could not open parameter file: %s\n", filename);
exit(EXIT_FAILURE);
}
while (!feof(fp)) {
line[0] = '\0';
fgets(line, MAXLINE, fp);
for (i = 0; line[i] != '\0' && line[i] != '#'; i++)
;
line[i] = '\0';
char* tok = strtok(line, " ");
char* val = strtok(NULL, " ");
#define PARSE_PARAM(p, f) \
if (strncmp(tok, #p, sizeof(#p) / sizeof(#p[0]) - 1) == 0) { \
param->p = f(val); \
}
#define PARSE_STRING(p) PARSE_PARAM(p, strdup)
#define PARSE_INT(p) PARSE_PARAM(p, atoi)
#define PARSE_REAL(p) PARSE_PARAM(p, atof)
if (tok != NULL && val != NULL) {
PARSE_REAL(xlength);
PARSE_REAL(ylength);
PARSE_INT(imax);
PARSE_INT(jmax);
PARSE_INT(itermax);
PARSE_REAL(eps);
PARSE_REAL(omg);
PARSE_REAL(re);
PARSE_REAL(tau);
PARSE_REAL(gamma);
PARSE_REAL(dt);
PARSE_REAL(te);
PARSE_REAL(gx);
PARSE_REAL(gy);
PARSE_STRING(name);
PARSE_INT(bcLeft);
PARSE_INT(bcRight);
PARSE_INT(bcBottom);
PARSE_INT(bcTop);
PARSE_REAL(u_init);
PARSE_REAL(v_init);
PARSE_REAL(p_init);
}
}
fclose(fp);
}
void printParameter(Parameter* param)
{
printf("Parameters for %s\n", param->name);
printf("Boundary conditions Left:%d Right:%d Bottom:%d Top:%d\n",
param->bcLeft,
param->bcRight,
param->bcBottom,
param->bcTop);
printf("\tReynolds number: %.2f\n", param->re);
printf("\tInit arrays: U:%.2f V:%.2f P:%.2f\n",
param->u_init,
param->v_init,
param->p_init);
printf("Geometry data:\n");
printf("\tDomain box size (x, y): %.2f, %.2f\n", param->xlength, param->ylength);
printf("\tCells (x, y): %d, %d\n", param->imax, param->jmax);
printf("Timestep parameters:\n");
printf("\tDefault stepsize: %.2f, Final time %.2f\n", param->dt, param->te);
printf("\tTau factor: %.2f\n", param->tau);
printf("Iterative solver parameters:\n");
printf("\tMax iterations: %d\n", param->itermax);
printf("\tepsilon (stopping tolerance) : %f\n", param->eps);
printf("\tgamma (stopping tolerance) : %f\n", param->gamma);
printf("\tomega (SOR relaxation): %f\n", param->omg);
}

View File

@ -0,0 +1,26 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of nusif-solver.
* Use of this source code is governed by a MIT style
* license that can be found in the LICENSE file.
*/
#ifndef __PARAMETER_H_
#define __PARAMETER_H_
typedef struct {
double xlength, ylength;
int imax, jmax;
int itermax;
double eps, omg;
double re, tau, gamma;
double te, dt;
double gx, gy;
char* name;
int bcLeft, bcRight, bcBottom, bcTop;
double u_init, v_init, p_init;
} Parameter;
void initParameter(Parameter*);
void readParameter(Parameter*, const char*);
void printParameter(Parameter*);
#endif

View File

@ -0,0 +1,60 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of nusif-solver.
* Use of this source code is governed by a MIT style
* license that can be found in the LICENSE file.
*/
#include <math.h>
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "progress.h"
static double _end;
static int _current;
static int _rank = -1;
void initProgress(double end)
{
MPI_Comm_rank(MPI_COMM_WORLD, &_rank);
_end = end;
_current = 0;
if (_rank == 0) {
printf("[ ]");
fflush(stdout);
}
}
void printProgress(double current)
{
if (_rank == 0) {
int new = (int)rint((current / _end) * 10.0);
if (new > _current) {
char progress[11];
_current = new;
progress[0] = 0;
for (int i = 0; i < 10; i++) {
if (i < _current) {
sprintf(progress + strlen(progress), "#");
} else {
sprintf(progress + strlen(progress), " ");
}
}
printf("\r[%s]", progress);
}
fflush(stdout);
}
}
void stopProgress()
{
if (_rank == 0) {
printf("\n");
fflush(stdout);
}
}

View File

@ -0,0 +1,14 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#ifndef __PROGRESS_H_
#define __PROGRESS_H_
extern void initProgress(double);
extern void printProgress(double);
extern void stopProgress();
#endif

View File

@ -0,0 +1,832 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of nusif-solver.
* Use of this source code is governed by a MIT style
* license that can be found in the LICENSE file.
*/
#include <float.h>
#include <math.h>
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "allocate.h"
#include "parameter.h"
#include "solver.h"
#include "util.h"
#define P(i, j) p[(j) * (imaxLocal + 2) + (i)]
#define F(i, j) f[(j) * (imaxLocal + 2) + (i)]
#define G(i, j) g[(j) * (imaxLocal + 2) + (i)]
#define U(i, j) u[(j) * (imaxLocal + 2) + (i)]
#define V(i, j) v[(j) * (imaxLocal + 2) + (i)]
#define RHS(i, j) rhs[(j) * (imaxLocal + 2) + (i)]
#define IDIM 0
#define JDIM 1
static int sizeOfRank(int rank, int size, int N)
{
return N / size + ((N % size > rank) ? 1 : 0);
}
void print(Solver* solver, double* grid)
{
int imaxLocal = solver->imaxLocal;
for (int i = 0; i < solver->size; i++) {
if (i == solver->rank) {
printf(
"### RANK %d #######################################################\n",
solver->rank);
for (int j = 0; j < solver->jmaxLocal + 2; j++) {
printf("%02d: ", j);
for (int i = 0; i < solver->imaxLocal + 2; i++) {
printf("%12.8f ", grid[j * (imaxLocal + 2) + i]);
}
printf("\n");
}
fflush(stdout);
}
MPI_Barrier(MPI_COMM_WORLD);
}
}
static void exchange(Solver* solver, double* grid)
{
int counts[4] = { 1, 1, 1, 1 };
MPI_Neighbor_alltoallw(grid,
counts,
solver->sdispls,
solver->bufferTypes,
grid,
counts,
solver->rdispls,
solver->bufferTypes,
solver->comm);
}
static void shift(Solver* solver)
{
MPI_Request requests[4] = { MPI_REQUEST_NULL,
MPI_REQUEST_NULL,
MPI_REQUEST_NULL,
MPI_REQUEST_NULL };
double* f = solver->f;
double* g = solver->g;
/* shift G */
double* buf = g + 1;
/* receive ghost cells from bottom neighbor */
MPI_Irecv(buf,
1,
solver->bufferTypes[2],
solver->jNeighbours[0],
0,
solver->comm,
&requests[0]);
buf = g + (solver->jmaxLocal) * (solver->imaxLocal + 2) + 1;
/* send ghost cells to top neighbor */
MPI_Isend(buf,
1,
solver->bufferTypes[2],
solver->jNeighbours[1],
0,
solver->comm,
&requests[1]);
/* shift F */
buf = f + (solver->imaxLocal + 2);
/* receive ghost cells from left neighbor */
MPI_Irecv(buf,
1,
solver->bufferTypes[0],
solver->iNeighbours[0],
1,
solver->comm,
&requests[2]);
buf = f + (solver->imaxLocal + 2) + (solver->imaxLocal);
/* send ghost cells to right neighbor */
MPI_Isend(buf,
1,
solver->bufferTypes[0],
solver->iNeighbours[1],
1,
solver->comm,
&requests[3]);
MPI_Waitall(4, requests, MPI_STATUSES_IGNORE);
}
void debugExchange(Solver* solver)
{
for (int i = 0; i < (solver->imaxLocal + 2) * (solver->jmaxLocal + 2); i++) {
solver->p[i] = solver->rank;
}
exchange(solver, solver->p);
print(solver, solver->p);
}
static void assembleResult(Solver* solver,
double* src,
double* dst,
int imaxLocal[],
int jmaxLocal[],
int offset[])
{
MPI_Request* requests;
int numRequests = 1;
if (solver->rank == 0) {
numRequests = solver->size + 1;
} else {
numRequests = 1;
}
requests = (MPI_Request*)malloc(numRequests * sizeof(MPI_Request));
/* all ranks send their bulk array */
MPI_Datatype bulkType;
const int ndims = 2;
int oldSizes[ndims] = { solver->jmaxLocal + 2, solver->imaxLocal + 2 };
int newSizes[ndims] = { solver->jmaxLocal, solver->imaxLocal };
int starts[ndims] = { 1, 1 };
MPI_Type_create_subarray(2,
oldSizes,
newSizes,
starts,
MPI_ORDER_C,
MPI_DOUBLE,
&bulkType);
MPI_Type_commit(&bulkType);
MPI_Isend(src, 1, bulkType, 0, 0, solver->comm, &requests[0]);
/* rank 0 assembles the subdomains */
if (solver->rank == 0) {
for (int i = 0; i < solver->size; i++) {
MPI_Datatype domainType;
MPI_Type_vector(jmaxLocal[i],
imaxLocal[i],
solver->imax,
MPI_DOUBLE,
&domainType);
MPI_Type_commit(&domainType);
MPI_Irecv(dst + offset[i],
1,
domainType,
i,
0,
solver->comm,
&requests[i + 1]);
}
}
MPI_Waitall(numRequests, requests, MPI_STATUSES_IGNORE);
}
static int sum(int* sizes, int position)
{
int sum = 0;
for (int i = 0; i < position; i++) {
sum += sizes[i];
}
return sum;
}
void collectResult(Solver* solver)
{
double* Pall = NULL;
double* Uall = NULL;
double* Vall = NULL;
int offset[solver->size];
int imaxLocal[solver->size];
int jmaxLocal[solver->size];
MPI_Gather(&solver->imaxLocal, 1, MPI_INT, imaxLocal, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Gather(&solver->jmaxLocal, 1, MPI_INT, jmaxLocal, 1, MPI_INT, 0, MPI_COMM_WORLD);
if (solver->rank == 0) {
Pall = allocate(64, (solver->imax) * (solver->jmax) * sizeof(double));
Uall = allocate(64, (solver->imax) * (solver->jmax) * sizeof(double));
Vall = allocate(64, (solver->imax) * (solver->jmax) * sizeof(double));
for (int i = 0; i < solver->size; i++) {
int coords[2];
MPI_Cart_coords(solver->comm, i, 2, coords);
int ioffset = sum(imaxLocal, coords[0]);
int joffset = sum(jmaxLocal, coords[1]);
offset[i] = (joffset * solver->imax) + ioffset;
printf("Rank: %d, Coords(i,j): %d %d, Size(i,j): %d %d, Offset(i,j): %d %d\n",
i,
coords[0],
coords[1],
imaxLocal[i],
jmaxLocal[i],
ioffset,
joffset);
}
}
/* collect P */
assembleResult(solver, solver->p, Pall, imaxLocal, jmaxLocal, offset);
/* collect U */
assembleResult(solver, solver->u, Uall, imaxLocal, jmaxLocal, offset);
/* collect V */
assembleResult(solver, solver->v, Vall, imaxLocal, jmaxLocal, offset);
/* write to disk */
if (solver->rank == 0) writeResult(solver, Pall, Uall, Vall);
}
static void printConfig(Solver* solver)
{
if (solver->rank == 0) {
printf("Parameters for #%s#\n", solver->problem);
printf("Boundary conditions Top:%d Bottom:%d Left:%d Right:%d\n",
solver->bcTop,
solver->bcBottom,
solver->bcLeft,
solver->bcRight);
printf("\tReynolds number: %.2f\n", solver->re);
printf("\tGx Gy: %.2f %.2f\n", solver->gx, solver->gy);
printf("Geometry data:\n");
printf("\tDomain box size (x, y): %.2f, %.2f\n",
solver->xlength,
solver->ylength);
printf("\tCells (x, y): %d, %d\n", solver->imax, solver->jmax);
printf("Timestep parameters:\n");
printf("\tDefault stepsize: %.2f, Final time %.2f\n", solver->dt, solver->te);
printf("\tdt bound: %.6f\n", solver->dtBound);
printf("\tTau factor: %.2f\n", solver->tau);
printf("Iterative solver parameters:\n");
printf("\tMax iterations: %d\n", solver->itermax);
printf("\tepsilon (stopping tolerance) : %f\n", solver->eps);
printf("\tgamma factor: %f\n", solver->gamma);
printf("\tomega (SOR relaxation): %f\n", solver->omega);
printf("Communication parameters:\n");
}
for (int i = 0; i < solver->size; i++) {
if (i == solver->rank) {
printf("\tRank %d of %d\n", solver->rank, solver->size);
printf("\tNeighbours (b, t, l, r): %d, %d, %d, %d\n",
solver->jNeighbours[0],
solver->jNeighbours[1],
solver->iNeighbours[0],
solver->iNeighbours[1]);
printf("\tCoordinates %d,%d\n", solver->coords[0], solver->coords[1]);
printf("\tLocal domain size: %dx%d\n", solver->imaxLocal, solver->jmaxLocal);
fflush(stdout);
}
}
}
void initSolver(Solver* solver, Parameter* params)
{
solver->problem = params->name;
solver->bcTop = params->bcTop;
solver->bcBottom = params->bcBottom;
solver->bcLeft = params->bcLeft;
solver->bcRight = params->bcRight;
solver->imax = params->imax;
solver->jmax = params->jmax;
solver->xlength = params->xlength;
solver->ylength = params->ylength;
solver->dx = params->xlength / params->imax;
solver->dy = params->ylength / params->jmax;
solver->eps = params->eps;
solver->omega = params->omg;
solver->itermax = params->itermax;
solver->re = params->re;
solver->gx = params->gx;
solver->gy = params->gy;
solver->dt = params->dt;
solver->te = params->te;
solver->tau = params->tau;
solver->gamma = params->gamma;
/* setup communication */
MPI_Comm_rank(MPI_COMM_WORLD, &(solver->rank));
MPI_Comm_size(MPI_COMM_WORLD, &(solver->size));
int dims[NDIMS] = { 0, 0 };
int periods[NDIMS] = { 0, 0 };
MPI_Dims_create(solver->size, NDIMS, dims);
MPI_Cart_create(MPI_COMM_WORLD, NDIMS, dims, periods, 0, &solver->comm);
MPI_Cart_shift(solver->comm,
IDIM,
1,
&solver->iNeighbours[0],
&solver->iNeighbours[1]);
MPI_Cart_shift(solver->comm,
JDIM,
1,
&solver->jNeighbours[0],
&solver->jNeighbours[1]);
MPI_Cart_get(solver->comm, NDIMS, solver->dims, periods, solver->coords);
solver->imaxLocal = sizeOfRank(solver->rank, dims[IDIM], solver->imax);
solver->jmaxLocal = sizeOfRank(solver->rank, dims[JDIM], solver->jmax);
MPI_Datatype jBufferType;
MPI_Type_contiguous(solver->imaxLocal, MPI_DOUBLE, &jBufferType);
MPI_Type_commit(&jBufferType);
MPI_Datatype iBufferType;
MPI_Type_vector(solver->jmaxLocal,
1,
solver->imaxLocal + 2,
MPI_DOUBLE,
&iBufferType);
MPI_Type_commit(&iBufferType);
// in the order of the dimensions i->0, j->1
// first negative direction, then positive direction
size_t dblsize = sizeof(double);
int imaxLocal = solver->imaxLocal;
int jmaxLocal = solver->jmaxLocal;
solver->bufferTypes[0] = iBufferType; // left
solver->bufferTypes[1] = iBufferType; // right
solver->bufferTypes[2] = jBufferType; // bottom
solver->bufferTypes[3] = jBufferType; // top
solver->sdispls[0] = ((imaxLocal + 2) + 1) * dblsize; // send left
solver->sdispls[1] = ((imaxLocal + 2) + imaxLocal) * dblsize; // send right
solver->sdispls[2] = ((imaxLocal + 2) + 1) * dblsize; // send bottom
solver->sdispls[3] = ((jmaxLocal) * (imaxLocal + 2) + 1) * dblsize; // send top
solver->rdispls[0] = (imaxLocal + 2) * dblsize; // recv left
solver->rdispls[1] = ((imaxLocal + 2) + (imaxLocal + 1)) * dblsize; // recv right
solver->rdispls[2] = 1 * dblsize; // recv bottom
solver->rdispls[3] = ((jmaxLocal + 1) * (imaxLocal + 2) + 1) * dblsize; // recv top
/* allocate arrays */
size_t bytesize = (imaxLocal + 2) * (jmaxLocal + 2) * sizeof(double);
solver->u = allocate(64, bytesize);
solver->v = allocate(64, bytesize);
solver->p = allocate(64, bytesize);
solver->rhs = allocate(64, bytesize);
solver->f = allocate(64, bytesize);
solver->g = allocate(64, bytesize);
for (int i = 0; i < (imaxLocal + 2) * (jmaxLocal + 2); i++) {
solver->u[i] = params->u_init;
solver->v[i] = params->v_init;
solver->p[i] = params->p_init;
solver->rhs[i] = 0.0;
solver->f[i] = 0.0;
solver->g[i] = 0.0;
}
double dx = solver->dx;
double dy = solver->dy;
double inv_sqr_sum = 1.0 / (dx * dx) + 1.0 / (dy * dy);
solver->dtBound = 0.5 * solver->re * 1.0 / inv_sqr_sum;
#ifdef VERBOSE
printConfig(solver);
#endif
}
void computeRHS(Solver* solver)
{
int imaxLocal = solver->imaxLocal;
int jmaxLocal = solver->jmaxLocal;
double idx = 1.0 / solver->dx;
double idy = 1.0 / solver->dy;
double idt = 1.0 / solver->dt;
double* rhs = solver->rhs;
double* f = solver->f;
double* g = solver->g;
shift(solver);
for (int j = 1; j < jmaxLocal + 1; j++) {
for (int i = 1; i < imaxLocal + 1; i++) {
RHS(i, j) = ((F(i, j) - F(i - 1, j)) * idx + (G(i, j) - G(i, j - 1)) * idy) *
idt;
}
}
}
int solve(Solver* solver)
{
int imax = solver->imax;
int jmax = solver->jmax;
int imaxLocal = solver->imaxLocal;
int jmaxLocal = solver->jmaxLocal;
double eps = solver->eps;
int itermax = solver->itermax;
double dx2 = solver->dx * solver->dx;
double dy2 = solver->dy * solver->dy;
double idx2 = 1.0 / dx2;
double idy2 = 1.0 / dy2;
double factor = solver->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
double* p = solver->p;
double* rhs = solver->rhs;
double epssq = eps * eps;
int it = 0;
double res = 1.0;
while ((res >= epssq) && (it < itermax)) {
res = 0.0;
exchange(solver, p);
for (int j = 1; j < jmaxLocal + 1; j++) {
for (int i = 1; i < imaxLocal + 1; i++) {
double r = RHS(i, j) -
((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
(P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
P(i, j) -= (factor * r);
res += (r * r);
}
}
if (solver->coords[JDIM] == 0) { // set bottom bc
for (int i = 1; i < imaxLocal + 1; i++) {
P(i, 0) = P(i, 1);
}
}
if (solver->coords[JDIM] == (solver->dims[JDIM] - 1)) { // set top bc
for (int i = 1; i < imaxLocal + 1; i++) {
P(i, jmaxLocal + 1) = P(i, jmaxLocal);
}
}
if (solver->coords[IDIM] == 0) { // set left bc
for (int j = 1; j < jmaxLocal + 1; j++) {
P(0, j) = P(1, j);
}
}
if (solver->coords[IDIM] == (solver->dims[IDIM] - 1)) { // set right bc
for (int j = 1; j < jmaxLocal + 1; j++) {
P(imaxLocal + 1, j) = P(imaxLocal, j);
}
}
MPI_Allreduce(MPI_IN_PLACE, &res, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
res = res / (double)(imax * jmax);
#ifdef DEBUG
if (solver->rank == 0) {
printf("%d Residuum: %e\n", it, res);
}
#endif
it++;
}
#ifdef VERBOSE
if (solver->rank == 0) {
printf("Solver took %d iterations to reach %f\n", it, sqrt(res));
}
#endif
if (res < eps) {
return 0;
} else {
return 1;
}
}
static double maxElement(Solver* solver, double* m)
{
int size = (solver->imaxLocal + 2) * (solver->jmaxLocal + 2);
double maxval = DBL_MIN;
for (int i = 0; i < size; i++) {
maxval = MAX(maxval, fabs(m[i]));
}
MPI_Allreduce(MPI_IN_PLACE, &maxval, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
return maxval;
}
void computeTimestep(Solver* solver)
{
double dt = solver->dtBound;
double dx = solver->dx;
double dy = solver->dy;
double umax = maxElement(solver, solver->u);
double vmax = maxElement(solver, solver->v);
if (umax > 0) {
dt = (dt > dx / umax) ? dx / umax : dt;
}
if (vmax > 0) {
dt = (dt > dy / vmax) ? dy / vmax : dt;
}
solver->dt = dt * solver->tau;
}
void setBoundaryConditions(Solver* solver)
{
int imaxLocal = solver->imaxLocal;
int jmaxLocal = solver->jmaxLocal;
double* u = solver->u;
double* v = solver->v;
// Northern boundary
if (solver->coords[JDIM] == (solver->dims[JDIM] - 1)) { // set top bc
switch (solver->bcTop) {
case NOSLIP:
for (int i = 1; i < imaxLocal + 1; i++) {
V(i, jmaxLocal) = 0.0;
U(i, jmaxLocal + 1) = -U(i, jmaxLocal);
}
break;
case SLIP:
for (int i = 1; i < imaxLocal + 1; i++) {
V(i, jmaxLocal) = 0.0;
U(i, jmaxLocal + 1) = U(i, jmaxLocal);
}
break;
case OUTFLOW:
for (int i = 1; i < imaxLocal + 1; i++) {
U(i, jmaxLocal + 1) = U(i, jmaxLocal);
V(i, jmaxLocal) = V(i, jmaxLocal - 1);
}
break;
case PERIODIC:
break;
}
}
// Southern boundary
if (solver->coords[JDIM] == 0) { // set bottom bc
switch (solver->bcBottom) {
case NOSLIP:
for (int i = 1; i < imaxLocal + 1; i++) {
V(i, 0) = 0.0;
U(i, 0) = -U(i, 1);
}
break;
case SLIP:
for (int i = 1; i < imaxLocal + 1; i++) {
V(i, 0) = 0.0;
U(i, 0) = U(i, 1);
}
break;
case OUTFLOW:
for (int i = 1; i < imaxLocal + 1; i++) {
U(i, 0) = U(i, 1);
V(i, 0) = V(i, 1);
}
break;
case PERIODIC:
break;
}
}
// Eastern boundary
if (solver->coords[IDIM] == (solver->dims[IDIM] - 1)) { // set right bc
switch (solver->bcRight) {
case NOSLIP:
for (int j = 1; j < jmaxLocal + 1; j++) {
U(imaxLocal, j) = 0.0;
V(imaxLocal + 1, j) = -V(imaxLocal, j);
}
break;
case SLIP:
for (int j = 1; j < jmaxLocal + 1; j++) {
U(imaxLocal, j) = 0.0;
V(imaxLocal + 1, j) = V(imaxLocal, j);
}
break;
case OUTFLOW:
for (int j = 1; j < jmaxLocal + 1; j++) {
U(imaxLocal, j) = U(imaxLocal - 1, j);
V(imaxLocal + 1, j) = V(imaxLocal, j);
}
break;
case PERIODIC:
break;
}
}
// Western boundary
if (solver->coords[IDIM] == 0) { // set left bc
switch (solver->bcLeft) {
case NOSLIP:
for (int j = 1; j < jmaxLocal + 1; j++) {
U(0, j) = 0.0;
V(0, j) = -V(1, j);
}
break;
case SLIP:
for (int j = 1; j < jmaxLocal + 1; j++) {
U(0, j) = 0.0;
V(0, j) = V(1, j);
}
break;
case OUTFLOW:
for (int j = 1; j < jmaxLocal + 1; j++) {
U(0, j) = U(1, j);
V(0, j) = V(1, j);
}
break;
case PERIODIC:
break;
}
}
}
void setSpecialBoundaryCondition(Solver* solver)
{
int imaxLocal = solver->imaxLocal;
int jmaxLocal = solver->jmaxLocal;
double* u = solver->u;
if (strcmp(solver->problem, "dcavity") == 0) {
if (solver->coords[JDIM] == (solver->dims[JDIM] - 1)) { // set top bc
for (int i = 1; i < imaxLocal + 1; i++) {
U(i, jmaxLocal + 1) = 2.0 - U(i, jmaxLocal);
}
}
} else if (strcmp(solver->problem, "canal") == 0) {
if (solver->coords[IDIM] == 0) { // set left bc
double ylength = solver->ylength;
double dy = solver->dy;
int rest = solver->jmax % solver->size;
int yc = solver->rank * (solver->jmax / solver->size) +
MIN(rest, solver->rank);
double ys = dy * (yc + 0.5);
double y;
/* printf("RANK %d yc: %d ys: %f\n", solver->rank, yc, ys); */
for (int j = 1; j < jmaxLocal + 1; j++) {
y = ys + dy * (j - 0.5);
U(0, j) = y * (ylength - y) * 4.0 / (ylength * ylength);
}
}
}
/* print(solver, solver->u); */
}
void computeFG(Solver* solver)
{
double* u = solver->u;
double* v = solver->v;
double* f = solver->f;
double* g = solver->g;
int imaxLocal = solver->imaxLocal;
int jmaxLocal = solver->jmaxLocal;
double gx = solver->gx;
double gy = solver->gy;
double gamma = solver->gamma;
double dt = solver->dt;
double inverseRe = 1.0 / solver->re;
double inverseDx = 1.0 / solver->dx;
double inverseDy = 1.0 / solver->dy;
double du2dx, dv2dy, duvdx, duvdy;
double du2dx2, du2dy2, dv2dx2, dv2dy2;
exchange(solver, u);
exchange(solver, v);
for (int j = 1; j < jmaxLocal + 1; j++) {
for (int i = 1; i < imaxLocal + 1; i++) {
du2dx = inverseDx * 0.25 *
((U(i, j) + U(i + 1, j)) * (U(i, j) + U(i + 1, j)) -
(U(i, j) + U(i - 1, j)) * (U(i, j) + U(i - 1, j))) +
gamma * inverseDx * 0.25 *
(fabs(U(i, j) + U(i + 1, j)) * (U(i, j) - U(i + 1, j)) +
fabs(U(i, j) + U(i - 1, j)) * (U(i, j) - U(i - 1, j)));
duvdy = inverseDy * 0.25 *
((V(i, j) + V(i + 1, j)) * (U(i, j) + U(i, j + 1)) -
(V(i, j - 1) + V(i + 1, j - 1)) * (U(i, j) + U(i, j - 1))) +
gamma * inverseDy * 0.25 *
(fabs(V(i, j) + V(i + 1, j)) * (U(i, j) - U(i, j + 1)) +
fabs(V(i, j - 1) + V(i + 1, j - 1)) *
(U(i, j) - U(i, j - 1)));
du2dx2 = inverseDx * inverseDx * (U(i + 1, j) - 2.0 * U(i, j) + U(i - 1, j));
du2dy2 = inverseDy * inverseDy * (U(i, j + 1) - 2.0 * U(i, j) + U(i, j - 1));
F(i, j) = U(i, j) + dt * (inverseRe * (du2dx2 + du2dy2) - du2dx - duvdy + gx);
duvdx = inverseDx * 0.25 *
((U(i, j) + U(i, j + 1)) * (V(i, j) + V(i + 1, j)) -
(U(i - 1, j) + U(i - 1, j + 1)) * (V(i, j) + V(i - 1, j))) +
gamma * inverseDx * 0.25 *
(fabs(U(i, j) + U(i, j + 1)) * (V(i, j) - V(i + 1, j)) +
fabs(U(i - 1, j) + U(i - 1, j + 1)) *
(V(i, j) - V(i - 1, j)));
dv2dy = inverseDy * 0.25 *
((V(i, j) + V(i, j + 1)) * (V(i, j) + V(i, j + 1)) -
(V(i, j) + V(i, j - 1)) * (V(i, j) + V(i, j - 1))) +
gamma * inverseDy * 0.25 *
(fabs(V(i, j) + V(i, j + 1)) * (V(i, j) - V(i, j + 1)) +
fabs(V(i, j) + V(i, j - 1)) * (V(i, j) - V(i, j - 1)));
dv2dx2 = inverseDx * inverseDx * (V(i + 1, j) - 2.0 * V(i, j) + V(i - 1, j));
dv2dy2 = inverseDy * inverseDy * (V(i, j + 1) - 2.0 * V(i, j) + V(i, j - 1));
G(i, j) = V(i, j) + dt * (inverseRe * (dv2dx2 + dv2dy2) - duvdx - dv2dy + gy);
}
}
/* ----------------------------- boundary of F --------------------------- */
if (solver->coords[IDIM] == 0) { // set left bc
for (int j = 1; j < jmaxLocal + 1; j++) {
F(0, j) = U(0, j);
}
}
if (solver->coords[IDIM] == (solver->dims[IDIM] - 1)) { // set right bc
for (int j = 1; j < jmaxLocal + 1; j++) {
F(imaxLocal, j) = U(imaxLocal, j);
}
}
/* ----------------------------- boundary of G --------------------------- */
if (solver->coords[JDIM] == 0) { // set bottom bc
for (int i = 1; i < imaxLocal + 1; i++) {
G(i, 0) = V(i, 0);
}
}
if (solver->coords[JDIM] == (solver->dims[JDIM] - 1)) { // set top bc
for (int i = 1; i < imaxLocal + 1; i++) {
G(i, jmaxLocal) = V(i, jmaxLocal);
}
}
}
void adaptUV(Solver* solver)
{
int imaxLocal = solver->imaxLocal;
int jmaxLocal = solver->jmaxLocal;
double* p = solver->p;
double* u = solver->u;
double* v = solver->v;
double* f = solver->f;
double* g = solver->g;
double factorX = solver->dt / solver->dx;
double factorY = solver->dt / solver->dy;
for (int j = 1; j < jmaxLocal + 1; j++) {
for (int i = 1; i < imaxLocal + 1; i++) {
U(i, j) = F(i, j) - (P(i + 1, j) - P(i, j)) * factorX;
V(i, j) = G(i, j) - (P(i, j + 1) - P(i, j)) * factorY;
}
}
}
void writeResult(Solver* solver, double* p, double* u, double* v)
{
int imax = solver->imax;
int jmax = solver->jmax;
double dx = solver->dx;
double dy = solver->dy;
double x = 0.0, y = 0.0;
FILE* fp;
fp = fopen("pressure.dat", "w");
if (fp == NULL) {
printf("Error!\n");
exit(EXIT_FAILURE);
}
for (int j = 1; j < jmax; j++) {
y = (double)(j - 0.5) * dy;
for (int i = 1; i < imax; i++) {
x = (double)(i - 0.5) * dx;
fprintf(fp, "%.2f %.2f %f\n", x, y, p[j * (imax) + i]);
}
fprintf(fp, "\n");
}
fclose(fp);
fp = fopen("velocity.dat", "w");
if (fp == NULL) {
printf("Error!\n");
exit(EXIT_FAILURE);
}
for (int j = 1; j < jmax; j++) {
y = dy * (j - 0.5);
for (int i = 1; i < imax; i++) {
x = dx * (i - 0.5);
double vel_u = (u[j * (imax) + i] + u[j * (imax) + (i - 1)]) / 2.0;
double vel_v = (v[j * (imax) + i] + v[(j - 1) * (imax) + i]) / 2.0;
double len = sqrt((vel_u * vel_u) + (vel_v * vel_v));
fprintf(fp, "%.2f %.2f %f %f %f\n", x, y, vel_u, vel_v, len);
}
}
fclose(fp);
}

View File

@ -0,0 +1,58 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of nusif-solver.
* Use of this source code is governed by a MIT style
* license that can be found in the LICENSE file.
*/
#ifndef __SOLVER_H_
#define __SOLVER_H_
#include "parameter.h"
#include <mpi.h>
#define NDIMS 2
enum BC { NOSLIP = 1, SLIP, OUTFLOW, PERIODIC };
typedef struct {
/* geometry and grid information */
double dx, dy;
int imax, jmax;
double xlength, ylength;
/* arrays */
double *p, *rhs;
double *f, *g;
double *u, *v;
/* parameters */
double eps, omega;
double re, tau, gamma;
double gx, gy;
/* time stepping */
int itermax;
double dt, te;
double dtBound;
char* problem;
int bcLeft, bcRight, bcBottom, bcTop;
/* mpi */
int rank;
int size;
MPI_Comm comm;
MPI_Datatype bufferTypes[NDIMS * 2];
MPI_Aint sdispls[NDIMS * 2], rdispls[NDIMS * 2];
int iNeighbours[NDIMS], jNeighbours[NDIMS];
int coords[NDIMS], dims[NDIMS];
int imaxLocal, jmaxLocal;
} Solver;
void initSolver(Solver*, Parameter*);
void computeRHS(Solver*);
int solve(Solver*);
void computeTimestep(Solver*);
void setBoundaryConditions(Solver*);
void setSpecialBoundaryCondition(Solver*);
void computeFG(Solver*);
void adaptUV(Solver*);
void collectResult(Solver*);
void writeResult(Solver*, double*, double*, double*);
void debugExchange(Solver*);
void print(Solver*, double*);
#endif

View File

@ -0,0 +1,24 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#include <stdlib.h>
#include <time.h>
double getTimeStamp()
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
}
double getTimeResolution()
{
struct timespec ts;
clock_getres(CLOCK_MONOTONIC, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
}
double getTimeStamp_() { return getTimeStamp(); }

View File

@ -0,0 +1,14 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#ifndef __TIMING_H_
#define __TIMING_H_
extern double getTimeStamp();
extern double getTimeResolution();
extern double getTimeStamp_();
#endif // __TIMING_H_

View File

@ -0,0 +1,22 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#ifndef __UTIL_H_
#define __UTIL_H_
#define HLINE \
"----------------------------------------------------------------------------\n"
#ifndef MIN
#define MIN(x, y) ((x) < (y) ? (x) : (y))
#endif
#ifndef MAX
#define MAX(x, y) ((x) > (y) ? (x) : (y))
#endif
#ifndef ABS
#define ABS(a) ((a) >= 0 ? (a) : -(a))
#endif
#endif // __UTIL_H_

View File

@ -0,0 +1,7 @@
set terminal png size 1024,768 enhanced font ,12
set output 'p.png'
set datafile separator whitespace
set grid
set hidden3d
splot 'pressure.dat' using 1:2:3 with lines

View File

@ -0,0 +1,5 @@
set terminal png size 1800,768 enhanced font ,12
set output 'velocity.png'
set datafile separator whitespace
plot 'velocity.dat' using 1:2:3:4:5 with vectors filled head size 0.01,20,60 lc palette

View File

@ -0,0 +1,71 @@
#=======================================================================================
# Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
# All rights reserved.
# Use of this source code is governed by a MIT-style
# license that can be found in the LICENSE file.
#=======================================================================================
#CONFIGURE BUILD SYSTEM
TARGET = exe-$(TAG)
BUILD_DIR = ./$(TAG)
SRC_DIR = ./src
MAKE_DIR = ./
Q ?= @
#DO NOT EDIT BELOW
include $(MAKE_DIR)/config.mk
include $(MAKE_DIR)/include_$(TAG).mk
INCLUDES += -I$(SRC_DIR) -I$(BUILD_DIR)
VPATH = $(SRC_DIR)
SRC = $(wildcard $(SRC_DIR)/*.c)
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s, $(SRC))
OBJ = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o, $(SRC))
SOURCES = $(SRC) $(wildcard $(SRC_DIR)/*.h)
CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(OPTIONS) $(INCLUDES)
${TARGET}: $(BUILD_DIR) $(OBJ)
$(info ===> LINKING $(TARGET))
$(Q)${LINKER} ${LFLAGS} -o $(TARGET) $(OBJ) $(LIBS)
$(BUILD_DIR)/%.o: %.c $(MAKE_DIR)/include_$(TAG).mk $(MAKE_DIR)/config.mk
$(info ===> COMPILE $@)
$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
$(Q)$(GCC) $(CPPFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d
$(BUILD_DIR)/%.s: %.c
$(info ===> GENERATE ASM $@)
$(CC) -S $(CPPFLAGS) $(CFLAGS) $< -o $@
.PHONY: clean distclean tags info asm format
clean:
$(info ===> CLEAN)
@rm -rf $(BUILD_DIR)
@rm -f tags
distclean: clean
$(info ===> DIST CLEAN)
@rm -f $(TARGET)
info:
$(info $(CFLAGS))
$(Q)$(CC) $(VERSION)
asm: $(BUILD_DIR) $(ASM)
tags:
$(info ===> GENERATE TAGS)
$(Q)ctags -R
format:
@for src in $(SOURCES) ; do \
echo "Formatting $$src" ; \
clang-format -i $$src ; \
done
@echo "Done"
$(BUILD_DIR):
@mkdir $(BUILD_DIR)
-include $(OBJ:.o=.d)

View File

@ -0,0 +1,48 @@
# C source skeleton
## Build
1. Configure the toolchain and additional options in `config.mk`:
```
# Supported: GCC, CLANG, ICC
TAG ?= GCC
ENABLE_OPENMP ?= false
OPTIONS += -DARRAY_ALIGNMENT=64
#OPTIONS += -DVERBOSE_AFFINITY
#OPTIONS += -DVERBOSE_DATASIZE
#OPTIONS += -DVERBOSE_TIMER
```
The verbosity options enable detailed output about affinity settings, allocation sizes and timer resolution.
2. Build with:
```
make
```
You can build multiple toolchains in the same directory, but notice that the Makefile is only acting on the one currently set.
Intermediate build results are located in the `<TOOLCHAIN>` directory.
To output the executed commands use:
```
make Q=
```
3. Clean up with:
```
make clean
```
to clean intermediate build results.
```
make distclean
```
to clean intermediate build results and binary.
4. (Optional) Generate assembler:
```
make asm
```
The assembler files will also be located in the `<TOOLCHAIN>` directory.

View File

@ -0,0 +1,46 @@
#==============================================================================
# Laminar Canal Flow
#==============================================================================
# Problem specific Data:
# ---------------------
name canal # name of flow setup
bcN 1 # flags for boundary conditions
bcE 3 # 1 = no-slip 3 = outflow
bcS 1 # 2 = free-slip 4 = periodic
bcW 3 #
gx 0.0 # Body forces (e.g. gravity)
gy 0.0 #
re 100.0 # Reynolds number
u_init 1.0 # initial value for velocity in x-direction
v_init 0.0 # initial value for velocity in y-direction
p_init 0.0 # initial value for pressure
# Geometry Data:
# -------------
xlength 30.0 # domain size in x-direction
ylength 4.0 # domain size in y-direction
imax 200 # number of interior cells in x-direction
jmax 50 # number of interior cells in y-direction
# Time Data:
# ---------
te 100.0 # final time
dt 0.02 # time stepsize
tau 0.5 # safety factor for time stepsize control (<0 constant delt)
# Pressure Iteration Data:
# -----------------------
itermax 500 # maximal number of pressure iteration in one time step
eps 0.00001 # stopping tolerance for pressure iteration
omg 1.8 # relaxation parameter for SOR iteration
gamma 0.9 # upwind differencing factor gamma
#===============================================================================

View File

@ -0,0 +1,10 @@
# Supported: GCC, CLANG, ICC
TAG ?= CLANG
ENABLE_OPENMP ?= false
#Feature options
OPTIONS += -DARRAY_ALIGNMENT=64
#OPTIONS += -DVERBOSE
#OPTIONS += -DVERBOSE_AFFINITY
#OPTIONS += -DVERBOSE_DATASIZE
#OPTIONS += -DVERBOSE_TIMER

View File

@ -0,0 +1,46 @@
#==============================================================================
# Driven Cavity
#==============================================================================
# Problem specific Data:
# ---------------------
name dcavity # name of flow setup
bcTop 1 # flags for boundary conditions
bcBottom 1 # 1 = no-slip 3 = outflow
bcLeft 1 # 2 = free-slip 4 = periodic
bcRight 1 #
gx 0.0 # Body forces (e.g. gravity)
gy 0.0 #
re 10.0 # Reynolds number
u_init 0.0 # initial value for velocity in x-direction
v_init 0.0 # initial value for velocity in y-direction
p_init 0.0 # initial value for pressure
# Geometry Data:
# -------------
xlength 1.0 # domain size in x-direction
ylength 1.0 # domain size in y-direction
imax 100 # number of interior cells in x-direction
jmax 100 # number of interior cells in y-direction
# Time Data:
# ---------
te 5.0 # final time
dt 0.02 # time stepsize
tau 0.5 # safety factor for time stepsize control (<0 constant delt)
# Pressure Iteration Data:
# -----------------------
itermax 1000 # maximal number of pressure iteration in one time step
eps 0.001 # stopping tolerance for pressure iteration
omg 1.7 # relaxation parameter for SOR iteration
gamma 0.9 # upwind differencing factor gamma
#===============================================================================

BIN
BasicSolver/2D-mpi/exe-CLANG Executable file

Binary file not shown.

View File

@ -0,0 +1,16 @@
CC = mpicc
GCC = cc
LINKER = $(CC)
ifeq ($(ENABLE_OPENMP),true)
OPENMP = -fopenmp
#OPENMP = -Xpreprocessor -fopenmp #required on Macos with homebrew libomp
LIBS = # -lomp
endif
VERSION = --version
CFLAGS = -Ofast -std=c99 $(OPENMP)
#CFLAGS = -Ofast -fnt-store=aggressive -std=c99 $(OPENMP) #AMD CLANG
LFLAGS = $(OPENMP)
DEFINES = -D_GNU_SOURCE# -DDEBUG
INCLUDES = -I/usr/local/include

View File

@ -0,0 +1,14 @@
CC = gcc
GCC = gcc
LINKER = $(CC)
ifeq ($(ENABLE_OPENMP),true)
OPENMP = -fopenmp
endif
VERSION = --version
CFLAGS = -Ofast -ffreestanding -std=c99 $(OPENMP)
LFLAGS = $(OPENMP)
DEFINES = -D_GNU_SOURCE
INCLUDES =
LIBS =

View File

@ -0,0 +1,14 @@
CC = mpiicc
GCC = gcc
LINKER = $(CC)
ifeq ($(ENABLE_OPENMP),true)
OPENMP = -qopenmp
endif
VERSION = --version
CFLAGS = -O3 -xHost -qopt-zmm-usage=high -std=c99 $(OPENMP)
LFLAGS = $(OPENMP)
DEFINES = -D_GNU_SOURCE
INCLUDES =
LIBS =

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,61 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#ifdef __linux__
#ifdef _OPENMP
#include <pthread.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
#define MAX_NUM_THREADS 128
#define gettid() syscall(SYS_gettid)
static int getProcessorID(cpu_set_t* cpu_set)
{
int processorId;
for (processorId = 0; processorId < MAX_NUM_THREADS; processorId++) {
if (CPU_ISSET(processorId, cpu_set)) {
break;
}
}
return processorId;
}
int affinity_getProcessorId()
{
cpu_set_t cpu_set;
CPU_ZERO(&cpu_set);
sched_getaffinity(gettid(), sizeof(cpu_set_t), &cpu_set);
return getProcessorID(&cpu_set);
}
void affinity_pinThread(int processorId)
{
cpu_set_t cpuset;
pthread_t thread;
thread = pthread_self();
CPU_ZERO(&cpuset);
CPU_SET(processorId, &cpuset);
pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
}
void affinity_pinProcess(int processorId)
{
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
CPU_SET(processorId, &cpuset);
sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
}
#endif /*_OPENMP*/
#endif /*__linux__*/

View File

@ -0,0 +1,14 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#ifndef AFFINITY_H
#define AFFINITY_H
extern int affinity_getProcessorId();
extern void affinity_pinProcess(int);
extern void affinity_pinThread(int);
#endif /*AFFINITY_H*/

View File

@ -0,0 +1,35 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
void* allocate(int alignment, size_t bytesize)
{
int errorCode;
void* ptr;
errorCode = posix_memalign(&ptr, alignment, bytesize);
if (errorCode) {
if (errorCode == EINVAL) {
fprintf(stderr, "Error: Alignment parameter is not a power of two\n");
exit(EXIT_FAILURE);
}
if (errorCode == ENOMEM) {
fprintf(stderr, "Error: Insufficient memory to fulfill the request\n");
exit(EXIT_FAILURE);
}
}
if (ptr == NULL) {
fprintf(stderr, "Error: posix_memalign failed!\n");
exit(EXIT_FAILURE);
}
return ptr;
}

View File

@ -0,0 +1,13 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#ifndef __ALLOCATE_H_
#define __ALLOCATE_H_
#include <stdlib.h>
extern void* allocate(int alignment, size_t bytesize);
#endif

View File

@ -0,0 +1,326 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of nusif-solver.
* Use of this source code is governed by a MIT style
* license that can be found in the LICENSE file.
*/
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include "comm.h"
// subroutines local to this module
static int sizeOfRank(int rank, int size, int N)
{
return N / size + ((N % size > rank) ? 1 : 0);
}
static void setupCommunication(Comm* c, int direction, int layer)
{
MPI_Datatype type;
size_t dblsize = sizeof(double);
int imaxLocal = c->imaxLocal;
int jmaxLocal = c->jmaxLocal;
int sizes[NDIMS];
int subSizes[NDIMS];
int starts[NDIMS];
int offset = 0;
sizes[IDIM] = imaxLocal + 2;
sizes[JDIM] = jmaxLocal + 2;
if (layer == HALO) {
offset = 1;
}
switch (direction) {
case LEFT:
subSizes[IDIM] = 1;
subSizes[JDIM] = jmaxLocal;
starts[IDIM] = 1 - offset;
starts[JDIM] = 1;
break;
case RIGHT:
subSizes[IDIM] = 1;
subSizes[JDIM] = jmaxLocal;
starts[IDIM] = imaxLocal + offset;
starts[JDIM] = 1;
break;
case BOTTOM:
subSizes[IDIM] = imaxLocal;
subSizes[JDIM] = 1;
starts[IDIM] = 1;
starts[JDIM] = 1 - offset;
break;
case TOP:
subSizes[IDIM] = imaxLocal;
subSizes[JDIM] = 1;
starts[IDIM] = 1;
starts[JDIM] = jmaxLocal + offset;
break;
}
MPI_Type_create_subarray(NDIMS,
sizes,
subSizes,
starts,
MPI_ORDER_C,
MPI_DOUBLE,
&type);
MPI_Type_commit(&type);
if (layer == HALO) {
c->rbufferTypes[direction] = type;
} else if (layer == BULK) {
c->sbufferTypes[direction] = type;
}
}
static void assembleResult(Comm* c,
double* src,
double* dst,
int imaxLocal[],
int jmaxLocal[],
int offset[],
int jmax,
int imax)
{
MPI_Request* requests;
int numRequests = 1;
if (c->rank == 0) {
numRequests = c->size + 1;
} else {
numRequests = 1;
}
requests = (MPI_Request*)malloc(numRequests * sizeof(MPI_Request));
/* all ranks send their bulk array */
MPI_Datatype bulkType;
int oldSizes[NDIMS] = { c->jmaxLocal + 2, c->imaxLocal + 2 };
int newSizes[NDIMS] = { c->jmaxLocal, c->imaxLocal };
int starts[NDIMS] = { 1, 1 };
MPI_Type_create_subarray(NDIMS,
oldSizes,
newSizes,
starts,
MPI_ORDER_C,
MPI_DOUBLE,
&bulkType);
MPI_Type_commit(&bulkType);
MPI_Isend(src, 1, bulkType, 0, 0, c->comm, &requests[0]);
/* rank 0 assembles the subdomains */
if (c->rank == 0) {
for (int i = 0; i < c->size; i++) {
MPI_Datatype domainType;
int oldSizes[NDIMS] = { jmax, imax };
int newSizes[NDIMS] = { jmaxLocal[i], imaxLocal[i] };
int starts[NDIMS] = { offset[i * NDIMS + JDIM], offset[i * NDIMS + IDIM] };
MPI_Type_create_subarray(NDIMS,
oldSizes,
newSizes,
starts,
MPI_ORDER_C,
MPI_DOUBLE,
&domainType);
MPI_Type_commit(&domainType);
MPI_Irecv(dst, 1, domainType, i, 0, c->comm, &requests[i + 1]);
}
}
MPI_Waitall(numRequests, requests, MPI_STATUSES_IGNORE);
}
static int sum(int* sizes, int position)
{
int sum = 0;
for (int i = 0; i < position; i++) {
sum += sizes[i];
}
return sum;
}
// exported subroutines
void commReduction(double* v, int op)
{
if (op == MAX) {
MPI_Allreduce(MPI_IN_PLACE, v, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
} else if (op == SUM) {
MPI_Allreduce(MPI_IN_PLACE, v, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
}
}
int commIsBoundary(Comm* c, int direction)
{
switch (direction) {
case LEFT:
return c->coords[IDIM] == 0;
break;
case RIGHT:
return c->coords[IDIM] == (c->dims[IDIM] - 1);
break;
case BOTTOM:
return c->coords[JDIM] == 0;
break;
case TOP:
return c->coords[JDIM] == (c->dims[JDIM] - 1);
break;
}
return 0;
}
void commExchange(Comm* c, double* grid)
{
int counts[NDIRS] = { 1, 1, 1, 1 };
MPI_Aint displs[NDIRS] = { 0, 0, 0, 0 };
MPI_Neighbor_alltoallw(grid,
counts,
displs,
c->sbufferTypes,
grid,
counts,
displs,
c->rbufferTypes,
c->comm);
}
void commShift(Comm* c, double* f, double* g)
{
MPI_Request requests[4] = { MPI_REQUEST_NULL,
MPI_REQUEST_NULL,
MPI_REQUEST_NULL,
MPI_REQUEST_NULL };
/* shift G */
/* receive ghost cells from bottom neighbor */
MPI_Irecv(g,
1,
c->rbufferTypes[BOTTOM],
c->neighbours[BOTTOM],
0,
c->comm,
&requests[0]);
/* send ghost cells to top neighbor */
MPI_Isend(g, 1, c->sbufferTypes[TOP], c->neighbours[TOP], 0, c->comm, &requests[1]);
/* shift F */
/* receive ghost cells from left neighbor */
MPI_Irecv(f, 1, c->rbufferTypes[LEFT], c->neighbours[LEFT], 1, c->comm, &requests[2]);
/* send ghost cells to right neighbor */
MPI_Isend(f,
1,
c->sbufferTypes[RIGHT],
c->neighbours[RIGHT],
1,
c->comm,
&requests[3]);
MPI_Waitall(4, requests, MPI_STATUSES_IGNORE);
}
void commCollectResult(Comm* c,
double* ug,
double* vg,
double* pg,
double* u,
double* v,
double* p,
int jmax,
int imax)
{
int offset[c->size * NDIMS];
int imaxLocal[c->size];
int jmaxLocal[c->size];
MPI_Gather(&c->imaxLocal, 1, MPI_INT, imaxLocal, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Gather(&c->jmaxLocal, 1, MPI_INT, jmaxLocal, 1, MPI_INT, 0, MPI_COMM_WORLD);
if (c->rank == 0) {
for (int i = 0; i < c->size; i++) {
int coords[NDIMS];
MPI_Cart_coords(c->comm, i, NDIMS, coords);
offset[i * NDIMS + IDIM] = sum(imaxLocal, coords[IDIM]);
offset[i * NDIMS + JDIM] = sum(jmaxLocal, coords[JDIM]);
printf("Rank: %d, Coords(j,i): %d %d, Size(j,i): %d %d "
"Offset(j,i): %d %d\n",
i,
coords[JDIM],
coords[IDIM],
jmaxLocal[i],
imaxLocal[i],
offset[i * NDIMS + JDIM],
offset[i * NDIMS + IDIM]);
}
}
/* collect P */
assembleResult(c, p, pg, imaxLocal, jmaxLocal, offset, jmax, imax);
/* collect U */
assembleResult(c, u, ug, imaxLocal, jmaxLocal, offset, jmax, imax);
/* collect V */
assembleResult(c, v, vg, imaxLocal, jmaxLocal, offset, jmax, imax);
}
void commPrintConfig(Comm* c)
{
fflush(stdout);
MPI_Barrier(MPI_COMM_WORLD);
if (commIsMaster(c)) {
printf("Communication setup:\n");
}
for (int i = 0; i < c->size; i++) {
if (i == c->rank) {
printf("\tRank %d of %d\n", c->rank, c->size);
printf("\tNeighbours (bottom, top, left, right): %d %d, %d, %d\n",
c->neighbours[BOTTOM],
c->neighbours[TOP],
c->neighbours[LEFT],
c->neighbours[RIGHT]);
printf("\tCoordinates (j,i) %d %d\n", c->coords[JDIM], c->coords[IDIM]);
printf("\tLocal domain size (j,i) %dx%d\n", c->jmaxLocal, c->imaxLocal);
fflush(stdout);
}
}
MPI_Barrier(MPI_COMM_WORLD);
}
void commInit(Comm* c, int jmax, int imax)
{
/* setup communication */
MPI_Comm_rank(MPI_COMM_WORLD, &(c->rank));
MPI_Comm_size(MPI_COMM_WORLD, &(c->size));
int dims[NDIMS] = { 0, 0 };
int periods[NDIMS] = { 0, 0 };
MPI_Dims_create(c->size, NDIMS, dims);
MPI_Cart_create(MPI_COMM_WORLD, NDIMS, dims, periods, 0, &c->comm);
MPI_Cart_shift(c->comm, IDIM, 1, &c->neighbours[LEFT], &c->neighbours[RIGHT]);
MPI_Cart_shift(c->comm, JDIM, 1, &c->neighbours[BOTTOM], &c->neighbours[TOP]);
MPI_Cart_get(c->comm, NDIMS, c->dims, periods, c->coords);
c->imaxLocal = sizeOfRank(c->rank, dims[IDIM], imax);
c->jmaxLocal = sizeOfRank(c->rank, dims[JDIM], jmax);
// setup buffer types for communication
setupCommunication(c, LEFT, BULK);
setupCommunication(c, LEFT, HALO);
setupCommunication(c, RIGHT, BULK);
setupCommunication(c, RIGHT, HALO);
setupCommunication(c, BOTTOM, BULK);
setupCommunication(c, BOTTOM, HALO);
setupCommunication(c, TOP, BULK);
setupCommunication(c, TOP, HALO);
}

View File

@ -0,0 +1,44 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of nusif-solver.
* Use of this source code is governed by a MIT style
* license that can be found in the LICENSE file.
*/
#ifndef __COMM_H_
#define __COMM_H_
#include <mpi.h>
enum direction { LEFT = 0, RIGHT, BOTTOM, TOP, NDIRS };
enum dimension { JDIM = 0, IDIM, NDIMS };
enum layer { HALO = 0, BULK };
enum op { MAX = 0, SUM };
typedef struct {
int rank;
int size;
MPI_Comm comm;
MPI_Datatype sbufferTypes[NDIRS];
MPI_Datatype rbufferTypes[NDIRS];
int neighbours[NDIRS];
int coords[NDIMS], dims[NDIMS];
int imaxLocal, jmaxLocal;
} Comm;
extern void commInit(Comm* c, int jmax, int imax);
extern void commPrintConfig(Comm*);
extern void commExchange(Comm*, double*);
extern void commShift(Comm* c, double* f, double* g);
extern void commReduction(double* v, int op);
extern int commIsBoundary(Comm* c, int direction);
extern void commCollectResult(Comm* c,
double* ug,
double* vg,
double* pg,
double* u,
double* v,
double* p,
int jmax,
int imax);
static inline int commIsMaster(Comm* c) { return c->rank == 0; }
#endif // __COMM_H_

View File

@ -0,0 +1,54 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* =======================================================================================
*/
#ifndef LIKWID_MARKERS_H
#define LIKWID_MARKERS_H
#ifdef LIKWID_PERFMON
#include <likwid.h>
#define LIKWID_MARKER_INIT likwid_markerInit()
#define LIKWID_MARKER_THREADINIT likwid_markerThreadInit()
#define LIKWID_MARKER_SWITCH likwid_markerNextGroup()
#define LIKWID_MARKER_REGISTER(regionTag) likwid_markerRegisterRegion(regionTag)
#define LIKWID_MARKER_START(regionTag) likwid_markerStartRegion(regionTag)
#define LIKWID_MARKER_STOP(regionTag) likwid_markerStopRegion(regionTag)
#define LIKWID_MARKER_CLOSE likwid_markerClose()
#define LIKWID_MARKER_RESET(regionTag) likwid_markerResetRegion(regionTag)
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count) \
likwid_markerGetRegion(regionTag, nevents, events, time, count)
#else /* LIKWID_PERFMON */
#define LIKWID_MARKER_INIT
#define LIKWID_MARKER_THREADINIT
#define LIKWID_MARKER_SWITCH
#define LIKWID_MARKER_REGISTER(regionTag)
#define LIKWID_MARKER_START(regionTag)
#define LIKWID_MARKER_STOP(regionTag)
#define LIKWID_MARKER_CLOSE
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
#define LIKWID_MARKER_RESET(regionTag)
#endif /* LIKWID_PERFMON */
#endif /*LIKWID_MARKERS_H*/

View File

@ -0,0 +1,95 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#include <float.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "allocate.h"
#include "parameter.h"
#include "progress.h"
#include "solver.h"
#include "timing.h"
#include <mpi.h>
int main(int argc, char** argv)
{
int rank;
double S, E;
Parameter params;
Solver solver;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
initParameter(&params);
if (argc != 2) {
printf("Usage: %s <configFile>\n", argv[0]);
exit(EXIT_SUCCESS);
}
readParameter(&params, argv[1]);
if (rank == 0) {
printParameter(&params);
}
initSolver(&solver, &params);
/* debugExchange(&solver); */
/* exit(EXIT_SUCCESS); */
initProgress(solver.te);
double tau = solver.tau;
double te = solver.te;
double t = 0.0;
S = getTimeStamp();
while (t <= te) {
if (tau > 0.0) {
computeTimestep(&solver);
}
setBoundaryConditions(&solver);
setSpecialBoundaryCondition(&solver);
computeFG(&solver);
computeRHS(&solver);
solve(&solver);
adaptUV(&solver);
t += solver.dt;
#ifdef VERBOSE
if (rank == 0) {
printf("TIME %f , TIMESTEP %f\n", t, solver.dt);
}
#else
printProgress(t);
#endif
}
E = getTimeStamp();
stopProgress();
if (rank == 0) {
printf("Solution took %.2fs\n", E - S);
}
size_t bytesize = solver.imax * solver.jmax * sizeof(double);
double* ug = allocate(64, bytesize);
double* vg = allocate(64, bytesize);
double* pg = allocate(64, bytesize);
commCollectResult(&solver.comm,
ug,
vg,
pg,
solver.u,
solver.v,
solver.p,
solver.jmax,
solver.imax);
writeResult(&solver, ug, vg, pg);
MPI_Finalize();
return EXIT_SUCCESS;
}

View File

@ -0,0 +1,108 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of nusif-solver.
* Use of this source code is governed by a MIT style
* license that can be found in the LICENSE file.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "parameter.h"
#include "util.h"
#define MAXLINE 4096
void initParameter(Parameter* param)
{
param->xlength = 1.0;
param->ylength = 1.0;
param->imax = 100;
param->jmax = 100;
param->itermax = 1000;
param->eps = 0.0001;
param->omg = 1.8;
}
void readParameter(Parameter* param, const char* filename)
{
FILE* fp = fopen(filename, "r");
char line[MAXLINE];
int i;
if (!fp) {
fprintf(stderr, "Could not open parameter file: %s\n", filename);
exit(EXIT_FAILURE);
}
while (!feof(fp)) {
line[0] = '\0';
fgets(line, MAXLINE, fp);
for (i = 0; line[i] != '\0' && line[i] != '#'; i++)
;
line[i] = '\0';
char* tok = strtok(line, " ");
char* val = strtok(NULL, " ");
#define PARSE_PARAM(p, f) \
if (strncmp(tok, #p, sizeof(#p) / sizeof(#p[0]) - 1) == 0) { \
param->p = f(val); \
}
#define PARSE_STRING(p) PARSE_PARAM(p, strdup)
#define PARSE_INT(p) PARSE_PARAM(p, atoi)
#define PARSE_REAL(p) PARSE_PARAM(p, atof)
if (tok != NULL && val != NULL) {
PARSE_REAL(xlength);
PARSE_REAL(ylength);
PARSE_INT(imax);
PARSE_INT(jmax);
PARSE_INT(itermax);
PARSE_REAL(eps);
PARSE_REAL(omg);
PARSE_REAL(re);
PARSE_REAL(tau);
PARSE_REAL(gamma);
PARSE_REAL(dt);
PARSE_REAL(te);
PARSE_REAL(gx);
PARSE_REAL(gy);
PARSE_STRING(name);
PARSE_INT(bcLeft);
PARSE_INT(bcRight);
PARSE_INT(bcBottom);
PARSE_INT(bcTop);
PARSE_REAL(u_init);
PARSE_REAL(v_init);
PARSE_REAL(p_init);
}
}
fclose(fp);
}
void printParameter(Parameter* param)
{
printf("Parameters for %s\n", param->name);
printf("Boundary conditions Left:%d Right:%d Bottom:%d Top:%d\n",
param->bcLeft,
param->bcRight,
param->bcBottom,
param->bcTop);
printf("\tReynolds number: %.2f\n", param->re);
printf("\tInit arrays: U:%.2f V:%.2f P:%.2f\n",
param->u_init,
param->v_init,
param->p_init);
printf("Geometry data:\n");
printf("\tDomain box size (x, y): %.2f, %.2f\n", param->xlength, param->ylength);
printf("\tCells (x, y): %d, %d\n", param->imax, param->jmax);
printf("Timestep parameters:\n");
printf("\tDefault stepsize: %.2f, Final time %.2f\n", param->dt, param->te);
printf("\tTau factor: %.2f\n", param->tau);
printf("Iterative solver parameters:\n");
printf("\tMax iterations: %d\n", param->itermax);
printf("\tepsilon (stopping tolerance) : %f\n", param->eps);
printf("\tgamma (stopping tolerance) : %f\n", param->gamma);
printf("\tomega (SOR relaxation): %f\n", param->omg);
}

View File

@ -0,0 +1,26 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of nusif-solver.
* Use of this source code is governed by a MIT style
* license that can be found in the LICENSE file.
*/
#ifndef __PARAMETER_H_
#define __PARAMETER_H_
typedef struct {
double xlength, ylength;
int imax, jmax;
int itermax;
double eps, omg;
double re, tau, gamma;
double te, dt;
double gx, gy;
char* name;
int bcLeft, bcRight, bcBottom, bcTop;
double u_init, v_init, p_init;
} Parameter;
void initParameter(Parameter*);
void readParameter(Parameter*, const char*);
void printParameter(Parameter*);
#endif

View File

@ -0,0 +1,60 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of nusif-solver.
* Use of this source code is governed by a MIT style
* license that can be found in the LICENSE file.
*/
#include <math.h>
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "progress.h"
static double _end;
static int _current;
static int _rank = -1;
void initProgress(double end)
{
MPI_Comm_rank(MPI_COMM_WORLD, &_rank);
_end = end;
_current = 0;
if (_rank == 0) {
printf("[ ]");
fflush(stdout);
}
}
void printProgress(double current)
{
if (_rank == 0) {
int new = (int)rint((current / _end) * 10.0);
if (new > _current) {
char progress[11];
_current = new;
progress[0] = 0;
for (int i = 0; i < 10; i++) {
if (i < _current) {
sprintf(progress + strlen(progress), "#");
} else {
sprintf(progress + strlen(progress), " ");
}
}
printf("\r[%s]", progress);
}
fflush(stdout);
}
}
void stopProgress()
{
if (_rank == 0) {
printf("\n");
fflush(stdout);
}
}

View File

@ -0,0 +1,14 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#ifndef __PROGRESS_H_
#define __PROGRESS_H_
extern void initProgress(double);
extern void printProgress(double);
extern void stopProgress();
#endif

View File

@ -0,0 +1,546 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of nusif-solver.
* Use of this source code is governed by a MIT style
* license that can be found in the LICENSE file.
*/
#include <float.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "allocate.h"
#include "comm.h"
#include "parameter.h"
#include "solver.h"
#include "util.h"
#define P(i, j) p[(j) * (imaxLocal + 2) + (i)]
#define F(i, j) f[(j) * (imaxLocal + 2) + (i)]
#define G(i, j) g[(j) * (imaxLocal + 2) + (i)]
#define U(i, j) u[(j) * (imaxLocal + 2) + (i)]
#define V(i, j) v[(j) * (imaxLocal + 2) + (i)]
#define RHS(i, j) rhs[(j) * (imaxLocal + 2) + (i)]
static void printConfig(Solver* s)
{
if (commIsMaster(&s->comm)) {
printf("Parameters for #%s#\n", s->problem);
printf("BC Left:%d Right:%d Bottom:%d Top:%d\n",
s->bcLeft,
s->bcRight,
s->bcBottom,
s->bcTop);
printf("\tReynolds number: %.2f\n", s->re);
printf("\tGx Gy: %.2f %.2f\n", s->gx, s->gy);
printf("Geometry data:\n");
printf("\tDomain box size (x, y): %.2f, %.2f\n", s->xlength, s->ylength);
printf("\tCells (x, y): %d, %d\n", s->imax, s->jmax);
printf("\tCell size (dx, dy): %f, %f\n", s->dx, s->dy);
printf("Timestep parameters:\n");
printf("\tDefault stepsize: %.2f, Final time %.2f\n", s->dt, s->te);
printf("\tdt bound: %.6f\n", s->dtBound);
printf("\tTau factor: %.2f\n", s->tau);
printf("Iterative s parameters:\n");
printf("\tMax iterations: %d\n", s->itermax);
printf("\tepsilon (stopping tolerance) : %f\n", s->eps);
printf("\tgamma factor: %f\n", s->gamma);
printf("\tomega (SOR relaxation): %f\n", s->omega);
}
commPrintConfig(&s->comm);
}
void initSolver(Solver* s, Parameter* params)
{
s->problem = params->name;
s->bcLeft = params->bcLeft;
s->bcRight = params->bcRight;
s->bcBottom = params->bcBottom;
s->bcTop = params->bcTop;
s->imax = params->imax;
s->jmax = params->jmax;
s->xlength = params->xlength;
s->ylength = params->ylength;
s->dx = params->xlength / params->imax;
s->dy = params->ylength / params->jmax;
s->eps = params->eps;
s->omega = params->omg;
s->itermax = params->itermax;
s->re = params->re;
s->gx = params->gx;
s->gy = params->gy;
s->dt = params->dt;
s->te = params->te;
s->tau = params->tau;
s->gamma = params->gamma;
commInit(&s->comm, s->jmax, s->imax);
/* allocate arrays */
int imaxLocal = s->comm.imaxLocal;
int jmaxLocal = s->comm.jmaxLocal;
size_t size = (imaxLocal + 2) * (jmaxLocal + 2);
s->u = allocate(64, size * sizeof(double));
s->v = allocate(64, size * sizeof(double));
s->p = allocate(64, size * sizeof(double));
s->rhs = allocate(64, size * sizeof(double));
s->f = allocate(64, size * sizeof(double));
s->g = allocate(64, size * sizeof(double));
for (int i = 0; i < size; i++) {
s->u[i] = params->u_init;
s->v[i] = params->v_init;
s->p[i] = params->p_init;
s->rhs[i] = 0.0;
s->f[i] = 0.0;
s->g[i] = 0.0;
}
double dx = s->dx;
double dy = s->dy;
double invSqrSum = 1.0 / (dx * dx) + 1.0 / (dy * dy);
s->dtBound = 0.5 * s->re * 1.0 / invSqrSum;
#ifdef VERBOSE
printConfig(s);
#endif
}
void computeRHS(Solver* s)
{
int imaxLocal = s->comm.imaxLocal;
int jmaxLocal = s->comm.jmaxLocal;
double idx = 1.0 / s->dx;
double idy = 1.0 / s->dy;
double idt = 1.0 / s->dt;
double* rhs = s->rhs;
double* f = s->f;
double* g = s->g;
commShift(&s->comm, f, g);
for (int j = 1; j < jmaxLocal + 1; j++) {
for (int i = 1; i < imaxLocal + 1; i++) {
RHS(i, j) = ((F(i, j) - F(i - 1, j)) * idx + (G(i, j) - G(i, j - 1)) * idy) *
idt;
}
}
}
int solve(Solver* s)
{
int imax = s->imax;
int jmax = s->jmax;
int imaxLocal = s->comm.imaxLocal;
int jmaxLocal = s->comm.jmaxLocal;
double eps = s->eps;
int itermax = s->itermax;
double dx2 = s->dx * s->dx;
double dy2 = s->dy * s->dy;
double idx2 = 1.0 / dx2;
double idy2 = 1.0 / dy2;
double factor = s->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
double* p = s->p;
double* rhs = s->rhs;
double epssq = eps * eps;
int it = 0;
double res = 1.0;
commExchange(&s->comm, p);
while ((res >= epssq) && (it < itermax)) {
res = 0.0;
for (int j = 1; j < jmaxLocal + 1; j++) {
for (int i = 1; i < imaxLocal + 1; i++) {
double r = RHS(i, j) -
((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
(P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
P(i, j) -= (factor * r);
res += (r * r);
}
}
if (commIsBoundary(&s->comm, BOTTOM)) { // set bottom bc
for (int i = 1; i < imaxLocal + 1; i++) {
P(i, 0) = P(i, 1);
}
}
if (commIsBoundary(&s->comm, TOP)) { // set top bc
for (int i = 1; i < imaxLocal + 1; i++) {
P(i, jmaxLocal + 1) = P(i, jmaxLocal);
}
}
if (commIsBoundary(&s->comm, LEFT)) { // set left bc
for (int j = 1; j < jmaxLocal + 1; j++) {
P(0, j) = P(1, j);
}
}
if (commIsBoundary(&s->comm, RIGHT)) { // set right bc
for (int j = 1; j < jmaxLocal + 1; j++) {
P(imaxLocal + 1, j) = P(imaxLocal, j);
}
}
commReduction(&res, SUM);
res = res / (double)(imax * jmax);
#ifdef DEBUG
if (commIsMaster(&s->comm)) {
printf("%d Residuum: %e\n", it, res);
}
#endif
it++;
}
#ifdef VERBOSE
if (commIsMaster(&s->comm)) {
printf("Solver took %d iterations to reach %f\n", it, sqrt(res));
}
#endif
if (res < eps) {
return 0;
} else {
return 1;
}
}
static double maxElement(Solver* s, double* m)
{
int imaxLocal = s->comm.imaxLocal;
int jmaxLocal = s->comm.jmaxLocal;
int size = (imaxLocal + 2) * (jmaxLocal + 2);
double maxval = DBL_MIN;
for (int i = 0; i < size; i++) {
maxval = MAX(maxval, fabs(m[i]));
}
commReduction(&maxval, MAX);
return maxval;
}
void computeTimestep(Solver* s)
{
double dt = s->dtBound;
double dx = s->dx;
double dy = s->dy;
double umax = maxElement(s, s->u);
double vmax = maxElement(s, s->v);
if (umax > 0) {
dt = (dt > dx / umax) ? dx / umax : dt;
}
if (vmax > 0) {
dt = (dt > dy / vmax) ? dy / vmax : dt;
}
s->dt = dt * s->tau;
}
void setBoundaryConditions(Solver* s)
{
int imaxLocal = s->comm.imaxLocal;
int jmaxLocal = s->comm.jmaxLocal;
double* u = s->u;
double* v = s->v;
if (commIsBoundary(&s->comm, TOP)) {
switch (s->bcTop) {
case NOSLIP:
for (int i = 1; i < imaxLocal + 1; i++) {
V(i, jmaxLocal) = 0.0;
U(i, jmaxLocal + 1) = -U(i, jmaxLocal);
}
break;
case SLIP:
for (int i = 1; i < imaxLocal + 1; i++) {
V(i, jmaxLocal) = 0.0;
U(i, jmaxLocal + 1) = U(i, jmaxLocal);
}
break;
case OUTFLOW:
for (int i = 1; i < imaxLocal + 1; i++) {
U(i, jmaxLocal + 1) = U(i, jmaxLocal);
V(i, jmaxLocal) = V(i, jmaxLocal - 1);
}
break;
case PERIODIC:
break;
}
}
if (commIsBoundary(&s->comm, BOTTOM)) {
switch (s->bcBottom) {
case NOSLIP:
for (int i = 1; i < imaxLocal + 1; i++) {
V(i, 0) = 0.0;
U(i, 0) = -U(i, 1);
}
break;
case SLIP:
for (int i = 1; i < imaxLocal + 1; i++) {
V(i, 0) = 0.0;
U(i, 0) = U(i, 1);
}
break;
case OUTFLOW:
for (int i = 1; i < imaxLocal + 1; i++) {
U(i, 0) = U(i, 1);
V(i, 0) = V(i, 1);
}
break;
case PERIODIC:
break;
}
}
if (commIsBoundary(&s->comm, RIGHT)) {
switch (s->bcRight) {
case NOSLIP:
for (int j = 1; j < jmaxLocal + 1; j++) {
U(imaxLocal, j) = 0.0;
V(imaxLocal + 1, j) = -V(imaxLocal, j);
}
break;
case SLIP:
for (int j = 1; j < jmaxLocal + 1; j++) {
U(imaxLocal, j) = 0.0;
V(imaxLocal + 1, j) = V(imaxLocal, j);
}
break;
case OUTFLOW:
for (int j = 1; j < jmaxLocal + 1; j++) {
U(imaxLocal, j) = U(imaxLocal - 1, j);
V(imaxLocal + 1, j) = V(imaxLocal, j);
}
break;
case PERIODIC:
break;
}
}
if (commIsBoundary(&s->comm, LEFT)) {
switch (s->bcLeft) {
case NOSLIP:
for (int j = 1; j < jmaxLocal + 1; j++) {
U(0, j) = 0.0;
V(0, j) = -V(1, j);
}
break;
case SLIP:
for (int j = 1; j < jmaxLocal + 1; j++) {
U(0, j) = 0.0;
V(0, j) = V(1, j);
}
break;
case OUTFLOW:
for (int j = 1; j < jmaxLocal + 1; j++) {
U(0, j) = U(1, j);
V(0, j) = V(1, j);
}
break;
case PERIODIC:
break;
}
}
}
void setSpecialBoundaryCondition(Solver* s)
{
int imaxLocal = s->comm.imaxLocal;
int jmaxLocal = s->comm.jmaxLocal;
double* u = s->u;
if (strcmp(s->problem, "dcavity") == 0) {
if (commIsBoundary(&s->comm, TOP)) {
for (int i = 1; i < imaxLocal + 1; i++) {
U(i, jmaxLocal + 1) = 2.0 - U(i, jmaxLocal);
}
}
} else if (strcmp(s->problem, "canal") == 0) {
if (commIsBoundary(&s->comm, LEFT)) {
double ylength = s->ylength;
double dy = s->dy;
int rest = s->jmax % s->comm.size;
int yc = s->comm.rank * (s->jmax / s->comm.size) + MIN(rest, s->comm.rank);
double ys = dy * (yc + 0.5);
double y;
/* printf("RANK %d yc: %d ys: %f\n", solver->rank, yc, ys); */
for (int j = 1; j < jmaxLocal + 1; j++) {
y = ys + dy * (j - 0.5);
U(0, j) = y * (ylength - y) * 4.0 / (ylength * ylength);
}
}
}
/* print(solver, solver->u); */
}
void computeFG(Solver* s)
{
double* u = s->u;
double* v = s->v;
double* f = s->f;
double* g = s->g;
int imaxLocal = s->comm.imaxLocal;
int jmaxLocal = s->comm.jmaxLocal;
double gx = s->gx;
double gy = s->gy;
double gamma = s->gamma;
double dt = s->dt;
double inverseRe = 1.0 / s->re;
double inverseDx = 1.0 / s->dx;
double inverseDy = 1.0 / s->dy;
double du2dx, dv2dy, duvdx, duvdy;
double du2dx2, du2dy2, dv2dx2, dv2dy2;
commExchange(&s->comm, u);
commExchange(&s->comm, v);
for (int j = 1; j < jmaxLocal + 1; j++) {
for (int i = 1; i < imaxLocal + 1; i++) {
du2dx = inverseDx * 0.25 *
((U(i, j) + U(i + 1, j)) * (U(i, j) + U(i + 1, j)) -
(U(i, j) + U(i - 1, j)) * (U(i, j) + U(i - 1, j))) +
gamma * inverseDx * 0.25 *
(fabs(U(i, j) + U(i + 1, j)) * (U(i, j) - U(i + 1, j)) +
fabs(U(i, j) + U(i - 1, j)) * (U(i, j) - U(i - 1, j)));
duvdy = inverseDy * 0.25 *
((V(i, j) + V(i + 1, j)) * (U(i, j) + U(i, j + 1)) -
(V(i, j - 1) + V(i + 1, j - 1)) * (U(i, j) + U(i, j - 1))) +
gamma * inverseDy * 0.25 *
(fabs(V(i, j) + V(i + 1, j)) * (U(i, j) - U(i, j + 1)) +
fabs(V(i, j - 1) + V(i + 1, j - 1)) *
(U(i, j) - U(i, j - 1)));
du2dx2 = inverseDx * inverseDx * (U(i + 1, j) - 2.0 * U(i, j) + U(i - 1, j));
du2dy2 = inverseDy * inverseDy * (U(i, j + 1) - 2.0 * U(i, j) + U(i, j - 1));
F(i, j) = U(i, j) + dt * (inverseRe * (du2dx2 + du2dy2) - du2dx - duvdy + gx);
duvdx = inverseDx * 0.25 *
((U(i, j) + U(i, j + 1)) * (V(i, j) + V(i + 1, j)) -
(U(i - 1, j) + U(i - 1, j + 1)) * (V(i, j) + V(i - 1, j))) +
gamma * inverseDx * 0.25 *
(fabs(U(i, j) + U(i, j + 1)) * (V(i, j) - V(i + 1, j)) +
fabs(U(i - 1, j) + U(i - 1, j + 1)) *
(V(i, j) - V(i - 1, j)));
dv2dy = inverseDy * 0.25 *
((V(i, j) + V(i, j + 1)) * (V(i, j) + V(i, j + 1)) -
(V(i, j) + V(i, j - 1)) * (V(i, j) + V(i, j - 1))) +
gamma * inverseDy * 0.25 *
(fabs(V(i, j) + V(i, j + 1)) * (V(i, j) - V(i, j + 1)) +
fabs(V(i, j) + V(i, j - 1)) * (V(i, j) - V(i, j - 1)));
dv2dx2 = inverseDx * inverseDx * (V(i + 1, j) - 2.0 * V(i, j) + V(i - 1, j));
dv2dy2 = inverseDy * inverseDy * (V(i, j + 1) - 2.0 * V(i, j) + V(i, j - 1));
G(i, j) = V(i, j) + dt * (inverseRe * (dv2dx2 + dv2dy2) - duvdx - dv2dy + gy);
}
}
/* ----------------------------- boundary of F --------------------------- */
if (commIsBoundary(&s->comm, LEFT)) {
for (int j = 1; j < jmaxLocal + 1; j++) {
F(0, j) = U(0, j);
}
}
if (commIsBoundary(&s->comm, RIGHT)) {
for (int j = 1; j < jmaxLocal + 1; j++) {
F(imaxLocal, j) = U(imaxLocal, j);
}
}
/* ----------------------------- boundary of G --------------------------- */
if (commIsBoundary(&s->comm, BOTTOM)) {
for (int i = 1; i < imaxLocal + 1; i++) {
G(i, 0) = V(i, 0);
}
}
if (commIsBoundary(&s->comm, TOP)) {
for (int i = 1; i < imaxLocal + 1; i++) {
G(i, jmaxLocal) = V(i, jmaxLocal);
}
}
}
void adaptUV(Solver* s)
{
int imaxLocal = s->comm.imaxLocal;
int jmaxLocal = s->comm.jmaxLocal;
double* p = s->p;
double* u = s->u;
double* v = s->v;
double* f = s->f;
double* g = s->g;
double factorX = s->dt / s->dx;
double factorY = s->dt / s->dy;
for (int j = 1; j < jmaxLocal + 1; j++) {
for (int i = 1; i < imaxLocal + 1; i++) {
U(i, j) = F(i, j) - (P(i + 1, j) - P(i, j)) * factorX;
V(i, j) = G(i, j) - (P(i, j + 1) - P(i, j)) * factorY;
}
}
}
void writeResult(Solver* s, double* u, double* v, double* p)
{
int imax = s->imax;
int jmax = s->jmax;
double dx = s->dx;
double dy = s->dy;
double x = 0.0, y = 0.0;
FILE* fp;
fp = fopen("pressure.dat", "w");
if (fp == NULL) {
printf("Error!\n");
exit(EXIT_FAILURE);
}
for (int j = 1; j < jmax; j++) {
y = (double)(j - 0.5) * dy;
for (int i = 1; i < imax; i++) {
x = (double)(i - 0.5) * dx;
fprintf(fp, "%.2f %.2f %f\n", x, y, p[j * (imax) + i]);
}
fprintf(fp, "\n");
}
fclose(fp);
fp = fopen("velocity.dat", "w");
if (fp == NULL) {
printf("Error!\n");
exit(EXIT_FAILURE);
}
for (int j = 1; j < jmax; j++) {
y = dy * (j - 0.5);
for (int i = 1; i < imax; i++) {
x = dx * (i - 0.5);
double vel_u = (u[j * (imax) + i] + u[j * (imax) + (i - 1)]) / 2.0;
double vel_v = (v[j * (imax) + i] + v[(j - 1) * (imax) + i]) / 2.0;
double len = sqrt((vel_u * vel_u) + (vel_v * vel_v));
fprintf(fp, "%.2f %.2f %f %f %f\n", x, y, vel_u, vel_v, len);
}
}
fclose(fp);
}

View File

@ -0,0 +1,47 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of nusif-solver.
* Use of this source code is governed by a MIT style
* license that can be found in the LICENSE file.
*/
#ifndef __SOLVER_H_
#define __SOLVER_H_
#include "comm.h"
#include "parameter.h"
enum BC { NOSLIP = 1, SLIP, OUTFLOW, PERIODIC };
typedef struct {
/* geometry and grid information */
double dx, dy;
int imax, jmax;
double xlength, ylength;
/* arrays */
double *p, *rhs;
double *f, *g;
double *u, *v;
/* parameters */
double eps, omega;
double re, tau, gamma;
double gx, gy;
/* time stepping */
int itermax;
double dt, te;
double dtBound;
char* problem;
int bcLeft, bcRight, bcBottom, bcTop;
/* communication */
Comm comm;
} Solver;
void initSolver(Solver*, Parameter*);
void computeRHS(Solver*);
int solve(Solver*);
void normalizePressure(Solver*);
void computeTimestep(Solver*);
void setBoundaryConditions(Solver*);
void setSpecialBoundaryCondition(Solver*);
void computeFG(Solver*);
void adaptUV(Solver*);
void writeResult(Solver* s, double* u, double* v, double* p);
#endif

View File

@ -0,0 +1,24 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved.
* Use of this source code is governed by a MIT-style
* license that can be found in the LICENSE file.
*/
#include <stdlib.h>
#include <time.h>
double getTimeStamp()
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
}
double getTimeResolution()
{
struct timespec ts;
clock_getres(CLOCK_MONOTONIC, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
}
double getTimeStamp_() { return getTimeStamp(); }

Some files were not shown because too many files have changed in this diff Show More