forked from moebiusband/NuSiF-Solver
		
	Merge 2D mpi versions. Cleanup.
This commit is contained in:
		@@ -1,71 +0,0 @@
 | 
				
			|||||||
#=======================================================================================
 | 
					 | 
				
			||||||
# Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
# All rights reserved.
 | 
					 | 
				
			||||||
# Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
# license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
#=======================================================================================
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#CONFIGURE BUILD SYSTEM
 | 
					 | 
				
			||||||
TARGET	   = exe-$(TAG)
 | 
					 | 
				
			||||||
BUILD_DIR  = ./$(TAG)
 | 
					 | 
				
			||||||
SRC_DIR    = ./src
 | 
					 | 
				
			||||||
MAKE_DIR   = ./
 | 
					 | 
				
			||||||
Q         ?= @
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#DO NOT EDIT BELOW
 | 
					 | 
				
			||||||
include $(MAKE_DIR)/config.mk
 | 
					 | 
				
			||||||
include $(MAKE_DIR)/include_$(TAG).mk
 | 
					 | 
				
			||||||
INCLUDES  += -I$(SRC_DIR) -I$(BUILD_DIR)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
VPATH     = $(SRC_DIR)
 | 
					 | 
				
			||||||
SRC       = $(wildcard $(SRC_DIR)/*.c)
 | 
					 | 
				
			||||||
ASM       = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s, $(SRC))
 | 
					 | 
				
			||||||
OBJ       = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o, $(SRC))
 | 
					 | 
				
			||||||
SOURCES   = $(SRC) $(wildcard $(SRC_DIR)/*.h)
 | 
					 | 
				
			||||||
CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(OPTIONS) $(INCLUDES)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
${TARGET}: $(BUILD_DIR) $(OBJ)
 | 
					 | 
				
			||||||
	$(info ===>  LINKING  $(TARGET))
 | 
					 | 
				
			||||||
	$(Q)${LINKER} ${LFLAGS} -o $(TARGET) $(OBJ) $(LIBS)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
$(BUILD_DIR)/%.o:  %.c $(MAKE_DIR)/include_$(TAG).mk $(MAKE_DIR)/config.mk
 | 
					 | 
				
			||||||
	$(info ===>  COMPILE  $@)
 | 
					 | 
				
			||||||
	$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
 | 
					 | 
				
			||||||
	$(Q)$(GCC) $(CPPFLAGS) -MT $(@:.d=.o) -MM  $< > $(BUILD_DIR)/$*.d
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
$(BUILD_DIR)/%.s:  %.c
 | 
					 | 
				
			||||||
	$(info ===>  GENERATE ASM  $@)
 | 
					 | 
				
			||||||
	$(CC) -S $(CPPFLAGS) $(CFLAGS) $< -o $@
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
.PHONY: clean distclean tags info asm format
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
clean:
 | 
					 | 
				
			||||||
	$(info ===>  CLEAN)
 | 
					 | 
				
			||||||
	@rm -rf $(BUILD_DIR)
 | 
					 | 
				
			||||||
	@rm -f tags
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
distclean: clean
 | 
					 | 
				
			||||||
	$(info ===>  DIST CLEAN)
 | 
					 | 
				
			||||||
	@rm -f $(TARGET)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
info:
 | 
					 | 
				
			||||||
	$(info $(CFLAGS))
 | 
					 | 
				
			||||||
	$(Q)$(CC) $(VERSION)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
asm:  $(BUILD_DIR) $(ASM)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
tags:
 | 
					 | 
				
			||||||
	$(info ===>  GENERATE TAGS)
 | 
					 | 
				
			||||||
	$(Q)ctags -R
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
format:
 | 
					 | 
				
			||||||
	@for src in $(SOURCES) ; do \
 | 
					 | 
				
			||||||
		echo "Formatting $$src" ; \
 | 
					 | 
				
			||||||
		clang-format -i $$src ; \
 | 
					 | 
				
			||||||
	done
 | 
					 | 
				
			||||||
	@echo "Done"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
$(BUILD_DIR):
 | 
					 | 
				
			||||||
	@mkdir $(BUILD_DIR)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
-include $(OBJ:.o=.d)
 | 
					 | 
				
			||||||
@@ -1,48 +0,0 @@
 | 
				
			|||||||
# C source skeleton
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Build
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
1. Configure the toolchain and additional options in `config.mk`:
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
# Supported: GCC, CLANG, ICC
 | 
					 | 
				
			||||||
TAG ?= GCC
 | 
					 | 
				
			||||||
ENABLE_OPENMP ?= false
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
OPTIONS +=  -DARRAY_ALIGNMENT=64
 | 
					 | 
				
			||||||
#OPTIONS +=  -DVERBOSE_AFFINITY
 | 
					 | 
				
			||||||
#OPTIONS +=  -DVERBOSE_DATASIZE
 | 
					 | 
				
			||||||
#OPTIONS +=  -DVERBOSE_TIMER
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The verbosity options enable detailed output about affinity settings, allocation sizes and timer resolution.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
2. Build with:
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
make
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
You can build multiple toolchains in the same directory, but notice that the Makefile is only acting on the one currently set.
 | 
					 | 
				
			||||||
Intermediate build results are located in the `<TOOLCHAIN>` directory.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
To output the executed commands use:
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
make Q=
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
3. Clean up with:
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
make clean
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
to clean intermediate build results.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
make distclean
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
to clean intermediate build results and binary.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
4. (Optional) Generate assembler:
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
make asm
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
The assembler files will also be located in the `<TOOLCHAIN>` directory.
 | 
					 | 
				
			||||||
@@ -1,46 +0,0 @@
 | 
				
			|||||||
#==============================================================================
 | 
					 | 
				
			||||||
#                            Laminar Canal Flow
 | 
					 | 
				
			||||||
#==============================================================================
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Problem specific Data:
 | 
					 | 
				
			||||||
# ---------------------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
name canal             # name of flow setup
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
bcLeft    3            #  flags for boundary conditions
 | 
					 | 
				
			||||||
bcRight   3            #  1 = no-slip      3 = outflow
 | 
					 | 
				
			||||||
bcBottom  1            #  2 = free-slip    4 = periodic
 | 
					 | 
				
			||||||
bcTop     1            #
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
gx     0.0      # Body forces (e.g. gravity)
 | 
					 | 
				
			||||||
gy     0.0      #
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
re            100.0	   # Reynolds number
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
u_init        1.0      # initial value for velocity in x-direction
 | 
					 | 
				
			||||||
v_init        0.0      # initial value for velocity in y-direction
 | 
					 | 
				
			||||||
p_init        0.0      # initial value for pressure
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Geometry Data:
 | 
					 | 
				
			||||||
# -------------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
xlength       30.0     # domain size in x-direction
 | 
					 | 
				
			||||||
ylength       4.0	   # domain size in y-direction
 | 
					 | 
				
			||||||
imax          200      # number of interior cells in x-direction
 | 
					 | 
				
			||||||
jmax          50	   # number of interior cells in y-direction
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Time Data:
 | 
					 | 
				
			||||||
# ---------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
te       100.0   # final time
 | 
					 | 
				
			||||||
dt       0.02    # time stepsize
 | 
					 | 
				
			||||||
tau      0.5     # safety factor for time stepsize control (<0 constant delt)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Pressure Iteration Data:
 | 
					 | 
				
			||||||
# -----------------------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
itermax       500       # maximal number of pressure iteration in one time step
 | 
					 | 
				
			||||||
eps           0.00001   # stopping tolerance for pressure iteration
 | 
					 | 
				
			||||||
omg           1.8       # relaxation parameter for SOR iteration
 | 
					 | 
				
			||||||
gamma         0.9       # upwind differencing factor gamma
 | 
					 | 
				
			||||||
#===============================================================================
 | 
					 | 
				
			||||||
@@ -1,10 +0,0 @@
 | 
				
			|||||||
# Supported: GCC, CLANG, ICC
 | 
					 | 
				
			||||||
TAG ?= CLANG
 | 
					 | 
				
			||||||
ENABLE_OPENMP ?= false
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#Feature options
 | 
					 | 
				
			||||||
OPTIONS +=  -DARRAY_ALIGNMENT=64
 | 
					 | 
				
			||||||
#OPTIONS +=  -DVERBOSE
 | 
					 | 
				
			||||||
#OPTIONS +=  -DVERBOSE_AFFINITY
 | 
					 | 
				
			||||||
#OPTIONS +=  -DVERBOSE_DATASIZE
 | 
					 | 
				
			||||||
#OPTIONS +=  -DVERBOSE_TIMER
 | 
					 | 
				
			||||||
@@ -1,46 +0,0 @@
 | 
				
			|||||||
#==============================================================================
 | 
					 | 
				
			||||||
#                              Driven Cavity
 | 
					 | 
				
			||||||
#==============================================================================
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Problem specific Data:
 | 
					 | 
				
			||||||
# ---------------------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
name dcavity        # name of flow setup
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
bcLeft   1			#  flags for boundary conditions
 | 
					 | 
				
			||||||
bcRight  1			#  1 = no-slip      3 = outflow
 | 
					 | 
				
			||||||
bcBottom 1			#  2 = free-slip    4 = periodic
 | 
					 | 
				
			||||||
bcTop    1			#
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
gx    0.0			# Body forces (e.g. gravity)
 | 
					 | 
				
			||||||
gy    0.0			#
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
re    500.0		    # Reynolds number
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
u_init    0.0		# initial value for velocity in x-direction
 | 
					 | 
				
			||||||
v_init    0.0		# initial value for velocity in y-direction
 | 
					 | 
				
			||||||
p_init    0.0		# initial value for pressure
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Geometry Data:
 | 
					 | 
				
			||||||
# -------------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
xlength    1.0		# domain size in x-direction
 | 
					 | 
				
			||||||
ylength    1.0		# domain size in y-direction
 | 
					 | 
				
			||||||
imax       100		# number of interior cells in x-direction
 | 
					 | 
				
			||||||
jmax       100		# number of interior cells in y-direction
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Time Data:
 | 
					 | 
				
			||||||
# ---------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
te      25.0		# final time
 | 
					 | 
				
			||||||
dt     0.02	    # time stepsize
 | 
					 | 
				
			||||||
tau     0.5		# safety factor for time stepsize control (<0 constant delt)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Pressure Iteration Data:
 | 
					 | 
				
			||||||
# -----------------------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
itermax  1000		# maximal number of pressure iteration in one time step
 | 
					 | 
				
			||||||
eps      0.001		# stopping tolerance for pressure iteration
 | 
					 | 
				
			||||||
omg      1.7		# relaxation parameter for SOR iteration
 | 
					 | 
				
			||||||
gamma    0.9		# upwind differencing factor gamma
 | 
					 | 
				
			||||||
#===============================================================================
 | 
					 | 
				
			||||||
@@ -1,16 +0,0 @@
 | 
				
			|||||||
CC   = mpicc
 | 
					 | 
				
			||||||
GCC  = cc
 | 
					 | 
				
			||||||
LINKER = $(CC)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
ifeq ($(ENABLE_OPENMP),true)
 | 
					 | 
				
			||||||
OPENMP   = -fopenmp
 | 
					 | 
				
			||||||
#OPENMP   = -Xpreprocessor -fopenmp #required on Macos with homebrew libomp
 | 
					 | 
				
			||||||
LIBS     = # -lomp
 | 
					 | 
				
			||||||
endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
VERSION  = --version
 | 
					 | 
				
			||||||
CFLAGS   = -Ofast -std=c99 $(OPENMP)
 | 
					 | 
				
			||||||
#CFLAGS   = -Ofast -fnt-store=aggressive  -std=c99 $(OPENMP) #AMD CLANG
 | 
					 | 
				
			||||||
LFLAGS   = $(OPENMP)
 | 
					 | 
				
			||||||
DEFINES  = -D_GNU_SOURCE# -DDEBUG
 | 
					 | 
				
			||||||
INCLUDES = -I/usr/local/include
 | 
					 | 
				
			||||||
@@ -1,14 +0,0 @@
 | 
				
			|||||||
CC   = gcc
 | 
					 | 
				
			||||||
GCC  = gcc
 | 
					 | 
				
			||||||
LINKER = $(CC)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
ifeq ($(ENABLE_OPENMP),true)
 | 
					 | 
				
			||||||
OPENMP   = -fopenmp
 | 
					 | 
				
			||||||
endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
VERSION  = --version
 | 
					 | 
				
			||||||
CFLAGS   = -Ofast -ffreestanding -std=c99 $(OPENMP)
 | 
					 | 
				
			||||||
LFLAGS   = $(OPENMP)
 | 
					 | 
				
			||||||
DEFINES  = -D_GNU_SOURCE
 | 
					 | 
				
			||||||
INCLUDES =
 | 
					 | 
				
			||||||
LIBS     =
 | 
					 | 
				
			||||||
@@ -1,14 +0,0 @@
 | 
				
			|||||||
CC   = mpiicc
 | 
					 | 
				
			||||||
GCC  = gcc
 | 
					 | 
				
			||||||
LINKER = $(CC)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
ifeq ($(ENABLE_OPENMP),true)
 | 
					 | 
				
			||||||
OPENMP   = -qopenmp
 | 
					 | 
				
			||||||
endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
VERSION  = --version
 | 
					 | 
				
			||||||
CFLAGS   =  -O3 -xHost -qopt-zmm-usage=high -std=c99 $(OPENMP)
 | 
					 | 
				
			||||||
LFLAGS   = $(OPENMP)
 | 
					 | 
				
			||||||
DEFINES  = -D_GNU_SOURCE
 | 
					 | 
				
			||||||
INCLUDES =
 | 
					 | 
				
			||||||
LIBS     =
 | 
					 | 
				
			||||||
@@ -1,61 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#ifdef __linux__
 | 
					 | 
				
			||||||
#ifdef _OPENMP
 | 
					 | 
				
			||||||
#include <pthread.h>
 | 
					 | 
				
			||||||
#include <sched.h>
 | 
					 | 
				
			||||||
#include <stdio.h>
 | 
					 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
#include <sys/syscall.h>
 | 
					 | 
				
			||||||
#include <sys/types.h>
 | 
					 | 
				
			||||||
#include <unistd.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define MAX_NUM_THREADS 128
 | 
					 | 
				
			||||||
#define gettid()        syscall(SYS_gettid)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static int getProcessorID(cpu_set_t* cpu_set)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int processorId;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (processorId = 0; processorId < MAX_NUM_THREADS; processorId++) {
 | 
					 | 
				
			||||||
        if (CPU_ISSET(processorId, cpu_set)) {
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    return processorId;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int affinity_getProcessorId()
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    cpu_set_t cpu_set;
 | 
					 | 
				
			||||||
    CPU_ZERO(&cpu_set);
 | 
					 | 
				
			||||||
    sched_getaffinity(gettid(), sizeof(cpu_set_t), &cpu_set);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return getProcessorID(&cpu_set);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void affinity_pinThread(int processorId)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    cpu_set_t cpuset;
 | 
					 | 
				
			||||||
    pthread_t thread;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    thread = pthread_self();
 | 
					 | 
				
			||||||
    CPU_ZERO(&cpuset);
 | 
					 | 
				
			||||||
    CPU_SET(processorId, &cpuset);
 | 
					 | 
				
			||||||
    pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void affinity_pinProcess(int processorId)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    cpu_set_t cpuset;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    CPU_ZERO(&cpuset);
 | 
					 | 
				
			||||||
    CPU_SET(processorId, &cpuset);
 | 
					 | 
				
			||||||
    sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
#endif /*_OPENMP*/
 | 
					 | 
				
			||||||
#endif /*__linux__*/
 | 
					 | 
				
			||||||
@@ -1,14 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#ifndef AFFINITY_H
 | 
					 | 
				
			||||||
#define AFFINITY_H
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
extern int affinity_getProcessorId();
 | 
					 | 
				
			||||||
extern void affinity_pinProcess(int);
 | 
					 | 
				
			||||||
extern void affinity_pinThread(int);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#endif /*AFFINITY_H*/
 | 
					 | 
				
			||||||
@@ -1,35 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#include <errno.h>
 | 
					 | 
				
			||||||
#include <stdio.h>
 | 
					 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void* allocate(int alignment, size_t bytesize)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int errorCode;
 | 
					 | 
				
			||||||
    void* ptr;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    errorCode = posix_memalign(&ptr, alignment, bytesize);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (errorCode) {
 | 
					 | 
				
			||||||
        if (errorCode == EINVAL) {
 | 
					 | 
				
			||||||
            fprintf(stderr, "Error: Alignment parameter is not a power of two\n");
 | 
					 | 
				
			||||||
            exit(EXIT_FAILURE);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        if (errorCode == ENOMEM) {
 | 
					 | 
				
			||||||
            fprintf(stderr, "Error: Insufficient memory to fulfill the request\n");
 | 
					 | 
				
			||||||
            exit(EXIT_FAILURE);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (ptr == NULL) {
 | 
					 | 
				
			||||||
        fprintf(stderr, "Error: posix_memalign failed!\n");
 | 
					 | 
				
			||||||
        exit(EXIT_FAILURE);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return ptr;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
@@ -1,13 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#ifndef __ALLOCATE_H_
 | 
					 | 
				
			||||||
#define __ALLOCATE_H_
 | 
					 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
extern void* allocate(int alignment, size_t bytesize);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
@@ -1,54 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * =======================================================================================
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 *      Author:   Jan Eitzinger (je), jan.eitzinger@fau.de
 | 
					 | 
				
			||||||
 *      Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 *      Permission is hereby granted, free of charge, to any person obtaining a
 | 
					 | 
				
			||||||
 * copy of this software and associated documentation files (the "Software"), to
 | 
					 | 
				
			||||||
 * deal in the Software without restriction, including without limitation the
 | 
					 | 
				
			||||||
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 | 
					 | 
				
			||||||
 * sell copies of the Software, and to permit persons to whom the Software is
 | 
					 | 
				
			||||||
 *      furnished to do so, subject to the following conditions:
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 *      The above copyright notice and this permission notice shall be included
 | 
					 | 
				
			||||||
 * in all copies or substantial portions of the Software.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 *      THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 | 
					 | 
				
			||||||
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 | 
					 | 
				
			||||||
 *      FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 | 
					 | 
				
			||||||
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 | 
					 | 
				
			||||||
 *      LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 | 
					 | 
				
			||||||
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 | 
					 | 
				
			||||||
 * IN THE SOFTWARE.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * =======================================================================================
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#ifndef LIKWID_MARKERS_H
 | 
					 | 
				
			||||||
#define LIKWID_MARKERS_H
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifdef LIKWID_PERFMON
 | 
					 | 
				
			||||||
#include <likwid.h>
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_INIT                likwid_markerInit()
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_THREADINIT          likwid_markerThreadInit()
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_SWITCH              likwid_markerNextGroup()
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_REGISTER(regionTag) likwid_markerRegisterRegion(regionTag)
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_START(regionTag)    likwid_markerStartRegion(regionTag)
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_STOP(regionTag)     likwid_markerStopRegion(regionTag)
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_CLOSE               likwid_markerClose()
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_RESET(regionTag)    likwid_markerResetRegion(regionTag)
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)                       \
 | 
					 | 
				
			||||||
    likwid_markerGetRegion(regionTag, nevents, events, time, count)
 | 
					 | 
				
			||||||
#else /* LIKWID_PERFMON */
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_INIT
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_THREADINIT
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_SWITCH
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_REGISTER(regionTag)
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_START(regionTag)
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_STOP(regionTag)
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_CLOSE
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_RESET(regionTag)
 | 
					 | 
				
			||||||
#endif /* LIKWID_PERFMON */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#endif /*LIKWID_MARKERS_H*/
 | 
					 | 
				
			||||||
@@ -1,77 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#include <float.h>
 | 
					 | 
				
			||||||
#include <limits.h>
 | 
					 | 
				
			||||||
#include <mpi.h>
 | 
					 | 
				
			||||||
#include <stdio.h>
 | 
					 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
#include <unistd.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#include "parameter.h"
 | 
					 | 
				
			||||||
#include "progress.h"
 | 
					 | 
				
			||||||
#include "solver.h"
 | 
					 | 
				
			||||||
#include "timing.h"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int main(int argc, char** argv)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int rank = 0;
 | 
					 | 
				
			||||||
    double start, end;
 | 
					 | 
				
			||||||
    Parameter params;
 | 
					 | 
				
			||||||
    Solver solver;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    MPI_Init(&argc, &argv);
 | 
					 | 
				
			||||||
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 | 
					 | 
				
			||||||
    initParameter(¶ms);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (argc != 2) {
 | 
					 | 
				
			||||||
        printf("Usage: %s <configFile>\n", argv[0]);
 | 
					 | 
				
			||||||
        exit(EXIT_SUCCESS);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    readParameter(¶ms, argv[1]);
 | 
					 | 
				
			||||||
    if (rank == 0) {
 | 
					 | 
				
			||||||
        printParameter(¶ms);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    initSolver(&solver, ¶ms);
 | 
					 | 
				
			||||||
    initProgress(solver.te);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    double tau = solver.tau;
 | 
					 | 
				
			||||||
    double te  = solver.te;
 | 
					 | 
				
			||||||
    double t   = 0.0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    start = getTimeStamp();
 | 
					 | 
				
			||||||
    while (t <= te) {
 | 
					 | 
				
			||||||
        if (tau > 0.0) {
 | 
					 | 
				
			||||||
            computeTimestep(&solver);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        setBoundaryConditions(&solver);
 | 
					 | 
				
			||||||
        setSpecialBoundaryCondition(&solver);
 | 
					 | 
				
			||||||
        computeFG(&solver);
 | 
					 | 
				
			||||||
        computeRHS(&solver);
 | 
					 | 
				
			||||||
        solve(&solver);
 | 
					 | 
				
			||||||
        adaptUV(&solver);
 | 
					 | 
				
			||||||
        t += solver.dt;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifdef VERBOSE
 | 
					 | 
				
			||||||
        if (rank == 0) {
 | 
					 | 
				
			||||||
            printf("TIME %f , TIMESTEP %f\n", t, solver.dt);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
        printProgress(t);
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    end = getTimeStamp();
 | 
					 | 
				
			||||||
    stopProgress();
 | 
					 | 
				
			||||||
    if (rank == 0) {
 | 
					 | 
				
			||||||
        printf("Solution took %.2fs\n", end - start);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    collectResult(&solver);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    MPI_Finalize();
 | 
					 | 
				
			||||||
    return EXIT_SUCCESS;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
@@ -1,111 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved. This file is part of nusif-solver.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#include <stdio.h>
 | 
					 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
#include <string.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#include "parameter.h"
 | 
					 | 
				
			||||||
#include "util.h"
 | 
					 | 
				
			||||||
#define MAXLINE 4096
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void initParameter(Parameter* param)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    param->xlength = 1.0;
 | 
					 | 
				
			||||||
    param->ylength = 1.0;
 | 
					 | 
				
			||||||
    param->imax    = 100;
 | 
					 | 
				
			||||||
    param->jmax    = 100;
 | 
					 | 
				
			||||||
    param->itermax = 1000;
 | 
					 | 
				
			||||||
    param->eps     = 0.0001;
 | 
					 | 
				
			||||||
    param->omg     = 1.7;
 | 
					 | 
				
			||||||
    param->re      = 100.0;
 | 
					 | 
				
			||||||
    param->gamma   = 0.9;
 | 
					 | 
				
			||||||
    param->tau     = 0.5;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void readParameter(Parameter* param, const char* filename)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    FILE* fp = fopen(filename, "r");
 | 
					 | 
				
			||||||
    char line[MAXLINE];
 | 
					 | 
				
			||||||
    int i;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (!fp) {
 | 
					 | 
				
			||||||
        fprintf(stderr, "Could not open parameter file: %s\n", filename);
 | 
					 | 
				
			||||||
        exit(EXIT_FAILURE);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    while (!feof(fp)) {
 | 
					 | 
				
			||||||
        line[0] = '\0';
 | 
					 | 
				
			||||||
        fgets(line, MAXLINE, fp);
 | 
					 | 
				
			||||||
        for (i = 0; line[i] != '\0' && line[i] != '#'; i++)
 | 
					 | 
				
			||||||
            ;
 | 
					 | 
				
			||||||
        line[i] = '\0';
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        char* tok = strtok(line, " ");
 | 
					 | 
				
			||||||
        char* val = strtok(NULL, " ");
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define PARSE_PARAM(p, f)                                                                \
 | 
					 | 
				
			||||||
    if (strncmp(tok, #p, sizeof(#p) / sizeof(#p[0]) - 1) == 0) {                         \
 | 
					 | 
				
			||||||
        param->p = f(val);                                                               \
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
#define PARSE_STRING(p) PARSE_PARAM(p, strdup)
 | 
					 | 
				
			||||||
#define PARSE_INT(p)    PARSE_PARAM(p, atoi)
 | 
					 | 
				
			||||||
#define PARSE_REAL(p)   PARSE_PARAM(p, atof)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if (tok != NULL && val != NULL) {
 | 
					 | 
				
			||||||
            PARSE_REAL(xlength);
 | 
					 | 
				
			||||||
            PARSE_REAL(ylength);
 | 
					 | 
				
			||||||
            PARSE_INT(imax);
 | 
					 | 
				
			||||||
            PARSE_INT(jmax);
 | 
					 | 
				
			||||||
            PARSE_INT(itermax);
 | 
					 | 
				
			||||||
            PARSE_REAL(eps);
 | 
					 | 
				
			||||||
            PARSE_REAL(omg);
 | 
					 | 
				
			||||||
            PARSE_REAL(re);
 | 
					 | 
				
			||||||
            PARSE_REAL(tau);
 | 
					 | 
				
			||||||
            PARSE_REAL(gamma);
 | 
					 | 
				
			||||||
            PARSE_REAL(dt);
 | 
					 | 
				
			||||||
            PARSE_REAL(te);
 | 
					 | 
				
			||||||
            PARSE_REAL(gx);
 | 
					 | 
				
			||||||
            PARSE_REAL(gy);
 | 
					 | 
				
			||||||
            PARSE_STRING(name);
 | 
					 | 
				
			||||||
            PARSE_INT(bcLeft);
 | 
					 | 
				
			||||||
            PARSE_INT(bcRight);
 | 
					 | 
				
			||||||
            PARSE_INT(bcBottom);
 | 
					 | 
				
			||||||
            PARSE_INT(bcTop);
 | 
					 | 
				
			||||||
            PARSE_REAL(u_init);
 | 
					 | 
				
			||||||
            PARSE_REAL(v_init);
 | 
					 | 
				
			||||||
            PARSE_REAL(p_init);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    fclose(fp);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void printParameter(Parameter* param)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    printf("Parameters for %s\n", param->name);
 | 
					 | 
				
			||||||
    printf("Boundary conditions Left:%d Right:%d Bottom:%d Top:%d\n",
 | 
					 | 
				
			||||||
        param->bcLeft,
 | 
					 | 
				
			||||||
        param->bcRight,
 | 
					 | 
				
			||||||
        param->bcBottom,
 | 
					 | 
				
			||||||
        param->bcTop);
 | 
					 | 
				
			||||||
    printf("\tReynolds number: %.2f\n", param->re);
 | 
					 | 
				
			||||||
    printf("\tInit arrays: U:%.2f V:%.2f P:%.2f\n",
 | 
					 | 
				
			||||||
        param->u_init,
 | 
					 | 
				
			||||||
        param->v_init,
 | 
					 | 
				
			||||||
        param->p_init);
 | 
					 | 
				
			||||||
    printf("Geometry data:\n");
 | 
					 | 
				
			||||||
    printf("\tDomain box size (x, y): %.2f, %.2f\n", param->xlength, param->ylength);
 | 
					 | 
				
			||||||
    printf("\tCells (x, y): %d, %d\n", param->imax, param->jmax);
 | 
					 | 
				
			||||||
    printf("Timestep parameters:\n");
 | 
					 | 
				
			||||||
    printf("\tDefault stepsize: %.2f, Final time %.2f\n", param->dt, param->te);
 | 
					 | 
				
			||||||
    printf("\tTau factor: %.2f\n", param->tau);
 | 
					 | 
				
			||||||
    printf("Iterative solver parameters:\n");
 | 
					 | 
				
			||||||
    printf("\tMax iterations: %d\n", param->itermax);
 | 
					 | 
				
			||||||
    printf("\tepsilon (stopping tolerance) : %f\n", param->eps);
 | 
					 | 
				
			||||||
    printf("\tgamma (stopping tolerance) : %f\n", param->gamma);
 | 
					 | 
				
			||||||
    printf("\tomega (SOR relaxation): %f\n", param->omg);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
@@ -1,26 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved. This file is part of nusif-solver.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#ifndef __PARAMETER_H_
 | 
					 | 
				
			||||||
#define __PARAMETER_H_
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
typedef struct {
 | 
					 | 
				
			||||||
    double xlength, ylength;
 | 
					 | 
				
			||||||
    int imax, jmax;
 | 
					 | 
				
			||||||
    int itermax;
 | 
					 | 
				
			||||||
    double eps, omg;
 | 
					 | 
				
			||||||
    double re, tau, gamma;
 | 
					 | 
				
			||||||
    double te, dt;
 | 
					 | 
				
			||||||
    double gx, gy;
 | 
					 | 
				
			||||||
    char* name;
 | 
					 | 
				
			||||||
    int bcLeft, bcRight, bcBottom, bcTop;
 | 
					 | 
				
			||||||
    double u_init, v_init, p_init;
 | 
					 | 
				
			||||||
} Parameter;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void initParameter(Parameter*);
 | 
					 | 
				
			||||||
void readParameter(Parameter*, const char*);
 | 
					 | 
				
			||||||
void printParameter(Parameter*);
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
@@ -1,60 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved. This file is part of nusif-solver.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#include <math.h>
 | 
					 | 
				
			||||||
#include <mpi.h>
 | 
					 | 
				
			||||||
#include <stdio.h>
 | 
					 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
#include <string.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#include "progress.h"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static double _end;
 | 
					 | 
				
			||||||
static int _current;
 | 
					 | 
				
			||||||
static int _rank = -1;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void initProgress(double end)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    MPI_Comm_rank(MPI_COMM_WORLD, &_rank);
 | 
					 | 
				
			||||||
    _end     = end;
 | 
					 | 
				
			||||||
    _current = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (_rank == 0) {
 | 
					 | 
				
			||||||
        printf("[          ]");
 | 
					 | 
				
			||||||
        fflush(stdout);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void printProgress(double current)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    if (_rank == 0) {
 | 
					 | 
				
			||||||
        int new = (int)rint((current / _end) * 10.0);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if (new > _current) {
 | 
					 | 
				
			||||||
            char progress[11];
 | 
					 | 
				
			||||||
            _current    = new;
 | 
					 | 
				
			||||||
            progress[0] = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            for (int i = 0; i < 10; i++) {
 | 
					 | 
				
			||||||
                if (i < _current) {
 | 
					 | 
				
			||||||
                    sprintf(progress + strlen(progress), "#");
 | 
					 | 
				
			||||||
                } else {
 | 
					 | 
				
			||||||
                    sprintf(progress + strlen(progress), " ");
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            printf("\r[%s]", progress);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        fflush(stdout);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void stopProgress()
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    if (_rank == 0) {
 | 
					 | 
				
			||||||
        printf("\n");
 | 
					 | 
				
			||||||
        fflush(stdout);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
@@ -1,14 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#ifndef __PROGRESS_H_
 | 
					 | 
				
			||||||
#define __PROGRESS_H_
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
extern void initProgress(double);
 | 
					 | 
				
			||||||
extern void printProgress(double);
 | 
					 | 
				
			||||||
extern void stopProgress();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
@@ -1,694 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved. This file is part of nusif-solver.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#include <float.h>
 | 
					 | 
				
			||||||
#include <math.h>
 | 
					 | 
				
			||||||
#include <mpi.h>
 | 
					 | 
				
			||||||
#include <stdio.h>
 | 
					 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
#include <string.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#include "allocate.h"
 | 
					 | 
				
			||||||
#include "parameter.h"
 | 
					 | 
				
			||||||
#include "solver.h"
 | 
					 | 
				
			||||||
#include "util.h"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define P(i, j)   p[(j) * (imax + 2) + (i)]
 | 
					 | 
				
			||||||
#define F(i, j)   f[(j) * (imax + 2) + (i)]
 | 
					 | 
				
			||||||
#define G(i, j)   g[(j) * (imax + 2) + (i)]
 | 
					 | 
				
			||||||
#define U(i, j)   u[(j) * (imax + 2) + (i)]
 | 
					 | 
				
			||||||
#define V(i, j)   v[(j) * (imax + 2) + (i)]
 | 
					 | 
				
			||||||
#define RHS(i, j) rhs[(j) * (imax + 2) + (i)]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static int sizeOfRank(int rank, int size, int N)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    return N / size + ((N % size > rank) ? 1 : 0);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void print(Solver* solver, double* grid)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int imax = solver->imax;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int i = 0; i < solver->size; i++) {
 | 
					 | 
				
			||||||
        if (i == solver->rank) {
 | 
					 | 
				
			||||||
            printf("### RANK %d "
 | 
					 | 
				
			||||||
                   "#######################################################\n",
 | 
					 | 
				
			||||||
                solver->rank);
 | 
					 | 
				
			||||||
            for (int j = 0; j < solver->jmaxLocal + 2; j++) {
 | 
					 | 
				
			||||||
                printf("%02d: ", j);
 | 
					 | 
				
			||||||
                for (int i = 0; i < solver->imax + 2; i++) {
 | 
					 | 
				
			||||||
                    printf("%12.8f  ", grid[j * (imax + 2) + i]);
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
                printf("\n");
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            fflush(stdout);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        MPI_Barrier(MPI_COMM_WORLD);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void exchange(Solver* solver, double* grid)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    MPI_Request requests[4] = { MPI_REQUEST_NULL,
 | 
					 | 
				
			||||||
        MPI_REQUEST_NULL,
 | 
					 | 
				
			||||||
        MPI_REQUEST_NULL,
 | 
					 | 
				
			||||||
        MPI_REQUEST_NULL };
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* exchange ghost cells with top neighbor */
 | 
					 | 
				
			||||||
    if (solver->rank + 1 < solver->size) {
 | 
					 | 
				
			||||||
        int top     = solver->rank + 1;
 | 
					 | 
				
			||||||
        double* src = grid + (solver->jmaxLocal) * (solver->imax + 2) + 1;
 | 
					 | 
				
			||||||
        double* dst = grid + (solver->jmaxLocal + 1) * (solver->imax + 2) + 1;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        MPI_Isend(src, solver->imax, MPI_DOUBLE, top, 1, MPI_COMM_WORLD, &requests[0]);
 | 
					 | 
				
			||||||
        MPI_Irecv(dst, solver->imax, MPI_DOUBLE, top, 2, MPI_COMM_WORLD, &requests[1]);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* exchange ghost cells with bottom neighbor */
 | 
					 | 
				
			||||||
    if (solver->rank > 0) {
 | 
					 | 
				
			||||||
        int bottom  = solver->rank - 1;
 | 
					 | 
				
			||||||
        double* src = grid + (solver->imax + 2) + 1;
 | 
					 | 
				
			||||||
        double* dst = grid + 1;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        MPI_Isend(src, solver->imax, MPI_DOUBLE, bottom, 2, MPI_COMM_WORLD, &requests[2]);
 | 
					 | 
				
			||||||
        MPI_Irecv(dst, solver->imax, MPI_DOUBLE, bottom, 1, MPI_COMM_WORLD, &requests[3]);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    MPI_Waitall(4, requests, MPI_STATUSES_IGNORE);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void shift(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    MPI_Request requests[2] = { MPI_REQUEST_NULL, MPI_REQUEST_NULL };
 | 
					 | 
				
			||||||
    double* g               = solver->g;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* shift G */
 | 
					 | 
				
			||||||
    /* receive ghost cells from bottom neighbor */
 | 
					 | 
				
			||||||
    if (solver->rank > 0) {
 | 
					 | 
				
			||||||
        int bottom = solver->rank - 1;
 | 
					 | 
				
			||||||
        MPI_Irecv(g + 1,
 | 
					 | 
				
			||||||
            solver->imax,
 | 
					 | 
				
			||||||
            MPI_DOUBLE,
 | 
					 | 
				
			||||||
            bottom,
 | 
					 | 
				
			||||||
            0,
 | 
					 | 
				
			||||||
            MPI_COMM_WORLD,
 | 
					 | 
				
			||||||
            &requests[0]);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (solver->rank + 1 < solver->size) {
 | 
					 | 
				
			||||||
        int top     = solver->rank + 1;
 | 
					 | 
				
			||||||
        double* buf = g + (solver->jmaxLocal) * (solver->imax + 2) + 1;
 | 
					 | 
				
			||||||
        /* send ghost cells to top neighbor */
 | 
					 | 
				
			||||||
        MPI_Isend(buf, solver->imax, MPI_DOUBLE, top, 0, MPI_COMM_WORLD, &requests[1]);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    MPI_Waitall(2, requests, MPI_STATUSES_IGNORE);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void gatherArray(
 | 
					 | 
				
			||||||
    Solver* solver, int cnt, int* rcvCounts, int* displs, double* src, double* dst)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    double* sendbuffer = src + (solver->imax + 2);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (solver->rank == 0) {
 | 
					 | 
				
			||||||
        sendbuffer = src;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    MPI_Gatherv(sendbuffer,
 | 
					 | 
				
			||||||
        cnt,
 | 
					 | 
				
			||||||
        MPI_DOUBLE,
 | 
					 | 
				
			||||||
        dst,
 | 
					 | 
				
			||||||
        rcvCounts,
 | 
					 | 
				
			||||||
        displs,
 | 
					 | 
				
			||||||
        MPI_DOUBLE,
 | 
					 | 
				
			||||||
        0,
 | 
					 | 
				
			||||||
        MPI_COMM_WORLD);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void collectResult(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    double* p = NULL;
 | 
					 | 
				
			||||||
    double* u = NULL;
 | 
					 | 
				
			||||||
    double* v = NULL;
 | 
					 | 
				
			||||||
    int *rcvCounts, *displs;
 | 
					 | 
				
			||||||
    int cnt = solver->jmaxLocal * (solver->imax + 2);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (solver->rank == 0) {
 | 
					 | 
				
			||||||
        p = allocate(64, (solver->imax + 2) * (solver->jmax + 2) * sizeof(double));
 | 
					 | 
				
			||||||
        u = allocate(64, (solver->imax + 2) * (solver->jmax + 2) * sizeof(double));
 | 
					 | 
				
			||||||
        v = allocate(64, (solver->imax + 2) * (solver->jmax + 2) * sizeof(double));
 | 
					 | 
				
			||||||
        rcvCounts    = (int*)malloc(solver->size * sizeof(int));
 | 
					 | 
				
			||||||
        displs       = (int*)malloc(solver->size * sizeof(int));
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (solver->rank == 0 && solver->size == 1) {
 | 
					 | 
				
			||||||
        cnt = (solver->jmaxLocal + 2) * (solver->imax + 2);
 | 
					 | 
				
			||||||
    } else if (solver->rank == 0 || solver->rank == (solver->size - 1)) {
 | 
					 | 
				
			||||||
        cnt = (solver->jmaxLocal + 1) * (solver->imax + 2);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    MPI_Gather(&cnt, 1, MPI_INTEGER, rcvCounts, 1, MPI_INTEGER, 0, MPI_COMM_WORLD);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (solver->rank == 0) {
 | 
					 | 
				
			||||||
        displs[0]    = 0;
 | 
					 | 
				
			||||||
        int cursor   = rcvCounts[0];
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        for (int i = 1; i < solver->size; i++) {
 | 
					 | 
				
			||||||
            displs[i]    = cursor;
 | 
					 | 
				
			||||||
            cursor += rcvCounts[i];
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    gatherArray(solver, cnt, rcvCounts, displs, solver->p, p);
 | 
					 | 
				
			||||||
    gatherArray(solver, cnt, rcvCounts, displs, solver->u, u);
 | 
					 | 
				
			||||||
    gatherArray(solver, cnt, rcvCounts, displs, solver->v, v);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (solver->rank == 0) {
 | 
					 | 
				
			||||||
        writeResult(solver, p, u, v);
 | 
					 | 
				
			||||||
        free(p);
 | 
					 | 
				
			||||||
        free(u);
 | 
					 | 
				
			||||||
        free(v);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void printConfig(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    if (solver->rank == 0) {
 | 
					 | 
				
			||||||
        printf("Parameters for #%s#\n", solver->problem);
 | 
					 | 
				
			||||||
        printf("Boundary conditions Left:%d Right:%d Bottom:%d Top:%d\n",
 | 
					 | 
				
			||||||
            solver->bcLeft,
 | 
					 | 
				
			||||||
            solver->bcRight,
 | 
					 | 
				
			||||||
            solver->bcBottom,
 | 
					 | 
				
			||||||
            solver->bcTop);
 | 
					 | 
				
			||||||
        printf("\tReynolds number: %.2f\n", solver->re);
 | 
					 | 
				
			||||||
        printf("\tGx Gy: %.2f %.2f\n", solver->gx, solver->gy);
 | 
					 | 
				
			||||||
        printf("Geometry data:\n");
 | 
					 | 
				
			||||||
        printf("\tDomain box size (x, y): %.2f, %.2f\n",
 | 
					 | 
				
			||||||
            solver->xlength,
 | 
					 | 
				
			||||||
            solver->ylength);
 | 
					 | 
				
			||||||
        printf("\tCells (x, y): %d, %d\n", solver->imax, solver->jmax);
 | 
					 | 
				
			||||||
        printf("Timestep parameters:\n");
 | 
					 | 
				
			||||||
        printf("\tDefault stepsize: %.2f, Final time %.2f\n", solver->dt, solver->te);
 | 
					 | 
				
			||||||
        printf("\tdt bound: %.6f\n", solver->dtBound);
 | 
					 | 
				
			||||||
        printf("\tTau factor: %.2f\n", solver->tau);
 | 
					 | 
				
			||||||
        printf("Iterative solver parameters:\n");
 | 
					 | 
				
			||||||
        printf("\tMax iterations: %d\n", solver->itermax);
 | 
					 | 
				
			||||||
        printf("\tepsilon (stopping tolerance) : %f\n", solver->eps);
 | 
					 | 
				
			||||||
        printf("\tgamma factor: %f\n", solver->gamma);
 | 
					 | 
				
			||||||
        printf("\tomega (SOR relaxation): %f\n", solver->omega);
 | 
					 | 
				
			||||||
        printf("Communication parameters:\n");
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    for (int i = 0; i < solver->size; i++) {
 | 
					 | 
				
			||||||
        if (i == solver->rank) {
 | 
					 | 
				
			||||||
            printf("\tRank %d of %d\n", solver->rank, solver->size);
 | 
					 | 
				
			||||||
            printf("\tLocal domain size: %dx%d\n", solver->imax, solver->jmaxLocal);
 | 
					 | 
				
			||||||
            fflush(stdout);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void initSolver(Solver* solver, Parameter* params)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    MPI_Comm_rank(MPI_COMM_WORLD, &(solver->rank));
 | 
					 | 
				
			||||||
    MPI_Comm_size(MPI_COMM_WORLD, &(solver->size));
 | 
					 | 
				
			||||||
    solver->problem   = params->name;
 | 
					 | 
				
			||||||
    solver->bcLeft    = params->bcLeft;
 | 
					 | 
				
			||||||
    solver->bcRight   = params->bcRight;
 | 
					 | 
				
			||||||
    solver->bcBottom  = params->bcBottom;
 | 
					 | 
				
			||||||
    solver->bcTop     = params->bcTop;
 | 
					 | 
				
			||||||
    solver->imax      = params->imax;
 | 
					 | 
				
			||||||
    solver->jmax      = params->jmax;
 | 
					 | 
				
			||||||
    solver->jmaxLocal = sizeOfRank(solver->rank, solver->size, solver->jmax);
 | 
					 | 
				
			||||||
    solver->xlength   = params->xlength;
 | 
					 | 
				
			||||||
    solver->ylength   = params->ylength;
 | 
					 | 
				
			||||||
    solver->dx        = params->xlength / params->imax;
 | 
					 | 
				
			||||||
    solver->dy        = params->ylength / params->jmax;
 | 
					 | 
				
			||||||
    solver->eps       = params->eps;
 | 
					 | 
				
			||||||
    solver->omega     = params->omg;
 | 
					 | 
				
			||||||
    solver->itermax   = params->itermax;
 | 
					 | 
				
			||||||
    solver->re        = params->re;
 | 
					 | 
				
			||||||
    solver->gx        = params->gx;
 | 
					 | 
				
			||||||
    solver->gy        = params->gy;
 | 
					 | 
				
			||||||
    solver->dt        = params->dt;
 | 
					 | 
				
			||||||
    solver->te        = params->te;
 | 
					 | 
				
			||||||
    solver->tau       = params->tau;
 | 
					 | 
				
			||||||
    solver->gamma     = params->gamma;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    int imax        = solver->imax;
 | 
					 | 
				
			||||||
    int jmaxLocal   = solver->jmaxLocal;
 | 
					 | 
				
			||||||
    size_t bytesize = (imax + 2) * (jmaxLocal + 2) * sizeof(double);
 | 
					 | 
				
			||||||
    solver->u       = allocate(64, bytesize);
 | 
					 | 
				
			||||||
    solver->v       = allocate(64, bytesize);
 | 
					 | 
				
			||||||
    solver->p       = allocate(64, bytesize);
 | 
					 | 
				
			||||||
    solver->rhs     = allocate(64, bytesize);
 | 
					 | 
				
			||||||
    solver->f       = allocate(64, bytesize);
 | 
					 | 
				
			||||||
    solver->g       = allocate(64, bytesize);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int i = 0; i < (imax + 2) * (jmaxLocal + 2); i++) {
 | 
					 | 
				
			||||||
        solver->u[i]   = params->u_init;
 | 
					 | 
				
			||||||
        solver->v[i]   = params->v_init;
 | 
					 | 
				
			||||||
        solver->p[i]   = params->p_init;
 | 
					 | 
				
			||||||
        solver->rhs[i] = 0.0;
 | 
					 | 
				
			||||||
        solver->f[i]   = 0.0;
 | 
					 | 
				
			||||||
        solver->g[i]   = 0.0;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    double dx          = solver->dx;
 | 
					 | 
				
			||||||
    double dy          = solver->dy;
 | 
					 | 
				
			||||||
    double invSquareSum = 1.0 / (dx * dx) + 1.0 / (dy * dy);
 | 
					 | 
				
			||||||
    solver->dtBound     = 0.5 * solver->re * 1.0 / invSquareSum;
 | 
					 | 
				
			||||||
#ifdef VERBOSE
 | 
					 | 
				
			||||||
    printConfig(solver);
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void computeRHS(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int imax      = solver->imax;
 | 
					 | 
				
			||||||
    int jmaxLocal = solver->jmaxLocal;
 | 
					 | 
				
			||||||
    double idx    = 1.0 / solver->dx;
 | 
					 | 
				
			||||||
    double idy    = 1.0 / solver->dy;
 | 
					 | 
				
			||||||
    double idt    = 1.0 / solver->dt;
 | 
					 | 
				
			||||||
    double* rhs   = solver->rhs;
 | 
					 | 
				
			||||||
    double* f     = solver->f;
 | 
					 | 
				
			||||||
    double* g     = solver->g;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    shift(solver);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
        for (int i = 1; i < imax + 1; i++) {
 | 
					 | 
				
			||||||
            RHS(i, j) = ((F(i, j) - F(i - 1, j)) * idx + (G(i, j) - G(i, j - 1)) * idy) *
 | 
					 | 
				
			||||||
                        idt;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void solve(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int imax      = solver->imax;
 | 
					 | 
				
			||||||
    int jmax      = solver->jmax;
 | 
					 | 
				
			||||||
    int jmaxLocal = solver->jmaxLocal;
 | 
					 | 
				
			||||||
    double eps    = solver->eps;
 | 
					 | 
				
			||||||
    int itermax   = solver->itermax;
 | 
					 | 
				
			||||||
    double dx2    = solver->dx * solver->dx;
 | 
					 | 
				
			||||||
    double dy2    = solver->dy * solver->dy;
 | 
					 | 
				
			||||||
    double idx2   = 1.0 / dx2;
 | 
					 | 
				
			||||||
    double idy2   = 1.0 / dy2;
 | 
					 | 
				
			||||||
    double factor = solver->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
 | 
					 | 
				
			||||||
    double* p     = solver->p;
 | 
					 | 
				
			||||||
    double* rhs   = solver->rhs;
 | 
					 | 
				
			||||||
    double epssq  = eps * eps;
 | 
					 | 
				
			||||||
    int it        = 0;
 | 
					 | 
				
			||||||
    double res    = 1.0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    while ((res >= epssq) && (it < itermax)) {
 | 
					 | 
				
			||||||
        res = 0.0;
 | 
					 | 
				
			||||||
        exchange(solver, p);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
            for (int i = 1; i < imax + 1; i++) {
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                double r = RHS(i, j) -
 | 
					 | 
				
			||||||
                           ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
 | 
					 | 
				
			||||||
                               (P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                P(i, j) -= (factor * r);
 | 
					 | 
				
			||||||
                res += (r * r);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if (solver->rank == 0) {
 | 
					 | 
				
			||||||
            for (int i = 1; i < imax + 1; i++) {
 | 
					 | 
				
			||||||
                P(i, 0) = P(i, 1);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if (solver->rank == (solver->size - 1)) {
 | 
					 | 
				
			||||||
            for (int i = 1; i < imax + 1; i++) {
 | 
					 | 
				
			||||||
                P(i, jmaxLocal + 1) = P(i, jmaxLocal);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
            P(0, j)        = P(1, j);
 | 
					 | 
				
			||||||
            P(imax + 1, j) = P(imax, j);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        MPI_Allreduce(MPI_IN_PLACE, &res, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
 | 
					 | 
				
			||||||
        res = res / (double)(imax * jmax);
 | 
					 | 
				
			||||||
#ifdef DEBUG
 | 
					 | 
				
			||||||
        if (solver->rank == 0) {
 | 
					 | 
				
			||||||
            printf("%d Residuum: %e\n", it, res);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
        it++;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifdef VERBOSE
 | 
					 | 
				
			||||||
    if (solver->rank == 0) {
 | 
					 | 
				
			||||||
        printf("Solver took %d iterations to reach %f\n", it, sqrt(res));
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static double maxElement(Solver* solver, double* m)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int size      = (solver->imax + 2) * (solver->jmaxLocal + 2);
 | 
					 | 
				
			||||||
    double maxval = DBL_MIN;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int i = 0; i < size; i++) {
 | 
					 | 
				
			||||||
        maxval = MAX(maxval, fabs(m[i]));
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    MPI_Allreduce(MPI_IN_PLACE, &maxval, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
 | 
					 | 
				
			||||||
    return maxval;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void normalizePressure(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int size    = (solver->imax + 2) * (solver->jmaxLocal + 2);
 | 
					 | 
				
			||||||
    double* p   = solver->p;
 | 
					 | 
				
			||||||
    double avgP = 0.0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int i = 0; i < size; i++) {
 | 
					 | 
				
			||||||
        avgP += p[i];
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    MPI_Allreduce(MPI_IN_PLACE, &avgP, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
 | 
					 | 
				
			||||||
    avgP /= (solver->imax + 2) * (solver->jmax + 2);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int i = 0; i < size; i++) {
 | 
					 | 
				
			||||||
        p[i] = p[i] - avgP;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void computeTimestep(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    double dt   = solver->dtBound;
 | 
					 | 
				
			||||||
    double dx   = solver->dx;
 | 
					 | 
				
			||||||
    double dy   = solver->dy;
 | 
					 | 
				
			||||||
    double umax = maxElement(solver, solver->u);
 | 
					 | 
				
			||||||
    double vmax = maxElement(solver, solver->v);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (umax > 0) {
 | 
					 | 
				
			||||||
        dt = (dt > dx / umax) ? dx / umax : dt;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    if (vmax > 0) {
 | 
					 | 
				
			||||||
        dt = (dt > dy / vmax) ? dy / vmax : dt;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    solver->dt = dt * solver->tau;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void setBoundaryConditions(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int imax      = solver->imax;
 | 
					 | 
				
			||||||
    int jmaxLocal = solver->jmaxLocal;
 | 
					 | 
				
			||||||
    double* u     = solver->u;
 | 
					 | 
				
			||||||
    double* v     = solver->v;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Left boundary
 | 
					 | 
				
			||||||
    switch (solver->bcLeft) {
 | 
					 | 
				
			||||||
    case NOSLIP:
 | 
					 | 
				
			||||||
        for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
            U(0, j) = 0.0;
 | 
					 | 
				
			||||||
            V(0, j) = -V(1, j);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        break;
 | 
					 | 
				
			||||||
    case SLIP:
 | 
					 | 
				
			||||||
        for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
            U(0, j) = 0.0;
 | 
					 | 
				
			||||||
            V(0, j) = V(1, j);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        break;
 | 
					 | 
				
			||||||
    case OUTFLOW:
 | 
					 | 
				
			||||||
        for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
            U(0, j) = U(1, j);
 | 
					 | 
				
			||||||
            V(0, j) = V(1, j);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        break;
 | 
					 | 
				
			||||||
    case PERIODIC:
 | 
					 | 
				
			||||||
        break;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Right boundary
 | 
					 | 
				
			||||||
    switch (solver->bcRight) {
 | 
					 | 
				
			||||||
    case NOSLIP:
 | 
					 | 
				
			||||||
        for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
            U(imax, j)     = 0.0;
 | 
					 | 
				
			||||||
            V(imax + 1, j) = -V(imax, j);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        break;
 | 
					 | 
				
			||||||
    case SLIP:
 | 
					 | 
				
			||||||
        for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
            U(imax, j)     = 0.0;
 | 
					 | 
				
			||||||
            V(imax + 1, j) = V(imax, j);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        break;
 | 
					 | 
				
			||||||
    case OUTFLOW:
 | 
					 | 
				
			||||||
        for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
            U(imax, j)     = U(imax - 1, j);
 | 
					 | 
				
			||||||
            V(imax + 1, j) = V(imax, j);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        break;
 | 
					 | 
				
			||||||
    case PERIODIC:
 | 
					 | 
				
			||||||
        break;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Bottom boundary
 | 
					 | 
				
			||||||
    if (solver->rank == 0) {
 | 
					 | 
				
			||||||
        switch (solver->bcBottom) {
 | 
					 | 
				
			||||||
        case NOSLIP:
 | 
					 | 
				
			||||||
            for (int i = 1; i < imax + 1; i++) {
 | 
					 | 
				
			||||||
                V(i, 0) = 0.0;
 | 
					 | 
				
			||||||
                U(i, 0) = -U(i, 1);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case SLIP:
 | 
					 | 
				
			||||||
            for (int i = 1; i < imax + 1; i++) {
 | 
					 | 
				
			||||||
                V(i, 0) = 0.0;
 | 
					 | 
				
			||||||
                U(i, 0) = U(i, 1);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case OUTFLOW:
 | 
					 | 
				
			||||||
            for (int i = 1; i < imax + 1; i++) {
 | 
					 | 
				
			||||||
                U(i, 0) = U(i, 1);
 | 
					 | 
				
			||||||
                V(i, 0) = V(i, 1);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case PERIODIC:
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Top boundary
 | 
					 | 
				
			||||||
    if (solver->rank == (solver->size - 1)) {
 | 
					 | 
				
			||||||
        switch (solver->bcTop) {
 | 
					 | 
				
			||||||
        case NOSLIP:
 | 
					 | 
				
			||||||
            for (int i = 1; i < imax + 1; i++) {
 | 
					 | 
				
			||||||
                V(i, jmaxLocal)     = 0.0;
 | 
					 | 
				
			||||||
                U(i, jmaxLocal + 1) = -U(i, jmaxLocal);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case SLIP:
 | 
					 | 
				
			||||||
            for (int i = 1; i < imax + 1; i++) {
 | 
					 | 
				
			||||||
                V(i, jmaxLocal)     = 0.0;
 | 
					 | 
				
			||||||
                U(i, jmaxLocal + 1) = U(i, jmaxLocal);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case OUTFLOW:
 | 
					 | 
				
			||||||
            for (int i = 1; i < imax + 1; i++) {
 | 
					 | 
				
			||||||
                U(i, jmaxLocal + 1) = U(i, jmaxLocal);
 | 
					 | 
				
			||||||
                V(i, jmaxLocal)     = V(i, jmaxLocal - 1);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case PERIODIC:
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void setSpecialBoundaryCondition(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int imax      = solver->imax;
 | 
					 | 
				
			||||||
    int jmaxLocal = solver->jmaxLocal;
 | 
					 | 
				
			||||||
    double* u     = solver->u;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (strcmp(solver->problem, "dcavity") == 0) {
 | 
					 | 
				
			||||||
        if (solver->rank == (solver->size - 1)) {
 | 
					 | 
				
			||||||
            for (int i = 1; i < imax; i++) {
 | 
					 | 
				
			||||||
                U(i, jmaxLocal + 1) = 2.0 - U(i, jmaxLocal);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    } else if (strcmp(solver->problem, "canal") == 0) {
 | 
					 | 
				
			||||||
        double ylength = solver->ylength;
 | 
					 | 
				
			||||||
        double dy      = solver->dy;
 | 
					 | 
				
			||||||
        int rest       = solver->jmax % solver->size;
 | 
					 | 
				
			||||||
        int yc = solver->rank * (solver->jmax / solver->size) + MIN(rest, solver->rank);
 | 
					 | 
				
			||||||
        double ys = dy * (yc + 0.5);
 | 
					 | 
				
			||||||
        double y;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        /* printf("RANK %d yc: %d ys: %f\n", solver->rank, yc, ys); */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
            y       = ys + dy * (j - 0.5);
 | 
					 | 
				
			||||||
            U(0, j) = y * (ylength - y) * 4.0 / (ylength * ylength);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    /* print(solver, solver->u); */
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void computeFG(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    double* u        = solver->u;
 | 
					 | 
				
			||||||
    double* v        = solver->v;
 | 
					 | 
				
			||||||
    double* f        = solver->f;
 | 
					 | 
				
			||||||
    double* g        = solver->g;
 | 
					 | 
				
			||||||
    int imax         = solver->imax;
 | 
					 | 
				
			||||||
    int jmaxLocal    = solver->jmaxLocal;
 | 
					 | 
				
			||||||
    double gx        = solver->gx;
 | 
					 | 
				
			||||||
    double gy        = solver->gy;
 | 
					 | 
				
			||||||
    double gamma     = solver->gamma;
 | 
					 | 
				
			||||||
    double dt        = solver->dt;
 | 
					 | 
				
			||||||
    double inverseRe = 1.0 / solver->re;
 | 
					 | 
				
			||||||
    double inverseDx = 1.0 / solver->dx;
 | 
					 | 
				
			||||||
    double inverseDy = 1.0 / solver->dy;
 | 
					 | 
				
			||||||
    double du2dx, dv2dy, duvdx, duvdy;
 | 
					 | 
				
			||||||
    double du2dx2, du2dy2, dv2dx2, dv2dy2;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    exchange(solver, u);
 | 
					 | 
				
			||||||
    exchange(solver, v);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
        for (int i = 1; i < imax + 1; i++) {
 | 
					 | 
				
			||||||
            du2dx = inverseDx * 0.25 *
 | 
					 | 
				
			||||||
                        ((U(i, j) + U(i + 1, j)) * (U(i, j) + U(i + 1, j)) -
 | 
					 | 
				
			||||||
                            (U(i, j) + U(i - 1, j)) * (U(i, j) + U(i - 1, j))) +
 | 
					 | 
				
			||||||
                    gamma * inverseDx * 0.25 *
 | 
					 | 
				
			||||||
                        (fabs(U(i, j) + U(i + 1, j)) * (U(i, j) - U(i + 1, j)) +
 | 
					 | 
				
			||||||
                            fabs(U(i, j) + U(i - 1, j)) * (U(i, j) - U(i - 1, j)));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            duvdy = inverseDy * 0.25 *
 | 
					 | 
				
			||||||
                        ((V(i, j) + V(i + 1, j)) * (U(i, j) + U(i, j + 1)) -
 | 
					 | 
				
			||||||
                            (V(i, j - 1) + V(i + 1, j - 1)) * (U(i, j) + U(i, j - 1))) +
 | 
					 | 
				
			||||||
                    gamma * inverseDy * 0.25 *
 | 
					 | 
				
			||||||
                        (fabs(V(i, j) + V(i + 1, j)) * (U(i, j) - U(i, j + 1)) +
 | 
					 | 
				
			||||||
                            fabs(V(i, j - 1) + V(i + 1, j - 1)) *
 | 
					 | 
				
			||||||
                                (U(i, j) - U(i, j - 1)));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            du2dx2  = inverseDx * inverseDx * (U(i + 1, j) - 2.0 * U(i, j) + U(i - 1, j));
 | 
					 | 
				
			||||||
            du2dy2  = inverseDy * inverseDy * (U(i, j + 1) - 2.0 * U(i, j) + U(i, j - 1));
 | 
					 | 
				
			||||||
            F(i, j) = U(i, j) + dt * (inverseRe * (du2dx2 + du2dy2) - du2dx - duvdy + gx);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            duvdx = inverseDx * 0.25 *
 | 
					 | 
				
			||||||
                        ((U(i, j) + U(i, j + 1)) * (V(i, j) + V(i + 1, j)) -
 | 
					 | 
				
			||||||
                            (U(i - 1, j) + U(i - 1, j + 1)) * (V(i, j) + V(i - 1, j))) +
 | 
					 | 
				
			||||||
                    gamma * inverseDx * 0.25 *
 | 
					 | 
				
			||||||
                        (fabs(U(i, j) + U(i, j + 1)) * (V(i, j) - V(i + 1, j)) +
 | 
					 | 
				
			||||||
                            fabs(U(i - 1, j) + U(i - 1, j + 1)) *
 | 
					 | 
				
			||||||
                                (V(i, j) - V(i - 1, j)));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            dv2dy = inverseDy * 0.25 *
 | 
					 | 
				
			||||||
                        ((V(i, j) + V(i, j + 1)) * (V(i, j) + V(i, j + 1)) -
 | 
					 | 
				
			||||||
                            (V(i, j) + V(i, j - 1)) * (V(i, j) + V(i, j - 1))) +
 | 
					 | 
				
			||||||
                    gamma * inverseDy * 0.25 *
 | 
					 | 
				
			||||||
                        (fabs(V(i, j) + V(i, j + 1)) * (V(i, j) - V(i, j + 1)) +
 | 
					 | 
				
			||||||
                            fabs(V(i, j) + V(i, j - 1)) * (V(i, j) - V(i, j - 1)));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            dv2dx2  = inverseDx * inverseDx * (V(i + 1, j) - 2.0 * V(i, j) + V(i - 1, j));
 | 
					 | 
				
			||||||
            dv2dy2  = inverseDy * inverseDy * (V(i, j + 1) - 2.0 * V(i, j) + V(i, j - 1));
 | 
					 | 
				
			||||||
            G(i, j) = V(i, j) + dt * (inverseRe * (dv2dx2 + dv2dy2) - duvdx - dv2dy + gy);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* ----------------------------- boundary of F ---------------------------
 | 
					 | 
				
			||||||
     */
 | 
					 | 
				
			||||||
    for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
        F(0, j)    = U(0, j);
 | 
					 | 
				
			||||||
        F(imax, j) = U(imax, j);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* ----------------------------- boundary of G ---------------------------
 | 
					 | 
				
			||||||
     */
 | 
					 | 
				
			||||||
    if (solver->rank == 0) {
 | 
					 | 
				
			||||||
        for (int i = 1; i < imax + 1; i++) {
 | 
					 | 
				
			||||||
            G(i, 0) = V(i, 0);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (solver->rank == (solver->size - 1)) {
 | 
					 | 
				
			||||||
        for (int i = 1; i < imax + 1; i++) {
 | 
					 | 
				
			||||||
            G(i, jmaxLocal) = V(i, jmaxLocal);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void adaptUV(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int imax       = solver->imax;
 | 
					 | 
				
			||||||
    int jmaxLocal  = solver->jmaxLocal;
 | 
					 | 
				
			||||||
    double* p      = solver->p;
 | 
					 | 
				
			||||||
    double* u      = solver->u;
 | 
					 | 
				
			||||||
    double* v      = solver->v;
 | 
					 | 
				
			||||||
    double* f      = solver->f;
 | 
					 | 
				
			||||||
    double* g      = solver->g;
 | 
					 | 
				
			||||||
    double factorX = solver->dt / solver->dx;
 | 
					 | 
				
			||||||
    double factorY = solver->dt / solver->dy;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
        for (int i = 1; i < imax + 1; i++) {
 | 
					 | 
				
			||||||
            U(i, j) = F(i, j) - (P(i + 1, j) - P(i, j)) * factorX;
 | 
					 | 
				
			||||||
            V(i, j) = G(i, j) - (P(i, j + 1) - P(i, j)) * factorY;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void writeResult(Solver* solver, double* p, double* u, double* v)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int imax  = solver->imax;
 | 
					 | 
				
			||||||
    int jmax  = solver->jmax;
 | 
					 | 
				
			||||||
    double dx = solver->dx;
 | 
					 | 
				
			||||||
    double dy = solver->dy;
 | 
					 | 
				
			||||||
    double x = 0.0, y = 0.0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    FILE* fp;
 | 
					 | 
				
			||||||
    fp = fopen("pressure.dat", "w");
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (fp == NULL) {
 | 
					 | 
				
			||||||
        printf("Error!\n");
 | 
					 | 
				
			||||||
        exit(EXIT_FAILURE);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int j = 1; j < jmax + 1; j++) {
 | 
					 | 
				
			||||||
        y = (double)(j - 0.5) * dy;
 | 
					 | 
				
			||||||
        for (int i = 1; i < imax + 1; i++) {
 | 
					 | 
				
			||||||
            x = (double)(i - 0.5) * dx;
 | 
					 | 
				
			||||||
            fprintf(fp, "%.2f %.2f %f\n", x, y, P(i, j));
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        fprintf(fp, "\n");
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    fclose(fp);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    fp = fopen("velocity.dat", "w");
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (fp == NULL) {
 | 
					 | 
				
			||||||
        printf("Error!\n");
 | 
					 | 
				
			||||||
        exit(EXIT_FAILURE);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int j = 1; j < jmax + 1; j++) {
 | 
					 | 
				
			||||||
        y = dy * (j - 0.5);
 | 
					 | 
				
			||||||
        for (int i = 1; i < imax + 1; i++) {
 | 
					 | 
				
			||||||
            x            = dx * (i - 0.5);
 | 
					 | 
				
			||||||
            double vu  = (U(i, j) + U(i - 1, j)) / 2.0;
 | 
					 | 
				
			||||||
            double vv  = (V(i, j) + V(i, j - 1)) / 2.0;
 | 
					 | 
				
			||||||
            double len = sqrt((vu * vu) + (vv * vv));
 | 
					 | 
				
			||||||
            fprintf(fp, "%.2f %.2f %f %f %f\n", x, y, vu, vv, len);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    fclose(fp);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
@@ -1,49 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved. This file is part of nusif-solver.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#ifndef __SOLVER_H_
 | 
					 | 
				
			||||||
#define __SOLVER_H_
 | 
					 | 
				
			||||||
#include "parameter.h"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
enum BC { NOSLIP = 1, SLIP, OUTFLOW, PERIODIC };
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
typedef struct {
 | 
					 | 
				
			||||||
    /* geometry and grid information */
 | 
					 | 
				
			||||||
    double dx, dy;
 | 
					 | 
				
			||||||
    int imax, jmax;
 | 
					 | 
				
			||||||
    int jmaxLocal;
 | 
					 | 
				
			||||||
    double xlength, ylength;
 | 
					 | 
				
			||||||
    /* arrays */
 | 
					 | 
				
			||||||
    double *p, *rhs;
 | 
					 | 
				
			||||||
    double *f, *g;
 | 
					 | 
				
			||||||
    double *u, *v;
 | 
					 | 
				
			||||||
    /* parameters */
 | 
					 | 
				
			||||||
    double eps, omega;
 | 
					 | 
				
			||||||
    double re, tau, gamma;
 | 
					 | 
				
			||||||
    double gx, gy;
 | 
					 | 
				
			||||||
    /* time stepping */
 | 
					 | 
				
			||||||
    int itermax;
 | 
					 | 
				
			||||||
    double dt, te;
 | 
					 | 
				
			||||||
    double dtBound;
 | 
					 | 
				
			||||||
    char* problem;
 | 
					 | 
				
			||||||
    int bcLeft, bcRight, bcBottom, bcTop;
 | 
					 | 
				
			||||||
    /* mpi */
 | 
					 | 
				
			||||||
    int rank;
 | 
					 | 
				
			||||||
    int size;
 | 
					 | 
				
			||||||
} Solver;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void initSolver(Solver*, Parameter*);
 | 
					 | 
				
			||||||
void computeRHS(Solver*);
 | 
					 | 
				
			||||||
void solve(Solver*);
 | 
					 | 
				
			||||||
void normalizePressure(Solver*);
 | 
					 | 
				
			||||||
void computeTimestep(Solver*);
 | 
					 | 
				
			||||||
void setBoundaryConditions(Solver*);
 | 
					 | 
				
			||||||
void setSpecialBoundaryCondition(Solver*);
 | 
					 | 
				
			||||||
void computeFG(Solver*);
 | 
					 | 
				
			||||||
void adaptUV(Solver*);
 | 
					 | 
				
			||||||
void collectResult(Solver*);
 | 
					 | 
				
			||||||
void writeResult(Solver*, double*, double*, double*);
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
@@ -1,24 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
#include <time.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
double getTimeStamp()
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    struct timespec ts;
 | 
					 | 
				
			||||||
    clock_gettime(CLOCK_MONOTONIC, &ts);
 | 
					 | 
				
			||||||
    return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
double getTimeResolution()
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    struct timespec ts;
 | 
					 | 
				
			||||||
    clock_getres(CLOCK_MONOTONIC, &ts);
 | 
					 | 
				
			||||||
    return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
double getTimeStamp_() { return getTimeStamp(); }
 | 
					 | 
				
			||||||
@@ -1,14 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#ifndef __TIMING_H_
 | 
					 | 
				
			||||||
#define __TIMING_H_
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
extern double getTimeStamp();
 | 
					 | 
				
			||||||
extern double getTimeResolution();
 | 
					 | 
				
			||||||
extern double getTimeStamp_();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#endif // __TIMING_H_
 | 
					 | 
				
			||||||
@@ -1,23 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#ifndef __UTIL_H_
 | 
					 | 
				
			||||||
#define __UTIL_H_
 | 
					 | 
				
			||||||
#define HLINE                                                                            \
 | 
					 | 
				
			||||||
    "------------------------------------------------------------------------"           \
 | 
					 | 
				
			||||||
    "----\n"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifndef MIN
 | 
					 | 
				
			||||||
#define MIN(x, y) ((x) < (y) ? (x) : (y))
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
#ifndef MAX
 | 
					 | 
				
			||||||
#define MAX(x, y) ((x) > (y) ? (x) : (y))
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
#ifndef ABS
 | 
					 | 
				
			||||||
#define ABS(a) ((a) >= 0 ? (a) : -(a))
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#endif // __UTIL_H_
 | 
					 | 
				
			||||||
@@ -1,7 +0,0 @@
 | 
				
			|||||||
set terminal png size 1024,768 enhanced font ,12
 | 
					 | 
				
			||||||
set output 'p.png'
 | 
					 | 
				
			||||||
set datafile separator whitespace
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
set grid
 | 
					 | 
				
			||||||
set hidden3d
 | 
					 | 
				
			||||||
splot 'pressure.dat' using 1:2:3 with lines
 | 
					 | 
				
			||||||
@@ -1,5 +0,0 @@
 | 
				
			|||||||
set terminal png size 1800,768 enhanced font ,12
 | 
					 | 
				
			||||||
set output 'velocity.png'
 | 
					 | 
				
			||||||
set datafile separator whitespace
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
plot 'velocity.dat' using 1:2:3:4:5 with vectors filled head size 0.01,20,60 lc palette
 | 
					 | 
				
			||||||
@@ -1,71 +0,0 @@
 | 
				
			|||||||
#=======================================================================================
 | 
					 | 
				
			||||||
# Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
# All rights reserved.
 | 
					 | 
				
			||||||
# Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
# license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
#=======================================================================================
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#CONFIGURE BUILD SYSTEM
 | 
					 | 
				
			||||||
TARGET	   = exe-$(TAG)
 | 
					 | 
				
			||||||
BUILD_DIR  = ./$(TAG)
 | 
					 | 
				
			||||||
SRC_DIR    = ./src
 | 
					 | 
				
			||||||
MAKE_DIR   = ./
 | 
					 | 
				
			||||||
Q         ?= @
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#DO NOT EDIT BELOW
 | 
					 | 
				
			||||||
include $(MAKE_DIR)/config.mk
 | 
					 | 
				
			||||||
include $(MAKE_DIR)/include_$(TAG).mk
 | 
					 | 
				
			||||||
INCLUDES  += -I$(SRC_DIR) -I$(BUILD_DIR)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
VPATH     = $(SRC_DIR)
 | 
					 | 
				
			||||||
SRC       = $(wildcard $(SRC_DIR)/*.c)
 | 
					 | 
				
			||||||
ASM       = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s, $(SRC))
 | 
					 | 
				
			||||||
OBJ       = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o, $(SRC))
 | 
					 | 
				
			||||||
SOURCES   = $(SRC) $(wildcard $(SRC_DIR)/*.h)
 | 
					 | 
				
			||||||
CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(OPTIONS) $(INCLUDES)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
${TARGET}: $(BUILD_DIR) $(OBJ)
 | 
					 | 
				
			||||||
	$(info ===>  LINKING  $(TARGET))
 | 
					 | 
				
			||||||
	$(Q)${LINKER} ${LFLAGS} -o $(TARGET) $(OBJ) $(LIBS)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
$(BUILD_DIR)/%.o:  %.c $(MAKE_DIR)/include_$(TAG).mk $(MAKE_DIR)/config.mk
 | 
					 | 
				
			||||||
	$(info ===>  COMPILE  $@)
 | 
					 | 
				
			||||||
	$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
 | 
					 | 
				
			||||||
	$(Q)$(GCC) $(CPPFLAGS) -MT $(@:.d=.o) -MM  $< > $(BUILD_DIR)/$*.d
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
$(BUILD_DIR)/%.s:  %.c
 | 
					 | 
				
			||||||
	$(info ===>  GENERATE ASM  $@)
 | 
					 | 
				
			||||||
	$(CC) -S $(CPPFLAGS) $(CFLAGS) $< -o $@
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
.PHONY: clean distclean tags info asm format
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
clean:
 | 
					 | 
				
			||||||
	$(info ===>  CLEAN)
 | 
					 | 
				
			||||||
	@rm -rf $(BUILD_DIR)
 | 
					 | 
				
			||||||
	@rm -f tags
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
distclean: clean
 | 
					 | 
				
			||||||
	$(info ===>  DIST CLEAN)
 | 
					 | 
				
			||||||
	@rm -f $(TARGET)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
info:
 | 
					 | 
				
			||||||
	$(info $(CFLAGS))
 | 
					 | 
				
			||||||
	$(Q)$(CC) $(VERSION)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
asm:  $(BUILD_DIR) $(ASM)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
tags:
 | 
					 | 
				
			||||||
	$(info ===>  GENERATE TAGS)
 | 
					 | 
				
			||||||
	$(Q)ctags -R
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
format:
 | 
					 | 
				
			||||||
	@for src in $(SOURCES) ; do \
 | 
					 | 
				
			||||||
		echo "Formatting $$src" ; \
 | 
					 | 
				
			||||||
		clang-format -i $$src ; \
 | 
					 | 
				
			||||||
	done
 | 
					 | 
				
			||||||
	@echo "Done"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
$(BUILD_DIR):
 | 
					 | 
				
			||||||
	@mkdir $(BUILD_DIR)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
-include $(OBJ:.o=.d)
 | 
					 | 
				
			||||||
@@ -1,48 +0,0 @@
 | 
				
			|||||||
# C source skeleton
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Build
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
1. Configure the toolchain and additional options in `config.mk`:
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
# Supported: GCC, CLANG, ICC
 | 
					 | 
				
			||||||
TAG ?= GCC
 | 
					 | 
				
			||||||
ENABLE_OPENMP ?= false
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
OPTIONS +=  -DARRAY_ALIGNMENT=64
 | 
					 | 
				
			||||||
#OPTIONS +=  -DVERBOSE_AFFINITY
 | 
					 | 
				
			||||||
#OPTIONS +=  -DVERBOSE_DATASIZE
 | 
					 | 
				
			||||||
#OPTIONS +=  -DVERBOSE_TIMER
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The verbosity options enable detailed output about affinity settings, allocation sizes and timer resolution.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
2. Build with:
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
make
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
You can build multiple toolchains in the same directory, but notice that the Makefile is only acting on the one currently set.
 | 
					 | 
				
			||||||
Intermediate build results are located in the `<TOOLCHAIN>` directory.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
To output the executed commands use:
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
make Q=
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
3. Clean up with:
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
make clean
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
to clean intermediate build results.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
make distclean
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
to clean intermediate build results and binary.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
4. (Optional) Generate assembler:
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
make asm
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
The assembler files will also be located in the `<TOOLCHAIN>` directory.
 | 
					 | 
				
			||||||
@@ -1,46 +0,0 @@
 | 
				
			|||||||
#==============================================================================
 | 
					 | 
				
			||||||
#                            Laminar Canal Flow
 | 
					 | 
				
			||||||
#==============================================================================
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Problem specific Data:
 | 
					 | 
				
			||||||
# ---------------------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
name canal             # name of flow setup
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
bcN     1              #  flags for boundary conditions
 | 
					 | 
				
			||||||
bcE     3              #  1 = no-slip      3 = outflow
 | 
					 | 
				
			||||||
bcS     1              #  2 = free-slip    4 = periodic
 | 
					 | 
				
			||||||
bcW     3              #
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
gx     0.0      # Body forces (e.g. gravity)
 | 
					 | 
				
			||||||
gy     0.0      #
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
re            100.0	   # Reynolds number
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
u_init        1.0      # initial value for velocity in x-direction
 | 
					 | 
				
			||||||
v_init        0.0      # initial value for velocity in y-direction
 | 
					 | 
				
			||||||
p_init        0.0      # initial value for pressure
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Geometry Data:
 | 
					 | 
				
			||||||
# -------------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
xlength       30.0     # domain size in x-direction
 | 
					 | 
				
			||||||
ylength       4.0	   # domain size in y-direction
 | 
					 | 
				
			||||||
imax          200      # number of interior cells in x-direction
 | 
					 | 
				
			||||||
jmax          50	   # number of interior cells in y-direction
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Time Data:
 | 
					 | 
				
			||||||
# ---------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
te       100.0   # final time
 | 
					 | 
				
			||||||
dt       0.02    # time stepsize
 | 
					 | 
				
			||||||
tau      0.5     # safety factor for time stepsize control (<0 constant delt)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Pressure Iteration Data:
 | 
					 | 
				
			||||||
# -----------------------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
itermax       500       # maximal number of pressure iteration in one time step
 | 
					 | 
				
			||||||
eps           0.00001   # stopping tolerance for pressure iteration
 | 
					 | 
				
			||||||
omg           1.8       # relaxation parameter for SOR iteration
 | 
					 | 
				
			||||||
gamma         0.9       # upwind differencing factor gamma
 | 
					 | 
				
			||||||
#===============================================================================
 | 
					 | 
				
			||||||
@@ -1,10 +0,0 @@
 | 
				
			|||||||
# Supported: GCC, CLANG, ICC
 | 
					 | 
				
			||||||
TAG ?= CLANG
 | 
					 | 
				
			||||||
ENABLE_OPENMP ?= false
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#Feature options
 | 
					 | 
				
			||||||
OPTIONS +=  -DARRAY_ALIGNMENT=64
 | 
					 | 
				
			||||||
# OPTIONS +=  -DVERBOSE
 | 
					 | 
				
			||||||
#OPTIONS +=  -DVERBOSE_AFFINITY
 | 
					 | 
				
			||||||
#OPTIONS +=  -DVERBOSE_DATASIZE
 | 
					 | 
				
			||||||
#OPTIONS +=  -DVERBOSE_TIMER
 | 
					 | 
				
			||||||
@@ -1,46 +0,0 @@
 | 
				
			|||||||
#==============================================================================
 | 
					 | 
				
			||||||
#                              Driven Cavity
 | 
					 | 
				
			||||||
#==============================================================================
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Problem specific Data:
 | 
					 | 
				
			||||||
# ---------------------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
name dcavity        # name of flow setup
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
bcN    1			#  flags for boundary conditions
 | 
					 | 
				
			||||||
bcE    1			#  1 = no-slip      3 = outflow
 | 
					 | 
				
			||||||
bcS    1			#  2 = free-slip    4 = periodic
 | 
					 | 
				
			||||||
bcW    1			#
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
gx    0.0			# Body forces (e.g. gravity)
 | 
					 | 
				
			||||||
gy    0.0			#
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
re    1000.0		    # Reynolds number
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
u_init    0.0		# initial value for velocity in x-direction
 | 
					 | 
				
			||||||
v_init    0.0		# initial value for velocity in y-direction
 | 
					 | 
				
			||||||
p_init    0.0		# initial value for pressure
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Geometry Data:
 | 
					 | 
				
			||||||
# -------------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
xlength    1.0		# domain size in x-direction
 | 
					 | 
				
			||||||
ylength    1.0		# domain size in y-direction
 | 
					 | 
				
			||||||
imax       100		# number of interior cells in x-direction
 | 
					 | 
				
			||||||
jmax       100		# number of interior cells in y-direction
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Time Data:
 | 
					 | 
				
			||||||
# ---------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
te      10.0		# final time
 | 
					 | 
				
			||||||
dt     0.02	    # time stepsize
 | 
					 | 
				
			||||||
tau     0.5		# safety factor for time stepsize control (<0 constant delt)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Pressure Iteration Data:
 | 
					 | 
				
			||||||
# -----------------------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
itermax  1000		# maximal number of pressure iteration in one time step
 | 
					 | 
				
			||||||
eps      0.001		# stopping tolerance for pressure iteration
 | 
					 | 
				
			||||||
omg      1.7		# relaxation parameter for SOR iteration
 | 
					 | 
				
			||||||
gamma    0.9		# upwind differencing factor gamma
 | 
					 | 
				
			||||||
#===============================================================================
 | 
					 | 
				
			||||||
@@ -1,16 +0,0 @@
 | 
				
			|||||||
CC   = mpicc
 | 
					 | 
				
			||||||
GCC  = cc
 | 
					 | 
				
			||||||
LINKER = $(CC)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
ifeq ($(ENABLE_OPENMP),true)
 | 
					 | 
				
			||||||
OPENMP   = -fopenmp
 | 
					 | 
				
			||||||
#OPENMP   = -Xpreprocessor -fopenmp #required on Macos with homebrew libomp
 | 
					 | 
				
			||||||
LIBS     = # -lomp
 | 
					 | 
				
			||||||
endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
VERSION  = --version
 | 
					 | 
				
			||||||
CFLAGS   = -Ofast -std=c99 $(OPENMP)
 | 
					 | 
				
			||||||
#CFLAGS   = -Ofast -fnt-store=aggressive  -std=c99 $(OPENMP) #AMD CLANG
 | 
					 | 
				
			||||||
LFLAGS   = $(OPENMP)
 | 
					 | 
				
			||||||
DEFINES  = -D_GNU_SOURCE# -DDEBUG
 | 
					 | 
				
			||||||
INCLUDES = -I/usr/local/include
 | 
					 | 
				
			||||||
@@ -1,14 +0,0 @@
 | 
				
			|||||||
CC   = gcc
 | 
					 | 
				
			||||||
GCC  = gcc
 | 
					 | 
				
			||||||
LINKER = $(CC)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
ifeq ($(ENABLE_OPENMP),true)
 | 
					 | 
				
			||||||
OPENMP   = -fopenmp
 | 
					 | 
				
			||||||
endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
VERSION  = --version
 | 
					 | 
				
			||||||
CFLAGS   = -Ofast -ffreestanding -std=c99 $(OPENMP)
 | 
					 | 
				
			||||||
LFLAGS   = $(OPENMP)
 | 
					 | 
				
			||||||
DEFINES  = -D_GNU_SOURCE
 | 
					 | 
				
			||||||
INCLUDES =
 | 
					 | 
				
			||||||
LIBS     =
 | 
					 | 
				
			||||||
@@ -1,14 +0,0 @@
 | 
				
			|||||||
CC   = mpiicc
 | 
					 | 
				
			||||||
GCC  = gcc
 | 
					 | 
				
			||||||
LINKER = $(CC)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
ifeq ($(ENABLE_OPENMP),true)
 | 
					 | 
				
			||||||
OPENMP   = -qopenmp
 | 
					 | 
				
			||||||
endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
VERSION  = --version
 | 
					 | 
				
			||||||
CFLAGS   =  -O3 -xHost -qopt-zmm-usage=high -std=c99 $(OPENMP)
 | 
					 | 
				
			||||||
LFLAGS   = $(OPENMP)
 | 
					 | 
				
			||||||
DEFINES  = -D_GNU_SOURCE
 | 
					 | 
				
			||||||
INCLUDES =
 | 
					 | 
				
			||||||
LIBS     =
 | 
					 | 
				
			||||||
@@ -1,61 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#ifdef __linux__
 | 
					 | 
				
			||||||
#ifdef _OPENMP
 | 
					 | 
				
			||||||
#include <pthread.h>
 | 
					 | 
				
			||||||
#include <sched.h>
 | 
					 | 
				
			||||||
#include <stdio.h>
 | 
					 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
#include <sys/syscall.h>
 | 
					 | 
				
			||||||
#include <sys/types.h>
 | 
					 | 
				
			||||||
#include <unistd.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define MAX_NUM_THREADS 128
 | 
					 | 
				
			||||||
#define gettid()        syscall(SYS_gettid)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static int getProcessorID(cpu_set_t* cpu_set)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int processorId;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (processorId = 0; processorId < MAX_NUM_THREADS; processorId++) {
 | 
					 | 
				
			||||||
        if (CPU_ISSET(processorId, cpu_set)) {
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    return processorId;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int affinity_getProcessorId()
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    cpu_set_t cpu_set;
 | 
					 | 
				
			||||||
    CPU_ZERO(&cpu_set);
 | 
					 | 
				
			||||||
    sched_getaffinity(gettid(), sizeof(cpu_set_t), &cpu_set);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return getProcessorID(&cpu_set);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void affinity_pinThread(int processorId)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    cpu_set_t cpuset;
 | 
					 | 
				
			||||||
    pthread_t thread;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    thread = pthread_self();
 | 
					 | 
				
			||||||
    CPU_ZERO(&cpuset);
 | 
					 | 
				
			||||||
    CPU_SET(processorId, &cpuset);
 | 
					 | 
				
			||||||
    pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void affinity_pinProcess(int processorId)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    cpu_set_t cpuset;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    CPU_ZERO(&cpuset);
 | 
					 | 
				
			||||||
    CPU_SET(processorId, &cpuset);
 | 
					 | 
				
			||||||
    sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
#endif /*_OPENMP*/
 | 
					 | 
				
			||||||
#endif /*__linux__*/
 | 
					 | 
				
			||||||
@@ -1,14 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#ifndef AFFINITY_H
 | 
					 | 
				
			||||||
#define AFFINITY_H
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
extern int affinity_getProcessorId();
 | 
					 | 
				
			||||||
extern void affinity_pinProcess(int);
 | 
					 | 
				
			||||||
extern void affinity_pinThread(int);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#endif /*AFFINITY_H*/
 | 
					 | 
				
			||||||
@@ -1,35 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#include <errno.h>
 | 
					 | 
				
			||||||
#include <stdio.h>
 | 
					 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void* allocate(int alignment, size_t bytesize)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int errorCode;
 | 
					 | 
				
			||||||
    void* ptr;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    errorCode = posix_memalign(&ptr, alignment, bytesize);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (errorCode) {
 | 
					 | 
				
			||||||
        if (errorCode == EINVAL) {
 | 
					 | 
				
			||||||
            fprintf(stderr, "Error: Alignment parameter is not a power of two\n");
 | 
					 | 
				
			||||||
            exit(EXIT_FAILURE);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        if (errorCode == ENOMEM) {
 | 
					 | 
				
			||||||
            fprintf(stderr, "Error: Insufficient memory to fulfill the request\n");
 | 
					 | 
				
			||||||
            exit(EXIT_FAILURE);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (ptr == NULL) {
 | 
					 | 
				
			||||||
        fprintf(stderr, "Error: posix_memalign failed!\n");
 | 
					 | 
				
			||||||
        exit(EXIT_FAILURE);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return ptr;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
@@ -1,13 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#ifndef __ALLOCATE_H_
 | 
					 | 
				
			||||||
#define __ALLOCATE_H_
 | 
					 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
extern void* allocate(int alignment, size_t bytesize);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
@@ -1,54 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * =======================================================================================
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 *      Author:   Jan Eitzinger (je), jan.eitzinger@fau.de
 | 
					 | 
				
			||||||
 *      Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 *      Permission is hereby granted, free of charge, to any person obtaining a copy
 | 
					 | 
				
			||||||
 *      of this software and associated documentation files (the "Software"), to deal
 | 
					 | 
				
			||||||
 *      in the Software without restriction, including without limitation the rights
 | 
					 | 
				
			||||||
 *      to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 | 
					 | 
				
			||||||
 *      copies of the Software, and to permit persons to whom the Software is
 | 
					 | 
				
			||||||
 *      furnished to do so, subject to the following conditions:
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 *      The above copyright notice and this permission notice shall be included in all
 | 
					 | 
				
			||||||
 *      copies or substantial portions of the Software.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 *      THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 | 
					 | 
				
			||||||
 *      IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 | 
					 | 
				
			||||||
 *      FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 | 
					 | 
				
			||||||
 *      AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 | 
					 | 
				
			||||||
 *      LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 | 
					 | 
				
			||||||
 *      OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 | 
					 | 
				
			||||||
 *      SOFTWARE.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * =======================================================================================
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#ifndef LIKWID_MARKERS_H
 | 
					 | 
				
			||||||
#define LIKWID_MARKERS_H
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifdef LIKWID_PERFMON
 | 
					 | 
				
			||||||
#include <likwid.h>
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_INIT                likwid_markerInit()
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_THREADINIT          likwid_markerThreadInit()
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_SWITCH              likwid_markerNextGroup()
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_REGISTER(regionTag) likwid_markerRegisterRegion(regionTag)
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_START(regionTag)    likwid_markerStartRegion(regionTag)
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_STOP(regionTag)     likwid_markerStopRegion(regionTag)
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_CLOSE               likwid_markerClose()
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_RESET(regionTag)    likwid_markerResetRegion(regionTag)
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)                       \
 | 
					 | 
				
			||||||
    likwid_markerGetRegion(regionTag, nevents, events, time, count)
 | 
					 | 
				
			||||||
#else /* LIKWID_PERFMON */
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_INIT
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_THREADINIT
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_SWITCH
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_REGISTER(regionTag)
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_START(regionTag)
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_STOP(regionTag)
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_CLOSE
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_RESET(regionTag)
 | 
					 | 
				
			||||||
#endif /* LIKWID_PERFMON */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#endif /*LIKWID_MARKERS_H*/
 | 
					 | 
				
			||||||
@@ -1,80 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#include <float.h>
 | 
					 | 
				
			||||||
#include <limits.h>
 | 
					 | 
				
			||||||
#include <mpi.h>
 | 
					 | 
				
			||||||
#include <stdio.h>
 | 
					 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
#include <unistd.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#include "parameter.h"
 | 
					 | 
				
			||||||
#include "progress.h"
 | 
					 | 
				
			||||||
#include "solver.h"
 | 
					 | 
				
			||||||
#include "timing.h"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int main(int argc, char** argv)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int rank;
 | 
					 | 
				
			||||||
    double S, E;
 | 
					 | 
				
			||||||
    Parameter params;
 | 
					 | 
				
			||||||
    Solver solver;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    MPI_Init(&argc, &argv);
 | 
					 | 
				
			||||||
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 | 
					 | 
				
			||||||
    initParameter(¶ms);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (argc != 2) {
 | 
					 | 
				
			||||||
        printf("Usage: %s <configFile>\n", argv[0]);
 | 
					 | 
				
			||||||
        exit(EXIT_SUCCESS);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    readParameter(¶ms, argv[1]);
 | 
					 | 
				
			||||||
    if (rank == 0) {
 | 
					 | 
				
			||||||
        printParameter(¶ms);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    initSolver(&solver, ¶ms);
 | 
					 | 
				
			||||||
    /* debugExchange(&solver); */
 | 
					 | 
				
			||||||
    /* debugBC(&solver); */
 | 
					 | 
				
			||||||
    /* exit(EXIT_SUCCESS); */
 | 
					 | 
				
			||||||
    initProgress(solver.te);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    double tau = solver.tau;
 | 
					 | 
				
			||||||
    double te  = solver.te;
 | 
					 | 
				
			||||||
    double t   = 0.0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    S = getTimeStamp();
 | 
					 | 
				
			||||||
    while (t <= te) {
 | 
					 | 
				
			||||||
        if (tau > 0.0) {
 | 
					 | 
				
			||||||
            computeTimestep(&solver);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        setBoundaryConditions(&solver);
 | 
					 | 
				
			||||||
        setSpecialBoundaryCondition(&solver);
 | 
					 | 
				
			||||||
        computeFG(&solver);
 | 
					 | 
				
			||||||
        computeRHS(&solver);
 | 
					 | 
				
			||||||
        solve(&solver);
 | 
					 | 
				
			||||||
        adaptUV(&solver);
 | 
					 | 
				
			||||||
        t += solver.dt;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifdef VERBOSE
 | 
					 | 
				
			||||||
        if (rank == 0) {
 | 
					 | 
				
			||||||
            printf("TIME %f , TIMESTEP %f\n", t, solver.dt);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
        printProgress(t);
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    E = getTimeStamp();
 | 
					 | 
				
			||||||
    stopProgress();
 | 
					 | 
				
			||||||
    if (rank == 0) {
 | 
					 | 
				
			||||||
        printf("Solution took %.2fs\n", E - S);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    collectResult(&solver);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    MPI_Finalize();
 | 
					 | 
				
			||||||
    return EXIT_SUCCESS;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
@@ -1,108 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved. This file is part of nusif-solver.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#include <stdio.h>
 | 
					 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
#include <string.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#include "parameter.h"
 | 
					 | 
				
			||||||
#include "util.h"
 | 
					 | 
				
			||||||
#define MAXLINE 4096
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void initParameter(Parameter* param)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    param->xlength = 1.0;
 | 
					 | 
				
			||||||
    param->ylength = 1.0;
 | 
					 | 
				
			||||||
    param->imax    = 100;
 | 
					 | 
				
			||||||
    param->jmax    = 100;
 | 
					 | 
				
			||||||
    param->itermax = 1000;
 | 
					 | 
				
			||||||
    param->eps     = 0.0001;
 | 
					 | 
				
			||||||
    param->omg     = 1.8;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void readParameter(Parameter* param, const char* filename)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    FILE* fp = fopen(filename, "r");
 | 
					 | 
				
			||||||
    char line[MAXLINE];
 | 
					 | 
				
			||||||
    int i;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (!fp) {
 | 
					 | 
				
			||||||
        fprintf(stderr, "Could not open parameter file: %s\n", filename);
 | 
					 | 
				
			||||||
        exit(EXIT_FAILURE);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    while (!feof(fp)) {
 | 
					 | 
				
			||||||
        line[0] = '\0';
 | 
					 | 
				
			||||||
        fgets(line, MAXLINE, fp);
 | 
					 | 
				
			||||||
        for (i = 0; line[i] != '\0' && line[i] != '#'; i++)
 | 
					 | 
				
			||||||
            ;
 | 
					 | 
				
			||||||
        line[i] = '\0';
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        char* tok = strtok(line, " ");
 | 
					 | 
				
			||||||
        char* val = strtok(NULL, " ");
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define PARSE_PARAM(p, f)                                                                \
 | 
					 | 
				
			||||||
    if (strncmp(tok, #p, sizeof(#p) / sizeof(#p[0]) - 1) == 0) {                         \
 | 
					 | 
				
			||||||
        param->p = f(val);                                                               \
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
#define PARSE_STRING(p) PARSE_PARAM(p, strdup)
 | 
					 | 
				
			||||||
#define PARSE_INT(p)    PARSE_PARAM(p, atoi)
 | 
					 | 
				
			||||||
#define PARSE_REAL(p)   PARSE_PARAM(p, atof)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if (tok != NULL && val != NULL) {
 | 
					 | 
				
			||||||
            PARSE_REAL(xlength);
 | 
					 | 
				
			||||||
            PARSE_REAL(ylength);
 | 
					 | 
				
			||||||
            PARSE_INT(imax);
 | 
					 | 
				
			||||||
            PARSE_INT(jmax);
 | 
					 | 
				
			||||||
            PARSE_INT(itermax);
 | 
					 | 
				
			||||||
            PARSE_REAL(eps);
 | 
					 | 
				
			||||||
            PARSE_REAL(omg);
 | 
					 | 
				
			||||||
            PARSE_REAL(re);
 | 
					 | 
				
			||||||
            PARSE_REAL(tau);
 | 
					 | 
				
			||||||
            PARSE_REAL(gamma);
 | 
					 | 
				
			||||||
            PARSE_REAL(dt);
 | 
					 | 
				
			||||||
            PARSE_REAL(te);
 | 
					 | 
				
			||||||
            PARSE_REAL(gx);
 | 
					 | 
				
			||||||
            PARSE_REAL(gy);
 | 
					 | 
				
			||||||
            PARSE_STRING(name);
 | 
					 | 
				
			||||||
            PARSE_INT(bcN);
 | 
					 | 
				
			||||||
            PARSE_INT(bcS);
 | 
					 | 
				
			||||||
            PARSE_INT(bcE);
 | 
					 | 
				
			||||||
            PARSE_INT(bcW);
 | 
					 | 
				
			||||||
            PARSE_REAL(u_init);
 | 
					 | 
				
			||||||
            PARSE_REAL(v_init);
 | 
					 | 
				
			||||||
            PARSE_REAL(p_init);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    fclose(fp);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void printParameter(Parameter* param)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    printf("Parameters for %s\n", param->name);
 | 
					 | 
				
			||||||
    printf("Boundary conditions N:%d E:%d S:%d W:%d\n",
 | 
					 | 
				
			||||||
        param->bcN,
 | 
					 | 
				
			||||||
        param->bcE,
 | 
					 | 
				
			||||||
        param->bcS,
 | 
					 | 
				
			||||||
        param->bcW);
 | 
					 | 
				
			||||||
    printf("\tReynolds number: %.2f\n", param->re);
 | 
					 | 
				
			||||||
    printf("\tInit arrays: U:%.2f V:%.2f P:%.2f\n",
 | 
					 | 
				
			||||||
        param->u_init,
 | 
					 | 
				
			||||||
        param->v_init,
 | 
					 | 
				
			||||||
        param->p_init);
 | 
					 | 
				
			||||||
    printf("Geometry data:\n");
 | 
					 | 
				
			||||||
    printf("\tDomain box size (x, y): %.2f, %.2f\n", param->xlength, param->ylength);
 | 
					 | 
				
			||||||
    printf("\tCells (x, y): %d, %d\n", param->imax, param->jmax);
 | 
					 | 
				
			||||||
    printf("Timestep parameters:\n");
 | 
					 | 
				
			||||||
    printf("\tDefault stepsize: %.2f, Final time %.2f\n", param->dt, param->te);
 | 
					 | 
				
			||||||
    printf("\tTau factor: %.2f\n", param->tau);
 | 
					 | 
				
			||||||
    printf("Iterative solver parameters:\n");
 | 
					 | 
				
			||||||
    printf("\tMax iterations: %d\n", param->itermax);
 | 
					 | 
				
			||||||
    printf("\tepsilon (stopping tolerance) : %f\n", param->eps);
 | 
					 | 
				
			||||||
    printf("\tgamma (stopping tolerance) : %f\n", param->gamma);
 | 
					 | 
				
			||||||
    printf("\tomega (SOR relaxation): %f\n", param->omg);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
@@ -1,26 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved. This file is part of nusif-solver.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#ifndef __PARAMETER_H_
 | 
					 | 
				
			||||||
#define __PARAMETER_H_
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
typedef struct {
 | 
					 | 
				
			||||||
    double xlength, ylength;
 | 
					 | 
				
			||||||
    int imax, jmax;
 | 
					 | 
				
			||||||
    int itermax;
 | 
					 | 
				
			||||||
    double eps, omg;
 | 
					 | 
				
			||||||
    double re, tau, gamma;
 | 
					 | 
				
			||||||
    double te, dt;
 | 
					 | 
				
			||||||
    double gx, gy;
 | 
					 | 
				
			||||||
    char* name;
 | 
					 | 
				
			||||||
    int bcN, bcS, bcE, bcW;
 | 
					 | 
				
			||||||
    double u_init, v_init, p_init;
 | 
					 | 
				
			||||||
} Parameter;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void initParameter(Parameter*);
 | 
					 | 
				
			||||||
void readParameter(Parameter*, const char*);
 | 
					 | 
				
			||||||
void printParameter(Parameter*);
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
@@ -1,60 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved. This file is part of nusif-solver.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#include <math.h>
 | 
					 | 
				
			||||||
#include <mpi.h>
 | 
					 | 
				
			||||||
#include <stdio.h>
 | 
					 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
#include <string.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#include "progress.h"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static double _end;
 | 
					 | 
				
			||||||
static int _current;
 | 
					 | 
				
			||||||
static int _rank = -1;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void initProgress(double end)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    MPI_Comm_rank(MPI_COMM_WORLD, &_rank);
 | 
					 | 
				
			||||||
    _end     = end;
 | 
					 | 
				
			||||||
    _current = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (_rank == 0) {
 | 
					 | 
				
			||||||
        printf("[          ]");
 | 
					 | 
				
			||||||
        fflush(stdout);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void printProgress(double current)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    if (_rank == 0) {
 | 
					 | 
				
			||||||
        int new = (int)rint((current / _end) * 10.0);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if (new > _current) {
 | 
					 | 
				
			||||||
            char progress[11];
 | 
					 | 
				
			||||||
            _current    = new;
 | 
					 | 
				
			||||||
            progress[0] = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            for (int i = 0; i < 10; i++) {
 | 
					 | 
				
			||||||
                if (i < _current) {
 | 
					 | 
				
			||||||
                    sprintf(progress + strlen(progress), "#");
 | 
					 | 
				
			||||||
                } else {
 | 
					 | 
				
			||||||
                    sprintf(progress + strlen(progress), " ");
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            printf("\r[%s]", progress);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        fflush(stdout);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void stopProgress()
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    if (_rank == 0) {
 | 
					 | 
				
			||||||
        printf("\n");
 | 
					 | 
				
			||||||
        fflush(stdout);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
@@ -1,14 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#ifndef __PROGRESS_H_
 | 
					 | 
				
			||||||
#define __PROGRESS_H_
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
extern void initProgress(double);
 | 
					 | 
				
			||||||
extern void printProgress(double);
 | 
					 | 
				
			||||||
extern void stopProgress();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
@@ -1,900 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved. This file is part of nusif-solver.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#include <float.h>
 | 
					 | 
				
			||||||
#include <math.h>
 | 
					 | 
				
			||||||
#include <mpi.h>
 | 
					 | 
				
			||||||
#include <stdio.h>
 | 
					 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
#include <string.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#include "allocate.h"
 | 
					 | 
				
			||||||
#include "parameter.h"
 | 
					 | 
				
			||||||
#include "solver.h"
 | 
					 | 
				
			||||||
#include "util.h"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define P(i, j)   p[(j) * (imaxLocal + 2) + (i)]
 | 
					 | 
				
			||||||
#define F(i, j)   f[(j) * (imaxLocal + 2) + (i)]
 | 
					 | 
				
			||||||
#define G(i, j)   g[(j) * (imaxLocal + 2) + (i)]
 | 
					 | 
				
			||||||
#define U(i, j)   u[(j) * (imaxLocal + 2) + (i)]
 | 
					 | 
				
			||||||
#define V(i, j)   v[(j) * (imaxLocal + 2) + (i)]
 | 
					 | 
				
			||||||
#define RHS(i, j) rhs[(j) * (imaxLocal + 2) + (i)]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define NDIMS 2
 | 
					 | 
				
			||||||
#define IDIM  0
 | 
					 | 
				
			||||||
#define JDIM  1
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static int sizeOfRank(int rank, int size, int N)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    return N / size + ((N % size > rank) ? 1 : 0);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void print(Solver* solver, double* grid)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int imaxLocal = solver->imaxLocal;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int i = 0; i < solver->size; i++) {
 | 
					 | 
				
			||||||
        if (i == solver->rank) {
 | 
					 | 
				
			||||||
            printf(
 | 
					 | 
				
			||||||
                "### RANK %d #######################################################\n",
 | 
					 | 
				
			||||||
                solver->rank);
 | 
					 | 
				
			||||||
            for (int j = 0; j < solver->jmaxLocal + 2; j++) {
 | 
					 | 
				
			||||||
                printf("%02d: ", j);
 | 
					 | 
				
			||||||
                for (int i = 0; i < solver->imaxLocal + 2; i++) {
 | 
					 | 
				
			||||||
                    printf("%12.8f  ", grid[j * (imaxLocal + 2) + i]);
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
                printf("\n");
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            fflush(stdout);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        MPI_Barrier(MPI_COMM_WORLD);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void exchange(Solver* solver, double* grid)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    double* buf[8];
 | 
					 | 
				
			||||||
    MPI_Request requests[8];
 | 
					 | 
				
			||||||
    for (int i = 0; i < 8; i++)
 | 
					 | 
				
			||||||
        requests[i] = MPI_REQUEST_NULL;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    buf[0] = grid + 1;                                                     // recv bottom
 | 
					 | 
				
			||||||
    buf[1] = grid + (solver->imaxLocal + 2) + 1;                           // send bottom
 | 
					 | 
				
			||||||
    buf[2] = grid + (solver->jmaxLocal + 1) * (solver->imaxLocal + 2) + 1; // recv top
 | 
					 | 
				
			||||||
    buf[3] = grid + (solver->jmaxLocal) * (solver->imaxLocal + 2) + 1;     // send top
 | 
					 | 
				
			||||||
    buf[4] = grid + (solver->imaxLocal + 2);                               // recv left
 | 
					 | 
				
			||||||
    buf[5] = grid + (solver->imaxLocal + 2) + 1;                           // send left
 | 
					 | 
				
			||||||
    buf[6] = grid + (solver->imaxLocal + 2) + (solver->imaxLocal + 1);     // recv right
 | 
					 | 
				
			||||||
    buf[7] = grid + (solver->imaxLocal + 2) + (solver->imaxLocal);         // send right
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int i = 0; i < 2; i++) {
 | 
					 | 
				
			||||||
        int tag = 0;
 | 
					 | 
				
			||||||
        if (solver->jNeighbours[i] != MPI_PROC_NULL) {
 | 
					 | 
				
			||||||
            tag = solver->jNeighbours[i];
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        /* exchange ghost cells with bottom/top neighbor */
 | 
					 | 
				
			||||||
        MPI_Irecv(buf[i * 2],
 | 
					 | 
				
			||||||
            1,
 | 
					 | 
				
			||||||
            solver->jBufferType,
 | 
					 | 
				
			||||||
            solver->jNeighbours[i],
 | 
					 | 
				
			||||||
            tag,
 | 
					 | 
				
			||||||
            solver->comm,
 | 
					 | 
				
			||||||
            &requests[i * 2]);
 | 
					 | 
				
			||||||
        MPI_Isend(buf[(i * 2) + 1],
 | 
					 | 
				
			||||||
            1,
 | 
					 | 
				
			||||||
            solver->jBufferType,
 | 
					 | 
				
			||||||
            solver->jNeighbours[i],
 | 
					 | 
				
			||||||
            solver->rank,
 | 
					 | 
				
			||||||
            solver->comm,
 | 
					 | 
				
			||||||
            &requests[i * 2 + 1]);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        tag = 0;
 | 
					 | 
				
			||||||
        if (solver->iNeighbours[i] != MPI_PROC_NULL) {
 | 
					 | 
				
			||||||
            tag = solver->iNeighbours[i];
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        /* exchange ghost cells with left/right neighbor */
 | 
					 | 
				
			||||||
        MPI_Irecv(buf[i * 2 + 4],
 | 
					 | 
				
			||||||
            1,
 | 
					 | 
				
			||||||
            solver->iBufferType,
 | 
					 | 
				
			||||||
            solver->iNeighbours[i],
 | 
					 | 
				
			||||||
            tag,
 | 
					 | 
				
			||||||
            solver->comm,
 | 
					 | 
				
			||||||
            &requests[i * 2 + 4]);
 | 
					 | 
				
			||||||
        MPI_Isend(buf[i * 2 + 5],
 | 
					 | 
				
			||||||
            1,
 | 
					 | 
				
			||||||
            solver->iBufferType,
 | 
					 | 
				
			||||||
            solver->iNeighbours[i],
 | 
					 | 
				
			||||||
            solver->rank,
 | 
					 | 
				
			||||||
            solver->comm,
 | 
					 | 
				
			||||||
            &requests[(i * 2) + 5]);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    MPI_Waitall(8, requests, MPI_STATUSES_IGNORE);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void shift(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    MPI_Request requests[4] = { MPI_REQUEST_NULL,
 | 
					 | 
				
			||||||
        MPI_REQUEST_NULL,
 | 
					 | 
				
			||||||
        MPI_REQUEST_NULL,
 | 
					 | 
				
			||||||
        MPI_REQUEST_NULL };
 | 
					 | 
				
			||||||
    double* f               = solver->f;
 | 
					 | 
				
			||||||
    double* g               = solver->g;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* shift G */
 | 
					 | 
				
			||||||
    double* buf = g + 1;
 | 
					 | 
				
			||||||
    /* receive ghost cells from bottom neighbor */
 | 
					 | 
				
			||||||
    MPI_Irecv(buf,
 | 
					 | 
				
			||||||
        1,
 | 
					 | 
				
			||||||
        solver->jBufferType,
 | 
					 | 
				
			||||||
        solver->jNeighbours[0],
 | 
					 | 
				
			||||||
        0,
 | 
					 | 
				
			||||||
        solver->comm,
 | 
					 | 
				
			||||||
        &requests[0]);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    buf = g + (solver->jmaxLocal) * (solver->imaxLocal + 2) + 1;
 | 
					 | 
				
			||||||
    /* send ghost cells to top neighbor */
 | 
					 | 
				
			||||||
    MPI_Isend(buf,
 | 
					 | 
				
			||||||
        1,
 | 
					 | 
				
			||||||
        solver->jBufferType,
 | 
					 | 
				
			||||||
        solver->jNeighbours[1],
 | 
					 | 
				
			||||||
        0,
 | 
					 | 
				
			||||||
        solver->comm,
 | 
					 | 
				
			||||||
        &requests[1]);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* shift F */
 | 
					 | 
				
			||||||
    buf = f + (solver->imaxLocal + 2);
 | 
					 | 
				
			||||||
    /* receive ghost cells from left neighbor */
 | 
					 | 
				
			||||||
    MPI_Irecv(buf,
 | 
					 | 
				
			||||||
        1,
 | 
					 | 
				
			||||||
        solver->iBufferType,
 | 
					 | 
				
			||||||
        solver->iNeighbours[0],
 | 
					 | 
				
			||||||
        1,
 | 
					 | 
				
			||||||
        solver->comm,
 | 
					 | 
				
			||||||
        &requests[2]);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    buf = f + (solver->imaxLocal + 2) + (solver->imaxLocal);
 | 
					 | 
				
			||||||
    /* send ghost cells to right neighbor */
 | 
					 | 
				
			||||||
    MPI_Isend(buf,
 | 
					 | 
				
			||||||
        1,
 | 
					 | 
				
			||||||
        solver->iBufferType,
 | 
					 | 
				
			||||||
        solver->iNeighbours[1],
 | 
					 | 
				
			||||||
        1,
 | 
					 | 
				
			||||||
        solver->comm,
 | 
					 | 
				
			||||||
        &requests[3]);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    MPI_Waitall(4, requests, MPI_STATUSES_IGNORE);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void debugExchange(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int imaxLocal = solver->imaxLocal;
 | 
					 | 
				
			||||||
    int jmaxLocal = solver->jmaxLocal;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int j = 0; j < jmaxLocal + 2; j++) {
 | 
					 | 
				
			||||||
        for (int i = 0; i < solver->imaxLocal + 2; i++) {
 | 
					 | 
				
			||||||
            solver->p[j * (imaxLocal + 2) + i] = solver->rank + 0.01 * i + 0.0001 * j;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    collectResult(solver);
 | 
					 | 
				
			||||||
    /* print(solver, solver->p); */
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void debugBC(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int imaxLocal = solver->imaxLocal;
 | 
					 | 
				
			||||||
    int jmaxLocal = solver->jmaxLocal;
 | 
					 | 
				
			||||||
    double* v     = solver->v;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Northern boundary
 | 
					 | 
				
			||||||
    if (solver->coords[JDIM] == (solver->dims[JDIM] - 1)) { // set top bc
 | 
					 | 
				
			||||||
        for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
            V(i, jmaxLocal + 1) = 10.0 + solver->rank;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Eastern boundary
 | 
					 | 
				
			||||||
    if (solver->coords[IDIM] == (solver->dims[IDIM] - 1)) { // set right bc
 | 
					 | 
				
			||||||
        for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
            V(imaxLocal + 1, j) = 20.0 + solver->rank;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Southern boundary
 | 
					 | 
				
			||||||
    if (solver->coords[JDIM] == 0) { // set bottom bc
 | 
					 | 
				
			||||||
        for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
            V(i, 0) = 30.0 + solver->rank;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Western boundary
 | 
					 | 
				
			||||||
    if (solver->coords[IDIM] == 0) { // set left bc
 | 
					 | 
				
			||||||
        for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
            V(0, j) = 40.0 + solver->rank;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    print(solver, solver->v);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void assembleResult(Solver* solver,
 | 
					 | 
				
			||||||
    double* src,
 | 
					 | 
				
			||||||
    double* dst,
 | 
					 | 
				
			||||||
    int imaxLocal[],
 | 
					 | 
				
			||||||
    int jmaxLocal[],
 | 
					 | 
				
			||||||
    int offset[])
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    MPI_Request* requests;
 | 
					 | 
				
			||||||
    int numRequests = 1;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (solver->rank == 0) {
 | 
					 | 
				
			||||||
        numRequests = solver->size + 1;
 | 
					 | 
				
			||||||
    } else {
 | 
					 | 
				
			||||||
        numRequests = 1;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    requests = (MPI_Request*)malloc(numRequests * sizeof(MPI_Request));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* all ranks send their bulk array */
 | 
					 | 
				
			||||||
    MPI_Datatype bulkType;
 | 
					 | 
				
			||||||
    const int ndims     = 2;
 | 
					 | 
				
			||||||
    int oldSizes[ndims] = { solver->jmaxLocal + 2, solver->imaxLocal + 2 };
 | 
					 | 
				
			||||||
    int newSizes[ndims] = { solver->jmaxLocal, solver->imaxLocal };
 | 
					 | 
				
			||||||
    int starts[ndims]   = { 1, 1 };
 | 
					 | 
				
			||||||
    MPI_Type_create_subarray(2,
 | 
					 | 
				
			||||||
        oldSizes,
 | 
					 | 
				
			||||||
        newSizes,
 | 
					 | 
				
			||||||
        starts,
 | 
					 | 
				
			||||||
        MPI_ORDER_C,
 | 
					 | 
				
			||||||
        MPI_DOUBLE,
 | 
					 | 
				
			||||||
        &bulkType);
 | 
					 | 
				
			||||||
    MPI_Type_commit(&bulkType);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    MPI_Isend(src, 1, bulkType, 0, 0, solver->comm, &requests[0]);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* rank 0 assembles the subdomains */
 | 
					 | 
				
			||||||
    if (solver->rank == 0) {
 | 
					 | 
				
			||||||
        for (int i = 0; i < solver->size; i++) {
 | 
					 | 
				
			||||||
            MPI_Datatype domainType;
 | 
					 | 
				
			||||||
            MPI_Type_vector(jmaxLocal[i],
 | 
					 | 
				
			||||||
                imaxLocal[i],
 | 
					 | 
				
			||||||
                solver->imax,
 | 
					 | 
				
			||||||
                MPI_DOUBLE,
 | 
					 | 
				
			||||||
                &domainType);
 | 
					 | 
				
			||||||
            MPI_Type_commit(&domainType);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            MPI_Irecv(dst + offset[i],
 | 
					 | 
				
			||||||
                1,
 | 
					 | 
				
			||||||
                domainType,
 | 
					 | 
				
			||||||
                i,
 | 
					 | 
				
			||||||
                0,
 | 
					 | 
				
			||||||
                solver->comm,
 | 
					 | 
				
			||||||
                &requests[i + 1]);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    MPI_Waitall(numRequests, requests, MPI_STATUSES_IGNORE);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static int sum(int* sizes, int position)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int sum = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int i = 0; i < position; i++) {
 | 
					 | 
				
			||||||
        sum += sizes[i];
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return sum;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void collectResult(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    double* Pall = NULL;
 | 
					 | 
				
			||||||
    double* Uall = NULL;
 | 
					 | 
				
			||||||
    double* Vall = NULL;
 | 
					 | 
				
			||||||
    int offset[solver->size];
 | 
					 | 
				
			||||||
    int imaxLocal[solver->size];
 | 
					 | 
				
			||||||
    int jmaxLocal[solver->size];
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    MPI_Gather(&solver->imaxLocal, 1, MPI_INT, imaxLocal, 1, MPI_INT, 0, MPI_COMM_WORLD);
 | 
					 | 
				
			||||||
    MPI_Gather(&solver->jmaxLocal, 1, MPI_INT, jmaxLocal, 1, MPI_INT, 0, MPI_COMM_WORLD);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (solver->rank == 0) {
 | 
					 | 
				
			||||||
        Pall = allocate(64, (solver->imax) * (solver->jmax) * sizeof(double));
 | 
					 | 
				
			||||||
        Uall = allocate(64, (solver->imax) * (solver->jmax) * sizeof(double));
 | 
					 | 
				
			||||||
        Vall = allocate(64, (solver->imax) * (solver->jmax) * sizeof(double));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        for (int i = 0; i < solver->size; i++) {
 | 
					 | 
				
			||||||
            int coords[2];
 | 
					 | 
				
			||||||
            MPI_Cart_coords(solver->comm, i, 2, coords);
 | 
					 | 
				
			||||||
            int ioffset = sum(imaxLocal, coords[0]);
 | 
					 | 
				
			||||||
            int joffset = sum(jmaxLocal, coords[1]);
 | 
					 | 
				
			||||||
            offset[i]   = (joffset * solver->imax) + ioffset;
 | 
					 | 
				
			||||||
            printf("Rank: %d, Coords(i,j): %d %d, Size(i,j): %d %d, Offset(i,j): %d %d\n",
 | 
					 | 
				
			||||||
                i,
 | 
					 | 
				
			||||||
                coords[0],
 | 
					 | 
				
			||||||
                coords[1],
 | 
					 | 
				
			||||||
                imaxLocal[i],
 | 
					 | 
				
			||||||
                jmaxLocal[i],
 | 
					 | 
				
			||||||
                ioffset,
 | 
					 | 
				
			||||||
                joffset);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* collect P */
 | 
					 | 
				
			||||||
    assembleResult(solver, solver->p, Pall, imaxLocal, jmaxLocal, offset);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* collect U */
 | 
					 | 
				
			||||||
    assembleResult(solver, solver->u, Uall, imaxLocal, jmaxLocal, offset);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* collect V */
 | 
					 | 
				
			||||||
    assembleResult(solver, solver->v, Vall, imaxLocal, jmaxLocal, offset);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* write to disk */
 | 
					 | 
				
			||||||
    if (solver->rank == 0) writeResult(solver, Pall, Uall, Vall);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void printConfig(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    if (solver->rank == 0) {
 | 
					 | 
				
			||||||
        printf("Parameters for #%s#\n", solver->problem);
 | 
					 | 
				
			||||||
        printf("Boundary conditions N:%d E:%d S:%d W:%d\n",
 | 
					 | 
				
			||||||
            solver->bcN,
 | 
					 | 
				
			||||||
            solver->bcE,
 | 
					 | 
				
			||||||
            solver->bcS,
 | 
					 | 
				
			||||||
            solver->bcW);
 | 
					 | 
				
			||||||
        printf("\tReynolds number: %.2f\n", solver->re);
 | 
					 | 
				
			||||||
        printf("\tGx Gy: %.2f %.2f\n", solver->gx, solver->gy);
 | 
					 | 
				
			||||||
        printf("Geometry data:\n");
 | 
					 | 
				
			||||||
        printf("\tDomain box size (x, y): %.2f, %.2f\n",
 | 
					 | 
				
			||||||
            solver->xlength,
 | 
					 | 
				
			||||||
            solver->ylength);
 | 
					 | 
				
			||||||
        printf("\tCells (x, y): %d, %d\n", solver->imax, solver->jmax);
 | 
					 | 
				
			||||||
        printf("Timestep parameters:\n");
 | 
					 | 
				
			||||||
        printf("\tDefault stepsize: %.2f, Final time %.2f\n", solver->dt, solver->te);
 | 
					 | 
				
			||||||
        printf("\tdt bound: %.6f\n", solver->dtBound);
 | 
					 | 
				
			||||||
        printf("\tTau factor: %.2f\n", solver->tau);
 | 
					 | 
				
			||||||
        printf("Iterative solver parameters:\n");
 | 
					 | 
				
			||||||
        printf("\tMax iterations: %d\n", solver->itermax);
 | 
					 | 
				
			||||||
        printf("\tepsilon (stopping tolerance) : %f\n", solver->eps);
 | 
					 | 
				
			||||||
        printf("\tgamma factor: %f\n", solver->gamma);
 | 
					 | 
				
			||||||
        printf("\tomega (SOR relaxation): %f\n", solver->omega);
 | 
					 | 
				
			||||||
        printf("Communication parameters:\n");
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    for (int i = 0; i < solver->size; i++) {
 | 
					 | 
				
			||||||
        if (i == solver->rank) {
 | 
					 | 
				
			||||||
            printf("\tRank %d of %d\n", solver->rank, solver->size);
 | 
					 | 
				
			||||||
            printf("\tNeighbours (b, t, l, r): %d, %d, %d, %d\n",
 | 
					 | 
				
			||||||
                solver->jNeighbours[0],
 | 
					 | 
				
			||||||
                solver->jNeighbours[1],
 | 
					 | 
				
			||||||
                solver->iNeighbours[0],
 | 
					 | 
				
			||||||
                solver->iNeighbours[1]);
 | 
					 | 
				
			||||||
            printf("\tCoordinates %d,%d\n", solver->coords[0], solver->coords[1]);
 | 
					 | 
				
			||||||
            printf("\tLocal domain size: %dx%d\n", solver->imaxLocal, solver->jmaxLocal);
 | 
					 | 
				
			||||||
            fflush(stdout);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void initSolver(Solver* solver, Parameter* params)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    solver->problem = params->name;
 | 
					 | 
				
			||||||
    solver->bcN     = params->bcN;
 | 
					 | 
				
			||||||
    solver->bcS     = params->bcS;
 | 
					 | 
				
			||||||
    solver->bcW     = params->bcW;
 | 
					 | 
				
			||||||
    solver->bcE     = params->bcE;
 | 
					 | 
				
			||||||
    solver->imax    = params->imax;
 | 
					 | 
				
			||||||
    solver->jmax    = params->jmax;
 | 
					 | 
				
			||||||
    solver->xlength = params->xlength;
 | 
					 | 
				
			||||||
    solver->ylength = params->ylength;
 | 
					 | 
				
			||||||
    solver->dx      = params->xlength / params->imax;
 | 
					 | 
				
			||||||
    solver->dy      = params->ylength / params->jmax;
 | 
					 | 
				
			||||||
    solver->eps     = params->eps;
 | 
					 | 
				
			||||||
    solver->omega   = params->omg;
 | 
					 | 
				
			||||||
    solver->itermax = params->itermax;
 | 
					 | 
				
			||||||
    solver->re      = params->re;
 | 
					 | 
				
			||||||
    solver->gx      = params->gx;
 | 
					 | 
				
			||||||
    solver->gy      = params->gy;
 | 
					 | 
				
			||||||
    solver->dt      = params->dt;
 | 
					 | 
				
			||||||
    solver->te      = params->te;
 | 
					 | 
				
			||||||
    solver->tau     = params->tau;
 | 
					 | 
				
			||||||
    solver->gamma   = params->gamma;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* setup communication */
 | 
					 | 
				
			||||||
    MPI_Comm_rank(MPI_COMM_WORLD, &(solver->rank));
 | 
					 | 
				
			||||||
    MPI_Comm_size(MPI_COMM_WORLD, &(solver->size));
 | 
					 | 
				
			||||||
    int dims[NDIMS]    = { 0, 0 };
 | 
					 | 
				
			||||||
    int periods[NDIMS] = { 0, 0 };
 | 
					 | 
				
			||||||
    MPI_Dims_create(solver->size, NDIMS, dims);
 | 
					 | 
				
			||||||
    MPI_Cart_create(MPI_COMM_WORLD, NDIMS, dims, periods, 0, &solver->comm);
 | 
					 | 
				
			||||||
    MPI_Cart_shift(solver->comm,
 | 
					 | 
				
			||||||
        IDIM,
 | 
					 | 
				
			||||||
        1,
 | 
					 | 
				
			||||||
        &solver->iNeighbours[0],
 | 
					 | 
				
			||||||
        &solver->iNeighbours[1]);
 | 
					 | 
				
			||||||
    MPI_Cart_shift(solver->comm,
 | 
					 | 
				
			||||||
        JDIM,
 | 
					 | 
				
			||||||
        1,
 | 
					 | 
				
			||||||
        &solver->jNeighbours[0],
 | 
					 | 
				
			||||||
        &solver->jNeighbours[1]);
 | 
					 | 
				
			||||||
    MPI_Cart_get(solver->comm, NDIMS, solver->dims, periods, solver->coords);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    solver->imaxLocal = sizeOfRank(solver->rank, dims[IDIM], solver->imax);
 | 
					 | 
				
			||||||
    solver->jmaxLocal = sizeOfRank(solver->rank, dims[JDIM], solver->jmax);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    MPI_Type_contiguous(solver->imaxLocal, MPI_DOUBLE, &solver->jBufferType);
 | 
					 | 
				
			||||||
    MPI_Type_commit(&solver->jBufferType);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    MPI_Type_vector(solver->jmaxLocal,
 | 
					 | 
				
			||||||
        1,
 | 
					 | 
				
			||||||
        solver->imaxLocal + 2,
 | 
					 | 
				
			||||||
        MPI_DOUBLE,
 | 
					 | 
				
			||||||
        &solver->iBufferType);
 | 
					 | 
				
			||||||
    MPI_Type_commit(&solver->iBufferType);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* allocate arrays */
 | 
					 | 
				
			||||||
    int imaxLocal   = solver->imaxLocal;
 | 
					 | 
				
			||||||
    int jmaxLocal   = solver->jmaxLocal;
 | 
					 | 
				
			||||||
    size_t bytesize = (imaxLocal + 2) * (jmaxLocal + 2) * sizeof(double);
 | 
					 | 
				
			||||||
    solver->u       = allocate(64, bytesize);
 | 
					 | 
				
			||||||
    solver->v       = allocate(64, bytesize);
 | 
					 | 
				
			||||||
    solver->p       = allocate(64, bytesize);
 | 
					 | 
				
			||||||
    solver->rhs     = allocate(64, bytesize);
 | 
					 | 
				
			||||||
    solver->f       = allocate(64, bytesize);
 | 
					 | 
				
			||||||
    solver->g       = allocate(64, bytesize);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int i = 0; i < (imaxLocal + 2) * (jmaxLocal + 2); i++) {
 | 
					 | 
				
			||||||
        solver->u[i]   = params->u_init;
 | 
					 | 
				
			||||||
        solver->v[i]   = params->v_init;
 | 
					 | 
				
			||||||
        solver->p[i]   = params->p_init;
 | 
					 | 
				
			||||||
        solver->rhs[i] = 0.0;
 | 
					 | 
				
			||||||
        solver->f[i]   = 0.0;
 | 
					 | 
				
			||||||
        solver->g[i]   = 0.0;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    double dx          = solver->dx;
 | 
					 | 
				
			||||||
    double dy          = solver->dy;
 | 
					 | 
				
			||||||
    double inv_sqr_sum = 1.0 / (dx * dx) + 1.0 / (dy * dy);
 | 
					 | 
				
			||||||
    solver->dtBound    = 0.5 * solver->re * 1.0 / inv_sqr_sum;
 | 
					 | 
				
			||||||
#ifdef VERBOSE
 | 
					 | 
				
			||||||
    printConfig(solver);
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void computeRHS(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int imaxLocal = solver->imaxLocal;
 | 
					 | 
				
			||||||
    int jmaxLocal = solver->jmaxLocal;
 | 
					 | 
				
			||||||
    double idx    = 1.0 / solver->dx;
 | 
					 | 
				
			||||||
    double idy    = 1.0 / solver->dy;
 | 
					 | 
				
			||||||
    double idt    = 1.0 / solver->dt;
 | 
					 | 
				
			||||||
    double* rhs   = solver->rhs;
 | 
					 | 
				
			||||||
    double* f     = solver->f;
 | 
					 | 
				
			||||||
    double* g     = solver->g;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    shift(solver);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
        for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
            RHS(i, j) = ((F(i, j) - F(i - 1, j)) * idx + (G(i, j) - G(i, j - 1)) * idy) *
 | 
					 | 
				
			||||||
                        idt;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int solve(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int imax      = solver->imax;
 | 
					 | 
				
			||||||
    int jmax      = solver->jmax;
 | 
					 | 
				
			||||||
    int imaxLocal = solver->imaxLocal;
 | 
					 | 
				
			||||||
    int jmaxLocal = solver->jmaxLocal;
 | 
					 | 
				
			||||||
    double eps    = solver->eps;
 | 
					 | 
				
			||||||
    int itermax   = solver->itermax;
 | 
					 | 
				
			||||||
    double dx2    = solver->dx * solver->dx;
 | 
					 | 
				
			||||||
    double dy2    = solver->dy * solver->dy;
 | 
					 | 
				
			||||||
    double idx2   = 1.0 / dx2;
 | 
					 | 
				
			||||||
    double idy2   = 1.0 / dy2;
 | 
					 | 
				
			||||||
    double factor = solver->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
 | 
					 | 
				
			||||||
    double* p     = solver->p;
 | 
					 | 
				
			||||||
    double* rhs   = solver->rhs;
 | 
					 | 
				
			||||||
    double epssq  = eps * eps;
 | 
					 | 
				
			||||||
    int it        = 0;
 | 
					 | 
				
			||||||
    double res    = 1.0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    while ((res >= epssq) && (it < itermax)) {
 | 
					 | 
				
			||||||
        res = 0.0;
 | 
					 | 
				
			||||||
        exchange(solver, p);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
            for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                double r = RHS(i, j) -
 | 
					 | 
				
			||||||
                           ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
 | 
					 | 
				
			||||||
                               (P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                P(i, j) -= (factor * r);
 | 
					 | 
				
			||||||
                res += (r * r);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if (solver->coords[JDIM] == 0) { // set bottom bc
 | 
					 | 
				
			||||||
            for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
                P(i, 0) = P(i, 1);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if (solver->coords[JDIM] == (solver->dims[JDIM] - 1)) { // set top bc
 | 
					 | 
				
			||||||
            for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
                P(i, jmaxLocal + 1) = P(i, jmaxLocal);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if (solver->coords[IDIM] == 0) { // set left bc
 | 
					 | 
				
			||||||
            for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
                P(0, j) = P(1, j);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if (solver->coords[IDIM] == (solver->dims[IDIM] - 1)) { // set right bc
 | 
					 | 
				
			||||||
            for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
                P(imaxLocal + 1, j) = P(imaxLocal, j);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        MPI_Allreduce(MPI_IN_PLACE, &res, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
 | 
					 | 
				
			||||||
        res = res / (double)(imax * jmax);
 | 
					 | 
				
			||||||
#ifdef DEBUG
 | 
					 | 
				
			||||||
        if (solver->rank == 0) {
 | 
					 | 
				
			||||||
            printf("%d Residuum: %e\n", it, res);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
        it++;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifdef VERBOSE
 | 
					 | 
				
			||||||
    if (solver->rank == 0) {
 | 
					 | 
				
			||||||
        printf("Solver took %d iterations to reach %f\n", it, sqrt(res));
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
    if (res < eps) {
 | 
					 | 
				
			||||||
        return 0;
 | 
					 | 
				
			||||||
    } else {
 | 
					 | 
				
			||||||
        return 1;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static double maxElement(Solver* solver, double* m)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int size      = (solver->imaxLocal + 2) * (solver->jmaxLocal + 2);
 | 
					 | 
				
			||||||
    double maxval = DBL_MIN;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int i = 0; i < size; i++) {
 | 
					 | 
				
			||||||
        maxval = MAX(maxval, fabs(m[i]));
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    MPI_Allreduce(MPI_IN_PLACE, &maxval, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
 | 
					 | 
				
			||||||
    return maxval;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void computeTimestep(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    double dt   = solver->dtBound;
 | 
					 | 
				
			||||||
    double dx   = solver->dx;
 | 
					 | 
				
			||||||
    double dy   = solver->dy;
 | 
					 | 
				
			||||||
    double umax = maxElement(solver, solver->u);
 | 
					 | 
				
			||||||
    double vmax = maxElement(solver, solver->v);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (umax > 0) {
 | 
					 | 
				
			||||||
        dt = (dt > dx / umax) ? dx / umax : dt;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    if (vmax > 0) {
 | 
					 | 
				
			||||||
        dt = (dt > dy / vmax) ? dy / vmax : dt;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    solver->dt = dt * solver->tau;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void setBoundaryConditions(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int imaxLocal = solver->imaxLocal;
 | 
					 | 
				
			||||||
    int jmaxLocal = solver->jmaxLocal;
 | 
					 | 
				
			||||||
    double* u     = solver->u;
 | 
					 | 
				
			||||||
    double* v     = solver->v;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Northern boundary
 | 
					 | 
				
			||||||
    if (solver->coords[JDIM] == (solver->dims[JDIM] - 1)) { // set top bc
 | 
					 | 
				
			||||||
        switch (solver->bcN) {
 | 
					 | 
				
			||||||
        case NOSLIP:
 | 
					 | 
				
			||||||
            for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
                V(i, jmaxLocal)     = 0.0;
 | 
					 | 
				
			||||||
                U(i, jmaxLocal + 1) = -U(i, jmaxLocal);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case SLIP:
 | 
					 | 
				
			||||||
            for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
                V(i, jmaxLocal)     = 0.0;
 | 
					 | 
				
			||||||
                U(i, jmaxLocal + 1) = U(i, jmaxLocal);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case OUTFLOW:
 | 
					 | 
				
			||||||
            for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
                U(i, jmaxLocal + 1) = U(i, jmaxLocal);
 | 
					 | 
				
			||||||
                V(i, jmaxLocal)     = V(i, jmaxLocal - 1);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case PERIODIC:
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Southern boundary
 | 
					 | 
				
			||||||
    if (solver->coords[JDIM] == 0) { // set bottom bc
 | 
					 | 
				
			||||||
        switch (solver->bcS) {
 | 
					 | 
				
			||||||
        case NOSLIP:
 | 
					 | 
				
			||||||
            for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
                V(i, 0) = 0.0;
 | 
					 | 
				
			||||||
                U(i, 0) = -U(i, 1);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case SLIP:
 | 
					 | 
				
			||||||
            for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
                V(i, 0) = 0.0;
 | 
					 | 
				
			||||||
                U(i, 0) = U(i, 1);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case OUTFLOW:
 | 
					 | 
				
			||||||
            for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
                U(i, 0) = U(i, 1);
 | 
					 | 
				
			||||||
                V(i, 0) = V(i, 1);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case PERIODIC:
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Eastern boundary
 | 
					 | 
				
			||||||
    if (solver->coords[IDIM] == (solver->dims[IDIM] - 1)) { // set right bc
 | 
					 | 
				
			||||||
        switch (solver->bcE) {
 | 
					 | 
				
			||||||
        case NOSLIP:
 | 
					 | 
				
			||||||
            for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
                U(imaxLocal, j)     = 0.0;
 | 
					 | 
				
			||||||
                V(imaxLocal + 1, j) = -V(imaxLocal, j);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case SLIP:
 | 
					 | 
				
			||||||
            for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
                U(imaxLocal, j)     = 0.0;
 | 
					 | 
				
			||||||
                V(imaxLocal + 1, j) = V(imaxLocal, j);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case OUTFLOW:
 | 
					 | 
				
			||||||
            for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
                U(imaxLocal, j)     = U(imaxLocal - 1, j);
 | 
					 | 
				
			||||||
                V(imaxLocal + 1, j) = V(imaxLocal, j);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case PERIODIC:
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Western boundary
 | 
					 | 
				
			||||||
    if (solver->coords[IDIM] == 0) { // set left bc
 | 
					 | 
				
			||||||
        switch (solver->bcW) {
 | 
					 | 
				
			||||||
        case NOSLIP:
 | 
					 | 
				
			||||||
            for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
                U(0, j) = 0.0;
 | 
					 | 
				
			||||||
                V(0, j) = -V(1, j);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case SLIP:
 | 
					 | 
				
			||||||
            for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
                U(0, j) = 0.0;
 | 
					 | 
				
			||||||
                V(0, j) = V(1, j);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case OUTFLOW:
 | 
					 | 
				
			||||||
            for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
                U(0, j) = U(1, j);
 | 
					 | 
				
			||||||
                V(0, j) = V(1, j);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case PERIODIC:
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void setSpecialBoundaryCondition(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int imaxLocal = solver->imaxLocal;
 | 
					 | 
				
			||||||
    int jmaxLocal = solver->jmaxLocal;
 | 
					 | 
				
			||||||
    double* u     = solver->u;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (strcmp(solver->problem, "dcavity") == 0) {
 | 
					 | 
				
			||||||
        if (solver->coords[JDIM] == (solver->dims[JDIM] - 1)) { // set top bc
 | 
					 | 
				
			||||||
            for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
                U(i, jmaxLocal + 1) = 2.0 - U(i, jmaxLocal);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    } else if (strcmp(solver->problem, "canal") == 0) {
 | 
					 | 
				
			||||||
        if (solver->coords[IDIM] == 0) { // set left bc
 | 
					 | 
				
			||||||
            double ylength = solver->ylength;
 | 
					 | 
				
			||||||
            double dy      = solver->dy;
 | 
					 | 
				
			||||||
            int rest       = solver->jmax % solver->size;
 | 
					 | 
				
			||||||
            int yc         = solver->rank * (solver->jmax / solver->size) +
 | 
					 | 
				
			||||||
                     MIN(rest, solver->rank);
 | 
					 | 
				
			||||||
            double ys = dy * (yc + 0.5);
 | 
					 | 
				
			||||||
            double y;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            /* printf("RANK %d yc: %d ys: %f\n", solver->rank, yc, ys); */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
                y       = ys + dy * (j - 0.5);
 | 
					 | 
				
			||||||
                U(0, j) = y * (ylength - y) * 4.0 / (ylength * ylength);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    /* print(solver, solver->u); */
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void computeFG(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    double* u        = solver->u;
 | 
					 | 
				
			||||||
    double* v        = solver->v;
 | 
					 | 
				
			||||||
    double* f        = solver->f;
 | 
					 | 
				
			||||||
    double* g        = solver->g;
 | 
					 | 
				
			||||||
    int imaxLocal    = solver->imaxLocal;
 | 
					 | 
				
			||||||
    int jmaxLocal    = solver->jmaxLocal;
 | 
					 | 
				
			||||||
    double gx        = solver->gx;
 | 
					 | 
				
			||||||
    double gy        = solver->gy;
 | 
					 | 
				
			||||||
    double gamma     = solver->gamma;
 | 
					 | 
				
			||||||
    double dt        = solver->dt;
 | 
					 | 
				
			||||||
    double inverseRe = 1.0 / solver->re;
 | 
					 | 
				
			||||||
    double inverseDx = 1.0 / solver->dx;
 | 
					 | 
				
			||||||
    double inverseDy = 1.0 / solver->dy;
 | 
					 | 
				
			||||||
    double du2dx, dv2dy, duvdx, duvdy;
 | 
					 | 
				
			||||||
    double du2dx2, du2dy2, dv2dx2, dv2dy2;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    exchange(solver, u);
 | 
					 | 
				
			||||||
    exchange(solver, v);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
        for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
            du2dx = inverseDx * 0.25 *
 | 
					 | 
				
			||||||
                        ((U(i, j) + U(i + 1, j)) * (U(i, j) + U(i + 1, j)) -
 | 
					 | 
				
			||||||
                            (U(i, j) + U(i - 1, j)) * (U(i, j) + U(i - 1, j))) +
 | 
					 | 
				
			||||||
                    gamma * inverseDx * 0.25 *
 | 
					 | 
				
			||||||
                        (fabs(U(i, j) + U(i + 1, j)) * (U(i, j) - U(i + 1, j)) +
 | 
					 | 
				
			||||||
                            fabs(U(i, j) + U(i - 1, j)) * (U(i, j) - U(i - 1, j)));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            duvdy = inverseDy * 0.25 *
 | 
					 | 
				
			||||||
                        ((V(i, j) + V(i + 1, j)) * (U(i, j) + U(i, j + 1)) -
 | 
					 | 
				
			||||||
                            (V(i, j - 1) + V(i + 1, j - 1)) * (U(i, j) + U(i, j - 1))) +
 | 
					 | 
				
			||||||
                    gamma * inverseDy * 0.25 *
 | 
					 | 
				
			||||||
                        (fabs(V(i, j) + V(i + 1, j)) * (U(i, j) - U(i, j + 1)) +
 | 
					 | 
				
			||||||
                            fabs(V(i, j - 1) + V(i + 1, j - 1)) *
 | 
					 | 
				
			||||||
                                (U(i, j) - U(i, j - 1)));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            du2dx2  = inverseDx * inverseDx * (U(i + 1, j) - 2.0 * U(i, j) + U(i - 1, j));
 | 
					 | 
				
			||||||
            du2dy2  = inverseDy * inverseDy * (U(i, j + 1) - 2.0 * U(i, j) + U(i, j - 1));
 | 
					 | 
				
			||||||
            F(i, j) = U(i, j) + dt * (inverseRe * (du2dx2 + du2dy2) - du2dx - duvdy + gx);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            duvdx = inverseDx * 0.25 *
 | 
					 | 
				
			||||||
                        ((U(i, j) + U(i, j + 1)) * (V(i, j) + V(i + 1, j)) -
 | 
					 | 
				
			||||||
                            (U(i - 1, j) + U(i - 1, j + 1)) * (V(i, j) + V(i - 1, j))) +
 | 
					 | 
				
			||||||
                    gamma * inverseDx * 0.25 *
 | 
					 | 
				
			||||||
                        (fabs(U(i, j) + U(i, j + 1)) * (V(i, j) - V(i + 1, j)) +
 | 
					 | 
				
			||||||
                            fabs(U(i - 1, j) + U(i - 1, j + 1)) *
 | 
					 | 
				
			||||||
                                (V(i, j) - V(i - 1, j)));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            dv2dy = inverseDy * 0.25 *
 | 
					 | 
				
			||||||
                        ((V(i, j) + V(i, j + 1)) * (V(i, j) + V(i, j + 1)) -
 | 
					 | 
				
			||||||
                            (V(i, j) + V(i, j - 1)) * (V(i, j) + V(i, j - 1))) +
 | 
					 | 
				
			||||||
                    gamma * inverseDy * 0.25 *
 | 
					 | 
				
			||||||
                        (fabs(V(i, j) + V(i, j + 1)) * (V(i, j) - V(i, j + 1)) +
 | 
					 | 
				
			||||||
                            fabs(V(i, j) + V(i, j - 1)) * (V(i, j) - V(i, j - 1)));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            dv2dx2  = inverseDx * inverseDx * (V(i + 1, j) - 2.0 * V(i, j) + V(i - 1, j));
 | 
					 | 
				
			||||||
            dv2dy2  = inverseDy * inverseDy * (V(i, j + 1) - 2.0 * V(i, j) + V(i, j - 1));
 | 
					 | 
				
			||||||
            G(i, j) = V(i, j) + dt * (inverseRe * (dv2dx2 + dv2dy2) - duvdx - dv2dy + gy);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* ----------------------------- boundary of F --------------------------- */
 | 
					 | 
				
			||||||
    if (solver->coords[IDIM] == 0) { // set left bc
 | 
					 | 
				
			||||||
        for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
            F(0, j) = U(0, j);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (solver->coords[IDIM] == (solver->dims[IDIM] - 1)) { // set right bc
 | 
					 | 
				
			||||||
        for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
            F(imaxLocal, j) = U(imaxLocal, j);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* ----------------------------- boundary of G --------------------------- */
 | 
					 | 
				
			||||||
    if (solver->coords[JDIM] == 0) { // set bottom bc
 | 
					 | 
				
			||||||
        for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
            G(i, 0) = V(i, 0);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (solver->coords[JDIM] == (solver->dims[JDIM] - 1)) { // set top bc
 | 
					 | 
				
			||||||
        for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
            G(i, jmaxLocal) = V(i, jmaxLocal);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void adaptUV(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int imaxLocal  = solver->imaxLocal;
 | 
					 | 
				
			||||||
    int jmaxLocal  = solver->jmaxLocal;
 | 
					 | 
				
			||||||
    double* p      = solver->p;
 | 
					 | 
				
			||||||
    double* u      = solver->u;
 | 
					 | 
				
			||||||
    double* v      = solver->v;
 | 
					 | 
				
			||||||
    double* f      = solver->f;
 | 
					 | 
				
			||||||
    double* g      = solver->g;
 | 
					 | 
				
			||||||
    double factorX = solver->dt / solver->dx;
 | 
					 | 
				
			||||||
    double factorY = solver->dt / solver->dy;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
        for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
            U(i, j) = F(i, j) - (P(i + 1, j) - P(i, j)) * factorX;
 | 
					 | 
				
			||||||
            V(i, j) = G(i, j) - (P(i, j + 1) - P(i, j)) * factorY;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void writeResult(Solver* solver, double* p, double* u, double* v)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int imax  = solver->imax;
 | 
					 | 
				
			||||||
    int jmax  = solver->jmax;
 | 
					 | 
				
			||||||
    double dx = solver->dx;
 | 
					 | 
				
			||||||
    double dy = solver->dy;
 | 
					 | 
				
			||||||
    double x = 0.0, y = 0.0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    FILE* fp;
 | 
					 | 
				
			||||||
    fp = fopen("pressure.dat", "w");
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (fp == NULL) {
 | 
					 | 
				
			||||||
        printf("Error!\n");
 | 
					 | 
				
			||||||
        exit(EXIT_FAILURE);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int j = 1; j < jmax; j++) {
 | 
					 | 
				
			||||||
        y = (double)(j - 0.5) * dy;
 | 
					 | 
				
			||||||
        for (int i = 1; i < imax; i++) {
 | 
					 | 
				
			||||||
            x = (double)(i - 0.5) * dx;
 | 
					 | 
				
			||||||
            fprintf(fp, "%.2f %.2f %f\n", x, y, p[j * (imax) + i]);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        fprintf(fp, "\n");
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    fclose(fp);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    fp = fopen("velocity.dat", "w");
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (fp == NULL) {
 | 
					 | 
				
			||||||
        printf("Error!\n");
 | 
					 | 
				
			||||||
        exit(EXIT_FAILURE);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int j = 1; j < jmax; j++) {
 | 
					 | 
				
			||||||
        y = dy * (j - 0.5);
 | 
					 | 
				
			||||||
        for (int i = 1; i < imax; i++) {
 | 
					 | 
				
			||||||
            x            = dx * (i - 0.5);
 | 
					 | 
				
			||||||
            double vel_u = (u[j * (imax) + i] + u[j * (imax) + (i - 1)]) / 2.0;
 | 
					 | 
				
			||||||
            double vel_v = (v[j * (imax) + i] + v[(j - 1) * (imax) + i]) / 2.0;
 | 
					 | 
				
			||||||
            double len   = sqrt((vel_u * vel_u) + (vel_v * vel_v));
 | 
					 | 
				
			||||||
            fprintf(fp, "%.2f %.2f %f %f %f\n", x, y, vel_u, vel_v, len);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    fclose(fp);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
@@ -1,56 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved. This file is part of nusif-solver.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#ifndef __SOLVER_H_
 | 
					 | 
				
			||||||
#define __SOLVER_H_
 | 
					 | 
				
			||||||
#include "parameter.h"
 | 
					 | 
				
			||||||
#include <mpi.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
enum BC { NOSLIP = 1, SLIP, OUTFLOW, PERIODIC };
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
typedef struct {
 | 
					 | 
				
			||||||
    /* geometry and grid information */
 | 
					 | 
				
			||||||
    double dx, dy;
 | 
					 | 
				
			||||||
    int imax, jmax;
 | 
					 | 
				
			||||||
    double xlength, ylength;
 | 
					 | 
				
			||||||
    /* arrays */
 | 
					 | 
				
			||||||
    double *p, *rhs;
 | 
					 | 
				
			||||||
    double *f, *g;
 | 
					 | 
				
			||||||
    double *u, *v;
 | 
					 | 
				
			||||||
    /* parameters */
 | 
					 | 
				
			||||||
    double eps, omega;
 | 
					 | 
				
			||||||
    double re, tau, gamma;
 | 
					 | 
				
			||||||
    double gx, gy;
 | 
					 | 
				
			||||||
    /* time stepping */
 | 
					 | 
				
			||||||
    int itermax;
 | 
					 | 
				
			||||||
    double dt, te;
 | 
					 | 
				
			||||||
    double dtBound;
 | 
					 | 
				
			||||||
    char* problem;
 | 
					 | 
				
			||||||
    int bcN, bcS, bcW, bcE;
 | 
					 | 
				
			||||||
    /* mpi */
 | 
					 | 
				
			||||||
    int rank;
 | 
					 | 
				
			||||||
    int size;
 | 
					 | 
				
			||||||
    MPI_Comm comm;
 | 
					 | 
				
			||||||
    MPI_Datatype iBufferType, jBufferType;
 | 
					 | 
				
			||||||
    int iNeighbours[2], jNeighbours[2];
 | 
					 | 
				
			||||||
    int coords[2], dims[2];
 | 
					 | 
				
			||||||
    int imaxLocal, jmaxLocal;
 | 
					 | 
				
			||||||
} Solver;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void initSolver(Solver*, Parameter*);
 | 
					 | 
				
			||||||
void computeRHS(Solver*);
 | 
					 | 
				
			||||||
int solve(Solver*);
 | 
					 | 
				
			||||||
void computeTimestep(Solver*);
 | 
					 | 
				
			||||||
void setBoundaryConditions(Solver*);
 | 
					 | 
				
			||||||
void setSpecialBoundaryCondition(Solver*);
 | 
					 | 
				
			||||||
void computeFG(Solver*);
 | 
					 | 
				
			||||||
void adaptUV(Solver*);
 | 
					 | 
				
			||||||
void collectResult(Solver*);
 | 
					 | 
				
			||||||
void writeResult(Solver*, double*, double*, double*);
 | 
					 | 
				
			||||||
void debugExchange(Solver*);
 | 
					 | 
				
			||||||
void debugBC(Solver*);
 | 
					 | 
				
			||||||
void print(Solver*, double*);
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
@@ -1,24 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
#include <time.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
double getTimeStamp()
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    struct timespec ts;
 | 
					 | 
				
			||||||
    clock_gettime(CLOCK_MONOTONIC, &ts);
 | 
					 | 
				
			||||||
    return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
double getTimeResolution()
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    struct timespec ts;
 | 
					 | 
				
			||||||
    clock_getres(CLOCK_MONOTONIC, &ts);
 | 
					 | 
				
			||||||
    return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
double getTimeStamp_() { return getTimeStamp(); }
 | 
					 | 
				
			||||||
@@ -1,14 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#ifndef __TIMING_H_
 | 
					 | 
				
			||||||
#define __TIMING_H_
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
extern double getTimeStamp();
 | 
					 | 
				
			||||||
extern double getTimeResolution();
 | 
					 | 
				
			||||||
extern double getTimeStamp_();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#endif // __TIMING_H_
 | 
					 | 
				
			||||||
@@ -1,22 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#ifndef __UTIL_H_
 | 
					 | 
				
			||||||
#define __UTIL_H_
 | 
					 | 
				
			||||||
#define HLINE                                                                            \
 | 
					 | 
				
			||||||
    "----------------------------------------------------------------------------\n"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifndef MIN
 | 
					 | 
				
			||||||
#define MIN(x, y) ((x) < (y) ? (x) : (y))
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
#ifndef MAX
 | 
					 | 
				
			||||||
#define MAX(x, y) ((x) > (y) ? (x) : (y))
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
#ifndef ABS
 | 
					 | 
				
			||||||
#define ABS(a) ((a) >= 0 ? (a) : -(a))
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#endif // __UTIL_H_
 | 
					 | 
				
			||||||
@@ -1,7 +0,0 @@
 | 
				
			|||||||
set terminal png size 1024,768 enhanced font ,12
 | 
					 | 
				
			||||||
set output 'p.png'
 | 
					 | 
				
			||||||
set datafile separator whitespace
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
set grid
 | 
					 | 
				
			||||||
set hidden3d
 | 
					 | 
				
			||||||
splot 'pressure.dat' using 1:2:3 with lines
 | 
					 | 
				
			||||||
@@ -1,5 +0,0 @@
 | 
				
			|||||||
set terminal png size 1800,768 enhanced font ,12
 | 
					 | 
				
			||||||
set output 'velocity.png'
 | 
					 | 
				
			||||||
set datafile separator whitespace
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
plot 'velocity.dat' using 1:2:3:4:5 with vectors filled head size 0.01,20,60 lc palette
 | 
					 | 
				
			||||||
@@ -1,71 +0,0 @@
 | 
				
			|||||||
#=======================================================================================
 | 
					 | 
				
			||||||
# Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
# All rights reserved.
 | 
					 | 
				
			||||||
# Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
# license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
#=======================================================================================
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#CONFIGURE BUILD SYSTEM
 | 
					 | 
				
			||||||
TARGET	   = exe-$(TAG)
 | 
					 | 
				
			||||||
BUILD_DIR  = ./$(TAG)
 | 
					 | 
				
			||||||
SRC_DIR    = ./src
 | 
					 | 
				
			||||||
MAKE_DIR   = ./
 | 
					 | 
				
			||||||
Q         ?= @
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#DO NOT EDIT BELOW
 | 
					 | 
				
			||||||
include $(MAKE_DIR)/config.mk
 | 
					 | 
				
			||||||
include $(MAKE_DIR)/include_$(TAG).mk
 | 
					 | 
				
			||||||
INCLUDES  += -I$(SRC_DIR) -I$(BUILD_DIR)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
VPATH     = $(SRC_DIR)
 | 
					 | 
				
			||||||
SRC       = $(wildcard $(SRC_DIR)/*.c)
 | 
					 | 
				
			||||||
ASM       = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s, $(SRC))
 | 
					 | 
				
			||||||
OBJ       = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o, $(SRC))
 | 
					 | 
				
			||||||
SOURCES   = $(SRC) $(wildcard $(SRC_DIR)/*.h)
 | 
					 | 
				
			||||||
CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(OPTIONS) $(INCLUDES)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
${TARGET}: $(BUILD_DIR) $(OBJ)
 | 
					 | 
				
			||||||
	$(info ===>  LINKING  $(TARGET))
 | 
					 | 
				
			||||||
	$(Q)${LINKER} ${LFLAGS} -o $(TARGET) $(OBJ) $(LIBS)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
$(BUILD_DIR)/%.o:  %.c $(MAKE_DIR)/include_$(TAG).mk $(MAKE_DIR)/config.mk
 | 
					 | 
				
			||||||
	$(info ===>  COMPILE  $@)
 | 
					 | 
				
			||||||
	$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
 | 
					 | 
				
			||||||
	$(Q)$(GCC) $(CPPFLAGS) -MT $(@:.d=.o) -MM  $< > $(BUILD_DIR)/$*.d
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
$(BUILD_DIR)/%.s:  %.c
 | 
					 | 
				
			||||||
	$(info ===>  GENERATE ASM  $@)
 | 
					 | 
				
			||||||
	$(CC) -S $(CPPFLAGS) $(CFLAGS) $< -o $@
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
.PHONY: clean distclean tags info asm format
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
clean:
 | 
					 | 
				
			||||||
	$(info ===>  CLEAN)
 | 
					 | 
				
			||||||
	@rm -rf $(BUILD_DIR)
 | 
					 | 
				
			||||||
	@rm -f tags
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
distclean: clean
 | 
					 | 
				
			||||||
	$(info ===>  DIST CLEAN)
 | 
					 | 
				
			||||||
	@rm -f $(TARGET)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
info:
 | 
					 | 
				
			||||||
	$(info $(CFLAGS))
 | 
					 | 
				
			||||||
	$(Q)$(CC) $(VERSION)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
asm:  $(BUILD_DIR) $(ASM)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
tags:
 | 
					 | 
				
			||||||
	$(info ===>  GENERATE TAGS)
 | 
					 | 
				
			||||||
	$(Q)ctags -R
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
format:
 | 
					 | 
				
			||||||
	@for src in $(SOURCES) ; do \
 | 
					 | 
				
			||||||
		echo "Formatting $$src" ; \
 | 
					 | 
				
			||||||
		clang-format -i $$src ; \
 | 
					 | 
				
			||||||
	done
 | 
					 | 
				
			||||||
	@echo "Done"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
$(BUILD_DIR):
 | 
					 | 
				
			||||||
	@mkdir $(BUILD_DIR)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
-include $(OBJ:.o=.d)
 | 
					 | 
				
			||||||
@@ -1,48 +0,0 @@
 | 
				
			|||||||
# C source skeleton
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Build
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
1. Configure the toolchain and additional options in `config.mk`:
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
# Supported: GCC, CLANG, ICC
 | 
					 | 
				
			||||||
TAG ?= GCC
 | 
					 | 
				
			||||||
ENABLE_OPENMP ?= false
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
OPTIONS +=  -DARRAY_ALIGNMENT=64
 | 
					 | 
				
			||||||
#OPTIONS +=  -DVERBOSE_AFFINITY
 | 
					 | 
				
			||||||
#OPTIONS +=  -DVERBOSE_DATASIZE
 | 
					 | 
				
			||||||
#OPTIONS +=  -DVERBOSE_TIMER
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The verbosity options enable detailed output about affinity settings, allocation sizes and timer resolution.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
2. Build with:
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
make
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
You can build multiple toolchains in the same directory, but notice that the Makefile is only acting on the one currently set.
 | 
					 | 
				
			||||||
Intermediate build results are located in the `<TOOLCHAIN>` directory.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
To output the executed commands use:
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
make Q=
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
3. Clean up with:
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
make clean
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
to clean intermediate build results.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
make distclean
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
to clean intermediate build results and binary.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
4. (Optional) Generate assembler:
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
make asm
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
The assembler files will also be located in the `<TOOLCHAIN>` directory.
 | 
					 | 
				
			||||||
@@ -1,46 +0,0 @@
 | 
				
			|||||||
#==============================================================================
 | 
					 | 
				
			||||||
#                            Laminar Canal Flow
 | 
					 | 
				
			||||||
#==============================================================================
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Problem specific Data:
 | 
					 | 
				
			||||||
# ---------------------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
name canal             # name of flow setup
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
bcN     1              #  flags for boundary conditions
 | 
					 | 
				
			||||||
bcE     3              #  1 = no-slip      3 = outflow
 | 
					 | 
				
			||||||
bcS     1              #  2 = free-slip    4 = periodic
 | 
					 | 
				
			||||||
bcW     3              #
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
gx     0.0      # Body forces (e.g. gravity)
 | 
					 | 
				
			||||||
gy     0.0      #
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
re            100.0	   # Reynolds number
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
u_init        1.0      # initial value for velocity in x-direction
 | 
					 | 
				
			||||||
v_init        0.0      # initial value for velocity in y-direction
 | 
					 | 
				
			||||||
p_init        0.0      # initial value for pressure
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Geometry Data:
 | 
					 | 
				
			||||||
# -------------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
xlength       30.0     # domain size in x-direction
 | 
					 | 
				
			||||||
ylength       4.0	   # domain size in y-direction
 | 
					 | 
				
			||||||
imax          200      # number of interior cells in x-direction
 | 
					 | 
				
			||||||
jmax          50	   # number of interior cells in y-direction
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Time Data:
 | 
					 | 
				
			||||||
# ---------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
te       100.0   # final time
 | 
					 | 
				
			||||||
dt       0.02    # time stepsize
 | 
					 | 
				
			||||||
tau      0.5     # safety factor for time stepsize control (<0 constant delt)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Pressure Iteration Data:
 | 
					 | 
				
			||||||
# -----------------------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
itermax       500       # maximal number of pressure iteration in one time step
 | 
					 | 
				
			||||||
eps           0.00001   # stopping tolerance for pressure iteration
 | 
					 | 
				
			||||||
omg           1.8       # relaxation parameter for SOR iteration
 | 
					 | 
				
			||||||
gamma         0.9       # upwind differencing factor gamma
 | 
					 | 
				
			||||||
#===============================================================================
 | 
					 | 
				
			||||||
@@ -1,10 +0,0 @@
 | 
				
			|||||||
# Supported: GCC, CLANG, ICC
 | 
					 | 
				
			||||||
TAG ?= CLANG
 | 
					 | 
				
			||||||
ENABLE_OPENMP ?= false
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#Feature options
 | 
					 | 
				
			||||||
OPTIONS +=  -DARRAY_ALIGNMENT=64
 | 
					 | 
				
			||||||
OPTIONS +=  -DVERBOSE
 | 
					 | 
				
			||||||
#OPTIONS +=  -DVERBOSE_AFFINITY
 | 
					 | 
				
			||||||
#OPTIONS +=  -DVERBOSE_DATASIZE
 | 
					 | 
				
			||||||
#OPTIONS +=  -DVERBOSE_TIMER
 | 
					 | 
				
			||||||
@@ -1,46 +0,0 @@
 | 
				
			|||||||
#==============================================================================
 | 
					 | 
				
			||||||
#                              Driven Cavity
 | 
					 | 
				
			||||||
#==============================================================================
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Problem specific Data:
 | 
					 | 
				
			||||||
# ---------------------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
name dcavity        # name of flow setup
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
bcTop      1			#  flags for boundary conditions
 | 
					 | 
				
			||||||
bcBottom   1			#  1 = no-slip      3 = outflow
 | 
					 | 
				
			||||||
bcLeft     1			#  2 = free-slip    4 = periodic
 | 
					 | 
				
			||||||
bcRight    1			#
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
gx    0.0			# Body forces (e.g. gravity)
 | 
					 | 
				
			||||||
gy    0.0			#
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
re    10.0		    # Reynolds number
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
u_init    0.0		# initial value for velocity in x-direction
 | 
					 | 
				
			||||||
v_init    0.0		# initial value for velocity in y-direction
 | 
					 | 
				
			||||||
p_init    0.0		# initial value for pressure
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Geometry Data:
 | 
					 | 
				
			||||||
# -------------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
xlength    1.0		# domain size in x-direction
 | 
					 | 
				
			||||||
ylength    1.0		# domain size in y-direction
 | 
					 | 
				
			||||||
imax       100		# number of interior cells in x-direction
 | 
					 | 
				
			||||||
jmax       100		# number of interior cells in y-direction
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Time Data:
 | 
					 | 
				
			||||||
# ---------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
te      5.0		# final time
 | 
					 | 
				
			||||||
dt     0.02	    # time stepsize
 | 
					 | 
				
			||||||
tau     0.5		# safety factor for time stepsize control (<0 constant delt)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Pressure Iteration Data:
 | 
					 | 
				
			||||||
# -----------------------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
itermax  1000		# maximal number of pressure iteration in one time step
 | 
					 | 
				
			||||||
eps      0.001		# stopping tolerance for pressure iteration
 | 
					 | 
				
			||||||
omg      1.7		# relaxation parameter for SOR iteration
 | 
					 | 
				
			||||||
gamma    0.9		# upwind differencing factor gamma
 | 
					 | 
				
			||||||
#===============================================================================
 | 
					 | 
				
			||||||
@@ -1,16 +0,0 @@
 | 
				
			|||||||
CC   = mpicc
 | 
					 | 
				
			||||||
GCC  = cc
 | 
					 | 
				
			||||||
LINKER = $(CC)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
ifeq ($(ENABLE_OPENMP),true)
 | 
					 | 
				
			||||||
OPENMP   = -fopenmp
 | 
					 | 
				
			||||||
#OPENMP   = -Xpreprocessor -fopenmp #required on Macos with homebrew libomp
 | 
					 | 
				
			||||||
LIBS     = # -lomp
 | 
					 | 
				
			||||||
endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
VERSION  = --version
 | 
					 | 
				
			||||||
CFLAGS   = -Ofast -std=c99 $(OPENMP)
 | 
					 | 
				
			||||||
#CFLAGS   = -Ofast -fnt-store=aggressive  -std=c99 $(OPENMP) #AMD CLANG
 | 
					 | 
				
			||||||
LFLAGS   = $(OPENMP)
 | 
					 | 
				
			||||||
DEFINES  = -D_GNU_SOURCE# -DDEBUG
 | 
					 | 
				
			||||||
INCLUDES = -I/usr/local/include
 | 
					 | 
				
			||||||
@@ -1,14 +0,0 @@
 | 
				
			|||||||
CC   = gcc
 | 
					 | 
				
			||||||
GCC  = gcc
 | 
					 | 
				
			||||||
LINKER = $(CC)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
ifeq ($(ENABLE_OPENMP),true)
 | 
					 | 
				
			||||||
OPENMP   = -fopenmp
 | 
					 | 
				
			||||||
endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
VERSION  = --version
 | 
					 | 
				
			||||||
CFLAGS   = -Ofast -ffreestanding -std=c99 $(OPENMP)
 | 
					 | 
				
			||||||
LFLAGS   = $(OPENMP)
 | 
					 | 
				
			||||||
DEFINES  = -D_GNU_SOURCE
 | 
					 | 
				
			||||||
INCLUDES =
 | 
					 | 
				
			||||||
LIBS     =
 | 
					 | 
				
			||||||
@@ -1,14 +0,0 @@
 | 
				
			|||||||
CC   = mpiicc
 | 
					 | 
				
			||||||
GCC  = gcc
 | 
					 | 
				
			||||||
LINKER = $(CC)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
ifeq ($(ENABLE_OPENMP),true)
 | 
					 | 
				
			||||||
OPENMP   = -qopenmp
 | 
					 | 
				
			||||||
endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
VERSION  = --version
 | 
					 | 
				
			||||||
CFLAGS   =  -O3 -xHost -qopt-zmm-usage=high -std=c99 $(OPENMP)
 | 
					 | 
				
			||||||
LFLAGS   = $(OPENMP)
 | 
					 | 
				
			||||||
DEFINES  = -D_GNU_SOURCE
 | 
					 | 
				
			||||||
INCLUDES =
 | 
					 | 
				
			||||||
LIBS     =
 | 
					 | 
				
			||||||
@@ -1,61 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#ifdef __linux__
 | 
					 | 
				
			||||||
#ifdef _OPENMP
 | 
					 | 
				
			||||||
#include <pthread.h>
 | 
					 | 
				
			||||||
#include <sched.h>
 | 
					 | 
				
			||||||
#include <stdio.h>
 | 
					 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
#include <sys/syscall.h>
 | 
					 | 
				
			||||||
#include <sys/types.h>
 | 
					 | 
				
			||||||
#include <unistd.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define MAX_NUM_THREADS 128
 | 
					 | 
				
			||||||
#define gettid()        syscall(SYS_gettid)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static int getProcessorID(cpu_set_t* cpu_set)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int processorId;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (processorId = 0; processorId < MAX_NUM_THREADS; processorId++) {
 | 
					 | 
				
			||||||
        if (CPU_ISSET(processorId, cpu_set)) {
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    return processorId;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int affinity_getProcessorId()
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    cpu_set_t cpu_set;
 | 
					 | 
				
			||||||
    CPU_ZERO(&cpu_set);
 | 
					 | 
				
			||||||
    sched_getaffinity(gettid(), sizeof(cpu_set_t), &cpu_set);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return getProcessorID(&cpu_set);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void affinity_pinThread(int processorId)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    cpu_set_t cpuset;
 | 
					 | 
				
			||||||
    pthread_t thread;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    thread = pthread_self();
 | 
					 | 
				
			||||||
    CPU_ZERO(&cpuset);
 | 
					 | 
				
			||||||
    CPU_SET(processorId, &cpuset);
 | 
					 | 
				
			||||||
    pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void affinity_pinProcess(int processorId)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    cpu_set_t cpuset;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    CPU_ZERO(&cpuset);
 | 
					 | 
				
			||||||
    CPU_SET(processorId, &cpuset);
 | 
					 | 
				
			||||||
    sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
#endif /*_OPENMP*/
 | 
					 | 
				
			||||||
#endif /*__linux__*/
 | 
					 | 
				
			||||||
@@ -1,14 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#ifndef AFFINITY_H
 | 
					 | 
				
			||||||
#define AFFINITY_H
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
extern int affinity_getProcessorId();
 | 
					 | 
				
			||||||
extern void affinity_pinProcess(int);
 | 
					 | 
				
			||||||
extern void affinity_pinThread(int);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#endif /*AFFINITY_H*/
 | 
					 | 
				
			||||||
@@ -1,35 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#include <errno.h>
 | 
					 | 
				
			||||||
#include <stdio.h>
 | 
					 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void* allocate(int alignment, size_t bytesize)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int errorCode;
 | 
					 | 
				
			||||||
    void* ptr;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    errorCode = posix_memalign(&ptr, alignment, bytesize);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (errorCode) {
 | 
					 | 
				
			||||||
        if (errorCode == EINVAL) {
 | 
					 | 
				
			||||||
            fprintf(stderr, "Error: Alignment parameter is not a power of two\n");
 | 
					 | 
				
			||||||
            exit(EXIT_FAILURE);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        if (errorCode == ENOMEM) {
 | 
					 | 
				
			||||||
            fprintf(stderr, "Error: Insufficient memory to fulfill the request\n");
 | 
					 | 
				
			||||||
            exit(EXIT_FAILURE);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (ptr == NULL) {
 | 
					 | 
				
			||||||
        fprintf(stderr, "Error: posix_memalign failed!\n");
 | 
					 | 
				
			||||||
        exit(EXIT_FAILURE);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return ptr;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
@@ -1,13 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#ifndef __ALLOCATE_H_
 | 
					 | 
				
			||||||
#define __ALLOCATE_H_
 | 
					 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
extern void* allocate(int alignment, size_t bytesize);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
@@ -1,54 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * =======================================================================================
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 *      Author:   Jan Eitzinger (je), jan.eitzinger@fau.de
 | 
					 | 
				
			||||||
 *      Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 *      Permission is hereby granted, free of charge, to any person obtaining a copy
 | 
					 | 
				
			||||||
 *      of this software and associated documentation files (the "Software"), to deal
 | 
					 | 
				
			||||||
 *      in the Software without restriction, including without limitation the rights
 | 
					 | 
				
			||||||
 *      to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 | 
					 | 
				
			||||||
 *      copies of the Software, and to permit persons to whom the Software is
 | 
					 | 
				
			||||||
 *      furnished to do so, subject to the following conditions:
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 *      The above copyright notice and this permission notice shall be included in all
 | 
					 | 
				
			||||||
 *      copies or substantial portions of the Software.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 *      THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 | 
					 | 
				
			||||||
 *      IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 | 
					 | 
				
			||||||
 *      FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 | 
					 | 
				
			||||||
 *      AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 | 
					 | 
				
			||||||
 *      LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 | 
					 | 
				
			||||||
 *      OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 | 
					 | 
				
			||||||
 *      SOFTWARE.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * =======================================================================================
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#ifndef LIKWID_MARKERS_H
 | 
					 | 
				
			||||||
#define LIKWID_MARKERS_H
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifdef LIKWID_PERFMON
 | 
					 | 
				
			||||||
#include <likwid.h>
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_INIT                likwid_markerInit()
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_THREADINIT          likwid_markerThreadInit()
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_SWITCH              likwid_markerNextGroup()
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_REGISTER(regionTag) likwid_markerRegisterRegion(regionTag)
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_START(regionTag)    likwid_markerStartRegion(regionTag)
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_STOP(regionTag)     likwid_markerStopRegion(regionTag)
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_CLOSE               likwid_markerClose()
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_RESET(regionTag)    likwid_markerResetRegion(regionTag)
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)                       \
 | 
					 | 
				
			||||||
    likwid_markerGetRegion(regionTag, nevents, events, time, count)
 | 
					 | 
				
			||||||
#else /* LIKWID_PERFMON */
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_INIT
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_THREADINIT
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_SWITCH
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_REGISTER(regionTag)
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_START(regionTag)
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_STOP(regionTag)
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_CLOSE
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
 | 
					 | 
				
			||||||
#define LIKWID_MARKER_RESET(regionTag)
 | 
					 | 
				
			||||||
#endif /* LIKWID_PERFMON */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#endif /*LIKWID_MARKERS_H*/
 | 
					 | 
				
			||||||
@@ -1,77 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#include <float.h>
 | 
					 | 
				
			||||||
#include <limits.h>
 | 
					 | 
				
			||||||
#include <stdio.h>
 | 
					 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
#include <unistd.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#include "parameter.h"
 | 
					 | 
				
			||||||
#include "progress.h"
 | 
					 | 
				
			||||||
#include "solver.h"
 | 
					 | 
				
			||||||
#include "timing.h"
 | 
					 | 
				
			||||||
#include <mpi.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int main(int argc, char** argv)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int rank;
 | 
					 | 
				
			||||||
    double S, E;
 | 
					 | 
				
			||||||
    Parameter params;
 | 
					 | 
				
			||||||
    Solver solver;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    MPI_Init(&argc, &argv);
 | 
					 | 
				
			||||||
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 | 
					 | 
				
			||||||
    initParameter(¶ms);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (argc != 2) {
 | 
					 | 
				
			||||||
        printf("Usage: %s <configFile>\n", argv[0]);
 | 
					 | 
				
			||||||
        exit(EXIT_SUCCESS);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    readParameter(¶ms, argv[1]);
 | 
					 | 
				
			||||||
    if (rank == 0) {
 | 
					 | 
				
			||||||
        printParameter(¶ms);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    initSolver(&solver, ¶ms);
 | 
					 | 
				
			||||||
    initProgress(solver.te);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    double tau = solver.tau;
 | 
					 | 
				
			||||||
    double te  = solver.te;
 | 
					 | 
				
			||||||
    double t   = 0.0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    S = getTimeStamp();
 | 
					 | 
				
			||||||
    while (t <= te) {
 | 
					 | 
				
			||||||
        if (tau > 0.0) {
 | 
					 | 
				
			||||||
            computeTimestep(&solver);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        setBoundaryConditions(&solver);
 | 
					 | 
				
			||||||
        setSpecialBoundaryCondition(&solver);
 | 
					 | 
				
			||||||
        computeFG(&solver);
 | 
					 | 
				
			||||||
        computeRHS(&solver);
 | 
					 | 
				
			||||||
        solve(&solver);
 | 
					 | 
				
			||||||
        adaptUV(&solver);
 | 
					 | 
				
			||||||
        t += solver.dt;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifdef VERBOSE
 | 
					 | 
				
			||||||
        if (rank == 0) {
 | 
					 | 
				
			||||||
            printf("TIME %f , TIMESTEP %f\n", t, solver.dt);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
        printProgress(t);
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    E = getTimeStamp();
 | 
					 | 
				
			||||||
    stopProgress();
 | 
					 | 
				
			||||||
    if (rank == 0) {
 | 
					 | 
				
			||||||
        printf("Solution took %.2fs\n", E - S);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    collectResult(&solver);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    MPI_Finalize();
 | 
					 | 
				
			||||||
    return EXIT_SUCCESS;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
@@ -1,111 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved. This file is part of nusif-solver.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#include <stdio.h>
 | 
					 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
#include <string.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#include "parameter.h"
 | 
					 | 
				
			||||||
#include "util.h"
 | 
					 | 
				
			||||||
#define MAXLINE 4096
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void initParameter(Parameter* param)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    param->xlength = 1.0;
 | 
					 | 
				
			||||||
    param->ylength = 1.0;
 | 
					 | 
				
			||||||
    param->imax    = 100;
 | 
					 | 
				
			||||||
    param->jmax    = 100;
 | 
					 | 
				
			||||||
    param->itermax = 1000;
 | 
					 | 
				
			||||||
    param->eps     = 0.0001;
 | 
					 | 
				
			||||||
    param->omg     = 1.7;
 | 
					 | 
				
			||||||
    param->re      = 100.0;
 | 
					 | 
				
			||||||
    param->gamma   = 0.9;
 | 
					 | 
				
			||||||
    param->tau     = 0.5;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void readParameter(Parameter* param, const char* filename)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    FILE* fp = fopen(filename, "r");
 | 
					 | 
				
			||||||
    char line[MAXLINE];
 | 
					 | 
				
			||||||
    int i;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (!fp) {
 | 
					 | 
				
			||||||
        fprintf(stderr, "Could not open parameter file: %s\n", filename);
 | 
					 | 
				
			||||||
        exit(EXIT_FAILURE);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    while (!feof(fp)) {
 | 
					 | 
				
			||||||
        line[0] = '\0';
 | 
					 | 
				
			||||||
        fgets(line, MAXLINE, fp);
 | 
					 | 
				
			||||||
        for (i = 0; line[i] != '\0' && line[i] != '#'; i++)
 | 
					 | 
				
			||||||
            ;
 | 
					 | 
				
			||||||
        line[i] = '\0';
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        char* tok = strtok(line, " ");
 | 
					 | 
				
			||||||
        char* val = strtok(NULL, " ");
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define PARSE_PARAM(p, f)                                                                \
 | 
					 | 
				
			||||||
    if (strncmp(tok, #p, sizeof(#p) / sizeof(#p[0]) - 1) == 0) {                         \
 | 
					 | 
				
			||||||
        param->p = f(val);                                                               \
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
#define PARSE_STRING(p) PARSE_PARAM(p, strdup)
 | 
					 | 
				
			||||||
#define PARSE_INT(p)    PARSE_PARAM(p, atoi)
 | 
					 | 
				
			||||||
#define PARSE_REAL(p)   PARSE_PARAM(p, atof)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if (tok != NULL && val != NULL) {
 | 
					 | 
				
			||||||
            PARSE_REAL(xlength);
 | 
					 | 
				
			||||||
            PARSE_REAL(ylength);
 | 
					 | 
				
			||||||
            PARSE_INT(imax);
 | 
					 | 
				
			||||||
            PARSE_INT(jmax);
 | 
					 | 
				
			||||||
            PARSE_INT(itermax);
 | 
					 | 
				
			||||||
            PARSE_REAL(eps);
 | 
					 | 
				
			||||||
            PARSE_REAL(omg);
 | 
					 | 
				
			||||||
            PARSE_REAL(re);
 | 
					 | 
				
			||||||
            PARSE_REAL(tau);
 | 
					 | 
				
			||||||
            PARSE_REAL(gamma);
 | 
					 | 
				
			||||||
            PARSE_REAL(dt);
 | 
					 | 
				
			||||||
            PARSE_REAL(te);
 | 
					 | 
				
			||||||
            PARSE_REAL(gx);
 | 
					 | 
				
			||||||
            PARSE_REAL(gy);
 | 
					 | 
				
			||||||
            PARSE_STRING(name);
 | 
					 | 
				
			||||||
            PARSE_INT(bcLeft);
 | 
					 | 
				
			||||||
            PARSE_INT(bcRight);
 | 
					 | 
				
			||||||
            PARSE_INT(bcBottom);
 | 
					 | 
				
			||||||
            PARSE_INT(bcTop);
 | 
					 | 
				
			||||||
            PARSE_REAL(u_init);
 | 
					 | 
				
			||||||
            PARSE_REAL(v_init);
 | 
					 | 
				
			||||||
            PARSE_REAL(p_init);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    fclose(fp);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void printParameter(Parameter* param)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    printf("Parameters for %s\n", param->name);
 | 
					 | 
				
			||||||
    printf("Boundary conditions Left:%d Right:%d Bottom:%d Top:%d\n",
 | 
					 | 
				
			||||||
        param->bcLeft,
 | 
					 | 
				
			||||||
        param->bcRight,
 | 
					 | 
				
			||||||
        param->bcBottom,
 | 
					 | 
				
			||||||
        param->bcTop);
 | 
					 | 
				
			||||||
    printf("\tReynolds number: %.2f\n", param->re);
 | 
					 | 
				
			||||||
    printf("\tInit arrays: U:%.2f V:%.2f P:%.2f\n",
 | 
					 | 
				
			||||||
        param->u_init,
 | 
					 | 
				
			||||||
        param->v_init,
 | 
					 | 
				
			||||||
        param->p_init);
 | 
					 | 
				
			||||||
    printf("Geometry data:\n");
 | 
					 | 
				
			||||||
    printf("\tDomain box size (x, y): %.2f, %.2f\n", param->xlength, param->ylength);
 | 
					 | 
				
			||||||
    printf("\tCells (x, y): %d, %d\n", param->imax, param->jmax);
 | 
					 | 
				
			||||||
    printf("Timestep parameters:\n");
 | 
					 | 
				
			||||||
    printf("\tDefault stepsize: %.2f, Final time %.2f\n", param->dt, param->te);
 | 
					 | 
				
			||||||
    printf("\tTau factor: %.2f\n", param->tau);
 | 
					 | 
				
			||||||
    printf("Iterative solver parameters:\n");
 | 
					 | 
				
			||||||
    printf("\tMax iterations: %d\n", param->itermax);
 | 
					 | 
				
			||||||
    printf("\tepsilon (stopping tolerance) : %f\n", param->eps);
 | 
					 | 
				
			||||||
    printf("\tgamma (stopping tolerance) : %f\n", param->gamma);
 | 
					 | 
				
			||||||
    printf("\tomega (SOR relaxation): %f\n", param->omg);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
@@ -1,26 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved. This file is part of nusif-solver.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#ifndef __PARAMETER_H_
 | 
					 | 
				
			||||||
#define __PARAMETER_H_
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
typedef struct {
 | 
					 | 
				
			||||||
    double xlength, ylength;
 | 
					 | 
				
			||||||
    int imax, jmax;
 | 
					 | 
				
			||||||
    int itermax;
 | 
					 | 
				
			||||||
    double eps, omg;
 | 
					 | 
				
			||||||
    double re, tau, gamma;
 | 
					 | 
				
			||||||
    double te, dt;
 | 
					 | 
				
			||||||
    double gx, gy;
 | 
					 | 
				
			||||||
    char* name;
 | 
					 | 
				
			||||||
    int bcLeft, bcRight, bcBottom, bcTop;
 | 
					 | 
				
			||||||
    double u_init, v_init, p_init;
 | 
					 | 
				
			||||||
} Parameter;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void initParameter(Parameter*);
 | 
					 | 
				
			||||||
void readParameter(Parameter*, const char*);
 | 
					 | 
				
			||||||
void printParameter(Parameter*);
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
@@ -1,60 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved. This file is part of nusif-solver.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#include <math.h>
 | 
					 | 
				
			||||||
#include <mpi.h>
 | 
					 | 
				
			||||||
#include <stdio.h>
 | 
					 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
#include <string.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#include "progress.h"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static double _end;
 | 
					 | 
				
			||||||
static int _current;
 | 
					 | 
				
			||||||
static int _rank = -1;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void initProgress(double end)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    MPI_Comm_rank(MPI_COMM_WORLD, &_rank);
 | 
					 | 
				
			||||||
    _end     = end;
 | 
					 | 
				
			||||||
    _current = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (_rank == 0) {
 | 
					 | 
				
			||||||
        printf("[          ]");
 | 
					 | 
				
			||||||
        fflush(stdout);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void printProgress(double current)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    if (_rank == 0) {
 | 
					 | 
				
			||||||
        int new = (int)rint((current / _end) * 10.0);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if (new > _current) {
 | 
					 | 
				
			||||||
            char progress[11];
 | 
					 | 
				
			||||||
            _current    = new;
 | 
					 | 
				
			||||||
            progress[0] = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            for (int i = 0; i < 10; i++) {
 | 
					 | 
				
			||||||
                if (i < _current) {
 | 
					 | 
				
			||||||
                    sprintf(progress + strlen(progress), "#");
 | 
					 | 
				
			||||||
                } else {
 | 
					 | 
				
			||||||
                    sprintf(progress + strlen(progress), " ");
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            printf("\r[%s]", progress);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        fflush(stdout);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void stopProgress()
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    if (_rank == 0) {
 | 
					 | 
				
			||||||
        printf("\n");
 | 
					 | 
				
			||||||
        fflush(stdout);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
@@ -1,14 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#ifndef __PROGRESS_H_
 | 
					 | 
				
			||||||
#define __PROGRESS_H_
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
extern void initProgress(double);
 | 
					 | 
				
			||||||
extern void printProgress(double);
 | 
					 | 
				
			||||||
extern void stopProgress();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
@@ -1,833 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved. This file is part of nusif-solver.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#include <float.h>
 | 
					 | 
				
			||||||
#include <math.h>
 | 
					 | 
				
			||||||
#include <mpi.h>
 | 
					 | 
				
			||||||
#include <stdio.h>
 | 
					 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
#include <string.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#include "allocate.h"
 | 
					 | 
				
			||||||
#include "parameter.h"
 | 
					 | 
				
			||||||
#include "solver.h"
 | 
					 | 
				
			||||||
#include "util.h"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define P(i, j)   p[(j) * (imaxLocal + 2) + (i)]
 | 
					 | 
				
			||||||
#define F(i, j)   f[(j) * (imaxLocal + 2) + (i)]
 | 
					 | 
				
			||||||
#define G(i, j)   g[(j) * (imaxLocal + 2) + (i)]
 | 
					 | 
				
			||||||
#define U(i, j)   u[(j) * (imaxLocal + 2) + (i)]
 | 
					 | 
				
			||||||
#define V(i, j)   v[(j) * (imaxLocal + 2) + (i)]
 | 
					 | 
				
			||||||
#define RHS(i, j) rhs[(j) * (imaxLocal + 2) + (i)]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define IDIM 0
 | 
					 | 
				
			||||||
#define JDIM 1
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static int sizeOfRank(int rank, int size, int N)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    return N / size + ((N % size > rank) ? 1 : 0);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void print(Solver* solver, double* grid)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int imaxLocal = solver->imaxLocal;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int i = 0; i < solver->size; i++) {
 | 
					 | 
				
			||||||
        if (i == solver->rank) {
 | 
					 | 
				
			||||||
            printf(
 | 
					 | 
				
			||||||
                "### RANK %d #######################################################\n",
 | 
					 | 
				
			||||||
                solver->rank);
 | 
					 | 
				
			||||||
            for (int j = 0; j < solver->jmaxLocal + 2; j++) {
 | 
					 | 
				
			||||||
                printf("%02d: ", j);
 | 
					 | 
				
			||||||
                for (int i = 0; i < solver->imaxLocal + 2; i++) {
 | 
					 | 
				
			||||||
                    printf("%12.8f  ", grid[j * (imaxLocal + 2) + i]);
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
                printf("\n");
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            fflush(stdout);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        MPI_Barrier(MPI_COMM_WORLD);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void exchange(Solver* solver, double* grid)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int counts[4] = { 1, 1, 1, 1 };
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    MPI_Neighbor_alltoallw(grid,
 | 
					 | 
				
			||||||
        counts,
 | 
					 | 
				
			||||||
        solver->sdispls,
 | 
					 | 
				
			||||||
        solver->bufferTypes,
 | 
					 | 
				
			||||||
        grid,
 | 
					 | 
				
			||||||
        counts,
 | 
					 | 
				
			||||||
        solver->rdispls,
 | 
					 | 
				
			||||||
        solver->bufferTypes,
 | 
					 | 
				
			||||||
        solver->comm);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void shift(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    MPI_Request requests[4] = { MPI_REQUEST_NULL,
 | 
					 | 
				
			||||||
        MPI_REQUEST_NULL,
 | 
					 | 
				
			||||||
        MPI_REQUEST_NULL,
 | 
					 | 
				
			||||||
        MPI_REQUEST_NULL };
 | 
					 | 
				
			||||||
    double* f               = solver->f;
 | 
					 | 
				
			||||||
    double* g               = solver->g;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* shift G */
 | 
					 | 
				
			||||||
    double* buf = g + 1;
 | 
					 | 
				
			||||||
    /* receive ghost cells from bottom neighbor */
 | 
					 | 
				
			||||||
    MPI_Irecv(buf,
 | 
					 | 
				
			||||||
        1,
 | 
					 | 
				
			||||||
        solver->bufferTypes[2],
 | 
					 | 
				
			||||||
        solver->jNeighbours[0],
 | 
					 | 
				
			||||||
        0,
 | 
					 | 
				
			||||||
        solver->comm,
 | 
					 | 
				
			||||||
        &requests[0]);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    buf = g + (solver->jmaxLocal) * (solver->imaxLocal + 2) + 1;
 | 
					 | 
				
			||||||
    /* send ghost cells to top neighbor */
 | 
					 | 
				
			||||||
    MPI_Isend(buf,
 | 
					 | 
				
			||||||
        1,
 | 
					 | 
				
			||||||
        solver->bufferTypes[2],
 | 
					 | 
				
			||||||
        solver->jNeighbours[1],
 | 
					 | 
				
			||||||
        0,
 | 
					 | 
				
			||||||
        solver->comm,
 | 
					 | 
				
			||||||
        &requests[1]);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* shift F */
 | 
					 | 
				
			||||||
    buf = f + (solver->imaxLocal + 2);
 | 
					 | 
				
			||||||
    /* receive ghost cells from left neighbor */
 | 
					 | 
				
			||||||
    MPI_Irecv(buf,
 | 
					 | 
				
			||||||
        1,
 | 
					 | 
				
			||||||
        solver->bufferTypes[0],
 | 
					 | 
				
			||||||
        solver->iNeighbours[0],
 | 
					 | 
				
			||||||
        1,
 | 
					 | 
				
			||||||
        solver->comm,
 | 
					 | 
				
			||||||
        &requests[2]);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    buf = f + (solver->imaxLocal + 2) + (solver->imaxLocal);
 | 
					 | 
				
			||||||
    /* send ghost cells to right neighbor */
 | 
					 | 
				
			||||||
    MPI_Isend(buf,
 | 
					 | 
				
			||||||
        1,
 | 
					 | 
				
			||||||
        solver->bufferTypes[0],
 | 
					 | 
				
			||||||
        solver->iNeighbours[1],
 | 
					 | 
				
			||||||
        1,
 | 
					 | 
				
			||||||
        solver->comm,
 | 
					 | 
				
			||||||
        &requests[3]);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    MPI_Waitall(4, requests, MPI_STATUSES_IGNORE);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void debugExchange(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    for (int i = 0; i < (solver->imaxLocal + 2) * (solver->jmaxLocal + 2); i++) {
 | 
					 | 
				
			||||||
        solver->p[i] = solver->rank;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    exchange(solver, solver->p);
 | 
					 | 
				
			||||||
    print(solver, solver->p);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void assembleResult(Solver* solver,
 | 
					 | 
				
			||||||
    double* src,
 | 
					 | 
				
			||||||
    double* dst,
 | 
					 | 
				
			||||||
    int imaxLocal[],
 | 
					 | 
				
			||||||
    int jmaxLocal[],
 | 
					 | 
				
			||||||
    int offset[])
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    MPI_Request* requests;
 | 
					 | 
				
			||||||
    int numRequests = 1;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (solver->rank == 0) {
 | 
					 | 
				
			||||||
        numRequests = solver->size + 1;
 | 
					 | 
				
			||||||
    } else {
 | 
					 | 
				
			||||||
        numRequests = 1;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    requests = (MPI_Request*)malloc(numRequests * sizeof(MPI_Request));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* all ranks send their bulk array */
 | 
					 | 
				
			||||||
    MPI_Datatype bulkType;
 | 
					 | 
				
			||||||
    const int ndims     = 2;
 | 
					 | 
				
			||||||
    int oldSizes[ndims] = { solver->jmaxLocal + 2, solver->imaxLocal + 2 };
 | 
					 | 
				
			||||||
    int newSizes[ndims] = { solver->jmaxLocal, solver->imaxLocal };
 | 
					 | 
				
			||||||
    int starts[ndims]   = { 1, 1 };
 | 
					 | 
				
			||||||
    MPI_Type_create_subarray(2,
 | 
					 | 
				
			||||||
        oldSizes,
 | 
					 | 
				
			||||||
        newSizes,
 | 
					 | 
				
			||||||
        starts,
 | 
					 | 
				
			||||||
        MPI_ORDER_C,
 | 
					 | 
				
			||||||
        MPI_DOUBLE,
 | 
					 | 
				
			||||||
        &bulkType);
 | 
					 | 
				
			||||||
    MPI_Type_commit(&bulkType);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    MPI_Isend(src, 1, bulkType, 0, 0, solver->comm, &requests[0]);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* rank 0 assembles the subdomains */
 | 
					 | 
				
			||||||
    if (solver->rank == 0) {
 | 
					 | 
				
			||||||
        for (int i = 0; i < solver->size; i++) {
 | 
					 | 
				
			||||||
            MPI_Datatype domainType;
 | 
					 | 
				
			||||||
            MPI_Type_vector(jmaxLocal[i],
 | 
					 | 
				
			||||||
                imaxLocal[i],
 | 
					 | 
				
			||||||
                solver->imax,
 | 
					 | 
				
			||||||
                MPI_DOUBLE,
 | 
					 | 
				
			||||||
                &domainType);
 | 
					 | 
				
			||||||
            MPI_Type_commit(&domainType);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            MPI_Irecv(dst + offset[i],
 | 
					 | 
				
			||||||
                1,
 | 
					 | 
				
			||||||
                domainType,
 | 
					 | 
				
			||||||
                i,
 | 
					 | 
				
			||||||
                0,
 | 
					 | 
				
			||||||
                solver->comm,
 | 
					 | 
				
			||||||
                &requests[i + 1]);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    MPI_Waitall(numRequests, requests, MPI_STATUSES_IGNORE);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static int sum(int* sizes, int position)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int sum = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int i = 0; i < position; i++) {
 | 
					 | 
				
			||||||
        sum += sizes[i];
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return sum;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void collectResult(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    double* Pall = NULL;
 | 
					 | 
				
			||||||
    double* Uall = NULL;
 | 
					 | 
				
			||||||
    double* Vall = NULL;
 | 
					 | 
				
			||||||
    int offset[solver->size];
 | 
					 | 
				
			||||||
    int imaxLocal[solver->size];
 | 
					 | 
				
			||||||
    int jmaxLocal[solver->size];
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    MPI_Gather(&solver->imaxLocal, 1, MPI_INT, imaxLocal, 1, MPI_INT, 0, MPI_COMM_WORLD);
 | 
					 | 
				
			||||||
    MPI_Gather(&solver->jmaxLocal, 1, MPI_INT, jmaxLocal, 1, MPI_INT, 0, MPI_COMM_WORLD);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (solver->rank == 0) {
 | 
					 | 
				
			||||||
        Pall = allocate(64, (solver->imax) * (solver->jmax) * sizeof(double));
 | 
					 | 
				
			||||||
        Uall = allocate(64, (solver->imax) * (solver->jmax) * sizeof(double));
 | 
					 | 
				
			||||||
        Vall = allocate(64, (solver->imax) * (solver->jmax) * sizeof(double));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        for (int i = 0; i < solver->size; i++) {
 | 
					 | 
				
			||||||
            int coords[2];
 | 
					 | 
				
			||||||
            MPI_Cart_coords(solver->comm, i, 2, coords);
 | 
					 | 
				
			||||||
            int ioffset = sum(imaxLocal, coords[0]);
 | 
					 | 
				
			||||||
            int joffset = sum(jmaxLocal, coords[1]);
 | 
					 | 
				
			||||||
            offset[i]   = (joffset * solver->imax) + ioffset;
 | 
					 | 
				
			||||||
            printf("Rank: %d, Coords(i,j): %d %d, Size(i,j): %d %d, Offset(i,j): %d %d\n",
 | 
					 | 
				
			||||||
                i,
 | 
					 | 
				
			||||||
                coords[0],
 | 
					 | 
				
			||||||
                coords[1],
 | 
					 | 
				
			||||||
                imaxLocal[i],
 | 
					 | 
				
			||||||
                jmaxLocal[i],
 | 
					 | 
				
			||||||
                ioffset,
 | 
					 | 
				
			||||||
                joffset);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* collect P */
 | 
					 | 
				
			||||||
    assembleResult(solver, solver->p, Pall, imaxLocal, jmaxLocal, offset);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* collect U */
 | 
					 | 
				
			||||||
    assembleResult(solver, solver->u, Uall, imaxLocal, jmaxLocal, offset);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* collect V */
 | 
					 | 
				
			||||||
    assembleResult(solver, solver->v, Vall, imaxLocal, jmaxLocal, offset);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* write to disk */
 | 
					 | 
				
			||||||
    if (solver->rank == 0) writeResult(solver, Pall, Uall, Vall);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void printConfig(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    if (solver->rank == 0) {
 | 
					 | 
				
			||||||
        printf("Parameters for #%s#\n", solver->problem);
 | 
					 | 
				
			||||||
        printf("Boundary conditions Top:%d Bottom:%d Left:%d Right:%d\n",
 | 
					 | 
				
			||||||
            solver->bcTop,
 | 
					 | 
				
			||||||
            solver->bcBottom,
 | 
					 | 
				
			||||||
            solver->bcLeft,
 | 
					 | 
				
			||||||
            solver->bcRight);
 | 
					 | 
				
			||||||
        printf("\tReynolds number: %.2f\n", solver->re);
 | 
					 | 
				
			||||||
        printf("\tGx Gy: %.2f %.2f\n", solver->gx, solver->gy);
 | 
					 | 
				
			||||||
        printf("Geometry data:\n");
 | 
					 | 
				
			||||||
        printf("\tDomain box size (x, y): %.2f, %.2f\n",
 | 
					 | 
				
			||||||
            solver->xlength,
 | 
					 | 
				
			||||||
            solver->ylength);
 | 
					 | 
				
			||||||
        printf("\tCells (x, y): %d, %d\n", solver->imax, solver->jmax);
 | 
					 | 
				
			||||||
        printf("Timestep parameters:\n");
 | 
					 | 
				
			||||||
        printf("\tDefault stepsize: %.2f, Final time %.2f\n", solver->dt, solver->te);
 | 
					 | 
				
			||||||
        printf("\tdt bound: %.6f\n", solver->dtBound);
 | 
					 | 
				
			||||||
        printf("\tTau factor: %.2f\n", solver->tau);
 | 
					 | 
				
			||||||
        printf("Iterative solver parameters:\n");
 | 
					 | 
				
			||||||
        printf("\tMax iterations: %d\n", solver->itermax);
 | 
					 | 
				
			||||||
        printf("\tepsilon (stopping tolerance) : %f\n", solver->eps);
 | 
					 | 
				
			||||||
        printf("\tgamma factor: %f\n", solver->gamma);
 | 
					 | 
				
			||||||
        printf("\tomega (SOR relaxation): %f\n", solver->omega);
 | 
					 | 
				
			||||||
        printf("Communication parameters:\n");
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    for (int i = 0; i < solver->size; i++) {
 | 
					 | 
				
			||||||
        if (i == solver->rank) {
 | 
					 | 
				
			||||||
            printf("\tRank %d of %d\n", solver->rank, solver->size);
 | 
					 | 
				
			||||||
            printf("\tNeighbours (b, t, l, r): %d, %d, %d, %d\n",
 | 
					 | 
				
			||||||
                solver->jNeighbours[0],
 | 
					 | 
				
			||||||
                solver->jNeighbours[1],
 | 
					 | 
				
			||||||
                solver->iNeighbours[0],
 | 
					 | 
				
			||||||
                solver->iNeighbours[1]);
 | 
					 | 
				
			||||||
            printf("\tCoordinates %d,%d\n", solver->coords[0], solver->coords[1]);
 | 
					 | 
				
			||||||
            printf("\tLocal domain size: %dx%d\n", solver->imaxLocal, solver->jmaxLocal);
 | 
					 | 
				
			||||||
            fflush(stdout);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void initSolver(Solver* solver, Parameter* params)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    solver->problem  = params->name;
 | 
					 | 
				
			||||||
    solver->bcTop    = params->bcTop;
 | 
					 | 
				
			||||||
    solver->bcBottom = params->bcBottom;
 | 
					 | 
				
			||||||
    solver->bcLeft   = params->bcLeft;
 | 
					 | 
				
			||||||
    solver->bcRight  = params->bcRight;
 | 
					 | 
				
			||||||
    solver->imax     = params->imax;
 | 
					 | 
				
			||||||
    solver->jmax     = params->jmax;
 | 
					 | 
				
			||||||
    solver->xlength  = params->xlength;
 | 
					 | 
				
			||||||
    solver->ylength  = params->ylength;
 | 
					 | 
				
			||||||
    solver->dx       = params->xlength / params->imax;
 | 
					 | 
				
			||||||
    solver->dy       = params->ylength / params->jmax;
 | 
					 | 
				
			||||||
    solver->eps      = params->eps;
 | 
					 | 
				
			||||||
    solver->omega    = params->omg;
 | 
					 | 
				
			||||||
    solver->itermax  = params->itermax;
 | 
					 | 
				
			||||||
    solver->re       = params->re;
 | 
					 | 
				
			||||||
    solver->gx       = params->gx;
 | 
					 | 
				
			||||||
    solver->gy       = params->gy;
 | 
					 | 
				
			||||||
    solver->dt       = params->dt;
 | 
					 | 
				
			||||||
    solver->te       = params->te;
 | 
					 | 
				
			||||||
    solver->tau      = params->tau;
 | 
					 | 
				
			||||||
    solver->gamma    = params->gamma;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* setup communication */
 | 
					 | 
				
			||||||
    MPI_Comm_rank(MPI_COMM_WORLD, &(solver->rank));
 | 
					 | 
				
			||||||
    MPI_Comm_size(MPI_COMM_WORLD, &(solver->size));
 | 
					 | 
				
			||||||
    int dims[NDIMS]    = { 0, 0 };
 | 
					 | 
				
			||||||
    int periods[NDIMS] = { 0, 0 };
 | 
					 | 
				
			||||||
    MPI_Dims_create(solver->size, NDIMS, dims);
 | 
					 | 
				
			||||||
    MPI_Cart_create(MPI_COMM_WORLD, NDIMS, dims, periods, 0, &solver->comm);
 | 
					 | 
				
			||||||
    MPI_Cart_shift(solver->comm,
 | 
					 | 
				
			||||||
        IDIM,
 | 
					 | 
				
			||||||
        1,
 | 
					 | 
				
			||||||
        &solver->iNeighbours[0],
 | 
					 | 
				
			||||||
        &solver->iNeighbours[1]);
 | 
					 | 
				
			||||||
    MPI_Cart_shift(solver->comm,
 | 
					 | 
				
			||||||
        JDIM,
 | 
					 | 
				
			||||||
        1,
 | 
					 | 
				
			||||||
        &solver->jNeighbours[0],
 | 
					 | 
				
			||||||
        &solver->jNeighbours[1]);
 | 
					 | 
				
			||||||
    MPI_Cart_get(solver->comm, NDIMS, solver->dims, periods, solver->coords);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    solver->imaxLocal = sizeOfRank(solver->coords[IDIM], dims[IDIM], solver->imax);
 | 
					 | 
				
			||||||
    solver->jmaxLocal = sizeOfRank(solver->coords[JDIM], dims[JDIM], solver->jmax);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    MPI_Datatype jBufferType;
 | 
					 | 
				
			||||||
    MPI_Type_contiguous(solver->imaxLocal, MPI_DOUBLE, &jBufferType);
 | 
					 | 
				
			||||||
    MPI_Type_commit(&jBufferType);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    MPI_Datatype iBufferType;
 | 
					 | 
				
			||||||
    MPI_Type_vector(solver->jmaxLocal,
 | 
					 | 
				
			||||||
        1,
 | 
					 | 
				
			||||||
        solver->imaxLocal + 2,
 | 
					 | 
				
			||||||
        MPI_DOUBLE,
 | 
					 | 
				
			||||||
        &iBufferType);
 | 
					 | 
				
			||||||
    MPI_Type_commit(&iBufferType);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // in the order of the dimensions i->0, j->1
 | 
					 | 
				
			||||||
    // first negative direction, then positive direction
 | 
					 | 
				
			||||||
    size_t dblsize         = sizeof(double);
 | 
					 | 
				
			||||||
    int imaxLocal          = solver->imaxLocal;
 | 
					 | 
				
			||||||
    int jmaxLocal          = solver->jmaxLocal;
 | 
					 | 
				
			||||||
    solver->bufferTypes[0] = iBufferType; // left
 | 
					 | 
				
			||||||
    solver->bufferTypes[1] = iBufferType; // right
 | 
					 | 
				
			||||||
    solver->bufferTypes[2] = jBufferType; // bottom
 | 
					 | 
				
			||||||
    solver->bufferTypes[3] = jBufferType; // top
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    solver->sdispls[0] = ((imaxLocal + 2) + 1) * dblsize;               // send left
 | 
					 | 
				
			||||||
    solver->sdispls[1] = ((imaxLocal + 2) + imaxLocal) * dblsize;       // send right
 | 
					 | 
				
			||||||
    solver->sdispls[2] = ((imaxLocal + 2) + 1) * dblsize;               // send bottom
 | 
					 | 
				
			||||||
    solver->sdispls[3] = ((jmaxLocal) * (imaxLocal + 2) + 1) * dblsize; // send top
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    solver->rdispls[0] = (imaxLocal + 2) * dblsize;                         // recv left
 | 
					 | 
				
			||||||
    solver->rdispls[1] = ((imaxLocal + 2) + (imaxLocal + 1)) * dblsize;     // recv right
 | 
					 | 
				
			||||||
    solver->rdispls[2] = 1 * dblsize;                                       // recv bottom
 | 
					 | 
				
			||||||
    solver->rdispls[3] = ((jmaxLocal + 1) * (imaxLocal + 2) + 1) * dblsize; // recv top
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* allocate arrays */
 | 
					 | 
				
			||||||
    size_t bytesize = (imaxLocal + 2) * (jmaxLocal + 2) * sizeof(double);
 | 
					 | 
				
			||||||
    solver->u       = allocate(64, bytesize);
 | 
					 | 
				
			||||||
    solver->v       = allocate(64, bytesize);
 | 
					 | 
				
			||||||
    solver->p       = allocate(64, bytesize);
 | 
					 | 
				
			||||||
    solver->rhs     = allocate(64, bytesize);
 | 
					 | 
				
			||||||
    solver->f       = allocate(64, bytesize);
 | 
					 | 
				
			||||||
    solver->g       = allocate(64, bytesize);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int i = 0; i < (imaxLocal + 2) * (jmaxLocal + 2); i++) {
 | 
					 | 
				
			||||||
        solver->u[i]   = params->u_init;
 | 
					 | 
				
			||||||
        solver->v[i]   = params->v_init;
 | 
					 | 
				
			||||||
        solver->p[i]   = params->p_init;
 | 
					 | 
				
			||||||
        solver->rhs[i] = 0.0;
 | 
					 | 
				
			||||||
        solver->f[i]   = 0.0;
 | 
					 | 
				
			||||||
        solver->g[i]   = 0.0;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    double dx          = solver->dx;
 | 
					 | 
				
			||||||
    double dy          = solver->dy;
 | 
					 | 
				
			||||||
    double inv_sqr_sum = 1.0 / (dx * dx) + 1.0 / (dy * dy);
 | 
					 | 
				
			||||||
    solver->dtBound    = 0.5 * solver->re * 1.0 / inv_sqr_sum;
 | 
					 | 
				
			||||||
#ifdef VERBOSE
 | 
					 | 
				
			||||||
    printConfig(solver);
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void computeRHS(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int imaxLocal = solver->imaxLocal;
 | 
					 | 
				
			||||||
    int jmaxLocal = solver->jmaxLocal;
 | 
					 | 
				
			||||||
    double idx    = 1.0 / solver->dx;
 | 
					 | 
				
			||||||
    double idy    = 1.0 / solver->dy;
 | 
					 | 
				
			||||||
    double idt    = 1.0 / solver->dt;
 | 
					 | 
				
			||||||
    double* rhs   = solver->rhs;
 | 
					 | 
				
			||||||
    double* f     = solver->f;
 | 
					 | 
				
			||||||
    double* g     = solver->g;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    shift(solver);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
        for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
            RHS(i, j) = ((F(i, j) - F(i - 1, j)) * idx + (G(i, j) - G(i, j - 1)) * idy) *
 | 
					 | 
				
			||||||
                        idt;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int solve(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int imax      = solver->imax;
 | 
					 | 
				
			||||||
    int jmax      = solver->jmax;
 | 
					 | 
				
			||||||
    int imaxLocal = solver->imaxLocal;
 | 
					 | 
				
			||||||
    int jmaxLocal = solver->jmaxLocal;
 | 
					 | 
				
			||||||
    double eps    = solver->eps;
 | 
					 | 
				
			||||||
    int itermax   = solver->itermax;
 | 
					 | 
				
			||||||
    double dx2    = solver->dx * solver->dx;
 | 
					 | 
				
			||||||
    double dy2    = solver->dy * solver->dy;
 | 
					 | 
				
			||||||
    double idx2   = 1.0 / dx2;
 | 
					 | 
				
			||||||
    double idy2   = 1.0 / dy2;
 | 
					 | 
				
			||||||
    // identical to 1/((2/dx2)+(2/dy2))
 | 
					 | 
				
			||||||
    double factor = solver->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
 | 
					 | 
				
			||||||
    double* p     = solver->p;
 | 
					 | 
				
			||||||
    double* rhs   = solver->rhs;
 | 
					 | 
				
			||||||
    double epssq  = eps * eps;
 | 
					 | 
				
			||||||
    int it        = 0;
 | 
					 | 
				
			||||||
    double res    = 1.0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    while ((res >= epssq) && (it < itermax)) {
 | 
					 | 
				
			||||||
        res = 0.0;
 | 
					 | 
				
			||||||
        exchange(solver, p);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
            for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                double r = RHS(i, j) -
 | 
					 | 
				
			||||||
                           ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
 | 
					 | 
				
			||||||
                               (P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                P(i, j) -= (factor * r);
 | 
					 | 
				
			||||||
                res += (r * r);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if (solver->coords[JDIM] == 0) { // set bottom bc
 | 
					 | 
				
			||||||
            for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
                P(i, 0) = P(i, 1);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if (solver->coords[JDIM] == (solver->dims[JDIM] - 1)) { // set top bc
 | 
					 | 
				
			||||||
            for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
                P(i, jmaxLocal + 1) = P(i, jmaxLocal);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if (solver->coords[IDIM] == 0) { // set left bc
 | 
					 | 
				
			||||||
            for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
                P(0, j) = P(1, j);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if (solver->coords[IDIM] == (solver->dims[IDIM] - 1)) { // set right bc
 | 
					 | 
				
			||||||
            for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
                P(imaxLocal + 1, j) = P(imaxLocal, j);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        MPI_Allreduce(MPI_IN_PLACE, &res, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
 | 
					 | 
				
			||||||
        res = res / (double)(imax * jmax);
 | 
					 | 
				
			||||||
#ifdef DEBUG
 | 
					 | 
				
			||||||
        if (solver->rank == 0) {
 | 
					 | 
				
			||||||
            printf("%d Residuum: %e\n", it, res);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
        it++;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifdef VERBOSE
 | 
					 | 
				
			||||||
    if (solver->rank == 0) {
 | 
					 | 
				
			||||||
        printf("Solver took %d iterations to reach %f\n", it, sqrt(res));
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
    if (res < eps) {
 | 
					 | 
				
			||||||
        return 0;
 | 
					 | 
				
			||||||
    } else {
 | 
					 | 
				
			||||||
        return 1;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static double maxElement(Solver* solver, double* m)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int size      = (solver->imaxLocal + 2) * (solver->jmaxLocal + 2);
 | 
					 | 
				
			||||||
    double maxval = DBL_MIN;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int i = 0; i < size; i++) {
 | 
					 | 
				
			||||||
        maxval = MAX(maxval, fabs(m[i]));
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    MPI_Allreduce(MPI_IN_PLACE, &maxval, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
 | 
					 | 
				
			||||||
    return maxval;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void computeTimestep(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    double dt   = solver->dtBound;
 | 
					 | 
				
			||||||
    double dx   = solver->dx;
 | 
					 | 
				
			||||||
    double dy   = solver->dy;
 | 
					 | 
				
			||||||
    double umax = maxElement(solver, solver->u);
 | 
					 | 
				
			||||||
    double vmax = maxElement(solver, solver->v);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (umax > 0) {
 | 
					 | 
				
			||||||
        dt = (dt > dx / umax) ? dx / umax : dt;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    if (vmax > 0) {
 | 
					 | 
				
			||||||
        dt = (dt > dy / vmax) ? dy / vmax : dt;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    solver->dt = dt * solver->tau;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void setBoundaryConditions(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int imaxLocal = solver->imaxLocal;
 | 
					 | 
				
			||||||
    int jmaxLocal = solver->jmaxLocal;
 | 
					 | 
				
			||||||
    double* u     = solver->u;
 | 
					 | 
				
			||||||
    double* v     = solver->v;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Northern boundary
 | 
					 | 
				
			||||||
    if (solver->coords[JDIM] == (solver->dims[JDIM] - 1)) { // set top bc
 | 
					 | 
				
			||||||
        switch (solver->bcTop) {
 | 
					 | 
				
			||||||
        case NOSLIP:
 | 
					 | 
				
			||||||
            for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
                V(i, jmaxLocal)     = 0.0;
 | 
					 | 
				
			||||||
                U(i, jmaxLocal + 1) = -U(i, jmaxLocal);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case SLIP:
 | 
					 | 
				
			||||||
            for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
                V(i, jmaxLocal)     = 0.0;
 | 
					 | 
				
			||||||
                U(i, jmaxLocal + 1) = U(i, jmaxLocal);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case OUTFLOW:
 | 
					 | 
				
			||||||
            for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
                U(i, jmaxLocal + 1) = U(i, jmaxLocal);
 | 
					 | 
				
			||||||
                V(i, jmaxLocal)     = V(i, jmaxLocal - 1);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case PERIODIC:
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Southern boundary
 | 
					 | 
				
			||||||
    if (solver->coords[JDIM] == 0) { // set bottom bc
 | 
					 | 
				
			||||||
        switch (solver->bcBottom) {
 | 
					 | 
				
			||||||
        case NOSLIP:
 | 
					 | 
				
			||||||
            for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
                V(i, 0) = 0.0;
 | 
					 | 
				
			||||||
                U(i, 0) = -U(i, 1);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case SLIP:
 | 
					 | 
				
			||||||
            for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
                V(i, 0) = 0.0;
 | 
					 | 
				
			||||||
                U(i, 0) = U(i, 1);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case OUTFLOW:
 | 
					 | 
				
			||||||
            for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
                U(i, 0) = U(i, 1);
 | 
					 | 
				
			||||||
                V(i, 0) = V(i, 1);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case PERIODIC:
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Eastern boundary
 | 
					 | 
				
			||||||
    if (solver->coords[IDIM] == (solver->dims[IDIM] - 1)) { // set right bc
 | 
					 | 
				
			||||||
        switch (solver->bcRight) {
 | 
					 | 
				
			||||||
        case NOSLIP:
 | 
					 | 
				
			||||||
            for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
                U(imaxLocal, j)     = 0.0;
 | 
					 | 
				
			||||||
                V(imaxLocal + 1, j) = -V(imaxLocal, j);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case SLIP:
 | 
					 | 
				
			||||||
            for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
                U(imaxLocal, j)     = 0.0;
 | 
					 | 
				
			||||||
                V(imaxLocal + 1, j) = V(imaxLocal, j);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case OUTFLOW:
 | 
					 | 
				
			||||||
            for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
                U(imaxLocal, j)     = U(imaxLocal - 1, j);
 | 
					 | 
				
			||||||
                V(imaxLocal + 1, j) = V(imaxLocal, j);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case PERIODIC:
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Western boundary
 | 
					 | 
				
			||||||
    if (solver->coords[IDIM] == 0) { // set left bc
 | 
					 | 
				
			||||||
        switch (solver->bcLeft) {
 | 
					 | 
				
			||||||
        case NOSLIP:
 | 
					 | 
				
			||||||
            for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
                U(0, j) = 0.0;
 | 
					 | 
				
			||||||
                V(0, j) = -V(1, j);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case SLIP:
 | 
					 | 
				
			||||||
            for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
                U(0, j) = 0.0;
 | 
					 | 
				
			||||||
                V(0, j) = V(1, j);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case OUTFLOW:
 | 
					 | 
				
			||||||
            for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
                U(0, j) = U(1, j);
 | 
					 | 
				
			||||||
                V(0, j) = V(1, j);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        case PERIODIC:
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void setSpecialBoundaryCondition(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int imaxLocal = solver->imaxLocal;
 | 
					 | 
				
			||||||
    int jmaxLocal = solver->jmaxLocal;
 | 
					 | 
				
			||||||
    double* u     = solver->u;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (strcmp(solver->problem, "dcavity") == 0) {
 | 
					 | 
				
			||||||
        if (solver->coords[JDIM] == (solver->dims[JDIM] - 1)) { // set top bc
 | 
					 | 
				
			||||||
            for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
                U(i, jmaxLocal + 1) = 2.0 - U(i, jmaxLocal);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    } else if (strcmp(solver->problem, "canal") == 0) {
 | 
					 | 
				
			||||||
        if (solver->coords[IDIM] == 0) { // set left bc
 | 
					 | 
				
			||||||
            double ylength = solver->ylength;
 | 
					 | 
				
			||||||
            double dy      = solver->dy;
 | 
					 | 
				
			||||||
            int rest       = solver->jmax % solver->size;
 | 
					 | 
				
			||||||
            int yc         = solver->rank * (solver->jmax / solver->size) +
 | 
					 | 
				
			||||||
                     MIN(rest, solver->rank);
 | 
					 | 
				
			||||||
            double ys = dy * (yc + 0.5);
 | 
					 | 
				
			||||||
            double y;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            /* printf("RANK %d yc: %d ys: %f\n", solver->rank, yc, ys); */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
                y       = ys + dy * (j - 0.5);
 | 
					 | 
				
			||||||
                U(0, j) = y * (ylength - y) * 4.0 / (ylength * ylength);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    /* print(solver, solver->u); */
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void computeFG(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    double* u        = solver->u;
 | 
					 | 
				
			||||||
    double* v        = solver->v;
 | 
					 | 
				
			||||||
    double* f        = solver->f;
 | 
					 | 
				
			||||||
    double* g        = solver->g;
 | 
					 | 
				
			||||||
    int imaxLocal    = solver->imaxLocal;
 | 
					 | 
				
			||||||
    int jmaxLocal    = solver->jmaxLocal;
 | 
					 | 
				
			||||||
    double gx        = solver->gx;
 | 
					 | 
				
			||||||
    double gy        = solver->gy;
 | 
					 | 
				
			||||||
    double gamma     = solver->gamma;
 | 
					 | 
				
			||||||
    double dt        = solver->dt;
 | 
					 | 
				
			||||||
    double inverseRe = 1.0 / solver->re;
 | 
					 | 
				
			||||||
    double inverseDx = 1.0 / solver->dx;
 | 
					 | 
				
			||||||
    double inverseDy = 1.0 / solver->dy;
 | 
					 | 
				
			||||||
    double du2dx, dv2dy, duvdx, duvdy;
 | 
					 | 
				
			||||||
    double du2dx2, du2dy2, dv2dx2, dv2dy2;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    exchange(solver, u);
 | 
					 | 
				
			||||||
    exchange(solver, v);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
        for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
            du2dx = inverseDx * 0.25 *
 | 
					 | 
				
			||||||
                        ((U(i, j) + U(i + 1, j)) * (U(i, j) + U(i + 1, j)) -
 | 
					 | 
				
			||||||
                            (U(i, j) + U(i - 1, j)) * (U(i, j) + U(i - 1, j))) +
 | 
					 | 
				
			||||||
                    gamma * inverseDx * 0.25 *
 | 
					 | 
				
			||||||
                        (fabs(U(i, j) + U(i + 1, j)) * (U(i, j) - U(i + 1, j)) +
 | 
					 | 
				
			||||||
                            fabs(U(i, j) + U(i - 1, j)) * (U(i, j) - U(i - 1, j)));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            duvdy = inverseDy * 0.25 *
 | 
					 | 
				
			||||||
                        ((V(i, j) + V(i + 1, j)) * (U(i, j) + U(i, j + 1)) -
 | 
					 | 
				
			||||||
                            (V(i, j - 1) + V(i + 1, j - 1)) * (U(i, j) + U(i, j - 1))) +
 | 
					 | 
				
			||||||
                    gamma * inverseDy * 0.25 *
 | 
					 | 
				
			||||||
                        (fabs(V(i, j) + V(i + 1, j)) * (U(i, j) - U(i, j + 1)) +
 | 
					 | 
				
			||||||
                            fabs(V(i, j - 1) + V(i + 1, j - 1)) *
 | 
					 | 
				
			||||||
                                (U(i, j) - U(i, j - 1)));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            du2dx2  = inverseDx * inverseDx * (U(i + 1, j) - 2.0 * U(i, j) + U(i - 1, j));
 | 
					 | 
				
			||||||
            du2dy2  = inverseDy * inverseDy * (U(i, j + 1) - 2.0 * U(i, j) + U(i, j - 1));
 | 
					 | 
				
			||||||
            F(i, j) = U(i, j) + dt * (inverseRe * (du2dx2 + du2dy2) - du2dx - duvdy + gx);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            duvdx = inverseDx * 0.25 *
 | 
					 | 
				
			||||||
                        ((U(i, j) + U(i, j + 1)) * (V(i, j) + V(i + 1, j)) -
 | 
					 | 
				
			||||||
                            (U(i - 1, j) + U(i - 1, j + 1)) * (V(i, j) + V(i - 1, j))) +
 | 
					 | 
				
			||||||
                    gamma * inverseDx * 0.25 *
 | 
					 | 
				
			||||||
                        (fabs(U(i, j) + U(i, j + 1)) * (V(i, j) - V(i + 1, j)) +
 | 
					 | 
				
			||||||
                            fabs(U(i - 1, j) + U(i - 1, j + 1)) *
 | 
					 | 
				
			||||||
                                (V(i, j) - V(i - 1, j)));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            dv2dy = inverseDy * 0.25 *
 | 
					 | 
				
			||||||
                        ((V(i, j) + V(i, j + 1)) * (V(i, j) + V(i, j + 1)) -
 | 
					 | 
				
			||||||
                            (V(i, j) + V(i, j - 1)) * (V(i, j) + V(i, j - 1))) +
 | 
					 | 
				
			||||||
                    gamma * inverseDy * 0.25 *
 | 
					 | 
				
			||||||
                        (fabs(V(i, j) + V(i, j + 1)) * (V(i, j) - V(i, j + 1)) +
 | 
					 | 
				
			||||||
                            fabs(V(i, j) + V(i, j - 1)) * (V(i, j) - V(i, j - 1)));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            dv2dx2  = inverseDx * inverseDx * (V(i + 1, j) - 2.0 * V(i, j) + V(i - 1, j));
 | 
					 | 
				
			||||||
            dv2dy2  = inverseDy * inverseDy * (V(i, j + 1) - 2.0 * V(i, j) + V(i, j - 1));
 | 
					 | 
				
			||||||
            G(i, j) = V(i, j) + dt * (inverseRe * (dv2dx2 + dv2dy2) - duvdx - dv2dy + gy);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* ----------------------------- boundary of F --------------------------- */
 | 
					 | 
				
			||||||
    if (solver->coords[IDIM] == 0) { // set left bc
 | 
					 | 
				
			||||||
        for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
            F(0, j) = U(0, j);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (solver->coords[IDIM] == (solver->dims[IDIM] - 1)) { // set right bc
 | 
					 | 
				
			||||||
        for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
            F(imaxLocal, j) = U(imaxLocal, j);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* ----------------------------- boundary of G --------------------------- */
 | 
					 | 
				
			||||||
    if (solver->coords[JDIM] == 0) { // set bottom bc
 | 
					 | 
				
			||||||
        for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
            G(i, 0) = V(i, 0);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (solver->coords[JDIM] == (solver->dims[JDIM] - 1)) { // set top bc
 | 
					 | 
				
			||||||
        for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
            G(i, jmaxLocal) = V(i, jmaxLocal);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void adaptUV(Solver* solver)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int imaxLocal  = solver->imaxLocal;
 | 
					 | 
				
			||||||
    int jmaxLocal  = solver->jmaxLocal;
 | 
					 | 
				
			||||||
    double* p      = solver->p;
 | 
					 | 
				
			||||||
    double* u      = solver->u;
 | 
					 | 
				
			||||||
    double* v      = solver->v;
 | 
					 | 
				
			||||||
    double* f      = solver->f;
 | 
					 | 
				
			||||||
    double* g      = solver->g;
 | 
					 | 
				
			||||||
    double factorX = solver->dt / solver->dx;
 | 
					 | 
				
			||||||
    double factorY = solver->dt / solver->dy;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int j = 1; j < jmaxLocal + 1; j++) {
 | 
					 | 
				
			||||||
        for (int i = 1; i < imaxLocal + 1; i++) {
 | 
					 | 
				
			||||||
            U(i, j) = F(i, j) - (P(i + 1, j) - P(i, j)) * factorX;
 | 
					 | 
				
			||||||
            V(i, j) = G(i, j) - (P(i, j + 1) - P(i, j)) * factorY;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void writeResult(Solver* solver, double* p, double* u, double* v)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int imax  = solver->imax;
 | 
					 | 
				
			||||||
    int jmax  = solver->jmax;
 | 
					 | 
				
			||||||
    double dx = solver->dx;
 | 
					 | 
				
			||||||
    double dy = solver->dy;
 | 
					 | 
				
			||||||
    double x = 0.0, y = 0.0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    FILE* fp;
 | 
					 | 
				
			||||||
    fp = fopen("pressure.dat", "w");
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (fp == NULL) {
 | 
					 | 
				
			||||||
        printf("Error!\n");
 | 
					 | 
				
			||||||
        exit(EXIT_FAILURE);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int j = 1; j < jmax; j++) {
 | 
					 | 
				
			||||||
        y = (double)(j - 0.5) * dy;
 | 
					 | 
				
			||||||
        for (int i = 1; i < imax; i++) {
 | 
					 | 
				
			||||||
            x = (double)(i - 0.5) * dx;
 | 
					 | 
				
			||||||
            fprintf(fp, "%.2f %.2f %f\n", x, y, p[j * (imax) + i]);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        fprintf(fp, "\n");
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    fclose(fp);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    fp = fopen("velocity.dat", "w");
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (fp == NULL) {
 | 
					 | 
				
			||||||
        printf("Error!\n");
 | 
					 | 
				
			||||||
        exit(EXIT_FAILURE);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int j = 1; j < jmax; j++) {
 | 
					 | 
				
			||||||
        y = dy * (j - 0.5);
 | 
					 | 
				
			||||||
        for (int i = 1; i < imax; i++) {
 | 
					 | 
				
			||||||
            x            = dx * (i - 0.5);
 | 
					 | 
				
			||||||
            double vel_u = (u[j * (imax) + i] + u[j * (imax) + (i - 1)]) / 2.0;
 | 
					 | 
				
			||||||
            double vel_v = (v[j * (imax) + i] + v[(j - 1) * (imax) + i]) / 2.0;
 | 
					 | 
				
			||||||
            double len   = sqrt((vel_u * vel_u) + (vel_v * vel_v));
 | 
					 | 
				
			||||||
            fprintf(fp, "%.2f %.2f %f %f %f\n", x, y, vel_u, vel_v, len);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    fclose(fp);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
@@ -1,58 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved. This file is part of nusif-solver.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#ifndef __SOLVER_H_
 | 
					 | 
				
			||||||
#define __SOLVER_H_
 | 
					 | 
				
			||||||
#include "parameter.h"
 | 
					 | 
				
			||||||
#include <mpi.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define NDIMS 2
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
enum BC { NOSLIP = 1, SLIP, OUTFLOW, PERIODIC };
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
typedef struct {
 | 
					 | 
				
			||||||
    /* geometry and grid information */
 | 
					 | 
				
			||||||
    double dx, dy;
 | 
					 | 
				
			||||||
    int imax, jmax;
 | 
					 | 
				
			||||||
    double xlength, ylength;
 | 
					 | 
				
			||||||
    /* arrays */
 | 
					 | 
				
			||||||
    double *p, *rhs;
 | 
					 | 
				
			||||||
    double *f, *g;
 | 
					 | 
				
			||||||
    double *u, *v;
 | 
					 | 
				
			||||||
    /* parameters */
 | 
					 | 
				
			||||||
    double eps, omega;
 | 
					 | 
				
			||||||
    double re, tau, gamma;
 | 
					 | 
				
			||||||
    double gx, gy;
 | 
					 | 
				
			||||||
    /* time stepping */
 | 
					 | 
				
			||||||
    int itermax;
 | 
					 | 
				
			||||||
    double dt, te;
 | 
					 | 
				
			||||||
    double dtBound;
 | 
					 | 
				
			||||||
    char* problem;
 | 
					 | 
				
			||||||
    int bcLeft, bcRight, bcBottom, bcTop;
 | 
					 | 
				
			||||||
    /* mpi */
 | 
					 | 
				
			||||||
    int rank;
 | 
					 | 
				
			||||||
    int size;
 | 
					 | 
				
			||||||
    MPI_Comm comm;
 | 
					 | 
				
			||||||
    MPI_Datatype bufferTypes[NDIMS * 2];
 | 
					 | 
				
			||||||
    MPI_Aint sdispls[NDIMS * 2], rdispls[NDIMS * 2];
 | 
					 | 
				
			||||||
    int iNeighbours[NDIMS], jNeighbours[NDIMS];
 | 
					 | 
				
			||||||
    int coords[NDIMS], dims[NDIMS];
 | 
					 | 
				
			||||||
    int imaxLocal, jmaxLocal;
 | 
					 | 
				
			||||||
} Solver;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void initSolver(Solver*, Parameter*);
 | 
					 | 
				
			||||||
void computeRHS(Solver*);
 | 
					 | 
				
			||||||
int solve(Solver*);
 | 
					 | 
				
			||||||
void computeTimestep(Solver*);
 | 
					 | 
				
			||||||
void setBoundaryConditions(Solver*);
 | 
					 | 
				
			||||||
void setSpecialBoundaryCondition(Solver*);
 | 
					 | 
				
			||||||
void computeFG(Solver*);
 | 
					 | 
				
			||||||
void adaptUV(Solver*);
 | 
					 | 
				
			||||||
void collectResult(Solver*);
 | 
					 | 
				
			||||||
void writeResult(Solver*, double*, double*, double*);
 | 
					 | 
				
			||||||
void debugExchange(Solver*);
 | 
					 | 
				
			||||||
void print(Solver*, double*);
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
@@ -1,24 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
#include <time.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
double getTimeStamp()
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    struct timespec ts;
 | 
					 | 
				
			||||||
    clock_gettime(CLOCK_MONOTONIC, &ts);
 | 
					 | 
				
			||||||
    return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
double getTimeResolution()
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    struct timespec ts;
 | 
					 | 
				
			||||||
    clock_getres(CLOCK_MONOTONIC, &ts);
 | 
					 | 
				
			||||||
    return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
double getTimeStamp_() { return getTimeStamp(); }
 | 
					 | 
				
			||||||
@@ -1,14 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#ifndef __TIMING_H_
 | 
					 | 
				
			||||||
#define __TIMING_H_
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
extern double getTimeStamp();
 | 
					 | 
				
			||||||
extern double getTimeResolution();
 | 
					 | 
				
			||||||
extern double getTimeStamp_();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#endif // __TIMING_H_
 | 
					 | 
				
			||||||
@@ -1,22 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
 * All rights reserved.
 | 
					 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#ifndef __UTIL_H_
 | 
					 | 
				
			||||||
#define __UTIL_H_
 | 
					 | 
				
			||||||
#define HLINE                                                                            \
 | 
					 | 
				
			||||||
    "----------------------------------------------------------------------------\n"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifndef MIN
 | 
					 | 
				
			||||||
#define MIN(x, y) ((x) < (y) ? (x) : (y))
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
#ifndef MAX
 | 
					 | 
				
			||||||
#define MAX(x, y) ((x) > (y) ? (x) : (y))
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
#ifndef ABS
 | 
					 | 
				
			||||||
#define ABS(a) ((a) >= 0 ? (a) : -(a))
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#endif // __UTIL_H_
 | 
					 | 
				
			||||||
@@ -1,7 +0,0 @@
 | 
				
			|||||||
set terminal png size 1024,768 enhanced font ,12
 | 
					 | 
				
			||||||
set output 'p.png'
 | 
					 | 
				
			||||||
set datafile separator whitespace
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
set grid
 | 
					 | 
				
			||||||
set hidden3d
 | 
					 | 
				
			||||||
splot 'pressure.dat' using 1:2:3 with lines
 | 
					 | 
				
			||||||
@@ -1,5 +0,0 @@
 | 
				
			|||||||
set terminal png size 1800,768 enhanced font ,12
 | 
					 | 
				
			||||||
set output 'velocity.png'
 | 
					 | 
				
			||||||
set datafile separator whitespace
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
plot 'velocity.dat' using 1:2:3:4:5 with vectors filled head size 0.01,20,60 lc palette
 | 
					 | 
				
			||||||
@@ -1,5 +1,5 @@
 | 
				
			|||||||
#=======================================================================================
 | 
					#=======================================================================================
 | 
				
			||||||
# Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					# Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 | 
				
			||||||
# All rights reserved.
 | 
					# All rights reserved.
 | 
				
			||||||
# Use of this source code is governed by a MIT-style
 | 
					# Use of this source code is governed by a MIT-style
 | 
				
			||||||
# license that can be found in the LICENSE file.
 | 
					# license that can be found in the LICENSE file.
 | 
				
			||||||
@@ -18,9 +18,10 @@ include $(MAKE_DIR)/include_$(TAG).mk
 | 
				
			|||||||
INCLUDES  += -I$(SRC_DIR) -I$(BUILD_DIR)
 | 
					INCLUDES  += -I$(SRC_DIR) -I$(BUILD_DIR)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
VPATH     = $(SRC_DIR)
 | 
					VPATH     = $(SRC_DIR)
 | 
				
			||||||
SRC       = $(wildcard $(SRC_DIR)/*.c)
 | 
					SRC       = $(filter-out $(wildcard $(SRC_DIR)/*-*.c),$(wildcard $(SRC_DIR)/*.c))
 | 
				
			||||||
ASM       = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s, $(SRC))
 | 
					ASM       = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s, $(SRC))
 | 
				
			||||||
OBJ       = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o, $(SRC))
 | 
					OBJ       = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o, $(SRC))
 | 
				
			||||||
 | 
					OBJ      += $(BUILD_DIR)/comm-$(COMM_TYPE).o
 | 
				
			||||||
SOURCES   = $(SRC) $(wildcard $(SRC_DIR)/*.h)
 | 
					SOURCES   = $(SRC) $(wildcard $(SRC_DIR)/*.h)
 | 
				
			||||||
CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(OPTIONS) $(INCLUDES)
 | 
					CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(OPTIONS) $(INCLUDES)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,6 +1,8 @@
 | 
				
			|||||||
# Supported: GCC, CLANG, ICC
 | 
					# Supported: GCC, CLANG, ICC
 | 
				
			||||||
TAG ?= CLANG
 | 
					TAG ?= CLANG
 | 
				
			||||||
 | 
					ENABLE_MPI ?= true
 | 
				
			||||||
ENABLE_OPENMP ?= false
 | 
					ENABLE_OPENMP ?= false
 | 
				
			||||||
 | 
					COMM_TYPE ?= v3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#Feature options
 | 
					#Feature options
 | 
				
			||||||
OPTIONS +=  -DARRAY_ALIGNMENT=64
 | 
					OPTIONS +=  -DARRAY_ALIGNMENT=64
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,4 +1,10 @@
 | 
				
			|||||||
 | 
					ifeq ($(ENABLE_MPI),true)
 | 
				
			||||||
CC   = mpicc
 | 
					CC   = mpicc
 | 
				
			||||||
 | 
					DEFINES  = -D_MPI
 | 
				
			||||||
 | 
					else
 | 
				
			||||||
 | 
					CC   = cc
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
GCC  = cc
 | 
					GCC  = cc
 | 
				
			||||||
LINKER = $(CC)
 | 
					LINKER = $(CC)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -9,8 +15,7 @@ LIBS     = # -lomp
 | 
				
			|||||||
endif
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
VERSION  = --version
 | 
					VERSION  = --version
 | 
				
			||||||
CFLAGS   = -Ofast -std=c99 $(OPENMP)
 | 
					CFLAGS   = -Ofast -std=c17
 | 
				
			||||||
#CFLAGS   = -Ofast -fnt-store=aggressive  -std=c99 $(OPENMP) #AMD CLANG
 | 
					LFLAGS   = $(OPENMP) -lm
 | 
				
			||||||
LFLAGS   = $(OPENMP)
 | 
					DEFINES  += -D_GNU_SOURCE# -DDEBUG
 | 
				
			||||||
DEFINES  = -D_GNU_SOURCE# -DDEBUG
 | 
					INCLUDES = -I/opt/homebrew/include
 | 
				
			||||||
INCLUDES = -I/usr/local/include
 | 
					 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,4 +1,10 @@
 | 
				
			|||||||
 | 
					ifeq ($(ENABLE_MPI),true)
 | 
				
			||||||
 | 
					CC   = mpicc
 | 
				
			||||||
 | 
					DEFINES  = -D_MPI
 | 
				
			||||||
 | 
					else
 | 
				
			||||||
CC   = gcc
 | 
					CC   = gcc
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
GCC  = gcc
 | 
					GCC  = gcc
 | 
				
			||||||
LINKER = $(CC)
 | 
					LINKER = $(CC)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -9,6 +15,6 @@ endif
 | 
				
			|||||||
VERSION  = --version
 | 
					VERSION  = --version
 | 
				
			||||||
CFLAGS   = -Ofast -ffreestanding -std=c99 $(OPENMP)
 | 
					CFLAGS   = -Ofast -ffreestanding -std=c99 $(OPENMP)
 | 
				
			||||||
LFLAGS   = $(OPENMP)
 | 
					LFLAGS   = $(OPENMP)
 | 
				
			||||||
DEFINES  = -D_GNU_SOURCE
 | 
					DEFINES  += -D_GNU_SOURCE
 | 
				
			||||||
INCLUDES =
 | 
					INCLUDES =
 | 
				
			||||||
LIBS     =
 | 
					LIBS     =
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,4 +1,10 @@
 | 
				
			|||||||
 | 
					ifeq ($(ENABLE_MPI),true)
 | 
				
			||||||
CC   = mpiicc
 | 
					CC   = mpiicc
 | 
				
			||||||
 | 
					DEFINES  = -D_MPI
 | 
				
			||||||
 | 
					else
 | 
				
			||||||
 | 
					CC = icc
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
GCC  = gcc
 | 
					GCC  = gcc
 | 
				
			||||||
LINKER = $(CC)
 | 
					LINKER = $(CC)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -9,6 +15,6 @@ endif
 | 
				
			|||||||
VERSION  = --version
 | 
					VERSION  = --version
 | 
				
			||||||
CFLAGS   =  -O3 -xHost -qopt-zmm-usage=high -std=c99 $(OPENMP)
 | 
					CFLAGS   =  -O3 -xHost -qopt-zmm-usage=high -std=c99 $(OPENMP)
 | 
				
			||||||
LFLAGS   = $(OPENMP)
 | 
					LFLAGS   = $(OPENMP)
 | 
				
			||||||
DEFINES  = -D_GNU_SOURCE
 | 
					DEFINES  += -D_GNU_SOURCE# -DDEBUG
 | 
				
			||||||
INCLUDES =
 | 
					INCLUDES =
 | 
				
			||||||
LIBS     =
 | 
					LIBS     =
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,5 +1,5 @@
 | 
				
			|||||||
/*
 | 
					/*
 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 | 
				
			||||||
 * All rights reserved.
 | 
					 * All rights reserved.
 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 * Use of this source code is governed by a MIT-style
 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 * license that can be found in the LICENSE file.
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,5 +1,5 @@
 | 
				
			|||||||
/*
 | 
					/*
 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 | 
				
			||||||
 * All rights reserved.
 | 
					 * All rights reserved.
 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 * Use of this source code is governed by a MIT-style
 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 * license that can be found in the LICENSE file.
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,14 +1,17 @@
 | 
				
			|||||||
/*
 | 
					/*
 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 | 
				
			||||||
 * All rights reserved.
 | 
					 * All rights reserved.
 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 * Use of this source code is governed by a MIT-style
 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 * license that can be found in the LICENSE file.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
#include <errno.h>
 | 
					#include <errno.h>
 | 
				
			||||||
 | 
					#include <stddef.h>
 | 
				
			||||||
#include <stdio.h>
 | 
					#include <stdio.h>
 | 
				
			||||||
#include <stdlib.h>
 | 
					#include <stdlib.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void* allocate(int alignment, size_t bytesize)
 | 
					#include "allocate.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void* allocate(size_t alignment, size_t bytesize)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    int errorCode;
 | 
					    int errorCode;
 | 
				
			||||||
    void* ptr;
 | 
					    void* ptr;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,5 +1,5 @@
 | 
				
			|||||||
/*
 | 
					/*
 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 | 
				
			||||||
 * All rights reserved.
 | 
					 * All rights reserved.
 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 * Use of this source code is governed by a MIT-style
 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 * license that can be found in the LICENSE file.
 | 
				
			||||||
@@ -8,6 +8,6 @@
 | 
				
			|||||||
#define __ALLOCATE_H_
 | 
					#define __ALLOCATE_H_
 | 
				
			||||||
#include <stdlib.h>
 | 
					#include <stdlib.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern void* allocate(int alignment, size_t bytesize);
 | 
					extern void* allocate(size_t alignment, size_t bytesize);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										247
									
								
								BasicSolver/2D-mpi/src/comm-v1.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										247
									
								
								BasicSolver/2D-mpi/src/comm-v1.c
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,247 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 | 
				
			||||||
 | 
					 * All rights reserved. This file is part of nusif-solver.
 | 
				
			||||||
 | 
					 * Use of this source code is governed by a MIT style
 | 
				
			||||||
 | 
					 * license that can be found in the LICENSE file.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					#include <stdio.h>
 | 
				
			||||||
 | 
					#include <stdlib.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "comm.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(_MPI)
 | 
				
			||||||
 | 
					// subroutines local to this module
 | 
				
			||||||
 | 
					static int sizeOfRank(int rank, int size, int N)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    return N / size + ((N % size > rank) ? 1 : 0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int sum(int* sizes, int position)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    int sum = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (int i = 0; i < position; i++) {
 | 
				
			||||||
 | 
					        sum += sizes[i];
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return sum;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void gatherArray(
 | 
				
			||||||
 | 
					    Comm* c, int cnt, int* rcvCounts, int* displs, double* src, double* dst)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    double* sendbuffer = src + (c->imaxLocal + 2);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (c->rank == 0) {
 | 
				
			||||||
 | 
					        sendbuffer = src;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    MPI_Gatherv(sendbuffer,
 | 
				
			||||||
 | 
					        cnt,
 | 
				
			||||||
 | 
					        MPI_DOUBLE,
 | 
				
			||||||
 | 
					        dst,
 | 
				
			||||||
 | 
					        rcvCounts,
 | 
				
			||||||
 | 
					        displs,
 | 
				
			||||||
 | 
					        MPI_DOUBLE,
 | 
				
			||||||
 | 
					        0,
 | 
				
			||||||
 | 
					        MPI_COMM_WORLD);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#endif // defined _MPI
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// exported subroutines
 | 
				
			||||||
 | 
					void commReduction(double* v, int op)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					#if defined(_MPI)
 | 
				
			||||||
 | 
					    if (op == MAX) {
 | 
				
			||||||
 | 
					        MPI_Allreduce(MPI_IN_PLACE, v, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
 | 
				
			||||||
 | 
					    } else if (op == SUM) {
 | 
				
			||||||
 | 
					        MPI_Allreduce(MPI_IN_PLACE, v, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int commIsBoundary(Comm* c, int direction)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					#if defined(_MPI)
 | 
				
			||||||
 | 
					    switch (direction) {
 | 
				
			||||||
 | 
					    case LEFT:
 | 
				
			||||||
 | 
					        return 1;
 | 
				
			||||||
 | 
					        break;
 | 
				
			||||||
 | 
					    case RIGHT:
 | 
				
			||||||
 | 
					        return 1;
 | 
				
			||||||
 | 
					        break;
 | 
				
			||||||
 | 
					    case BOTTOM:
 | 
				
			||||||
 | 
					        return c->rank == 0;
 | 
				
			||||||
 | 
					        break;
 | 
				
			||||||
 | 
					    case TOP:
 | 
				
			||||||
 | 
					        return c->rank == (c->size - 1);
 | 
				
			||||||
 | 
					        break;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return 1;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void commExchange(Comm* c, double* grid)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					#if defined(_MPI)
 | 
				
			||||||
 | 
					    MPI_Request requests[4] = { MPI_REQUEST_NULL,
 | 
				
			||||||
 | 
					        MPI_REQUEST_NULL,
 | 
				
			||||||
 | 
					        MPI_REQUEST_NULL,
 | 
				
			||||||
 | 
					        MPI_REQUEST_NULL };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /* exchange ghost cells with top neighbor */
 | 
				
			||||||
 | 
					    if (c->rank + 1 < c->size) {
 | 
				
			||||||
 | 
					        int top     = c->rank + 1;
 | 
				
			||||||
 | 
					        double* src = grid + (c->jmaxLocal) * (c->imaxLocal + 2) + 1;
 | 
				
			||||||
 | 
					        double* dst = grid + (c->jmaxLocal + 1) * (c->imaxLocal + 2) + 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        MPI_Isend(src, c->imaxLocal, MPI_DOUBLE, top, 1, MPI_COMM_WORLD, &requests[0]);
 | 
				
			||||||
 | 
					        MPI_Irecv(dst, c->imaxLocal, MPI_DOUBLE, top, 2, MPI_COMM_WORLD, &requests[1]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /* exchange ghost cells with bottom neighbor */
 | 
				
			||||||
 | 
					    if (c->rank > 0) {
 | 
				
			||||||
 | 
					        int bottom  = c->rank - 1;
 | 
				
			||||||
 | 
					        double* src = grid + (c->imaxLocal + 2) + 1;
 | 
				
			||||||
 | 
					        double* dst = grid + 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        MPI_Isend(src, c->imaxLocal, MPI_DOUBLE, bottom, 2, MPI_COMM_WORLD, &requests[2]);
 | 
				
			||||||
 | 
					        MPI_Irecv(dst, c->imaxLocal, MPI_DOUBLE, bottom, 1, MPI_COMM_WORLD, &requests[3]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    MPI_Waitall(4, requests, MPI_STATUSES_IGNORE);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void commShift(Comm* c, double* f, double* g)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					#if defined(_MPI)
 | 
				
			||||||
 | 
					    MPI_Request requests[2] = { MPI_REQUEST_NULL, MPI_REQUEST_NULL };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /* shift G */
 | 
				
			||||||
 | 
					    /* receive ghost cells from bottom neighbor */
 | 
				
			||||||
 | 
					    if (c->rank > 0) {
 | 
				
			||||||
 | 
					        int bottom = c->rank - 1;
 | 
				
			||||||
 | 
					        MPI_Irecv(g + 1,
 | 
				
			||||||
 | 
					            c->imaxLocal,
 | 
				
			||||||
 | 
					            MPI_DOUBLE,
 | 
				
			||||||
 | 
					            bottom,
 | 
				
			||||||
 | 
					            0,
 | 
				
			||||||
 | 
					            MPI_COMM_WORLD,
 | 
				
			||||||
 | 
					            &requests[0]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (c->rank + 1 < c->size) {
 | 
				
			||||||
 | 
					        int top     = c->rank + 1;
 | 
				
			||||||
 | 
					        double* buf = g + (c->jmaxLocal) * (c->imaxLocal + 2) + 1;
 | 
				
			||||||
 | 
					        /* send ghost cells to top neighbor */
 | 
				
			||||||
 | 
					        MPI_Isend(buf, c->imaxLocal, MPI_DOUBLE, top, 0, MPI_COMM_WORLD, &requests[1]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    MPI_Waitall(2, requests, MPI_STATUSES_IGNORE);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// TODO Merge with seq
 | 
				
			||||||
 | 
					void commCollectResult(Comm* c,
 | 
				
			||||||
 | 
					    double* ug,
 | 
				
			||||||
 | 
					    double* vg,
 | 
				
			||||||
 | 
					    double* pg,
 | 
				
			||||||
 | 
					    double* u,
 | 
				
			||||||
 | 
					    double* v,
 | 
				
			||||||
 | 
					    double* p,
 | 
				
			||||||
 | 
					    int jmax,
 | 
				
			||||||
 | 
					    int imax)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    int *rcvCounts, *displs;
 | 
				
			||||||
 | 
					    int cnt = c->jmaxLocal * (imax + 2);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (c->rank == 0) {
 | 
				
			||||||
 | 
					        rcvCounts = (int*)malloc(c->size * sizeof(int));
 | 
				
			||||||
 | 
					        displs    = (int*)malloc(c->size * sizeof(int));
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (c->rank == 0 && c->size == 1) {
 | 
				
			||||||
 | 
					        cnt = (c->jmaxLocal + 2) * (imax + 2);
 | 
				
			||||||
 | 
					    } else if (c->rank == 0 || c->rank == (c->size - 1)) {
 | 
				
			||||||
 | 
					        cnt = (c->jmaxLocal + 1) * (imax + 2);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    MPI_Gather(&cnt, 1, MPI_INTEGER, rcvCounts, 1, MPI_INTEGER, 0, MPI_COMM_WORLD);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (c->rank == 0) {
 | 
				
			||||||
 | 
					        displs[0]  = 0;
 | 
				
			||||||
 | 
					        int cursor = rcvCounts[0];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for (int i = 1; i < c->size; i++) {
 | 
				
			||||||
 | 
					            displs[i] = cursor;
 | 
				
			||||||
 | 
					            cursor += rcvCounts[i];
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    gatherArray(c, cnt, rcvCounts, displs, p, pg);
 | 
				
			||||||
 | 
					    gatherArray(c, cnt, rcvCounts, displs, u, ug);
 | 
				
			||||||
 | 
					    gatherArray(c, cnt, rcvCounts, displs, v, vg);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void commPrintConfig(Comm* c)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					#if defined(_MPI)
 | 
				
			||||||
 | 
					    fflush(stdout);
 | 
				
			||||||
 | 
					    MPI_Barrier(MPI_COMM_WORLD);
 | 
				
			||||||
 | 
					    if (commIsMaster(c)) {
 | 
				
			||||||
 | 
					        printf("Communication setup:\n");
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (int i = 0; i < c->size; i++) {
 | 
				
			||||||
 | 
					        if (i == c->rank) {
 | 
				
			||||||
 | 
					            printf("\tRank %d of %d\n", c->rank, c->size);
 | 
				
			||||||
 | 
					            printf("\tNeighbours (bottom, top, left, right): %d %d, %d, %d\n",
 | 
				
			||||||
 | 
					                c->neighbours[BOTTOM],
 | 
				
			||||||
 | 
					                c->neighbours[TOP],
 | 
				
			||||||
 | 
					                c->neighbours[LEFT],
 | 
				
			||||||
 | 
					                c->neighbours[RIGHT]);
 | 
				
			||||||
 | 
					            printf("\tCoordinates (j,i) %d %d\n", c->coords[JDIM], c->coords[IDIM]);
 | 
				
			||||||
 | 
					            printf("\tLocal domain size (j,i) %dx%d\n", c->jmaxLocal, c->imaxLocal);
 | 
				
			||||||
 | 
					            fflush(stdout);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    MPI_Barrier(MPI_COMM_WORLD);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void commInit(Comm* c, int argc, char** argv)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					#if defined(_MPI)
 | 
				
			||||||
 | 
					    MPI_Init(&argc, &argv);
 | 
				
			||||||
 | 
					    MPI_Comm_rank(MPI_COMM_WORLD, &(c->rank));
 | 
				
			||||||
 | 
					    MPI_Comm_size(MPI_COMM_WORLD, &(c->size));
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					    c->rank = 0;
 | 
				
			||||||
 | 
					    c->size = 1;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void commPartition(Comm* c, int jmax, int imax)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					#if defined(_MPI)
 | 
				
			||||||
 | 
					    c->imaxLocal = imax;
 | 
				
			||||||
 | 
					    c->jmaxLocal = sizeOfRank(c->rank, c->size, jmax);
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					    c->imaxLocal = imax;
 | 
				
			||||||
 | 
					    c->jmaxLocal = jmax;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void commFinalize(Comm* c)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					#if defined(_MPI)
 | 
				
			||||||
 | 
					    for (int i = 0; i < NDIRS; i++) {
 | 
				
			||||||
 | 
					        MPI_Type_free(&c->sbufferTypes[i]);
 | 
				
			||||||
 | 
					        MPI_Type_free(&c->rbufferTypes[i]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    MPI_Finalize();
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										355
									
								
								BasicSolver/2D-mpi/src/comm-v2.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										355
									
								
								BasicSolver/2D-mpi/src/comm-v2.c
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,355 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 | 
				
			||||||
 | 
					 * All rights reserved. This file is part of nusif-solver.
 | 
				
			||||||
 | 
					 * Use of this source code is governed by a MIT style
 | 
				
			||||||
 | 
					 * license that can be found in the LICENSE file.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					#include <stddef.h>
 | 
				
			||||||
 | 
					#include <stdio.h>
 | 
				
			||||||
 | 
					#include <stdlib.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "comm.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(_MPI)
 | 
				
			||||||
 | 
					// subroutines local to this module
 | 
				
			||||||
 | 
					static int sizeOfRank(int rank, int size, int N)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    return N / size + ((N % size > rank) ? 1 : 0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void setupCommunication(Comm* c, int direction, int layer)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    MPI_Datatype type;
 | 
				
			||||||
 | 
					    size_t dblsize = sizeof(double);
 | 
				
			||||||
 | 
					    int imaxLocal  = c->imaxLocal;
 | 
				
			||||||
 | 
					    int jmaxLocal  = c->jmaxLocal;
 | 
				
			||||||
 | 
					    int sizes[NDIMS];
 | 
				
			||||||
 | 
					    int subSizes[NDIMS];
 | 
				
			||||||
 | 
					    int starts[NDIMS];
 | 
				
			||||||
 | 
					    int offset = 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void assembleResult(Comm* c,
 | 
				
			||||||
 | 
					    double* src,
 | 
				
			||||||
 | 
					    double* dst,
 | 
				
			||||||
 | 
					    int imaxLocal[],
 | 
				
			||||||
 | 
					    int jmaxLocal[],
 | 
				
			||||||
 | 
					    int offset[],
 | 
				
			||||||
 | 
					    int jmax,
 | 
				
			||||||
 | 
					    int imax)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    MPI_Request* requests;
 | 
				
			||||||
 | 
					    int numRequests = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (c->rank == 0) {
 | 
				
			||||||
 | 
					        numRequests = c->size + 1;
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        numRequests = 1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    requests = (MPI_Request*)malloc(numRequests * sizeof(MPI_Request));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /* all ranks send their bulk array */
 | 
				
			||||||
 | 
					    MPI_Datatype bulkType;
 | 
				
			||||||
 | 
					    int oldSizes[NDIMS] = { c->jmaxLocal + 2, c->imaxLocal + 2 };
 | 
				
			||||||
 | 
					    int newSizes[NDIMS] = { c->jmaxLocal, c->imaxLocal };
 | 
				
			||||||
 | 
					    int starts[NDIMS]   = { 1, 1 };
 | 
				
			||||||
 | 
					    MPI_Type_create_subarray(NDIMS,
 | 
				
			||||||
 | 
					        oldSizes,
 | 
				
			||||||
 | 
					        newSizes,
 | 
				
			||||||
 | 
					        starts,
 | 
				
			||||||
 | 
					        MPI_ORDER_C,
 | 
				
			||||||
 | 
					        MPI_DOUBLE,
 | 
				
			||||||
 | 
					        &bulkType);
 | 
				
			||||||
 | 
					    MPI_Type_commit(&bulkType);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    MPI_Isend(src, 1, bulkType, 0, 0, c->comm, &requests[0]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /* rank 0 assembles the subdomains */
 | 
				
			||||||
 | 
					    if (c->rank == 0) {
 | 
				
			||||||
 | 
					        for (int i = 0; i < c->size; i++) {
 | 
				
			||||||
 | 
					            MPI_Datatype domainType;
 | 
				
			||||||
 | 
					            int oldSizes[NDIMS] = { jmax, imax };
 | 
				
			||||||
 | 
					            int newSizes[NDIMS] = { jmaxLocal[i], imaxLocal[i] };
 | 
				
			||||||
 | 
					            int starts[NDIMS]   = { offset[i * NDIMS + JDIM], offset[i * NDIMS + IDIM] };
 | 
				
			||||||
 | 
					            MPI_Type_create_subarray(NDIMS,
 | 
				
			||||||
 | 
					                oldSizes,
 | 
				
			||||||
 | 
					                newSizes,
 | 
				
			||||||
 | 
					                starts,
 | 
				
			||||||
 | 
					                MPI_ORDER_C,
 | 
				
			||||||
 | 
					                MPI_DOUBLE,
 | 
				
			||||||
 | 
					                &domainType);
 | 
				
			||||||
 | 
					            MPI_Type_commit(&domainType);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            MPI_Irecv(dst, 1, domainType, i, 0, c->comm, &requests[i + 1]);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    MPI_Waitall(numRequests, requests, MPI_STATUSES_IGNORE);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int sum(int* sizes, int position)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    int sum = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (int i = 0; i < position; i++) {
 | 
				
			||||||
 | 
					        sum += sizes[i];
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return sum;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#endif // defined _MPI
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// exported subroutines
 | 
				
			||||||
 | 
					void commReduction(double* v, int op)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					#if defined(_MPI)
 | 
				
			||||||
 | 
					    if (op == MAX) {
 | 
				
			||||||
 | 
					        MPI_Allreduce(MPI_IN_PLACE, v, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
 | 
				
			||||||
 | 
					    } else if (op == SUM) {
 | 
				
			||||||
 | 
					        MPI_Allreduce(MPI_IN_PLACE, v, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int commIsBoundary(Comm* c, int direction)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					#if defined(_MPI)
 | 
				
			||||||
 | 
					    switch (direction) {
 | 
				
			||||||
 | 
					    case LEFT:
 | 
				
			||||||
 | 
					        return c->coords[IDIM] == 0;
 | 
				
			||||||
 | 
					        break;
 | 
				
			||||||
 | 
					    case RIGHT:
 | 
				
			||||||
 | 
					        return c->coords[IDIM] == (c->dims[IDIM] - 1);
 | 
				
			||||||
 | 
					        break;
 | 
				
			||||||
 | 
					    case BOTTOM:
 | 
				
			||||||
 | 
					        return c->coords[JDIM] == 0;
 | 
				
			||||||
 | 
					        break;
 | 
				
			||||||
 | 
					    case TOP:
 | 
				
			||||||
 | 
					        return c->coords[JDIM] == (c->dims[JDIM] - 1);
 | 
				
			||||||
 | 
					        break;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return 1;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void commExchange(Comm* c, double* grid)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					#if defined(_MPI)
 | 
				
			||||||
 | 
					    double* buf[8];
 | 
				
			||||||
 | 
					    MPI_Request requests[8];
 | 
				
			||||||
 | 
					    for (int i = 0; i < 8; i++)
 | 
				
			||||||
 | 
					        requests[i] = MPI_REQUEST_NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    buf[0] = grid + 1;                                           // recv bottom
 | 
				
			||||||
 | 
					    buf[1] = grid + (c->imaxLocal + 2) + 1;                      // send bottom
 | 
				
			||||||
 | 
					    buf[2] = grid + (c->jmaxLocal + 1) * (c->imaxLocal + 2) + 1; // recv top
 | 
				
			||||||
 | 
					    buf[3] = grid + (c->jmaxLocal) * (c->imaxLocal + 2) + 1;     // send top
 | 
				
			||||||
 | 
					    buf[4] = grid + (c->imaxLocal + 2);                          // recv left
 | 
				
			||||||
 | 
					    buf[5] = grid + (c->imaxLocal + 2) + 1;                      // send left
 | 
				
			||||||
 | 
					    buf[6] = grid + (c->imaxLocal + 2) + (c->imaxLocal + 1);     // recv right
 | 
				
			||||||
 | 
					    buf[7] = grid + (c->imaxLocal + 2) + (c->imaxLocal);         // send right
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (int i = 2; i < 4; i++) {
 | 
				
			||||||
 | 
					        int tag = 0;
 | 
				
			||||||
 | 
					        if (c->neighbours[i] != MPI_PROC_NULL) {
 | 
				
			||||||
 | 
					            tag = c->neighbours[i];
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        /* exchange ghost cells with bottom/top neighbor */
 | 
				
			||||||
 | 
					        MPI_Irecv(buf[i * 2],
 | 
				
			||||||
 | 
					            1,
 | 
				
			||||||
 | 
					            c->rbufferTypes[0],
 | 
				
			||||||
 | 
					            c->neighbours[i],
 | 
				
			||||||
 | 
					            tag,
 | 
				
			||||||
 | 
					            c->comm,
 | 
				
			||||||
 | 
					            &requests[i * 2]);
 | 
				
			||||||
 | 
					        MPI_Isend(buf[(i * 2) + 1],
 | 
				
			||||||
 | 
					            1,
 | 
				
			||||||
 | 
					            c->rbufferTypes[0],
 | 
				
			||||||
 | 
					            c->neighbours[i],
 | 
				
			||||||
 | 
					            c->rank,
 | 
				
			||||||
 | 
					            c->comm,
 | 
				
			||||||
 | 
					            &requests[i * 2 + 1]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    for (int i = 0; i < 2; i++) {
 | 
				
			||||||
 | 
					        int tag = 0;
 | 
				
			||||||
 | 
					        if (c->neighbours[i] != MPI_PROC_NULL) {
 | 
				
			||||||
 | 
					            tag = c->neighbours[i];
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        /* exchange ghost cells with left/right neighbor */
 | 
				
			||||||
 | 
					        MPI_Irecv(buf[i * 2 + 4],
 | 
				
			||||||
 | 
					            1,
 | 
				
			||||||
 | 
					            c->sbufferTypes[0],
 | 
				
			||||||
 | 
					            c->neighbours[i],
 | 
				
			||||||
 | 
					            tag,
 | 
				
			||||||
 | 
					            c->comm,
 | 
				
			||||||
 | 
					            &requests[i * 2 + 4]);
 | 
				
			||||||
 | 
					        MPI_Isend(buf[i * 2 + 5],
 | 
				
			||||||
 | 
					            1,
 | 
				
			||||||
 | 
					            c->sbufferTypes[0],
 | 
				
			||||||
 | 
					            c->neighbours[i],
 | 
				
			||||||
 | 
					            c->rank,
 | 
				
			||||||
 | 
					            c->comm,
 | 
				
			||||||
 | 
					            &requests[(i * 2) + 5]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    MPI_Waitall(8, requests, MPI_STATUSES_IGNORE);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void commShift(Comm* c, double* f, double* g)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					#if defined(_MPI)
 | 
				
			||||||
 | 
					    MPI_Request requests[4] = { MPI_REQUEST_NULL,
 | 
				
			||||||
 | 
					        MPI_REQUEST_NULL,
 | 
				
			||||||
 | 
					        MPI_REQUEST_NULL,
 | 
				
			||||||
 | 
					        MPI_REQUEST_NULL };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /* shift G */
 | 
				
			||||||
 | 
					    double* buf = g + 1;
 | 
				
			||||||
 | 
					    /* receive ghost cells from bottom neighbor */
 | 
				
			||||||
 | 
					    MPI_Irecv(buf,
 | 
				
			||||||
 | 
					        1,
 | 
				
			||||||
 | 
					        c->rbufferTypes[0],
 | 
				
			||||||
 | 
					        c->neighbours[BOTTOM],
 | 
				
			||||||
 | 
					        0,
 | 
				
			||||||
 | 
					        c->comm,
 | 
				
			||||||
 | 
					        &requests[0]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    buf = g + (c->jmaxLocal) * (c->imaxLocal + 2) + 1;
 | 
				
			||||||
 | 
					    /* send ghost cells to top neighbor */
 | 
				
			||||||
 | 
					    MPI_Isend(buf, 1, c->rbufferTypes[0], c->neighbours[TOP], 0, c->comm, &requests[1]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /* shift F */
 | 
				
			||||||
 | 
					    buf = f + (c->imaxLocal + 2);
 | 
				
			||||||
 | 
					    /* receive ghost cells from left neighbor */
 | 
				
			||||||
 | 
					    MPI_Irecv(buf, 1, c->sbufferTypes[0], c->neighbours[LEFT], 1, c->comm, &requests[2]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    buf = f + (c->imaxLocal + 2) + (c->imaxLocal);
 | 
				
			||||||
 | 
					    /* send ghost cells to right neighbor */
 | 
				
			||||||
 | 
					    MPI_Isend(buf, 1, c->sbufferTypes[0], c->neighbours[RIGHT], 1, c->comm, &requests[3]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    MPI_Waitall(4, requests, MPI_STATUSES_IGNORE);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void commCollectResult(Comm* c,
 | 
				
			||||||
 | 
					    double* ug,
 | 
				
			||||||
 | 
					    double* vg,
 | 
				
			||||||
 | 
					    double* pg,
 | 
				
			||||||
 | 
					    double* u,
 | 
				
			||||||
 | 
					    double* v,
 | 
				
			||||||
 | 
					    double* p,
 | 
				
			||||||
 | 
					    int jmax,
 | 
				
			||||||
 | 
					    int imax)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    int offset[c->size * NDIMS];
 | 
				
			||||||
 | 
					    int imaxLocal[c->size];
 | 
				
			||||||
 | 
					    int jmaxLocal[c->size];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    MPI_Gather(&c->imaxLocal, 1, MPI_INT, imaxLocal, 1, MPI_INT, 0, MPI_COMM_WORLD);
 | 
				
			||||||
 | 
					    MPI_Gather(&c->jmaxLocal, 1, MPI_INT, jmaxLocal, 1, MPI_INT, 0, MPI_COMM_WORLD);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (c->rank == 0) {
 | 
				
			||||||
 | 
					        for (int i = 0; i < c->size; i++) {
 | 
				
			||||||
 | 
					            int coords[NDIMS];
 | 
				
			||||||
 | 
					            MPI_Cart_coords(c->comm, i, NDIMS, coords);
 | 
				
			||||||
 | 
					            offset[i * NDIMS + IDIM] = sum(imaxLocal, coords[IDIM]);
 | 
				
			||||||
 | 
					            offset[i * NDIMS + JDIM] = sum(jmaxLocal, coords[JDIM]);
 | 
				
			||||||
 | 
					            printf("Rank: %d, Coords(j,i): %d %d, Size(j,i): %d %d "
 | 
				
			||||||
 | 
					                   "Offset(j,i): %d %d\n",
 | 
				
			||||||
 | 
					                i,
 | 
				
			||||||
 | 
					                coords[JDIM],
 | 
				
			||||||
 | 
					                coords[IDIM],
 | 
				
			||||||
 | 
					                jmaxLocal[i],
 | 
				
			||||||
 | 
					                imaxLocal[i],
 | 
				
			||||||
 | 
					                offset[i * NDIMS + JDIM],
 | 
				
			||||||
 | 
					                offset[i * NDIMS + IDIM]);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /* collect P */
 | 
				
			||||||
 | 
					    assembleResult(c, p, pg, imaxLocal, jmaxLocal, offset, jmax, imax);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /* collect U */
 | 
				
			||||||
 | 
					    assembleResult(c, u, ug, imaxLocal, jmaxLocal, offset, jmax, imax);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /* collect V */
 | 
				
			||||||
 | 
					    assembleResult(c, v, vg, imaxLocal, jmaxLocal, offset, jmax, imax);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void commPrintConfig(Comm* c)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					#if defined(_MPI)
 | 
				
			||||||
 | 
					    fflush(stdout);
 | 
				
			||||||
 | 
					    MPI_Barrier(MPI_COMM_WORLD);
 | 
				
			||||||
 | 
					    if (commIsMaster(c)) {
 | 
				
			||||||
 | 
					        printf("Communication setup:\n");
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (int i = 0; i < c->size; i++) {
 | 
				
			||||||
 | 
					        if (i == c->rank) {
 | 
				
			||||||
 | 
					            printf("\tRank %d of %d\n", c->rank, c->size);
 | 
				
			||||||
 | 
					            printf("\tNeighbours (bottom, top, left, right): %d %d, %d, %d\n",
 | 
				
			||||||
 | 
					                c->neighbours[BOTTOM],
 | 
				
			||||||
 | 
					                c->neighbours[TOP],
 | 
				
			||||||
 | 
					                c->neighbours[LEFT],
 | 
				
			||||||
 | 
					                c->neighbours[RIGHT]);
 | 
				
			||||||
 | 
					            printf("\tCoordinates (j,i) %d %d\n", c->coords[JDIM], c->coords[IDIM]);
 | 
				
			||||||
 | 
					            printf("\tLocal domain size (j,i) %dx%d\n", c->jmaxLocal, c->imaxLocal);
 | 
				
			||||||
 | 
					            fflush(stdout);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    MPI_Barrier(MPI_COMM_WORLD);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void commInit(Comm* c, int argc, char** argv)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					#if defined(_MPI)
 | 
				
			||||||
 | 
					    MPI_Init(&argc, &argv);
 | 
				
			||||||
 | 
					    MPI_Comm_rank(MPI_COMM_WORLD, &(c->rank));
 | 
				
			||||||
 | 
					    MPI_Comm_size(MPI_COMM_WORLD, &(c->size));
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					    c->rank = 0;
 | 
				
			||||||
 | 
					    c->size = 1;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void commPartition(Comm* c, int jmax, int imax)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					#if defined(_MPI)
 | 
				
			||||||
 | 
					    int dims[NDIMS]    = { 0, 0 };
 | 
				
			||||||
 | 
					    int periods[NDIMS] = { 0, 0 };
 | 
				
			||||||
 | 
					    MPI_Dims_create(c->size, NDIMS, dims);
 | 
				
			||||||
 | 
					    MPI_Cart_create(MPI_COMM_WORLD, NDIMS, dims, periods, 0, &c->comm);
 | 
				
			||||||
 | 
					    MPI_Cart_shift(c->comm, IDIM, 1, &c->neighbours[LEFT], &c->neighbours[RIGHT]);
 | 
				
			||||||
 | 
					    MPI_Cart_shift(c->comm, JDIM, 1, &c->neighbours[BOTTOM], &c->neighbours[TOP]);
 | 
				
			||||||
 | 
					    MPI_Cart_get(c->comm, NDIMS, c->dims, periods, c->coords);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    c->imaxLocal = sizeOfRank(c->rank, dims[IDIM], imax);
 | 
				
			||||||
 | 
					    c->jmaxLocal = sizeOfRank(c->rank, dims[JDIM], jmax);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    MPI_Type_contiguous(c->imaxLocal, MPI_DOUBLE, &c->rbufferTypes[0]);
 | 
				
			||||||
 | 
					    MPI_Type_commit(&c->rbufferTypes[0]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    MPI_Type_vector(c->jmaxLocal, 1, c->imaxLocal + 2, MPI_DOUBLE, &c->sbufferTypes[0]);
 | 
				
			||||||
 | 
					    MPI_Type_commit(&c->sbufferTypes[0]);
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					    c->imaxLocal = imax;
 | 
				
			||||||
 | 
					    c->jmaxLocal = jmax;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void commFinalize(Comm* c)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					#if defined(_MPI)
 | 
				
			||||||
 | 
					    for (int i = 0; i < NDIRS; i++) {
 | 
				
			||||||
 | 
					        MPI_Type_free(&c->sbufferTypes[i]);
 | 
				
			||||||
 | 
					        MPI_Type_free(&c->rbufferTypes[i]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    MPI_Finalize();
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -1,5 +1,5 @@
 | 
				
			|||||||
/*
 | 
					/*
 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 | 
				
			||||||
 * All rights reserved. This file is part of nusif-solver.
 | 
					 * All rights reserved. This file is part of nusif-solver.
 | 
				
			||||||
 * Use of this source code is governed by a MIT style
 | 
					 * Use of this source code is governed by a MIT style
 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 * license that can be found in the LICENSE file.
 | 
				
			||||||
@@ -10,6 +10,7 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#include "comm.h"
 | 
					#include "comm.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(_MPI)
 | 
				
			||||||
// subroutines local to this module
 | 
					// subroutines local to this module
 | 
				
			||||||
static int sizeOfRank(int rank, int size, int N)
 | 
					static int sizeOfRank(int rank, int size, int N)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
@@ -146,19 +147,23 @@ static int sum(int* sizes, int position)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    return sum;
 | 
					    return sum;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					#endif // defined _MPI
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// exported subroutines
 | 
					// exported subroutines
 | 
				
			||||||
void commReduction(double* v, int op)
 | 
					void commReduction(double* v, int op)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					#if defined(_MPI)
 | 
				
			||||||
    if (op == MAX) {
 | 
					    if (op == MAX) {
 | 
				
			||||||
        MPI_Allreduce(MPI_IN_PLACE, v, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
 | 
					        MPI_Allreduce(MPI_IN_PLACE, v, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
 | 
				
			||||||
    } else if (op == SUM) {
 | 
					    } else if (op == SUM) {
 | 
				
			||||||
        MPI_Allreduce(MPI_IN_PLACE, v, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
 | 
					        MPI_Allreduce(MPI_IN_PLACE, v, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int commIsBoundary(Comm* c, int direction)
 | 
					int commIsBoundary(Comm* c, int direction)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					#if defined(_MPI)
 | 
				
			||||||
    switch (direction) {
 | 
					    switch (direction) {
 | 
				
			||||||
    case LEFT:
 | 
					    case LEFT:
 | 
				
			||||||
        return c->coords[IDIM] == 0;
 | 
					        return c->coords[IDIM] == 0;
 | 
				
			||||||
@@ -173,12 +178,14 @@ int commIsBoundary(Comm* c, int direction)
 | 
				
			|||||||
        return c->coords[JDIM] == (c->dims[JDIM] - 1);
 | 
					        return c->coords[JDIM] == (c->dims[JDIM] - 1);
 | 
				
			||||||
        break;
 | 
					        break;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return 0;
 | 
					    return 1;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void commExchange(Comm* c, double* grid)
 | 
					void commExchange(Comm* c, double* grid)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					#if defined(_MPI)
 | 
				
			||||||
    int counts[NDIRS]      = { 1, 1, 1, 1 };
 | 
					    int counts[NDIRS]      = { 1, 1, 1, 1 };
 | 
				
			||||||
    MPI_Aint displs[NDIRS] = { 0, 0, 0, 0 };
 | 
					    MPI_Aint displs[NDIRS] = { 0, 0, 0, 0 };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -191,10 +198,12 @@ void commExchange(Comm* c, double* grid)
 | 
				
			|||||||
        displs,
 | 
					        displs,
 | 
				
			||||||
        c->rbufferTypes,
 | 
					        c->rbufferTypes,
 | 
				
			||||||
        c->comm);
 | 
					        c->comm);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void commShift(Comm* c, double* f, double* g)
 | 
					void commShift(Comm* c, double* f, double* g)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					#if defined(_MPI)
 | 
				
			||||||
    MPI_Request requests[4] = { MPI_REQUEST_NULL,
 | 
					    MPI_Request requests[4] = { MPI_REQUEST_NULL,
 | 
				
			||||||
        MPI_REQUEST_NULL,
 | 
					        MPI_REQUEST_NULL,
 | 
				
			||||||
        MPI_REQUEST_NULL,
 | 
					        MPI_REQUEST_NULL,
 | 
				
			||||||
@@ -227,8 +236,10 @@ void commShift(Comm* c, double* f, double* g)
 | 
				
			|||||||
        &requests[3]);
 | 
					        &requests[3]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    MPI_Waitall(4, requests, MPI_STATUSES_IGNORE);
 | 
					    MPI_Waitall(4, requests, MPI_STATUSES_IGNORE);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// TODO Merge with seq
 | 
				
			||||||
void commCollectResult(Comm* c,
 | 
					void commCollectResult(Comm* c,
 | 
				
			||||||
    double* ug,
 | 
					    double* ug,
 | 
				
			||||||
    double* vg,
 | 
					    double* vg,
 | 
				
			||||||
@@ -276,6 +287,7 @@ void commCollectResult(Comm* c,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
void commPrintConfig(Comm* c)
 | 
					void commPrintConfig(Comm* c)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					#if defined(_MPI)
 | 
				
			||||||
    fflush(stdout);
 | 
					    fflush(stdout);
 | 
				
			||||||
    MPI_Barrier(MPI_COMM_WORLD);
 | 
					    MPI_Barrier(MPI_COMM_WORLD);
 | 
				
			||||||
    if (commIsMaster(c)) {
 | 
					    if (commIsMaster(c)) {
 | 
				
			||||||
@@ -296,13 +308,24 @@ void commPrintConfig(Comm* c)
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    MPI_Barrier(MPI_COMM_WORLD);
 | 
					    MPI_Barrier(MPI_COMM_WORLD);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void commInit(Comm* c, int jmax, int imax)
 | 
					void commInit(Comm* c, int argc, char** argv)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    /* setup communication */
 | 
					#if defined(_MPI)
 | 
				
			||||||
 | 
					    MPI_Init(&argc, &argv);
 | 
				
			||||||
    MPI_Comm_rank(MPI_COMM_WORLD, &(c->rank));
 | 
					    MPI_Comm_rank(MPI_COMM_WORLD, &(c->rank));
 | 
				
			||||||
    MPI_Comm_size(MPI_COMM_WORLD, &(c->size));
 | 
					    MPI_Comm_size(MPI_COMM_WORLD, &(c->size));
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					    c->rank = 0;
 | 
				
			||||||
 | 
					    c->size = 1;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void commPartition(Comm* c, int jmax, int imax)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					#if defined(_MPI)
 | 
				
			||||||
    int dims[NDIMS]    = { 0, 0 };
 | 
					    int dims[NDIMS]    = { 0, 0 };
 | 
				
			||||||
    int periods[NDIMS] = { 0, 0 };
 | 
					    int periods[NDIMS] = { 0, 0 };
 | 
				
			||||||
    MPI_Dims_create(c->size, NDIMS, dims);
 | 
					    MPI_Dims_create(c->size, NDIMS, dims);
 | 
				
			||||||
@@ -323,4 +346,20 @@ void commInit(Comm* c, int jmax, int imax)
 | 
				
			|||||||
    setupCommunication(c, BOTTOM, HALO);
 | 
					    setupCommunication(c, BOTTOM, HALO);
 | 
				
			||||||
    setupCommunication(c, TOP, BULK);
 | 
					    setupCommunication(c, TOP, BULK);
 | 
				
			||||||
    setupCommunication(c, TOP, HALO);
 | 
					    setupCommunication(c, TOP, HALO);
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					    c->imaxLocal = imax;
 | 
				
			||||||
 | 
					    c->jmaxLocal = jmax;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void commFinalize(Comm* c)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					#if defined(_MPI)
 | 
				
			||||||
 | 
					    for (int i = 0; i < NDIRS; i++) {
 | 
				
			||||||
 | 
					        MPI_Type_free(&c->sbufferTypes[i]);
 | 
				
			||||||
 | 
					        MPI_Type_free(&c->rbufferTypes[i]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    MPI_Finalize();
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -1,12 +1,14 @@
 | 
				
			|||||||
/*
 | 
					/*
 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 | 
				
			||||||
 * All rights reserved. This file is part of nusif-solver.
 | 
					 * All rights reserved. This file is part of nusif-solver.
 | 
				
			||||||
 * Use of this source code is governed by a MIT style
 | 
					 * Use of this source code is governed by a MIT style
 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 * license that can be found in the LICENSE file.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
#ifndef __COMM_H_
 | 
					#ifndef __COMM_H_
 | 
				
			||||||
#define __COMM_H_
 | 
					#define __COMM_H_
 | 
				
			||||||
 | 
					#if defined(_MPI)
 | 
				
			||||||
#include <mpi.h>
 | 
					#include <mpi.h>
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
enum direction { LEFT = 0, RIGHT, BOTTOM, TOP, NDIRS };
 | 
					enum direction { LEFT = 0, RIGHT, BOTTOM, TOP, NDIRS };
 | 
				
			||||||
enum dimension { JDIM = 0, IDIM, NDIMS };
 | 
					enum dimension { JDIM = 0, IDIM, NDIMS };
 | 
				
			||||||
@@ -16,15 +18,19 @@ enum op { MAX = 0, SUM };
 | 
				
			|||||||
typedef struct {
 | 
					typedef struct {
 | 
				
			||||||
    int rank;
 | 
					    int rank;
 | 
				
			||||||
    int size;
 | 
					    int size;
 | 
				
			||||||
 | 
					#if defined(_MPI)
 | 
				
			||||||
    MPI_Comm comm;
 | 
					    MPI_Comm comm;
 | 
				
			||||||
    MPI_Datatype sbufferTypes[NDIRS];
 | 
					    MPI_Datatype sbufferTypes[NDIRS];
 | 
				
			||||||
    MPI_Datatype rbufferTypes[NDIRS];
 | 
					    MPI_Datatype rbufferTypes[NDIRS];
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
    int neighbours[NDIRS];
 | 
					    int neighbours[NDIRS];
 | 
				
			||||||
    int coords[NDIMS], dims[NDIMS];
 | 
					    int coords[NDIMS], dims[NDIMS];
 | 
				
			||||||
    int imaxLocal, jmaxLocal;
 | 
					    int imaxLocal, jmaxLocal;
 | 
				
			||||||
} Comm;
 | 
					} Comm;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern void commInit(Comm* c, int jmax, int imax);
 | 
					extern void commInit(Comm* c, int argc, char** argv);
 | 
				
			||||||
 | 
					extern void commFinalize(Comm* c);
 | 
				
			||||||
 | 
					extern void commPartition(Comm* c, int jmax, int imax);
 | 
				
			||||||
extern void commPrintConfig(Comm*);
 | 
					extern void commPrintConfig(Comm*);
 | 
				
			||||||
extern void commExchange(Comm*, double*);
 | 
					extern void commExchange(Comm*, double*);
 | 
				
			||||||
extern void commShift(Comm* c, double* f, double* g);
 | 
					extern void commShift(Comm* c, double* f, double* g);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,95 +1,84 @@
 | 
				
			|||||||
/*
 | 
					/*
 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 | 
				
			||||||
 * All rights reserved.
 | 
					 * All rights reserved.
 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 * Use of this source code is governed by a MIT-style
 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 * license that can be found in the LICENSE file.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
#include <float.h>
 | 
					 | 
				
			||||||
#include <limits.h>
 | 
					 | 
				
			||||||
#include <stdio.h>
 | 
					#include <stdio.h>
 | 
				
			||||||
#include <stdlib.h>
 | 
					#include <stdlib.h>
 | 
				
			||||||
#include <unistd.h>
 | 
					#include <unistd.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "allocate.h"
 | 
					#include "allocate.h"
 | 
				
			||||||
 | 
					#include "comm.h"
 | 
				
			||||||
#include "parameter.h"
 | 
					#include "parameter.h"
 | 
				
			||||||
#include "progress.h"
 | 
					#include "progress.h"
 | 
				
			||||||
#include "solver.h"
 | 
					#include "solver.h"
 | 
				
			||||||
#include "timing.h"
 | 
					#include "timing.h"
 | 
				
			||||||
#include <mpi.h>
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
int main(int argc, char** argv)
 | 
					int main(int argc, char** argv)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    int rank;
 | 
					    int rank;
 | 
				
			||||||
    double S, E;
 | 
					    double timeStart, timeStop;
 | 
				
			||||||
    Parameter params;
 | 
					    Parameter p;
 | 
				
			||||||
    Solver solver;
 | 
					    Solver s;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    MPI_Init(&argc, &argv);
 | 
					    commInit(&s.comm, argc, argv);
 | 
				
			||||||
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 | 
					    initParameter(&p);
 | 
				
			||||||
    initParameter(¶ms);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (argc != 2) {
 | 
					    if (argc != 2) {
 | 
				
			||||||
        printf("Usage: %s <configFile>\n", argv[0]);
 | 
					        printf("Usage: %s <configFile>\n", argv[0]);
 | 
				
			||||||
        exit(EXIT_SUCCESS);
 | 
					        exit(EXIT_SUCCESS);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    readParameter(¶ms, argv[1]);
 | 
					    readParameter(&p, argv[1]);
 | 
				
			||||||
    if (rank == 0) {
 | 
					    commPartition(&s.comm, p.jmax, p.imax);
 | 
				
			||||||
        printParameter(¶ms);
 | 
					    if (commIsMaster(&s.comm)) {
 | 
				
			||||||
 | 
					        printParameter(&p);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    initSolver(&solver, ¶ms);
 | 
					    initSolver(&s, &p);
 | 
				
			||||||
    /* debugExchange(&solver); */
 | 
					#ifndef VERBOSE
 | 
				
			||||||
    /* exit(EXIT_SUCCESS); */
 | 
					    initProgress(s.te);
 | 
				
			||||||
    initProgress(solver.te);
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    double tau = solver.tau;
 | 
					    double tau = s.tau;
 | 
				
			||||||
    double te  = solver.te;
 | 
					    double te  = s.te;
 | 
				
			||||||
    double t   = 0.0;
 | 
					    double t   = 0.0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    S = getTimeStamp();
 | 
					    timeStart = getTimeStamp();
 | 
				
			||||||
    while (t <= te) {
 | 
					    while (t <= te) {
 | 
				
			||||||
        if (tau > 0.0) {
 | 
					        if (tau > 0.0) computeTimestep(&s);
 | 
				
			||||||
            computeTimestep(&solver);
 | 
					        setBoundaryConditions(&s);
 | 
				
			||||||
        }
 | 
					        setSpecialBoundaryCondition(&s);
 | 
				
			||||||
 | 
					        computeFG(&s);
 | 
				
			||||||
        setBoundaryConditions(&solver);
 | 
					        computeRHS(&s);
 | 
				
			||||||
        setSpecialBoundaryCondition(&solver);
 | 
					        solve(&s);
 | 
				
			||||||
        computeFG(&solver);
 | 
					        adaptUV(&s);
 | 
				
			||||||
        computeRHS(&solver);
 | 
					        t += s.dt;
 | 
				
			||||||
        solve(&solver);
 | 
					 | 
				
			||||||
        adaptUV(&solver);
 | 
					 | 
				
			||||||
        t += solver.dt;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef VERBOSE
 | 
					#ifdef VERBOSE
 | 
				
			||||||
        if (rank == 0) {
 | 
					        if (commIsMaster(&s.comm)) {
 | 
				
			||||||
            printf("TIME %f , TIMESTEP %f\n", t, solver.dt);
 | 
					            printf("TIME %f , TIMESTEP %f\n", t, s.dt);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
        printProgress(t);
 | 
					        printProgress(t);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    E = getTimeStamp();
 | 
					    timeStop = getTimeStamp();
 | 
				
			||||||
 | 
					#ifndef VERBOSE
 | 
				
			||||||
    stopProgress();
 | 
					    stopProgress();
 | 
				
			||||||
    if (rank == 0) {
 | 
					#endif
 | 
				
			||||||
        printf("Solution took %.2fs\n", E - S);
 | 
					    if (commIsMaster(&s.comm)) {
 | 
				
			||||||
 | 
					        printf("Solution took %.2fs\n", timeStop - timeStart);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    size_t bytesize = solver.imax * solver.jmax * sizeof(double);
 | 
					    size_t bytesize = s.imax * s.jmax * sizeof(double);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    double* ug = allocate(64, bytesize);
 | 
					    double* ug = allocate(64, bytesize);
 | 
				
			||||||
    double* vg = allocate(64, bytesize);
 | 
					    double* vg = allocate(64, bytesize);
 | 
				
			||||||
    double* pg = allocate(64, bytesize);
 | 
					    double* pg = allocate(64, bytesize);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    commCollectResult(&solver.comm,
 | 
					    commCollectResult(&s.comm, ug, vg, pg, s.u, s.v, s.p, s.jmax, s.imax);
 | 
				
			||||||
        ug,
 | 
					    writeResult(&s, ug, vg, pg);
 | 
				
			||||||
        vg,
 | 
					 | 
				
			||||||
        pg,
 | 
					 | 
				
			||||||
        solver.u,
 | 
					 | 
				
			||||||
        solver.v,
 | 
					 | 
				
			||||||
        solver.p,
 | 
					 | 
				
			||||||
        solver.jmax,
 | 
					 | 
				
			||||||
        solver.imax);
 | 
					 | 
				
			||||||
    writeResult(&solver, ug, vg, pg);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    MPI_Finalize();
 | 
					    commFinalize(&s.comm);
 | 
				
			||||||
    return EXIT_SUCCESS;
 | 
					    return EXIT_SUCCESS;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,5 +1,5 @@
 | 
				
			|||||||
/*
 | 
					/*
 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 | 
				
			||||||
 * All rights reserved. This file is part of nusif-solver.
 | 
					 * All rights reserved. This file is part of nusif-solver.
 | 
				
			||||||
 * Use of this source code is governed by a MIT style
 | 
					 * Use of this source code is governed by a MIT style
 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 * license that can be found in the LICENSE file.
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,5 +1,5 @@
 | 
				
			|||||||
/*
 | 
					/*
 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 | 
				
			||||||
 * All rights reserved. This file is part of nusif-solver.
 | 
					 * All rights reserved. This file is part of nusif-solver.
 | 
				
			||||||
 * Use of this source code is governed by a MIT style
 | 
					 * Use of this source code is governed by a MIT style
 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 * license that can be found in the LICENSE file.
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,5 +1,5 @@
 | 
				
			|||||||
/*
 | 
					/*
 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 | 
				
			||||||
 * All rights reserved. This file is part of nusif-solver.
 | 
					 * All rights reserved. This file is part of nusif-solver.
 | 
				
			||||||
 * Use of this source code is governed by a MIT style
 | 
					 * Use of this source code is governed by a MIT style
 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 * license that can be found in the LICENSE file.
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,5 +1,5 @@
 | 
				
			|||||||
/*
 | 
					/*
 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 | 
				
			||||||
 * All rights reserved.
 | 
					 * All rights reserved.
 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 * Use of this source code is governed by a MIT-style
 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 * license that can be found in the LICENSE file.
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,5 +1,5 @@
 | 
				
			|||||||
/*
 | 
					/*
 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 | 
				
			||||||
 * All rights reserved. This file is part of nusif-solver.
 | 
					 * All rights reserved. This file is part of nusif-solver.
 | 
				
			||||||
 * Use of this source code is governed by a MIT style
 | 
					 * Use of this source code is governed by a MIT style
 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 * license that can be found in the LICENSE file.
 | 
				
			||||||
@@ -75,8 +75,6 @@ void initSolver(Solver* s, Parameter* params)
 | 
				
			|||||||
    s->tau      = params->tau;
 | 
					    s->tau      = params->tau;
 | 
				
			||||||
    s->gamma    = params->gamma;
 | 
					    s->gamma    = params->gamma;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    commInit(&s->comm, s->jmax, s->imax);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* allocate arrays */
 | 
					    /* allocate arrays */
 | 
				
			||||||
    int imaxLocal = s->comm.imaxLocal;
 | 
					    int imaxLocal = s->comm.imaxLocal;
 | 
				
			||||||
    int jmaxLocal = s->comm.jmaxLocal;
 | 
					    int jmaxLocal = s->comm.jmaxLocal;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,5 +1,5 @@
 | 
				
			|||||||
/*
 | 
					/*
 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 | 
				
			||||||
 * All rights reserved. This file is part of nusif-solver.
 | 
					 * All rights reserved. This file is part of nusif-solver.
 | 
				
			||||||
 * Use of this source code is governed by a MIT style
 | 
					 * Use of this source code is governed by a MIT style
 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 * license that can be found in the LICENSE file.
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,5 +1,5 @@
 | 
				
			|||||||
/*
 | 
					/*
 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 | 
				
			||||||
 * All rights reserved.
 | 
					 * All rights reserved.
 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 * Use of this source code is governed by a MIT-style
 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 * license that can be found in the LICENSE file.
 | 
				
			||||||
@@ -7,18 +7,16 @@
 | 
				
			|||||||
#include <stdlib.h>
 | 
					#include <stdlib.h>
 | 
				
			||||||
#include <time.h>
 | 
					#include <time.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
double getTimeStamp()
 | 
					double getTimeStamp(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    struct timespec ts;
 | 
					    struct timespec ts;
 | 
				
			||||||
    clock_gettime(CLOCK_MONOTONIC, &ts);
 | 
					    clock_gettime(CLOCK_MONOTONIC, &ts);
 | 
				
			||||||
    return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
 | 
					    return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
double getTimeResolution()
 | 
					double getTimeResolution(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    struct timespec ts;
 | 
					    struct timespec ts;
 | 
				
			||||||
    clock_getres(CLOCK_MONOTONIC, &ts);
 | 
					    clock_getres(CLOCK_MONOTONIC, &ts);
 | 
				
			||||||
    return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
 | 
					    return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					 | 
				
			||||||
double getTimeStamp_() { return getTimeStamp(); }
 | 
					 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,5 +1,5 @@
 | 
				
			|||||||
/*
 | 
					/*
 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 | 
				
			||||||
 * All rights reserved.
 | 
					 * All rights reserved.
 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 * Use of this source code is governed by a MIT-style
 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 * license that can be found in the LICENSE file.
 | 
				
			||||||
@@ -7,8 +7,7 @@
 | 
				
			|||||||
#ifndef __TIMING_H_
 | 
					#ifndef __TIMING_H_
 | 
				
			||||||
#define __TIMING_H_
 | 
					#define __TIMING_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern double getTimeStamp();
 | 
					extern double getTimeStamp(void);
 | 
				
			||||||
extern double getTimeResolution();
 | 
					extern double getTimeResolution(void);
 | 
				
			||||||
extern double getTimeStamp_();
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif // __TIMING_H_
 | 
					#endif // __TIMING_H_
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,5 +1,5 @@
 | 
				
			|||||||
/*
 | 
					/*
 | 
				
			||||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 | 
				
			||||||
 * All rights reserved.
 | 
					 * All rights reserved.
 | 
				
			||||||
 * Use of this source code is governed by a MIT-style
 | 
					 * Use of this source code is governed by a MIT-style
 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 * license that can be found in the LICENSE file.
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,71 +0,0 @@
 | 
				
			|||||||
#=======================================================================================
 | 
					 | 
				
			||||||
# Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
					 | 
				
			||||||
# All rights reserved.
 | 
					 | 
				
			||||||
# Use of this source code is governed by a MIT-style
 | 
					 | 
				
			||||||
# license that can be found in the LICENSE file.
 | 
					 | 
				
			||||||
#=======================================================================================
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#CONFIGURE BUILD SYSTEM
 | 
					 | 
				
			||||||
TARGET	   = exe-$(TAG)
 | 
					 | 
				
			||||||
BUILD_DIR  = ./$(TAG)
 | 
					 | 
				
			||||||
SRC_DIR    = ./src
 | 
					 | 
				
			||||||
MAKE_DIR   = ./
 | 
					 | 
				
			||||||
Q         ?= @
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#DO NOT EDIT BELOW
 | 
					 | 
				
			||||||
include $(MAKE_DIR)/config.mk
 | 
					 | 
				
			||||||
include $(MAKE_DIR)/include_$(TAG).mk
 | 
					 | 
				
			||||||
INCLUDES  += -I$(SRC_DIR) -I$(BUILD_DIR)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
VPATH     = $(SRC_DIR)
 | 
					 | 
				
			||||||
SRC       = $(wildcard $(SRC_DIR)/*.c)
 | 
					 | 
				
			||||||
ASM       = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s, $(SRC))
 | 
					 | 
				
			||||||
OBJ       = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o, $(SRC))
 | 
					 | 
				
			||||||
SOURCES   = $(SRC) $(wildcard $(SRC_DIR)/*.h)
 | 
					 | 
				
			||||||
CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(OPTIONS) $(INCLUDES)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
${TARGET}: $(BUILD_DIR) $(OBJ)
 | 
					 | 
				
			||||||
	$(info ===>  LINKING  $(TARGET))
 | 
					 | 
				
			||||||
	$(Q)${LINKER} ${LFLAGS} -o $(TARGET) $(OBJ) $(LIBS)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
$(BUILD_DIR)/%.o:  %.c $(MAKE_DIR)/include_$(TAG).mk $(MAKE_DIR)/config.mk
 | 
					 | 
				
			||||||
	$(info ===>  COMPILE  $@)
 | 
					 | 
				
			||||||
	$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
 | 
					 | 
				
			||||||
	$(Q)$(GCC) $(CPPFLAGS) -MT $(@:.d=.o) -MM  $< > $(BUILD_DIR)/$*.d
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
$(BUILD_DIR)/%.s:  %.c
 | 
					 | 
				
			||||||
	$(info ===>  GENERATE ASM  $@)
 | 
					 | 
				
			||||||
	$(CC) -S $(CPPFLAGS) $(CFLAGS) $< -o $@
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
.PHONY: clean distclean tags info asm format
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
clean:
 | 
					 | 
				
			||||||
	$(info ===>  CLEAN)
 | 
					 | 
				
			||||||
	@rm -rf $(BUILD_DIR)
 | 
					 | 
				
			||||||
	@rm -f tags
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
distclean: clean
 | 
					 | 
				
			||||||
	$(info ===>  DIST CLEAN)
 | 
					 | 
				
			||||||
	@rm -f $(TARGET)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
info:
 | 
					 | 
				
			||||||
	$(info $(CFLAGS))
 | 
					 | 
				
			||||||
	$(Q)$(CC) $(VERSION)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
asm:  $(BUILD_DIR) $(ASM)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
tags:
 | 
					 | 
				
			||||||
	$(info ===>  GENERATE TAGS)
 | 
					 | 
				
			||||||
	$(Q)ctags -R
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
format:
 | 
					 | 
				
			||||||
	@for src in $(SOURCES) ; do \
 | 
					 | 
				
			||||||
		echo "Formatting $$src" ; \
 | 
					 | 
				
			||||||
		clang-format -i $$src ; \
 | 
					 | 
				
			||||||
	done
 | 
					 | 
				
			||||||
	@echo "Done"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
$(BUILD_DIR):
 | 
					 | 
				
			||||||
	@mkdir $(BUILD_DIR)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
-include $(OBJ:.o=.d)
 | 
					 | 
				
			||||||
@@ -1,78 +0,0 @@
 | 
				
			|||||||
# C source skeleton
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Build
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
1. Configure the toolchain and additional options in `config.mk`:
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
# Supported: GCC, CLANG, ICC
 | 
					 | 
				
			||||||
TAG ?= GCC
 | 
					 | 
				
			||||||
ENABLE_OPENMP ?= false
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
OPTIONS +=  -DARRAY_ALIGNMENT=64
 | 
					 | 
				
			||||||
#OPTIONS +=  -DVERBOSE
 | 
					 | 
				
			||||||
#OPTIONS +=  -DVERBOSE_AFFINITY
 | 
					 | 
				
			||||||
#OPTIONS +=  -DVERBOSE_DATASIZE
 | 
					 | 
				
			||||||
#OPTIONS +=  -DVERBOSE_TIMER
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The verbosity options enable detailed output about solver, affinity settings, allocation sizes and timer resolution.
 | 
					 | 
				
			||||||
For debugging you may want to set the VERBOSE option:
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
# Supported: GCC, CLANG, ICC
 | 
					 | 
				
			||||||
TAG ?= GCC
 | 
					 | 
				
			||||||
ENABLE_OPENMP ?= false
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
OPTIONS +=  -DARRAY_ALIGNMENT=64
 | 
					 | 
				
			||||||
OPTIONS +=  -DVERBOSE
 | 
					 | 
				
			||||||
#OPTIONS +=  -DVERBOSE_AFFINITY
 | 
					 | 
				
			||||||
#OPTIONS +=  -DVERBOSE_DATASIZE
 | 
					 | 
				
			||||||
#OPTIONS +=  -DVERBOSE_TIMER
 | 
					 | 
				
			||||||
`
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
2. Build with:
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
make
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
You can build multiple toolchains in the same directory, but notice that the Makefile is only acting on the one currently set.
 | 
					 | 
				
			||||||
Intermediate build results are located in the `<TOOLCHAIN>` directory.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
To output the executed commands use:
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
make Q=
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
3. Clean up with:
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
make clean
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
to clean intermediate build results.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
make distclean
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
to clean intermediate build results and binary.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
4. (Optional) Generate assembler:
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
make asm
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
The assembler files will also be located in the `<TOOLCHAIN>` directory.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Usage
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
You have to provide a parameter file describing the problem you want to solve:
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
./exe-CLANG  dcavity.par
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Examples are given in in dcavity (a lid driven cavity test case) and canal (simulating a empty canal).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
You can plot the resulting velocity and pressure fields using gnuplot:
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
gnuplot vector.plot
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
and for the pressure:
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
gnuplot surface.plot
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user