331 changed files with 18625 additions and 8575 deletions
--- a/.clangd
+++ b/.clangd
@@ -1,3 +1,3 @@
-CompileFlags:                     # Tweak the parse settings
-  Add: [-I/usr/local/include]             # treat all files as C++, enable more warnings
+CompileFlags:
+  Add: [-I/usr/local/include, -I/opt/homebrew/include, -D_MPI]
  Compiler: clang
--- a/BasicSolver/2D-mpi-v1/canal.par
+++ b/BasicSolver/2D-mpi-v1/canal.par
@@ -1,46 +0,0 @@
-#==============================================================================
-#                            Laminar Canal Flow
-#==============================================================================
-
-# Problem specific Data:
-# ---------------------
-
-name canal             # name of flow setup
-
-bcLeft    3            #  flags for boundary conditions
-bcRight   3            #  1 = no-slip      3 = outflow
-bcBottom  1            #  2 = free-slip    4 = periodic
-bcTop     1            #
-
-gx     0.0      # Body forces (e.g. gravity)
-gy     0.0      #
-
-re            100.0	   # Reynolds number
-
-u_init        1.0      # initial value for velocity in x-direction
-v_init        0.0      # initial value for velocity in y-direction
-p_init        0.0      # initial value for pressure
-
-# Geometry Data:
-# -------------
-
-xlength       30.0     # domain size in x-direction
-ylength       4.0	   # domain size in y-direction
-imax          200      # number of interior cells in x-direction
-jmax          50	   # number of interior cells in y-direction
-
-# Time Data:
-# ---------
-
-te       100.0   # final time
-dt       0.02    # time stepsize
-tau      0.5     # safety factor for time stepsize control (<0 constant delt)
-
-# Pressure Iteration Data:
-# -----------------------
-
-itermax       500       # maximal number of pressure iteration in one time step
-eps           0.00001   # stopping tolerance for pressure iteration
-omg           1.8       # relaxation parameter for SOR iteration
-gamma         0.9       # upwind differencing factor gamma
-#===============================================================================
--- a/BasicSolver/2D-mpi-v1/dcavity.par
+++ b/BasicSolver/2D-mpi-v1/dcavity.par
@@ -1,46 +0,0 @@
-#==============================================================================
-#                              Driven Cavity
-#==============================================================================
-
-# Problem specific Data:
-# ---------------------
-
-name dcavity        # name of flow setup
-
-bcLeft   1			#  flags for boundary conditions
-bcRight  1			#  1 = no-slip      3 = outflow
-bcBottom 1			#  2 = free-slip    4 = periodic
-bcTop    1			#
-
-gx    0.0			# Body forces (e.g. gravity)
-gy    0.0			#
-
-re    500.0		    # Reynolds number
-
-u_init    0.0		# initial value for velocity in x-direction
-v_init    0.0		# initial value for velocity in y-direction
-p_init    0.0		# initial value for pressure
-
-# Geometry Data:
-# -------------
-
-xlength    1.0		# domain size in x-direction
-ylength    1.0		# domain size in y-direction
-imax       100		# number of interior cells in x-direction
-jmax       100		# number of interior cells in y-direction
-
-# Time Data:
-# ---------
-
-te      25.0		# final time
-dt     0.02	    # time stepsize
-tau     0.5		# safety factor for time stepsize control (<0 constant delt)
-
-# Pressure Iteration Data:
-# -----------------------
-
-itermax  1000		# maximal number of pressure iteration in one time step
-eps      0.001		# stopping tolerance for pressure iteration
-omg      1.7		# relaxation parameter for SOR iteration
-gamma    0.9		# upwind differencing factor gamma
-#===============================================================================
--- a/BasicSolver/2D-mpi-v1/include_CLANG.mk
+++ b/BasicSolver/2D-mpi-v1/include_CLANG.mk
@@ -1,16 +0,0 @@
-CC   = mpicc
-GCC  = cc
-LINKER = $(CC)
-
-ifeq ($(ENABLE_OPENMP),true)
-OPENMP   = -fopenmp
-#OPENMP   = -Xpreprocessor -fopenmp #required on Macos with homebrew libomp
-LIBS     = # -lomp
-endif
-
-VERSION  = --version
-CFLAGS   = -Ofast -std=c99 $(OPENMP)
-#CFLAGS   = -Ofast -fnt-store=aggressive  -std=c99 $(OPENMP) #AMD CLANG
-LFLAGS   = $(OPENMP)
-DEFINES  = -D_GNU_SOURCE# -DDEBUG
-INCLUDES = -I/usr/local/include
--- a/BasicSolver/2D-mpi-v1/src/affinity.c
+++ b/BasicSolver/2D-mpi-v1/src/affinity.c
@@ -1,61 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved.
- * Use of this source code is governed by a MIT-style
- * license that can be found in the LICENSE file.
- */
-#ifdef __linux__
-#ifdef _OPENMP
-#include <pthread.h>
-#include <sched.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/syscall.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#define MAX_NUM_THREADS 128
-#define gettid()        syscall(SYS_gettid)
-
-static int getProcessorID(cpu_set_t* cpu_set)
-{
-    int processorId;
-
-    for (processorId = 0; processorId < MAX_NUM_THREADS; processorId++) {
-        if (CPU_ISSET(processorId, cpu_set)) {
-            break;
-        }
-    }
-    return processorId;
-}
-
-int affinity_getProcessorId()
-{
-    cpu_set_t cpu_set;
-    CPU_ZERO(&cpu_set);
-    sched_getaffinity(gettid(), sizeof(cpu_set_t), &cpu_set);
-
-    return getProcessorID(&cpu_set);
-}
-
-void affinity_pinThread(int processorId)
-{
-    cpu_set_t cpuset;
-    pthread_t thread;
-
-    thread = pthread_self();
-    CPU_ZERO(&cpuset);
-    CPU_SET(processorId, &cpuset);
-    pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
-}
-
-void affinity_pinProcess(int processorId)
-{
-    cpu_set_t cpuset;
-
-    CPU_ZERO(&cpuset);
-    CPU_SET(processorId, &cpuset);
-    sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
-}
-#endif /*_OPENMP*/
-#endif /*__linux__*/
--- a/BasicSolver/2D-mpi-v1/src/affinity.h
+++ b/BasicSolver/2D-mpi-v1/src/affinity.h
@@ -1,14 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved.
- * Use of this source code is governed by a MIT-style
- * license that can be found in the LICENSE file.
- */
-#ifndef AFFINITY_H
-#define AFFINITY_H
-
-extern int affinity_getProcessorId();
-extern void affinity_pinProcess(int);
-extern void affinity_pinThread(int);
-
-#endif /*AFFINITY_H*/
--- a/BasicSolver/2D-mpi-v1/src/main.c
+++ b/BasicSolver/2D-mpi-v1/src/main.c
@@ -1,79 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved.
- * Use of this source code is governed by a MIT-style
- * license that can be found in the LICENSE file.
- */
-#include <float.h>
-#include <limits.h>
-#include <mpi.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include "parameter.h"
-#include "progress.h"
-#include "solver.h"
-#include "timing.h"
-#include <mpi.h>
-
-int main(int argc, char** argv)
-{
-    int rank;
-    double S, E;
-    Parameter params;
-    Solver solver;
-
-    MPI_Init(&argc, &argv);
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    initParameter(&params);
-
-    if (argc != 2) {
-        printf("Usage: %s <configFile>\n", argv[0]);
-        exit(EXIT_SUCCESS);
-    }
-
-    readParameter(&params, argv[1]);
-    if (rank == 0) {
-        printParameter(&params);
-    }
-    initSolver(&solver, &params);
-    initProgress(solver.te);
-
-    double tau = solver.tau;
-    double te  = solver.te;
-    double t   = 0.0;
-
-    S = getTimeStamp();
-    while (t <= te) {
-        if (tau > 0.0) {
-            computeTimestep(&solver);
-        }
-
-        setBoundaryConditions(&solver);
-        setSpecialBoundaryCondition(&solver);
-        computeFG(&solver);
-        computeRHS(&solver);
-        solve(&solver);
-        adaptUV(&solver);
-        /* exit(EXIT_SUCCESS); */
-        t += solver.dt;
-
-#ifdef VERBOSE
-        if (rank == 0) {
-            printf("TIME %f , TIMESTEP %f\n", t, solver.dt);
-        }
-#else
-        printProgress(t);
-#endif
-    }
-    E = getTimeStamp();
-    stopProgress();
-    if (rank == 0) {
-        printf("Solution took %.2fs\n", E - S);
-    }
-    collectResult(&solver);
-
-    MPI_Finalize();
-    return EXIT_SUCCESS;
-}
--- a/BasicSolver/2D-mpi-v1/src/parameter.h
+++ b/BasicSolver/2D-mpi-v1/src/parameter.h
@@ -1,26 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved. This file is part of nusif-solver.
- * Use of this source code is governed by a MIT style
- * license that can be found in the LICENSE file.
- */
-#ifndef __PARAMETER_H_
-#define __PARAMETER_H_
-
-typedef struct {
-    double xlength, ylength;
-    int imax, jmax;
-    int itermax;
-    double eps, omg;
-    double re, tau, gamma;
-    double te, dt;
-    double gx, gy;
-    char* name;
-    int bcLeft, bcRight, bcBottom, bcTop;
-    double u_init, v_init, p_init;
-} Parameter;
-
-void initParameter(Parameter*);
-void readParameter(Parameter*, const char*);
-void printParameter(Parameter*);
-#endif
--- a/BasicSolver/2D-mpi-v1/src/progress.c
+++ b/BasicSolver/2D-mpi-v1/src/progress.c
@@ -1,60 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved. This file is part of nusif-solver.
- * Use of this source code is governed by a MIT style
- * license that can be found in the LICENSE file.
- */
-#include <math.h>
-#include <mpi.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "progress.h"
-
-static double _end;
-static int _current;
-static int _rank = -1;
-
-void initProgress(double end)
-{
-    MPI_Comm_rank(MPI_COMM_WORLD, &_rank);
-    _end     = end;
-    _current = 0;
-
-    if (_rank == 0) {
-        printf("[          ]");
-        fflush(stdout);
-    }
-}
-
-void printProgress(double current)
-{
-    if (_rank == 0) {
-        int new = (int)rint((current / _end) * 10.0);
-
-        if (new > _current) {
-            char progress[11];
-            _current    = new;
-            progress[0] = 0;
-
-            for (int i = 0; i < 10; i++) {
-                if (i < _current) {
-                    sprintf(progress + strlen(progress), "#");
-                } else {
-                    sprintf(progress + strlen(progress), " ");
-                }
-            }
-            printf("\r[%s]", progress);
-        }
-        fflush(stdout);
-    }
-}
-
-void stopProgress()
-{
-    if (_rank == 0) {
-        printf("\n");
-        fflush(stdout);
-    }
-}
--- a/BasicSolver/2D-mpi-v1/src/solver.c
+++ b/BasicSolver/2D-mpi-v1/src/solver.c
@@ -1,689 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved. This file is part of nusif-solver.
- * Use of this source code is governed by a MIT style
- * license that can be found in the LICENSE file.
- */
-#include <float.h>
-#include <math.h>
-#include <mpi.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "allocate.h"
-#include "parameter.h"
-#include "solver.h"
-#include "util.h"
-
-#define P(i, j)   p[(j) * (imax + 2) + (i)]
-#define F(i, j)   f[(j) * (imax + 2) + (i)]
-#define G(i, j)   g[(j) * (imax + 2) + (i)]
-#define U(i, j)   u[(j) * (imax + 2) + (i)]
-#define V(i, j)   v[(j) * (imax + 2) + (i)]
-#define RHS(i, j) rhs[(j) * (imax + 2) + (i)]
-
-static int sizeOfRank(int rank, int size, int N)
-{
-    return N / size + ((N % size > rank) ? 1 : 0);
-}
-
-static void print(Solver* solver, double* grid)
-{
-    int imax = solver->imax;
-
-    for (int i = 0; i < solver->size; i++) {
-        if (i == solver->rank) {
-            printf("### RANK %d "
-                   "#######################################################\n",
-                solver->rank);
-            for (int j = 0; j < solver->jmaxLocal + 2; j++) {
-                printf("%02d: ", j);
-                for (int i = 0; i < solver->imax + 2; i++) {
-                    printf("%12.8f  ", grid[j * (imax + 2) + i]);
-                }
-                printf("\n");
-            }
-            fflush(stdout);
-        }
-        MPI_Barrier(MPI_COMM_WORLD);
-    }
-}
-
-static void exchange(Solver* solver, double* grid)
-{
-    MPI_Request requests[4] = { MPI_REQUEST_NULL,
-        MPI_REQUEST_NULL,
-        MPI_REQUEST_NULL,
-        MPI_REQUEST_NULL };
-
-    /* exchange ghost cells with top neighbor */
-    if (solver->rank + 1 < solver->size) {
-        int top     = solver->rank + 1;
-        double* src = grid + (solver->jmaxLocal) * (solver->imax + 2) + 1;
-        double* dst = grid + (solver->jmaxLocal + 1) * (solver->imax + 2) + 1;
-
-        MPI_Isend(src, solver->imax, MPI_DOUBLE, top, 1, MPI_COMM_WORLD, &requests[0]);
-        MPI_Irecv(dst, solver->imax, MPI_DOUBLE, top, 2, MPI_COMM_WORLD, &requests[1]);
-    }
-
-    /* exchange ghost cells with bottom neighbor */
-    if (solver->rank > 0) {
-        int bottom  = solver->rank - 1;
-        double* src = grid + (solver->imax + 2) + 1;
-        double* dst = grid + 1;
-
-        MPI_Isend(src, solver->imax, MPI_DOUBLE, bottom, 2, MPI_COMM_WORLD, &requests[2]);
-        MPI_Irecv(dst, solver->imax, MPI_DOUBLE, bottom, 1, MPI_COMM_WORLD, &requests[3]);
-    }
-
-    MPI_Waitall(4, requests, MPI_STATUSES_IGNORE);
-}
-
-static void shift(Solver* solver)
-{
-    MPI_Request requests[2] = { MPI_REQUEST_NULL, MPI_REQUEST_NULL };
-    double* g               = solver->g;
-
-    /* shift G */
-    /* receive ghost cells from bottom neighbor */
-    if (solver->rank > 0) {
-        int bottom = solver->rank - 1;
-        MPI_Irecv(g + 1,
-            solver->imax,
-            MPI_DOUBLE,
-            bottom,
-            0,
-            MPI_COMM_WORLD,
-            &requests[0]);
-    }
-
-    if (solver->rank + 1 < solver->size) {
-        int top     = solver->rank + 1;
-        double* buf = g + (solver->jmaxLocal) * (solver->imax + 2) + 1;
-        /* send ghost cells to top neighbor */
-        MPI_Isend(buf, solver->imax, MPI_DOUBLE, top, 0, MPI_COMM_WORLD, &requests[1]);
-    }
-
-    MPI_Waitall(2, requests, MPI_STATUSES_IGNORE);
-}
-
-void collectResult(Solver* solver)
-{
-    double* Pall = NULL;
-    double* Uall = NULL;
-    double* Vall = NULL;
-    int *rcvCounts, *displs;
-
-    if (solver->rank == 0) {
-        Pall = allocate(64, (solver->imax + 2) * (solver->jmax + 2) * sizeof(double));
-        Uall = allocate(64, (solver->imax + 2) * (solver->jmax + 2) * sizeof(double));
-        Vall = allocate(64, (solver->imax + 2) * (solver->jmax + 2) * sizeof(double));
-        rcvCounts    = (int*)malloc(solver->size * sizeof(int));
-        displs       = (int*)malloc(solver->size * sizeof(int));
-        rcvCounts[0] = solver->jmaxLocal * (solver->imax + 2);
-        displs[0]    = 0;
-        int cursor   = rcvCounts[0];
-
-        for (int i = 1; i < solver->size; i++) {
-            rcvCounts[i] = sizeOfRank(i, solver->size, solver->jmax) * (solver->imax + 2);
-            displs[i]    = cursor;
-            cursor += rcvCounts[i];
-        }
-    }
-
-    int cnt            = solver->jmaxLocal * (solver->imax + 2);
-    double* sendbuffer = solver->p + (solver->imax + 2);
-    MPI_Gatherv(sendbuffer,
-        cnt,
-        MPI_DOUBLE,
-        Pall,
-        rcvCounts,
-        displs,
-        MPI_DOUBLE,
-        0,
-        MPI_COMM_WORLD);
-    sendbuffer = solver->u + (solver->imax + 2);
-    MPI_Gatherv(sendbuffer,
-        cnt,
-        MPI_DOUBLE,
-        Uall,
-        rcvCounts,
-        displs,
-        MPI_DOUBLE,
-        0,
-        MPI_COMM_WORLD);
-    sendbuffer = solver->v + (solver->imax + 2);
-    MPI_Gatherv(sendbuffer,
-        cnt,
-        MPI_DOUBLE,
-        Vall,
-        rcvCounts,
-        displs,
-        MPI_DOUBLE,
-        0,
-        MPI_COMM_WORLD);
-
-    if (solver->rank == 0) {
-        writeResult(solver, Pall, Uall, Vall);
-    }
-}
-
-static void printConfig(Solver* solver)
-{
-    if (solver->rank == 0) {
-        printf("Parameters for #%s#\n", solver->problem);
-        printf("Boundary conditions Left:%d Right:%d Bottom:%d Top:%d\n",
-            solver->bcLeft,
-            solver->bcRight,
-            solver->bcBottom,
-            solver->bcTop);
-        printf("\tReynolds number: %.2f\n", solver->re);
-        printf("\tGx Gy: %.2f %.2f\n", solver->gx, solver->gy);
-        printf("Geometry data:\n");
-        printf("\tDomain box size (x, y): %.2f, %.2f\n",
-            solver->xlength,
-            solver->ylength);
-        printf("\tCells (x, y): %d, %d\n", solver->imax, solver->jmax);
-        printf("Timestep parameters:\n");
-        printf("\tDefault stepsize: %.2f, Final time %.2f\n", solver->dt, solver->te);
-        printf("\tdt bound: %.6f\n", solver->dtBound);
-        printf("\tTau factor: %.2f\n", solver->tau);
-        printf("Iterative solver parameters:\n");
-        printf("\tMax iterations: %d\n", solver->itermax);
-        printf("\tepsilon (stopping tolerance) : %f\n", solver->eps);
-        printf("\tgamma factor: %f\n", solver->gamma);
-        printf("\tomega (SOR relaxation): %f\n", solver->omega);
-        printf("Communication parameters:\n");
-    }
-    for (int i = 0; i < solver->size; i++) {
-        if (i == solver->rank) {
-            printf("\tRank %d of %d\n", solver->rank, solver->size);
-            printf("\tLocal domain size: %dx%d\n", solver->imax, solver->jmaxLocal);
-            fflush(stdout);
-        }
-    }
-}
-
-void initSolver(Solver* solver, Parameter* params)
-{
-    MPI_Comm_rank(MPI_COMM_WORLD, &(solver->rank));
-    MPI_Comm_size(MPI_COMM_WORLD, &(solver->size));
-    solver->problem   = params->name;
-    solver->bcLeft    = params->bcLeft;
-    solver->bcRight   = params->bcRight;
-    solver->bcBottom  = params->bcBottom;
-    solver->bcTop     = params->bcTop;
-    solver->imax      = params->imax;
-    solver->jmax      = params->jmax;
-    solver->jmaxLocal = sizeOfRank(solver->rank, solver->size, solver->jmax);
-    solver->xlength   = params->xlength;
-    solver->ylength   = params->ylength;
-    solver->dx        = params->xlength / params->imax;
-    solver->dy        = params->ylength / params->jmax;
-    solver->eps       = params->eps;
-    solver->omega     = params->omg;
-    solver->itermax   = params->itermax;
-    solver->re        = params->re;
-    solver->gx        = params->gx;
-    solver->gy        = params->gy;
-    solver->dt        = params->dt;
-    solver->te        = params->te;
-    solver->tau       = params->tau;
-    solver->gamma     = params->gamma;
-
-    int imax        = solver->imax;
-    int jmaxLocal   = solver->jmaxLocal;
-    size_t bytesize = (imax + 2) * (jmaxLocal + 2) * sizeof(double);
-    solver->u       = allocate(64, bytesize);
-    solver->v       = allocate(64, bytesize);
-    solver->p       = allocate(64, bytesize);
-    solver->rhs     = allocate(64, bytesize);
-    solver->f       = allocate(64, bytesize);
-    solver->g       = allocate(64, bytesize);
-
-    for (int i = 0; i < (imax + 2) * (jmaxLocal + 2); i++) {
-        solver->u[i]   = params->u_init;
-        solver->v[i]   = params->v_init;
-        solver->p[i]   = params->p_init;
-        solver->rhs[i] = 0.0;
-        solver->f[i]   = 0.0;
-        solver->g[i]   = 0.0;
-    }
-
-    double dx          = solver->dx;
-    double dy          = solver->dy;
-    double inv_sqr_sum = 1.0 / (dx * dx) + 1.0 / (dy * dy);
-    solver->dtBound    = 0.5 * solver->re * 1.0 / inv_sqr_sum;
-#ifdef VERBOSE
-    printConfig(solver);
-#endif
-}
-
-void computeRHS(Solver* solver)
-{
-    int imax      = solver->imax;
-    int jmaxLocal = solver->jmaxLocal;
-    double idx    = 1.0 / solver->dx;
-    double idy    = 1.0 / solver->dy;
-    double idt    = 1.0 / solver->dt;
-    double* rhs   = solver->rhs;
-    double* f     = solver->f;
-    double* g     = solver->g;
-
-    shift(solver);
-
-    for (int j = 1; j < jmaxLocal + 1; j++) {
-        for (int i = 1; i < imax + 1; i++) {
-            RHS(i, j) = ((F(i, j) - F(i - 1, j)) * idx + (G(i, j) - G(i, j - 1)) * idy) *
-                        idt;
-        }
-    }
-}
-
-void solve(Solver* solver)
-{
-    int imax      = solver->imax;
-    int jmax      = solver->jmax;
-    int jmaxLocal = solver->jmaxLocal;
-    double eps    = solver->eps;
-    int itermax   = solver->itermax;
-    double dx2    = solver->dx * solver->dx;
-    double dy2    = solver->dy * solver->dy;
-    double idx2   = 1.0 / dx2;
-    double idy2   = 1.0 / dy2;
-    double factor = solver->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
-    double* p     = solver->p;
-    double* rhs   = solver->rhs;
-    double epssq  = eps * eps;
-    int it        = 0;
-    double res    = 1.0;
-
-    while ((res >= epssq) && (it < itermax)) {
-        res = 0.0;
-        exchange(solver, p);
-
-        for (int j = 1; j < jmaxLocal + 1; j++) {
-            for (int i = 1; i < imax + 1; i++) {
-
-                double r = RHS(i, j) -
-                           ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
-                               (P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
-
-                P(i, j) -= (factor * r);
-                res += (r * r);
-            }
-        }
-
-        if (solver->rank == 0) {
-            for (int i = 1; i < imax + 1; i++) {
-                P(i, 0) = P(i, 1);
-            }
-        }
-
-        if (solver->rank == (solver->size - 1)) {
-            for (int i = 1; i < imax + 1; i++) {
-                P(i, jmaxLocal + 1) = P(i, jmaxLocal);
-            }
-        }
-
-        for (int j = 1; j < jmaxLocal + 1; j++) {
-            P(0, j)        = P(1, j);
-            P(imax + 1, j) = P(imax, j);
-        }
-
-        MPI_Allreduce(MPI_IN_PLACE, &res, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
-        res = res / (double)(imax * jmax);
-#ifdef DEBUG
-        if (solver->rank == 0) {
-            printf("%d Residuum: %e\n", it, res);
-        }
-#endif
-        it++;
-    }
-
-#ifdef VERBOSE
-    if (solver->rank == 0) {
-        printf("Solver took %d iterations to reach %f\n", it, sqrt(res));
-    }
-#endif
-}
-
-static double maxElement(Solver* solver, double* m)
-{
-    int size      = (solver->imax + 2) * (solver->jmaxLocal + 2);
-    double maxval = DBL_MIN;
-
-    for (int i = 0; i < size; i++) {
-        maxval = MAX(maxval, fabs(m[i]));
-    }
-
-    MPI_Allreduce(MPI_IN_PLACE, &maxval, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
-    return maxval;
-}
-
-void normalizePressure(Solver* solver)
-{
-    int size    = (solver->imax + 2) * (solver->jmaxLocal + 2);
-    double* p   = solver->p;
-    double avgP = 0.0;
-
-    for (int i = 0; i < size; i++) {
-        avgP += p[i];
-    }
-    MPI_Allreduce(MPI_IN_PLACE, &avgP, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
-    avgP /= (solver->imax + 2) * (solver->jmax + 2);
-
-    for (int i = 0; i < size; i++) {
-        p[i] = p[i] - avgP;
-    }
-}
-
-void computeTimestep(Solver* solver)
-{
-    double dt   = solver->dtBound;
-    double dx   = solver->dx;
-    double dy   = solver->dy;
-    double umax = maxElement(solver, solver->u);
-    double vmax = maxElement(solver, solver->v);
-
-    if (umax > 0) {
-        dt = (dt > dx / umax) ? dx / umax : dt;
-    }
-    if (vmax > 0) {
-        dt = (dt > dy / vmax) ? dy / vmax : dt;
-    }
-
-    solver->dt = dt * solver->tau;
-}
-
-void setBoundaryConditions(Solver* solver)
-{
-    int imax      = solver->imax;
-    int jmaxLocal = solver->jmaxLocal;
-    double* u     = solver->u;
-    double* v     = solver->v;
-
-    // Left boundary
-    switch (solver->bcLeft) {
-    case NOSLIP:
-        for (int j = 1; j < jmaxLocal + 1; j++) {
-            U(0, j) = 0.0;
-            V(0, j) = -V(1, j);
-        }
-        break;
-    case SLIP:
-        for (int j = 1; j < jmaxLocal + 1; j++) {
-            U(0, j) = 0.0;
-            V(0, j) = V(1, j);
-        }
-        break;
-    case OUTFLOW:
-        for (int j = 1; j < jmaxLocal + 1; j++) {
-            U(0, j) = U(1, j);
-            V(0, j) = V(1, j);
-        }
-        break;
-    case PERIODIC:
-        break;
-    }
-
-    // Right boundary
-    switch (solver->bcRight) {
-    case NOSLIP:
-        for (int j = 1; j < jmaxLocal + 1; j++) {
-            U(imax, j)     = 0.0;
-            V(imax + 1, j) = -V(imax, j);
-        }
-        break;
-    case SLIP:
-        for (int j = 1; j < jmaxLocal + 1; j++) {
-            U(imax, j)     = 0.0;
-            V(imax + 1, j) = V(imax, j);
-        }
-        break;
-    case OUTFLOW:
-        for (int j = 1; j < jmaxLocal + 1; j++) {
-            U(imax, j)     = U(imax - 1, j);
-            V(imax + 1, j) = V(imax, j);
-        }
-        break;
-    case PERIODIC:
-        break;
-    }
-
-    // Bottom boundary
-    if (solver->rank == 0) {
-        switch (solver->bcBottom) {
-        case NOSLIP:
-            for (int i = 1; i < imax + 1; i++) {
-                V(i, 0) = 0.0;
-                U(i, 0) = -U(i, 1);
-            }
-            break;
-        case SLIP:
-            for (int i = 1; i < imax + 1; i++) {
-                V(i, 0) = 0.0;
-                U(i, 0) = U(i, 1);
-            }
-            break;
-        case OUTFLOW:
-            for (int i = 1; i < imax + 1; i++) {
-                U(i, 0) = U(i, 1);
-                V(i, 0) = V(i, 1);
-            }
-            break;
-        case PERIODIC:
-            break;
-        }
-    }
-
-    // Top boundary
-    if (solver->rank == (solver->size - 1)) {
-        switch (solver->bcTop) {
-        case NOSLIP:
-            for (int i = 1; i < imax + 1; i++) {
-                V(i, jmaxLocal)     = 0.0;
-                U(i, jmaxLocal + 1) = -U(i, jmaxLocal);
-            }
-            break;
-        case SLIP:
-            for (int i = 1; i < imax + 1; i++) {
-                V(i, jmaxLocal)     = 0.0;
-                U(i, jmaxLocal + 1) = U(i, jmaxLocal);
-            }
-            break;
-        case OUTFLOW:
-            for (int i = 1; i < imax + 1; i++) {
-                U(i, jmaxLocal + 1) = U(i, jmaxLocal);
-                V(i, jmaxLocal)     = V(i, jmaxLocal - 1);
-            }
-            break;
-        case PERIODIC:
-            break;
-        }
-    }
-}
-
-void setSpecialBoundaryCondition(Solver* solver)
-{
-    int imax      = solver->imax;
-    int jmaxLocal = solver->jmaxLocal;
-    double* u     = solver->u;
-
-    if (strcmp(solver->problem, "dcavity") == 0) {
-        if (solver->rank == (solver->size - 1)) {
-            for (int i = 1; i < imax; i++) {
-                U(i, jmaxLocal + 1) = 2.0 - U(i, jmaxLocal);
-            }
-        }
-    } else if (strcmp(solver->problem, "canal") == 0) {
-        double ylength = solver->ylength;
-        double dy      = solver->dy;
-        int rest       = solver->jmax % solver->size;
-        int yc = solver->rank * (solver->jmax / solver->size) + MIN(rest, solver->rank);
-        double ys = dy * (yc + 0.5);
-        double y;
-
-        /* printf("RANK %d yc: %d ys: %f\n", solver->rank, yc, ys); */
-
-        for (int j = 1; j < jmaxLocal + 1; j++) {
-            y       = ys + dy * (j - 0.5);
-            U(0, j) = y * (ylength - y) * 4.0 / (ylength * ylength);
-        }
-    }
-    /* print(solver, solver->u); */
-}
-
-void computeFG(Solver* solver)
-{
-    double* u        = solver->u;
-    double* v        = solver->v;
-    double* f        = solver->f;
-    double* g        = solver->g;
-    int imax         = solver->imax;
-    int jmaxLocal    = solver->jmaxLocal;
-    double gx        = solver->gx;
-    double gy        = solver->gy;
-    double gamma     = solver->gamma;
-    double dt        = solver->dt;
-    double inverseRe = 1.0 / solver->re;
-    double inverseDx = 1.0 / solver->dx;
-    double inverseDy = 1.0 / solver->dy;
-    double du2dx, dv2dy, duvdx, duvdy;
-    double du2dx2, du2dy2, dv2dx2, dv2dy2;
-
-    exchange(solver, u);
-    exchange(solver, v);
-
-    for (int j = 1; j < jmaxLocal + 1; j++) {
-        for (int i = 1; i < imax + 1; i++) {
-            du2dx = inverseDx * 0.25 *
-                        ((U(i, j) + U(i + 1, j)) * (U(i, j) + U(i + 1, j)) -
-                            (U(i, j) + U(i - 1, j)) * (U(i, j) + U(i - 1, j))) +
-                    gamma * inverseDx * 0.25 *
-                        (fabs(U(i, j) + U(i + 1, j)) * (U(i, j) - U(i + 1, j)) +
-                            fabs(U(i, j) + U(i - 1, j)) * (U(i, j) - U(i - 1, j)));
-
-            duvdy = inverseDy * 0.25 *
-                        ((V(i, j) + V(i + 1, j)) * (U(i, j) + U(i, j + 1)) -
-                            (V(i, j - 1) + V(i + 1, j - 1)) * (U(i, j) + U(i, j - 1))) +
-                    gamma * inverseDy * 0.25 *
-                        (fabs(V(i, j) + V(i + 1, j)) * (U(i, j) - U(i, j + 1)) +
-                            fabs(V(i, j - 1) + V(i + 1, j - 1)) *
-                                (U(i, j) - U(i, j - 1)));
-
-            du2dx2  = inverseDx * inverseDx * (U(i + 1, j) - 2.0 * U(i, j) + U(i - 1, j));
-            du2dy2  = inverseDy * inverseDy * (U(i, j + 1) - 2.0 * U(i, j) + U(i, j - 1));
-            F(i, j) = U(i, j) + dt * (inverseRe * (du2dx2 + du2dy2) - du2dx - duvdy + gx);
-
-            duvdx = inverseDx * 0.25 *
-                        ((U(i, j) + U(i, j + 1)) * (V(i, j) + V(i + 1, j)) -
-                            (U(i - 1, j) + U(i - 1, j + 1)) * (V(i, j) + V(i - 1, j))) +
-                    gamma * inverseDx * 0.25 *
-                        (fabs(U(i, j) + U(i, j + 1)) * (V(i, j) - V(i + 1, j)) +
-                            fabs(U(i - 1, j) + U(i - 1, j + 1)) *
-                                (V(i, j) - V(i - 1, j)));
-
-            dv2dy = inverseDy * 0.25 *
-                        ((V(i, j) + V(i, j + 1)) * (V(i, j) + V(i, j + 1)) -
-                            (V(i, j) + V(i, j - 1)) * (V(i, j) + V(i, j - 1))) +
-                    gamma * inverseDy * 0.25 *
-                        (fabs(V(i, j) + V(i, j + 1)) * (V(i, j) - V(i, j + 1)) +
-                            fabs(V(i, j) + V(i, j - 1)) * (V(i, j) - V(i, j - 1)));
-
-            dv2dx2  = inverseDx * inverseDx * (V(i + 1, j) - 2.0 * V(i, j) + V(i - 1, j));
-            dv2dy2  = inverseDy * inverseDy * (V(i, j + 1) - 2.0 * V(i, j) + V(i, j - 1));
-            G(i, j) = V(i, j) + dt * (inverseRe * (dv2dx2 + dv2dy2) - duvdx - dv2dy + gy);
-        }
-    }
-
-    /* ----------------------------- boundary of F ---------------------------
-     */
-    for (int j = 1; j < jmaxLocal + 1; j++) {
-        F(0, j)    = U(0, j);
-        F(imax, j) = U(imax, j);
-    }
-
-    /* ----------------------------- boundary of G ---------------------------
-     */
-    if (solver->rank == 0) {
-        for (int i = 1; i < imax + 1; i++) {
-            G(i, 0) = V(i, 0);
-        }
-    }
-
-    if (solver->rank == (solver->size - 1)) {
-        for (int i = 1; i < imax + 1; i++) {
-            G(i, jmaxLocal) = V(i, jmaxLocal);
-        }
-    }
-}
-
-void adaptUV(Solver* solver)
-{
-    int imax       = solver->imax;
-    int jmaxLocal  = solver->jmaxLocal;
-    double* p      = solver->p;
-    double* u      = solver->u;
-    double* v      = solver->v;
-    double* f      = solver->f;
-    double* g      = solver->g;
-    double factorX = solver->dt / solver->dx;
-    double factorY = solver->dt / solver->dy;
-
-    for (int j = 1; j < jmaxLocal + 1; j++) {
-        for (int i = 1; i < imax + 1; i++) {
-            U(i, j) = F(i, j) - (P(i + 1, j) - P(i, j)) * factorX;
-            V(i, j) = G(i, j) - (P(i, j + 1) - P(i, j)) * factorY;
-        }
-    }
-}
-
-void writeResult(Solver* solver, double* p, double* u, double* v)
-{
-    int imax  = solver->imax;
-    int jmax  = solver->jmax;
-    double dx = solver->dx;
-    double dy = solver->dy;
-    double x = 0.0, y = 0.0;
-
-    FILE* fp;
-    fp = fopen("pressure.dat", "w");
-
-    if (fp == NULL) {
-        printf("Error!\n");
-        exit(EXIT_FAILURE);
-    }
-
-    for (int j = 1; j < jmax + 1; j++) {
-        y = (double)(j - 0.5) * dy;
-        for (int i = 1; i < imax + 1; i++) {
-            x = (double)(i - 0.5) * dx;
-            fprintf(fp, "%.2f %.2f %f\n", x, y, P(i, j));
-        }
-        fprintf(fp, "\n");
-    }
-
-    fclose(fp);
-
-    fp = fopen("velocity.dat", "w");
-
-    if (fp == NULL) {
-        printf("Error!\n");
-        exit(EXIT_FAILURE);
-    }
-
-    for (int j = 1; j < jmax + 1; j++) {
-        y = dy * (j - 0.5);
-        for (int i = 1; i < imax + 1; i++) {
-            x            = dx * (i - 0.5);
-            double vel_u = (U(i, j) + U(i - 1, j)) / 2.0;
-            double vel_v = (V(i, j) + V(i, j - 1)) / 2.0;
-            double len   = sqrt((vel_u * vel_u) + (vel_v * vel_v));
-            fprintf(fp, "%.2f %.2f %f %f %f\n", x, y, vel_u, vel_v, len);
-        }
-    }
-
-    fclose(fp);
-}
--- a/BasicSolver/2D-mpi-v1/src/solver.h
+++ b/BasicSolver/2D-mpi-v1/src/solver.h
@@ -1,49 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved. This file is part of nusif-solver.
- * Use of this source code is governed by a MIT style
- * license that can be found in the LICENSE file.
- */
-#ifndef __SOLVER_H_
-#define __SOLVER_H_
-#include "parameter.h"
-
-enum BC { NOSLIP = 1, SLIP, OUTFLOW, PERIODIC };
-
-typedef struct {
-    /* geometry and grid information */
-    double dx, dy;
-    int imax, jmax;
-    int jmaxLocal;
-    double xlength, ylength;
-    /* arrays */
-    double *p, *rhs;
-    double *f, *g;
-    double *u, *v;
-    /* parameters */
-    double eps, omega;
-    double re, tau, gamma;
-    double gx, gy;
-    /* time stepping */
-    int itermax;
-    double dt, te;
-    double dtBound;
-    char* problem;
-    int bcLeft, bcRight, bcBottom, bcTop;
-    /* mpi */
-    int rank;
-    int size;
-} Solver;
-
-void initSolver(Solver*, Parameter*);
-void computeRHS(Solver*);
-void solve(Solver*);
-void normalizePressure(Solver*);
-void computeTimestep(Solver*);
-void setBoundaryConditions(Solver*);
-void setSpecialBoundaryCondition(Solver*);
-void computeFG(Solver*);
-void adaptUV(Solver*);
-void collectResult(Solver*);
-void writeResult(Solver*, double*, double*, double*);
-#endif
--- a/BasicSolver/2D-mpi-v1/src/util.h
+++ b/BasicSolver/2D-mpi-v1/src/util.h
@@ -1,23 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved.
- * Use of this source code is governed by a MIT-style
- * license that can be found in the LICENSE file.
- */
-#ifndef __UTIL_H_
-#define __UTIL_H_
-#define HLINE                                                                            \
-    "------------------------------------------------------------------------"           \
-    "----\n"
-
-#ifndef MIN
-#define MIN(x, y) ((x) < (y) ? (x) : (y))
-#endif
-#ifndef MAX
-#define MAX(x, y) ((x) > (y) ? (x) : (y))
-#endif
-#ifndef ABS
-#define ABS(a) ((a) >= 0 ? (a) : -(a))
-#endif
-
-#endif // __UTIL_H_
--- a/BasicSolver/2D-mpi-v1/vector.plot
+++ b/BasicSolver/2D-mpi-v1/vector.plot
@@ -1,5 +0,0 @@
-set terminal png size 1800,768 enhanced font ,12
-set output 'velocity.png'
-set datafile separator whitespace
-
-plot 'velocity.dat' using 1:2:3:4:5 with vectors filled head size 0.01,20,60 lc palette
--- a/BasicSolver/2D-mpi-v2/README.md
+++ b/BasicSolver/2D-mpi-v2/README.md
@@ -1,48 +0,0 @@
-# C source skeleton
-
-## Build
-
-1. Configure the toolchain and additional options in `config.mk`:
-```
-# Supported: GCC, CLANG, ICC
-TAG ?= GCC
-ENABLE_OPENMP ?= false
-
-OPTIONS +=  -DARRAY_ALIGNMENT=64
-#OPTIONS +=  -DVERBOSE_AFFINITY
-#OPTIONS +=  -DVERBOSE_DATASIZE
-#OPTIONS +=  -DVERBOSE_TIMER
-```
-
-The verbosity options enable detailed output about affinity settings, allocation sizes and timer resolution.
-
-
-2. Build with:
-```
-make
-```
-
-You can build multiple toolchains in the same directory, but notice that the Makefile is only acting on the one currently set.
-Intermediate build results are located in the `<TOOLCHAIN>` directory.
-
-To output the executed commands use:
-```
-make Q=
-```
-
-3. Clean up with:
-```
-make clean
-```
-to clean intermediate build results.
-
-```
-make distclean
-```
-to clean intermediate build results and binary.
-
-4. (Optional) Generate assembler:
-```
-make asm
-```
-The assembler files will also be located in the `<TOOLCHAIN>` directory.
--- a/BasicSolver/2D-mpi-v2/config.mk
+++ b/BasicSolver/2D-mpi-v2/config.mk
@@ -1,10 +0,0 @@
-# Supported: GCC, CLANG, ICC
-TAG ?= CLANG
-ENABLE_OPENMP ?= false
-
-#Feature options
-OPTIONS +=  -DARRAY_ALIGNMENT=64
-# OPTIONS +=  -DVERBOSE
-#OPTIONS +=  -DVERBOSE_AFFINITY
-#OPTIONS +=  -DVERBOSE_DATASIZE
-#OPTIONS +=  -DVERBOSE_TIMER
--- a/BasicSolver/2D-mpi-v2/dcavity.par
+++ b/BasicSolver/2D-mpi-v2/dcavity.par
@@ -1,46 +0,0 @@
-#==============================================================================
-#                              Driven Cavity
-#==============================================================================
-
-# Problem specific Data:
-# ---------------------
-
-name dcavity        # name of flow setup
-
-bcN    1			#  flags for boundary conditions
-bcE    1			#  1 = no-slip      3 = outflow
-bcS    1			#  2 = free-slip    4 = periodic
-bcW    1			#
-
-gx    0.0			# Body forces (e.g. gravity)
-gy    0.0			#
-
-re    1000.0		    # Reynolds number
-
-u_init    0.0		# initial value for velocity in x-direction
-v_init    0.0		# initial value for velocity in y-direction
-p_init    0.0		# initial value for pressure
-
-# Geometry Data:
-# -------------
-
-xlength    1.0		# domain size in x-direction
-ylength    1.0		# domain size in y-direction
-imax       100		# number of interior cells in x-direction
-jmax       100		# number of interior cells in y-direction
-
-# Time Data:
-# ---------
-
-te      10.0		# final time
-dt     0.02	    # time stepsize
-tau     0.5		# safety factor for time stepsize control (<0 constant delt)
-
-# Pressure Iteration Data:
-# -----------------------
-
-itermax  1000		# maximal number of pressure iteration in one time step
-eps      0.001		# stopping tolerance for pressure iteration
-omg      1.7		# relaxation parameter for SOR iteration
-gamma    0.9		# upwind differencing factor gamma
-#===============================================================================
--- a/BasicSolver/2D-mpi-v2/include_CLANG.mk
+++ b/BasicSolver/2D-mpi-v2/include_CLANG.mk
@@ -1,16 +0,0 @@
-CC   = mpicc
-GCC  = cc
-LINKER = $(CC)
-
-ifeq ($(ENABLE_OPENMP),true)
-OPENMP   = -fopenmp
-#OPENMP   = -Xpreprocessor -fopenmp #required on Macos with homebrew libomp
-LIBS     = # -lomp
-endif
-
-VERSION  = --version
-CFLAGS   = -Ofast -std=c99 $(OPENMP)
-#CFLAGS   = -Ofast -fnt-store=aggressive  -std=c99 $(OPENMP) #AMD CLANG
-LFLAGS   = $(OPENMP)
-DEFINES  = -D_GNU_SOURCE# -DDEBUG
-INCLUDES = -I/usr/local/include
--- a/BasicSolver/2D-mpi-v2/include_ICC.mk
+++ b/BasicSolver/2D-mpi-v2/include_ICC.mk
@@ -1,14 +0,0 @@
-CC   = mpiicc
-GCC  = gcc
-LINKER = $(CC)
-
-ifeq ($(ENABLE_OPENMP),true)
-OPENMP   = -qopenmp
-endif
-
-VERSION  = --version
-CFLAGS   =  -O3 -xHost -qopt-zmm-usage=high -std=c99 $(OPENMP)
-LFLAGS   = $(OPENMP)
-DEFINES  = -D_GNU_SOURCE
-INCLUDES =
-LIBS     =
--- a/BasicSolver/2D-mpi-v2/src/affinity.c
+++ b/BasicSolver/2D-mpi-v2/src/affinity.c
@@ -1,61 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved.
- * Use of this source code is governed by a MIT-style
- * license that can be found in the LICENSE file.
- */
-#ifdef __linux__
-#ifdef _OPENMP
-#include <pthread.h>
-#include <sched.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/syscall.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#define MAX_NUM_THREADS 128
-#define gettid()        syscall(SYS_gettid)
-
-static int getProcessorID(cpu_set_t* cpu_set)
-{
-    int processorId;
-
-    for (processorId = 0; processorId < MAX_NUM_THREADS; processorId++) {
-        if (CPU_ISSET(processorId, cpu_set)) {
-            break;
-        }
-    }
-    return processorId;
-}
-
-int affinity_getProcessorId()
-{
-    cpu_set_t cpu_set;
-    CPU_ZERO(&cpu_set);
-    sched_getaffinity(gettid(), sizeof(cpu_set_t), &cpu_set);
-
-    return getProcessorID(&cpu_set);
-}
-
-void affinity_pinThread(int processorId)
-{
-    cpu_set_t cpuset;
-    pthread_t thread;
-
-    thread = pthread_self();
-    CPU_ZERO(&cpuset);
-    CPU_SET(processorId, &cpuset);
-    pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
-}
-
-void affinity_pinProcess(int processorId)
-{
-    cpu_set_t cpuset;
-
-    CPU_ZERO(&cpuset);
-    CPU_SET(processorId, &cpuset);
-    sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
-}
-#endif /*_OPENMP*/
-#endif /*__linux__*/
--- a/BasicSolver/2D-mpi-v2/src/main.c
+++ b/BasicSolver/2D-mpi-v2/src/main.c
@@ -1,80 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved.
- * Use of this source code is governed by a MIT-style
- * license that can be found in the LICENSE file.
- */
-#include <float.h>
-#include <limits.h>
-#include <mpi.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include "parameter.h"
-#include "progress.h"
-#include "solver.h"
-#include "timing.h"
-
-int main(int argc, char** argv)
-{
-    int rank;
-    double S, E;
-    Parameter params;
-    Solver solver;
-
-    MPI_Init(&argc, &argv);
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    initParameter(&params);
-
-    if (argc != 2) {
-        printf("Usage: %s <configFile>\n", argv[0]);
-        exit(EXIT_SUCCESS);
-    }
-
-    readParameter(&params, argv[1]);
-    if (rank == 0) {
-        printParameter(&params);
-    }
-    initSolver(&solver, &params);
-    /* debugExchange(&solver); */
-    /* debugBC(&solver); */
-    /* exit(EXIT_SUCCESS); */
-    initProgress(solver.te);
-
-    double tau = solver.tau;
-    double te  = solver.te;
-    double t   = 0.0;
-
-    S = getTimeStamp();
-    while (t <= te) {
-        if (tau > 0.0) {
-            computeTimestep(&solver);
-        }
-
-        setBoundaryConditions(&solver);
-        setSpecialBoundaryCondition(&solver);
-        computeFG(&solver);
-        computeRHS(&solver);
-        solve(&solver);
-        adaptUV(&solver);
-        t += solver.dt;
-
-#ifdef VERBOSE
-        if (rank == 0) {
-            printf("TIME %f , TIMESTEP %f\n", t, solver.dt);
-        }
-#else
-        printProgress(t);
-#endif
-    }
-    E = getTimeStamp();
-    stopProgress();
-    if (rank == 0) {
-        printf("Solution took %.2fs\n", E - S);
-    }
-    collectResult(&solver);
-
-    MPI_Finalize();
-    return EXIT_SUCCESS;
-}
--- a/BasicSolver/2D-mpi-v2/src/parameter.c
+++ b/BasicSolver/2D-mpi-v2/src/parameter.c
@@ -1,108 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved. This file is part of nusif-solver.
- * Use of this source code is governed by a MIT style
- * license that can be found in the LICENSE file.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "parameter.h"
-#include "util.h"
-#define MAXLINE 4096
-
-void initParameter(Parameter* param)
-{
-    param->xlength = 1.0;
-    param->ylength = 1.0;
-    param->imax    = 100;
-    param->jmax    = 100;
-    param->itermax = 1000;
-    param->eps     = 0.0001;
-    param->omg     = 1.8;
-}
-
-void readParameter(Parameter* param, const char* filename)
-{
-    FILE* fp = fopen(filename, "r");
-    char line[MAXLINE];
-    int i;
-
-    if (!fp) {
-        fprintf(stderr, "Could not open parameter file: %s\n", filename);
-        exit(EXIT_FAILURE);
-    }
-
-    while (!feof(fp)) {
-        line[0] = '\0';
-        fgets(line, MAXLINE, fp);
-        for (i = 0; line[i] != '\0' && line[i] != '#'; i++)
-            ;
-        line[i] = '\0';
-
-        char* tok = strtok(line, " ");
-        char* val = strtok(NULL, " ");
-
-#define PARSE_PARAM(p, f)                                                                \
-    if (strncmp(tok, #p, sizeof(#p) / sizeof(#p[0]) - 1) == 0) {                         \
-        param->p = f(val);                                                               \
-    }
-#define PARSE_STRING(p) PARSE_PARAM(p, strdup)
-#define PARSE_INT(p)    PARSE_PARAM(p, atoi)
-#define PARSE_REAL(p)   PARSE_PARAM(p, atof)
-
-        if (tok != NULL && val != NULL) {
-            PARSE_REAL(xlength);
-            PARSE_REAL(ylength);
-            PARSE_INT(imax);
-            PARSE_INT(jmax);
-            PARSE_INT(itermax);
-            PARSE_REAL(eps);
-            PARSE_REAL(omg);
-            PARSE_REAL(re);
-            PARSE_REAL(tau);
-            PARSE_REAL(gamma);
-            PARSE_REAL(dt);
-            PARSE_REAL(te);
-            PARSE_REAL(gx);
-            PARSE_REAL(gy);
-            PARSE_STRING(name);
-            PARSE_INT(bcN);
-            PARSE_INT(bcS);
-            PARSE_INT(bcE);
-            PARSE_INT(bcW);
-            PARSE_REAL(u_init);
-            PARSE_REAL(v_init);
-            PARSE_REAL(p_init);
-        }
-    }
-
-    fclose(fp);
-}
-
-void printParameter(Parameter* param)
-{
-    printf("Parameters for %s\n", param->name);
-    printf("Boundary conditions N:%d E:%d S:%d W:%d\n",
-        param->bcN,
-        param->bcE,
-        param->bcS,
-        param->bcW);
-    printf("\tReynolds number: %.2f\n", param->re);
-    printf("\tInit arrays: U:%.2f V:%.2f P:%.2f\n",
-        param->u_init,
-        param->v_init,
-        param->p_init);
-    printf("Geometry data:\n");
-    printf("\tDomain box size (x, y): %.2f, %.2f\n", param->xlength, param->ylength);
-    printf("\tCells (x, y): %d, %d\n", param->imax, param->jmax);
-    printf("Timestep parameters:\n");
-    printf("\tDefault stepsize: %.2f, Final time %.2f\n", param->dt, param->te);
-    printf("\tTau factor: %.2f\n", param->tau);
-    printf("Iterative solver parameters:\n");
-    printf("\tMax iterations: %d\n", param->itermax);
-    printf("\tepsilon (stopping tolerance) : %f\n", param->eps);
-    printf("\tgamma (stopping tolerance) : %f\n", param->gamma);
-    printf("\tomega (SOR relaxation): %f\n", param->omg);
-}
--- a/BasicSolver/2D-mpi-v2/src/parameter.h
+++ b/BasicSolver/2D-mpi-v2/src/parameter.h
@@ -1,26 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved. This file is part of nusif-solver.
- * Use of this source code is governed by a MIT style
- * license that can be found in the LICENSE file.
- */
-#ifndef __PARAMETER_H_
-#define __PARAMETER_H_
-
-typedef struct {
-    double xlength, ylength;
-    int imax, jmax;
-    int itermax;
-    double eps, omg;
-    double re, tau, gamma;
-    double te, dt;
-    double gx, gy;
-    char* name;
-    int bcN, bcS, bcE, bcW;
-    double u_init, v_init, p_init;
-} Parameter;
-
-void initParameter(Parameter*);
-void readParameter(Parameter*, const char*);
-void printParameter(Parameter*);
-#endif
--- a/BasicSolver/2D-mpi-v2/src/progress.c
+++ b/BasicSolver/2D-mpi-v2/src/progress.c
@@ -1,60 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved. This file is part of nusif-solver.
- * Use of this source code is governed by a MIT style
- * license that can be found in the LICENSE file.
- */
-#include <math.h>
-#include <mpi.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "progress.h"
-
-static double _end;
-static int _current;
-static int _rank = -1;
-
-void initProgress(double end)
-{
-    MPI_Comm_rank(MPI_COMM_WORLD, &_rank);
-    _end     = end;
-    _current = 0;
-
-    if (_rank == 0) {
-        printf("[          ]");
-        fflush(stdout);
-    }
-}
-
-void printProgress(double current)
-{
-    if (_rank == 0) {
-        int new = (int)rint((current / _end) * 10.0);
-
-        if (new > _current) {
-            char progress[11];
-            _current    = new;
-            progress[0] = 0;
-
-            for (int i = 0; i < 10; i++) {
-                if (i < _current) {
-                    sprintf(progress + strlen(progress), "#");
-                } else {
-                    sprintf(progress + strlen(progress), " ");
-                }
-            }
-            printf("\r[%s]", progress);
-        }
-        fflush(stdout);
-    }
-}
-
-void stopProgress()
-{
-    if (_rank == 0) {
-        printf("\n");
-        fflush(stdout);
-    }
-}
--- a/BasicSolver/2D-mpi-v2/src/solver.c
+++ b/BasicSolver/2D-mpi-v2/src/solver.c
@@ -1,900 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved. This file is part of nusif-solver.
- * Use of this source code is governed by a MIT style
- * license that can be found in the LICENSE file.
- */
-#include <float.h>
-#include <math.h>
-#include <mpi.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "allocate.h"
-#include "parameter.h"
-#include "solver.h"
-#include "util.h"
-
-#define P(i, j)   p[(j) * (imaxLocal + 2) + (i)]
-#define F(i, j)   f[(j) * (imaxLocal + 2) + (i)]
-#define G(i, j)   g[(j) * (imaxLocal + 2) + (i)]
-#define U(i, j)   u[(j) * (imaxLocal + 2) + (i)]
-#define V(i, j)   v[(j) * (imaxLocal + 2) + (i)]
-#define RHS(i, j) rhs[(j) * (imaxLocal + 2) + (i)]
-
-#define NDIMS 2
-#define IDIM  0
-#define JDIM  1
-
-static int sizeOfRank(int rank, int size, int N)
-{
-    return N / size + ((N % size > rank) ? 1 : 0);
-}
-
-void print(Solver* solver, double* grid)
-{
-    int imaxLocal = solver->imaxLocal;
-
-    for (int i = 0; i < solver->size; i++) {
-        if (i == solver->rank) {
-            printf(
-                "### RANK %d #######################################################\n",
-                solver->rank);
-            for (int j = 0; j < solver->jmaxLocal + 2; j++) {
-                printf("%02d: ", j);
-                for (int i = 0; i < solver->imaxLocal + 2; i++) {
-                    printf("%12.8f  ", grid[j * (imaxLocal + 2) + i]);
-                }
-                printf("\n");
-            }
-            fflush(stdout);
-        }
-        MPI_Barrier(MPI_COMM_WORLD);
-    }
-}
-
-static void exchange(Solver* solver, double* grid)
-{
-    double* buf[8];
-    MPI_Request requests[8];
-    for (int i = 0; i < 8; i++)
-        requests[i] = MPI_REQUEST_NULL;
-
-    buf[0] = grid + 1;                                                     // recv bottom
-    buf[1] = grid + (solver->imaxLocal + 2) + 1;                           // send bottom
-    buf[2] = grid + (solver->jmaxLocal + 1) * (solver->imaxLocal + 2) + 1; // recv top
-    buf[3] = grid + (solver->jmaxLocal) * (solver->imaxLocal + 2) + 1;     // send top
-    buf[4] = grid + (solver->imaxLocal + 2);                               // recv left
-    buf[5] = grid + (solver->imaxLocal + 2) + 1;                           // send left
-    buf[6] = grid + (solver->imaxLocal + 2) + (solver->imaxLocal + 1);     // recv right
-    buf[7] = grid + (solver->imaxLocal + 2) + (solver->imaxLocal);         // send right
-
-    for (int i = 0; i < 2; i++) {
-        int tag = 0;
-        if (solver->jNeighbours[i] != MPI_PROC_NULL) {
-            tag = solver->jNeighbours[i];
-        }
-        /* exchange ghost cells with bottom/top neighbor */
-        MPI_Irecv(buf[i * 2],
-            1,
-            solver->jBufferType,
-            solver->jNeighbours[i],
-            tag,
-            solver->comm,
-            &requests[i * 2]);
-        MPI_Isend(buf[(i * 2) + 1],
-            1,
-            solver->jBufferType,
-            solver->jNeighbours[i],
-            solver->rank,
-            solver->comm,
-            &requests[i * 2 + 1]);
-
-        tag = 0;
-        if (solver->iNeighbours[i] != MPI_PROC_NULL) {
-            tag = solver->iNeighbours[i];
-        }
-        /* exchange ghost cells with left/right neighbor */
-        MPI_Irecv(buf[i * 2 + 4],
-            1,
-            solver->iBufferType,
-            solver->iNeighbours[i],
-            tag,
-            solver->comm,
-            &requests[i * 2 + 4]);
-        MPI_Isend(buf[i * 2 + 5],
-            1,
-            solver->iBufferType,
-            solver->iNeighbours[i],
-            solver->rank,
-            solver->comm,
-            &requests[(i * 2) + 5]);
-    }
-
-    MPI_Waitall(8, requests, MPI_STATUSES_IGNORE);
-}
-
-static void shift(Solver* solver)
-{
-    MPI_Request requests[4] = { MPI_REQUEST_NULL,
-        MPI_REQUEST_NULL,
-        MPI_REQUEST_NULL,
-        MPI_REQUEST_NULL };
-    double* f               = solver->f;
-    double* g               = solver->g;
-
-    /* shift G */
-    double* buf = g + 1;
-    /* receive ghost cells from bottom neighbor */
-    MPI_Irecv(buf,
-        1,
-        solver->jBufferType,
-        solver->jNeighbours[0],
-        0,
-        solver->comm,
-        &requests[0]);
-
-    buf = g + (solver->jmaxLocal) * (solver->imaxLocal + 2) + 1;
-    /* send ghost cells to top neighbor */
-    MPI_Isend(buf,
-        1,
-        solver->jBufferType,
-        solver->jNeighbours[1],
-        0,
-        solver->comm,
-        &requests[1]);
-
-    /* shift F */
-    buf = f + (solver->imaxLocal + 2);
-    /* receive ghost cells from left neighbor */
-    MPI_Irecv(buf,
-        1,
-        solver->iBufferType,
-        solver->iNeighbours[0],
-        1,
-        solver->comm,
-        &requests[2]);
-
-    buf = f + (solver->imaxLocal + 2) + (solver->imaxLocal);
-    /* send ghost cells to right neighbor */
-    MPI_Isend(buf,
-        1,
-        solver->iBufferType,
-        solver->iNeighbours[1],
-        1,
-        solver->comm,
-        &requests[3]);
-
-    MPI_Waitall(4, requests, MPI_STATUSES_IGNORE);
-}
-
-void debugExchange(Solver* solver)
-{
-    int imaxLocal = solver->imaxLocal;
-    int jmaxLocal = solver->jmaxLocal;
-
-    for (int j = 0; j < jmaxLocal + 2; j++) {
-        for (int i = 0; i < solver->imaxLocal + 2; i++) {
-            solver->p[j * (imaxLocal + 2) + i] = solver->rank + 0.01 * i + 0.0001 * j;
-        }
-    }
-    collectResult(solver);
-    /* print(solver, solver->p); */
-}
-
-void debugBC(Solver* solver)
-{
-    int imaxLocal = solver->imaxLocal;
-    int jmaxLocal = solver->jmaxLocal;
-    double* v     = solver->v;
-
-    // Northern boundary
-    if (solver->coords[JDIM] == (solver->dims[JDIM] - 1)) { // set top bc
-        for (int i = 1; i < imaxLocal + 1; i++) {
-            V(i, jmaxLocal + 1) = 10.0 + solver->rank;
-        }
-    }
-
-    // Eastern boundary
-    if (solver->coords[IDIM] == (solver->dims[IDIM] - 1)) { // set right bc
-        for (int j = 1; j < jmaxLocal + 1; j++) {
-            V(imaxLocal + 1, j) = 20.0 + solver->rank;
-        }
-    }
-
-    // Southern boundary
-    if (solver->coords[JDIM] == 0) { // set bottom bc
-        for (int i = 1; i < imaxLocal + 1; i++) {
-            V(i, 0) = 30.0 + solver->rank;
-        }
-    }
-
-    // Western boundary
-    if (solver->coords[IDIM] == 0) { // set left bc
-        for (int j = 1; j < jmaxLocal + 1; j++) {
-            V(0, j) = 40.0 + solver->rank;
-        }
-    }
-    print(solver, solver->v);
-}
-
-static void assembleResult(Solver* solver,
-    double* src,
-    double* dst,
-    int imaxLocal[],
-    int jmaxLocal[],
-    int offset[])
-{
-    MPI_Request* requests;
-    int numRequests = 1;
-
-    if (solver->rank == 0) {
-        numRequests = solver->size + 1;
-    } else {
-        numRequests = 1;
-    }
-
-    requests = (MPI_Request*)malloc(numRequests * sizeof(MPI_Request));
-
-    /* all ranks send their bulk array */
-    MPI_Datatype bulkType;
-    const int ndims     = 2;
-    int oldSizes[ndims] = { solver->jmaxLocal + 2, solver->imaxLocal + 2 };
-    int newSizes[ndims] = { solver->jmaxLocal, solver->imaxLocal };
-    int starts[ndims]   = { 1, 1 };
-    MPI_Type_create_subarray(2,
-        oldSizes,
-        newSizes,
-        starts,
-        MPI_ORDER_C,
-        MPI_DOUBLE,
-        &bulkType);
-    MPI_Type_commit(&bulkType);
-
-    MPI_Isend(src, 1, bulkType, 0, 0, solver->comm, &requests[0]);
-
-    /* rank 0 assembles the subdomains */
-    if (solver->rank == 0) {
-        for (int i = 0; i < solver->size; i++) {
-            MPI_Datatype domainType;
-            MPI_Type_vector(jmaxLocal[i],
-                imaxLocal[i],
-                solver->imax,
-                MPI_DOUBLE,
-                &domainType);
-            MPI_Type_commit(&domainType);
-
-            MPI_Irecv(dst + offset[i],
-                1,
-                domainType,
-                i,
-                0,
-                solver->comm,
-                &requests[i + 1]);
-        }
-    }
-
-    MPI_Waitall(numRequests, requests, MPI_STATUSES_IGNORE);
-}
-
-static int sum(int* sizes, int position)
-{
-    int sum = 0;
-
-    for (int i = 0; i < position; i++) {
-        sum += sizes[i];
-    }
-
-    return sum;
-}
-
-void collectResult(Solver* solver)
-{
-    double* Pall = NULL;
-    double* Uall = NULL;
-    double* Vall = NULL;
-    int offset[solver->size];
-    int imaxLocal[solver->size];
-    int jmaxLocal[solver->size];
-
-    MPI_Gather(&solver->imaxLocal, 1, MPI_INT, imaxLocal, 1, MPI_INT, 0, MPI_COMM_WORLD);
-    MPI_Gather(&solver->jmaxLocal, 1, MPI_INT, jmaxLocal, 1, MPI_INT, 0, MPI_COMM_WORLD);
-
-    if (solver->rank == 0) {
-        Pall = allocate(64, (solver->imax) * (solver->jmax) * sizeof(double));
-        Uall = allocate(64, (solver->imax) * (solver->jmax) * sizeof(double));
-        Vall = allocate(64, (solver->imax) * (solver->jmax) * sizeof(double));
-
-        for (int i = 0; i < solver->size; i++) {
-            int coords[2];
-            MPI_Cart_coords(solver->comm, i, 2, coords);
-            int ioffset = sum(imaxLocal, coords[0]);
-            int joffset = sum(jmaxLocal, coords[1]);
-            offset[i]   = (joffset * solver->imax) + ioffset;
-            printf("Rank: %d, Coords(i,j): %d %d, Size(i,j): %d %d, Offset(i,j): %d %d\n",
-                i,
-                coords[0],
-                coords[1],
-                imaxLocal[i],
-                jmaxLocal[i],
-                ioffset,
-                joffset);
-        }
-    }
-
-    /* collect P */
-    assembleResult(solver, solver->p, Pall, imaxLocal, jmaxLocal, offset);
-
-    /* collect U */
-    assembleResult(solver, solver->u, Uall, imaxLocal, jmaxLocal, offset);
-
-    /* collect V */
-    assembleResult(solver, solver->v, Vall, imaxLocal, jmaxLocal, offset);
-
-    /* write to disk */
-    if (solver->rank == 0) writeResult(solver, Pall, Uall, Vall);
-}
-
-static void printConfig(Solver* solver)
-{
-    if (solver->rank == 0) {
-        printf("Parameters for #%s#\n", solver->problem);
-        printf("Boundary conditions N:%d E:%d S:%d W:%d\n",
-            solver->bcN,
-            solver->bcE,
-            solver->bcS,
-            solver->bcW);
-        printf("\tReynolds number: %.2f\n", solver->re);
-        printf("\tGx Gy: %.2f %.2f\n", solver->gx, solver->gy);
-        printf("Geometry data:\n");
-        printf("\tDomain box size (x, y): %.2f, %.2f\n",
-            solver->xlength,
-            solver->ylength);
-        printf("\tCells (x, y): %d, %d\n", solver->imax, solver->jmax);
-        printf("Timestep parameters:\n");
-        printf("\tDefault stepsize: %.2f, Final time %.2f\n", solver->dt, solver->te);
-        printf("\tdt bound: %.6f\n", solver->dtBound);
-        printf("\tTau factor: %.2f\n", solver->tau);
-        printf("Iterative solver parameters:\n");
-        printf("\tMax iterations: %d\n", solver->itermax);
-        printf("\tepsilon (stopping tolerance) : %f\n", solver->eps);
-        printf("\tgamma factor: %f\n", solver->gamma);
-        printf("\tomega (SOR relaxation): %f\n", solver->omega);
-        printf("Communication parameters:\n");
-    }
-    for (int i = 0; i < solver->size; i++) {
-        if (i == solver->rank) {
-            printf("\tRank %d of %d\n", solver->rank, solver->size);
-            printf("\tNeighbours (b, t, l, r): %d, %d, %d, %d\n",
-                solver->jNeighbours[0],
-                solver->jNeighbours[1],
-                solver->iNeighbours[0],
-                solver->iNeighbours[1]);
-            printf("\tCoordinates %d,%d\n", solver->coords[0], solver->coords[1]);
-            printf("\tLocal domain size: %dx%d\n", solver->imaxLocal, solver->jmaxLocal);
-            fflush(stdout);
-        }
-    }
-}
-
-void initSolver(Solver* solver, Parameter* params)
-{
-    solver->problem = params->name;
-    solver->bcN     = params->bcN;
-    solver->bcS     = params->bcS;
-    solver->bcW     = params->bcW;
-    solver->bcE     = params->bcE;
-    solver->imax    = params->imax;
-    solver->jmax    = params->jmax;
-    solver->xlength = params->xlength;
-    solver->ylength = params->ylength;
-    solver->dx      = params->xlength / params->imax;
-    solver->dy      = params->ylength / params->jmax;
-    solver->eps     = params->eps;
-    solver->omega   = params->omg;
-    solver->itermax = params->itermax;
-    solver->re      = params->re;
-    solver->gx      = params->gx;
-    solver->gy      = params->gy;
-    solver->dt      = params->dt;
-    solver->te      = params->te;
-    solver->tau     = params->tau;
-    solver->gamma   = params->gamma;
-
-    /* setup communication */
-    MPI_Comm_rank(MPI_COMM_WORLD, &(solver->rank));
-    MPI_Comm_size(MPI_COMM_WORLD, &(solver->size));
-    int dims[NDIMS]    = { 0, 0 };
-    int periods[NDIMS] = { 0, 0 };
-    MPI_Dims_create(solver->size, NDIMS, dims);
-    MPI_Cart_create(MPI_COMM_WORLD, NDIMS, dims, periods, 0, &solver->comm);
-    MPI_Cart_shift(solver->comm,
-        IDIM,
-        1,
-        &solver->iNeighbours[0],
-        &solver->iNeighbours[1]);
-    MPI_Cart_shift(solver->comm,
-        JDIM,
-        1,
-        &solver->jNeighbours[0],
-        &solver->jNeighbours[1]);
-    MPI_Cart_get(solver->comm, NDIMS, solver->dims, periods, solver->coords);
-
-    solver->imaxLocal = sizeOfRank(solver->rank, dims[IDIM], solver->imax);
-    solver->jmaxLocal = sizeOfRank(solver->rank, dims[JDIM], solver->jmax);
-
-    MPI_Type_contiguous(solver->imaxLocal, MPI_DOUBLE, &solver->jBufferType);
-    MPI_Type_commit(&solver->jBufferType);
-
-    MPI_Type_vector(solver->jmaxLocal,
-        1,
-        solver->imaxLocal + 2,
-        MPI_DOUBLE,
-        &solver->iBufferType);
-    MPI_Type_commit(&solver->iBufferType);
-
-    /* allocate arrays */
-    int imaxLocal   = solver->imaxLocal;
-    int jmaxLocal   = solver->jmaxLocal;
-    size_t bytesize = (imaxLocal + 2) * (jmaxLocal + 2) * sizeof(double);
-    solver->u       = allocate(64, bytesize);
-    solver->v       = allocate(64, bytesize);
-    solver->p       = allocate(64, bytesize);
-    solver->rhs     = allocate(64, bytesize);
-    solver->f       = allocate(64, bytesize);
-    solver->g       = allocate(64, bytesize);
-
-    for (int i = 0; i < (imaxLocal + 2) * (jmaxLocal + 2); i++) {
-        solver->u[i]   = params->u_init;
-        solver->v[i]   = params->v_init;
-        solver->p[i]   = params->p_init;
-        solver->rhs[i] = 0.0;
-        solver->f[i]   = 0.0;
-        solver->g[i]   = 0.0;
-    }
-
-    double dx          = solver->dx;
-    double dy          = solver->dy;
-    double inv_sqr_sum = 1.0 / (dx * dx) + 1.0 / (dy * dy);
-    solver->dtBound    = 0.5 * solver->re * 1.0 / inv_sqr_sum;
-#ifdef VERBOSE
-    printConfig(solver);
-#endif
-}
-
-void computeRHS(Solver* solver)
-{
-    int imaxLocal = solver->imaxLocal;
-    int jmaxLocal = solver->jmaxLocal;
-    double idx    = 1.0 / solver->dx;
-    double idy    = 1.0 / solver->dy;
-    double idt    = 1.0 / solver->dt;
-    double* rhs   = solver->rhs;
-    double* f     = solver->f;
-    double* g     = solver->g;
-
-    shift(solver);
-
-    for (int j = 1; j < jmaxLocal + 1; j++) {
-        for (int i = 1; i < imaxLocal + 1; i++) {
-            RHS(i, j) = ((F(i, j) - F(i - 1, j)) * idx + (G(i, j) - G(i, j - 1)) * idy) *
-                        idt;
-        }
-    }
-}
-
-int solve(Solver* solver)
-{
-    int imax      = solver->imax;
-    int jmax      = solver->jmax;
-    int imaxLocal = solver->imaxLocal;
-    int jmaxLocal = solver->jmaxLocal;
-    double eps    = solver->eps;
-    int itermax   = solver->itermax;
-    double dx2    = solver->dx * solver->dx;
-    double dy2    = solver->dy * solver->dy;
-    double idx2   = 1.0 / dx2;
-    double idy2   = 1.0 / dy2;
-    double factor = solver->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
-    double* p     = solver->p;
-    double* rhs   = solver->rhs;
-    double epssq  = eps * eps;
-    int it        = 0;
-    double res    = 1.0;
-
-    while ((res >= epssq) && (it < itermax)) {
-        res = 0.0;
-        exchange(solver, p);
-
-        for (int j = 1; j < jmaxLocal + 1; j++) {
-            for (int i = 1; i < imaxLocal + 1; i++) {
-
-                double r = RHS(i, j) -
-                           ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
-                               (P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
-
-                P(i, j) -= (factor * r);
-                res += (r * r);
-            }
-        }
-
-        if (solver->coords[JDIM] == 0) { // set bottom bc
-            for (int i = 1; i < imaxLocal + 1; i++) {
-                P(i, 0) = P(i, 1);
-            }
-        }
-
-        if (solver->coords[JDIM] == (solver->dims[JDIM] - 1)) { // set top bc
-            for (int i = 1; i < imaxLocal + 1; i++) {
-                P(i, jmaxLocal + 1) = P(i, jmaxLocal);
-            }
-        }
-
-        if (solver->coords[IDIM] == 0) { // set left bc
-            for (int j = 1; j < jmaxLocal + 1; j++) {
-                P(0, j) = P(1, j);
-            }
-        }
-
-        if (solver->coords[IDIM] == (solver->dims[IDIM] - 1)) { // set right bc
-            for (int j = 1; j < jmaxLocal + 1; j++) {
-                P(imaxLocal + 1, j) = P(imaxLocal, j);
-            }
-        }
-
-        MPI_Allreduce(MPI_IN_PLACE, &res, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
-        res = res / (double)(imax * jmax);
-#ifdef DEBUG
-        if (solver->rank == 0) {
-            printf("%d Residuum: %e\n", it, res);
-        }
-#endif
-        it++;
-    }
-
-#ifdef VERBOSE
-    if (solver->rank == 0) {
-        printf("Solver took %d iterations to reach %f\n", it, sqrt(res));
-    }
-#endif
-    if (res < eps) {
-        return 0;
-    } else {
-        return 1;
-    }
-}
-
-static double maxElement(Solver* solver, double* m)
-{
-    int size      = (solver->imaxLocal + 2) * (solver->jmaxLocal + 2);
-    double maxval = DBL_MIN;
-
-    for (int i = 0; i < size; i++) {
-        maxval = MAX(maxval, fabs(m[i]));
-    }
-
-    MPI_Allreduce(MPI_IN_PLACE, &maxval, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
-    return maxval;
-}
-
-void computeTimestep(Solver* solver)
-{
-    double dt   = solver->dtBound;
-    double dx   = solver->dx;
-    double dy   = solver->dy;
-    double umax = maxElement(solver, solver->u);
-    double vmax = maxElement(solver, solver->v);
-
-    if (umax > 0) {
-        dt = (dt > dx / umax) ? dx / umax : dt;
-    }
-    if (vmax > 0) {
-        dt = (dt > dy / vmax) ? dy / vmax : dt;
-    }
-
-    solver->dt = dt * solver->tau;
-}
-
-void setBoundaryConditions(Solver* solver)
-{
-    int imaxLocal = solver->imaxLocal;
-    int jmaxLocal = solver->jmaxLocal;
-    double* u     = solver->u;
-    double* v     = solver->v;
-
-    // Northern boundary
-    if (solver->coords[JDIM] == (solver->dims[JDIM] - 1)) { // set top bc
-        switch (solver->bcN) {
-        case NOSLIP:
-            for (int i = 1; i < imaxLocal + 1; i++) {
-                V(i, jmaxLocal)     = 0.0;
-                U(i, jmaxLocal + 1) = -U(i, jmaxLocal);
-            }
-            break;
-        case SLIP:
-            for (int i = 1; i < imaxLocal + 1; i++) {
-                V(i, jmaxLocal)     = 0.0;
-                U(i, jmaxLocal + 1) = U(i, jmaxLocal);
-            }
-            break;
-        case OUTFLOW:
-            for (int i = 1; i < imaxLocal + 1; i++) {
-                U(i, jmaxLocal + 1) = U(i, jmaxLocal);
-                V(i, jmaxLocal)     = V(i, jmaxLocal - 1);
-            }
-            break;
-        case PERIODIC:
-            break;
-        }
-    }
-
-    // Southern boundary
-    if (solver->coords[JDIM] == 0) { // set bottom bc
-        switch (solver->bcS) {
-        case NOSLIP:
-            for (int i = 1; i < imaxLocal + 1; i++) {
-                V(i, 0) = 0.0;
-                U(i, 0) = -U(i, 1);
-            }
-            break;
-        case SLIP:
-            for (int i = 1; i < imaxLocal + 1; i++) {
-                V(i, 0) = 0.0;
-                U(i, 0) = U(i, 1);
-            }
-            break;
-        case OUTFLOW:
-            for (int i = 1; i < imaxLocal + 1; i++) {
-                U(i, 0) = U(i, 1);
-                V(i, 0) = V(i, 1);
-            }
-            break;
-        case PERIODIC:
-            break;
-        }
-    }
-
-    // Eastern boundary
-    if (solver->coords[IDIM] == (solver->dims[IDIM] - 1)) { // set right bc
-        switch (solver->bcE) {
-        case NOSLIP:
-            for (int j = 1; j < jmaxLocal + 1; j++) {
-                U(imaxLocal, j)     = 0.0;
-                V(imaxLocal + 1, j) = -V(imaxLocal, j);
-            }
-            break;
-        case SLIP:
-            for (int j = 1; j < jmaxLocal + 1; j++) {
-                U(imaxLocal, j)     = 0.0;
-                V(imaxLocal + 1, j) = V(imaxLocal, j);
-            }
-            break;
-        case OUTFLOW:
-            for (int j = 1; j < jmaxLocal + 1; j++) {
-                U(imaxLocal, j)     = U(imaxLocal - 1, j);
-                V(imaxLocal + 1, j) = V(imaxLocal, j);
-            }
-            break;
-        case PERIODIC:
-            break;
-        }
-    }
-
-    // Western boundary
-    if (solver->coords[IDIM] == 0) { // set left bc
-        switch (solver->bcW) {
-        case NOSLIP:
-            for (int j = 1; j < jmaxLocal + 1; j++) {
-                U(0, j) = 0.0;
-                V(0, j) = -V(1, j);
-            }
-            break;
-        case SLIP:
-            for (int j = 1; j < jmaxLocal + 1; j++) {
-                U(0, j) = 0.0;
-                V(0, j) = V(1, j);
-            }
-            break;
-        case OUTFLOW:
-            for (int j = 1; j < jmaxLocal + 1; j++) {
-                U(0, j) = U(1, j);
-                V(0, j) = V(1, j);
-            }
-            break;
-        case PERIODIC:
-            break;
-        }
-    }
-}
-
-void setSpecialBoundaryCondition(Solver* solver)
-{
-    int imaxLocal = solver->imaxLocal;
-    int jmaxLocal = solver->jmaxLocal;
-    double* u     = solver->u;
-
-    if (strcmp(solver->problem, "dcavity") == 0) {
-        if (solver->coords[JDIM] == (solver->dims[JDIM] - 1)) { // set top bc
-            for (int i = 1; i < imaxLocal + 1; i++) {
-                U(i, jmaxLocal + 1) = 2.0 - U(i, jmaxLocal);
-            }
-        }
-    } else if (strcmp(solver->problem, "canal") == 0) {
-        if (solver->coords[IDIM] == 0) { // set left bc
-            double ylength = solver->ylength;
-            double dy      = solver->dy;
-            int rest       = solver->jmax % solver->size;
-            int yc         = solver->rank * (solver->jmax / solver->size) +
-                     MIN(rest, solver->rank);
-            double ys = dy * (yc + 0.5);
-            double y;
-
-            /* printf("RANK %d yc: %d ys: %f\n", solver->rank, yc, ys); */
-
-            for (int j = 1; j < jmaxLocal + 1; j++) {
-                y       = ys + dy * (j - 0.5);
-                U(0, j) = y * (ylength - y) * 4.0 / (ylength * ylength);
-            }
-        }
-    }
-    /* print(solver, solver->u); */
-}
-
-void computeFG(Solver* solver)
-{
-    double* u        = solver->u;
-    double* v        = solver->v;
-    double* f        = solver->f;
-    double* g        = solver->g;
-    int imaxLocal    = solver->imaxLocal;
-    int jmaxLocal    = solver->jmaxLocal;
-    double gx        = solver->gx;
-    double gy        = solver->gy;
-    double gamma     = solver->gamma;
-    double dt        = solver->dt;
-    double inverseRe = 1.0 / solver->re;
-    double inverseDx = 1.0 / solver->dx;
-    double inverseDy = 1.0 / solver->dy;
-    double du2dx, dv2dy, duvdx, duvdy;
-    double du2dx2, du2dy2, dv2dx2, dv2dy2;
-
-    exchange(solver, u);
-    exchange(solver, v);
-
-    for (int j = 1; j < jmaxLocal + 1; j++) {
-        for (int i = 1; i < imaxLocal + 1; i++) {
-            du2dx = inverseDx * 0.25 *
-                        ((U(i, j) + U(i + 1, j)) * (U(i, j) + U(i + 1, j)) -
-                            (U(i, j) + U(i - 1, j)) * (U(i, j) + U(i - 1, j))) +
-                    gamma * inverseDx * 0.25 *
-                        (fabs(U(i, j) + U(i + 1, j)) * (U(i, j) - U(i + 1, j)) +
-                            fabs(U(i, j) + U(i - 1, j)) * (U(i, j) - U(i - 1, j)));
-
-            duvdy = inverseDy * 0.25 *
-                        ((V(i, j) + V(i + 1, j)) * (U(i, j) + U(i, j + 1)) -
-                            (V(i, j - 1) + V(i + 1, j - 1)) * (U(i, j) + U(i, j - 1))) +
-                    gamma * inverseDy * 0.25 *
-                        (fabs(V(i, j) + V(i + 1, j)) * (U(i, j) - U(i, j + 1)) +
-                            fabs(V(i, j - 1) + V(i + 1, j - 1)) *
-                                (U(i, j) - U(i, j - 1)));
-
-            du2dx2  = inverseDx * inverseDx * (U(i + 1, j) - 2.0 * U(i, j) + U(i - 1, j));
-            du2dy2  = inverseDy * inverseDy * (U(i, j + 1) - 2.0 * U(i, j) + U(i, j - 1));
-            F(i, j) = U(i, j) + dt * (inverseRe * (du2dx2 + du2dy2) - du2dx - duvdy + gx);
-
-            duvdx = inverseDx * 0.25 *
-                        ((U(i, j) + U(i, j + 1)) * (V(i, j) + V(i + 1, j)) -
-                            (U(i - 1, j) + U(i - 1, j + 1)) * (V(i, j) + V(i - 1, j))) +
-                    gamma * inverseDx * 0.25 *
-                        (fabs(U(i, j) + U(i, j + 1)) * (V(i, j) - V(i + 1, j)) +
-                            fabs(U(i - 1, j) + U(i - 1, j + 1)) *
-                                (V(i, j) - V(i - 1, j)));
-
-            dv2dy = inverseDy * 0.25 *
-                        ((V(i, j) + V(i, j + 1)) * (V(i, j) + V(i, j + 1)) -
-                            (V(i, j) + V(i, j - 1)) * (V(i, j) + V(i, j - 1))) +
-                    gamma * inverseDy * 0.25 *
-                        (fabs(V(i, j) + V(i, j + 1)) * (V(i, j) - V(i, j + 1)) +
-                            fabs(V(i, j) + V(i, j - 1)) * (V(i, j) - V(i, j - 1)));
-
-            dv2dx2  = inverseDx * inverseDx * (V(i + 1, j) - 2.0 * V(i, j) + V(i - 1, j));
-            dv2dy2  = inverseDy * inverseDy * (V(i, j + 1) - 2.0 * V(i, j) + V(i, j - 1));
-            G(i, j) = V(i, j) + dt * (inverseRe * (dv2dx2 + dv2dy2) - duvdx - dv2dy + gy);
-        }
-    }
-
-    /* ----------------------------- boundary of F --------------------------- */
-    if (solver->coords[IDIM] == 0) { // set left bc
-        for (int j = 1; j < jmaxLocal + 1; j++) {
-            F(0, j) = U(0, j);
-        }
-    }
-
-    if (solver->coords[IDIM] == (solver->dims[IDIM] - 1)) { // set right bc
-        for (int j = 1; j < jmaxLocal + 1; j++) {
-            F(imaxLocal, j) = U(imaxLocal, j);
-        }
-    }
-
-    /* ----------------------------- boundary of G --------------------------- */
-    if (solver->coords[JDIM] == 0) { // set bottom bc
-        for (int i = 1; i < imaxLocal + 1; i++) {
-            G(i, 0) = V(i, 0);
-        }
-    }
-
-    if (solver->coords[JDIM] == (solver->dims[JDIM] - 1)) { // set top bc
-        for (int i = 1; i < imaxLocal + 1; i++) {
-            G(i, jmaxLocal) = V(i, jmaxLocal);
-        }
-    }
-}
-
-void adaptUV(Solver* solver)
-{
-    int imaxLocal  = solver->imaxLocal;
-    int jmaxLocal  = solver->jmaxLocal;
-    double* p      = solver->p;
-    double* u      = solver->u;
-    double* v      = solver->v;
-    double* f      = solver->f;
-    double* g      = solver->g;
-    double factorX = solver->dt / solver->dx;
-    double factorY = solver->dt / solver->dy;
-
-    for (int j = 1; j < jmaxLocal + 1; j++) {
-        for (int i = 1; i < imaxLocal + 1; i++) {
-            U(i, j) = F(i, j) - (P(i + 1, j) - P(i, j)) * factorX;
-            V(i, j) = G(i, j) - (P(i, j + 1) - P(i, j)) * factorY;
-        }
-    }
-}
-
-void writeResult(Solver* solver, double* p, double* u, double* v)
-{
-    int imax  = solver->imax;
-    int jmax  = solver->jmax;
-    double dx = solver->dx;
-    double dy = solver->dy;
-    double x = 0.0, y = 0.0;
-
-    FILE* fp;
-    fp = fopen("pressure.dat", "w");
-
-    if (fp == NULL) {
-        printf("Error!\n");
-        exit(EXIT_FAILURE);
-    }
-
-    for (int j = 1; j < jmax; j++) {
-        y = (double)(j - 0.5) * dy;
-        for (int i = 1; i < imax; i++) {
-            x = (double)(i - 0.5) * dx;
-            fprintf(fp, "%.2f %.2f %f\n", x, y, p[j * (imax) + i]);
-        }
-        fprintf(fp, "\n");
-    }
-
-    fclose(fp);
-
-    fp = fopen("velocity.dat", "w");
-
-    if (fp == NULL) {
-        printf("Error!\n");
-        exit(EXIT_FAILURE);
-    }
-
-    for (int j = 1; j < jmax; j++) {
-        y = dy * (j - 0.5);
-        for (int i = 1; i < imax; i++) {
-            x            = dx * (i - 0.5);
-            double vel_u = (u[j * (imax) + i] + u[j * (imax) + (i - 1)]) / 2.0;
-            double vel_v = (v[j * (imax) + i] + v[(j - 1) * (imax) + i]) / 2.0;
-            double len   = sqrt((vel_u * vel_u) + (vel_v * vel_v));
-            fprintf(fp, "%.2f %.2f %f %f %f\n", x, y, vel_u, vel_v, len);
-        }
-    }
-
-    fclose(fp);
-}
--- a/BasicSolver/2D-mpi-v2/src/solver.h
+++ b/BasicSolver/2D-mpi-v2/src/solver.h
@@ -1,56 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved. This file is part of nusif-solver.
- * Use of this source code is governed by a MIT style
- * license that can be found in the LICENSE file.
- */
-#ifndef __SOLVER_H_
-#define __SOLVER_H_
-#include "parameter.h"
-#include <mpi.h>
-
-enum BC { NOSLIP = 1, SLIP, OUTFLOW, PERIODIC };
-
-typedef struct {
-    /* geometry and grid information */
-    double dx, dy;
-    int imax, jmax;
-    double xlength, ylength;
-    /* arrays */
-    double *p, *rhs;
-    double *f, *g;
-    double *u, *v;
-    /* parameters */
-    double eps, omega;
-    double re, tau, gamma;
-    double gx, gy;
-    /* time stepping */
-    int itermax;
-    double dt, te;
-    double dtBound;
-    char* problem;
-    int bcN, bcS, bcW, bcE;
-    /* mpi */
-    int rank;
-    int size;
-    MPI_Comm comm;
-    MPI_Datatype iBufferType, jBufferType;
-    int iNeighbours[2], jNeighbours[2];
-    int coords[2], dims[2];
-    int imaxLocal, jmaxLocal;
-} Solver;
-
-void initSolver(Solver*, Parameter*);
-void computeRHS(Solver*);
-int solve(Solver*);
-void computeTimestep(Solver*);
-void setBoundaryConditions(Solver*);
-void setSpecialBoundaryCondition(Solver*);
-void computeFG(Solver*);
-void adaptUV(Solver*);
-void collectResult(Solver*);
-void writeResult(Solver*, double*, double*, double*);
-void debugExchange(Solver*);
-void debugBC(Solver*);
-void print(Solver*, double*);
-#endif
--- a/BasicSolver/2D-mpi-v2/vector.plot
+++ b/BasicSolver/2D-mpi-v2/vector.plot
@@ -1,5 +0,0 @@
-set terminal png size 1800,768 enhanced font ,12
-set output 'velocity.png'
-set datafile separator whitespace
-
-plot 'velocity.dat' using 1:2:3:4:5 with vectors filled head size 0.01,20,60 lc palette
--- a/BasicSolver/2D-mpi-v3/canal.par
+++ b/BasicSolver/2D-mpi-v3/canal.par
@@ -1,46 +0,0 @@
-#==============================================================================
-#                            Laminar Canal Flow
-#==============================================================================
-
-# Problem specific Data:
-# ---------------------
-
-name canal             # name of flow setup
-
-bcN     1              #  flags for boundary conditions
-bcE     3              #  1 = no-slip      3 = outflow
-bcS     1              #  2 = free-slip    4 = periodic
-bcW     3              #
-
-gx     0.0      # Body forces (e.g. gravity)
-gy     0.0      #
-
-re            100.0	   # Reynolds number
-
-u_init        1.0      # initial value for velocity in x-direction
-v_init        0.0      # initial value for velocity in y-direction
-p_init        0.0      # initial value for pressure
-
-# Geometry Data:
-# -------------
-
-xlength       30.0     # domain size in x-direction
-ylength       4.0	   # domain size in y-direction
-imax          200      # number of interior cells in x-direction
-jmax          50	   # number of interior cells in y-direction
-
-# Time Data:
-# ---------
-
-te       100.0   # final time
-dt       0.02    # time stepsize
-tau      0.5     # safety factor for time stepsize control (<0 constant delt)
-
-# Pressure Iteration Data:
-# -----------------------
-
-itermax       500       # maximal number of pressure iteration in one time step
-eps           0.00001   # stopping tolerance for pressure iteration
-omg           1.8       # relaxation parameter for SOR iteration
-gamma         0.9       # upwind differencing factor gamma
-#===============================================================================
--- a/BasicSolver/2D-mpi-v3/include_CLANG.mk
+++ b/BasicSolver/2D-mpi-v3/include_CLANG.mk
@@ -1,16 +0,0 @@
-CC   = mpicc
-GCC  = cc
-LINKER = $(CC)
-
-ifeq ($(ENABLE_OPENMP),true)
-OPENMP   = -fopenmp
-#OPENMP   = -Xpreprocessor -fopenmp #required on Macos with homebrew libomp
-LIBS     = # -lomp
-endif
-
-VERSION  = --version
-CFLAGS   = -Ofast -std=c99 $(OPENMP)
-#CFLAGS   = -Ofast -fnt-store=aggressive  -std=c99 $(OPENMP) #AMD CLANG
-LFLAGS   = $(OPENMP)
-DEFINES  = -D_GNU_SOURCE# -DDEBUG
-INCLUDES = -I/usr/local/include
--- a/BasicSolver/2D-mpi-v3/include_ICC.mk
+++ b/BasicSolver/2D-mpi-v3/include_ICC.mk
@@ -1,14 +0,0 @@
-CC   = mpiicc
-GCC  = gcc
-LINKER = $(CC)
-
-ifeq ($(ENABLE_OPENMP),true)
-OPENMP   = -qopenmp
-endif
-
-VERSION  = --version
-CFLAGS   =  -O3 -xHost -qopt-zmm-usage=high -std=c99 $(OPENMP)
-LFLAGS   = $(OPENMP)
-DEFINES  = -D_GNU_SOURCE
-INCLUDES =
-LIBS     =
--- a/BasicSolver/2D-mpi-v3/src/affinity.h
+++ b/BasicSolver/2D-mpi-v3/src/affinity.h
@@ -1,14 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved.
- * Use of this source code is governed by a MIT-style
- * license that can be found in the LICENSE file.
- */
-#ifndef AFFINITY_H
-#define AFFINITY_H
-
-extern int affinity_getProcessorId();
-extern void affinity_pinProcess(int);
-extern void affinity_pinThread(int);
-
-#endif /*AFFINITY_H*/
--- a/BasicSolver/2D-mpi-v3/src/main.c
+++ b/BasicSolver/2D-mpi-v3/src/main.c
@@ -1,77 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved.
- * Use of this source code is governed by a MIT-style
- * license that can be found in the LICENSE file.
- */
-#include <float.h>
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include "parameter.h"
-#include "progress.h"
-#include "solver.h"
-#include "timing.h"
-#include <mpi.h>
-
-int main(int argc, char** argv)
-{
-    int rank;
-    double S, E;
-    Parameter params;
-    Solver solver;
-
-    MPI_Init(&argc, &argv);
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    initParameter(&params);
-
-    if (argc != 2) {
-        printf("Usage: %s <configFile>\n", argv[0]);
-        exit(EXIT_SUCCESS);
-    }
-
-    readParameter(&params, argv[1]);
-    if (rank == 0) {
-        printParameter(&params);
-    }
-    initSolver(&solver, &params);
-    initProgress(solver.te);
-
-    double tau = solver.tau;
-    double te  = solver.te;
-    double t   = 0.0;
-
-    S = getTimeStamp();
-    while (t <= te) {
-        if (tau > 0.0) {
-            computeTimestep(&solver);
-        }
-
-        setBoundaryConditions(&solver);
-        setSpecialBoundaryCondition(&solver);
-        computeFG(&solver);
-        computeRHS(&solver);
-        solve(&solver);
-        adaptUV(&solver);
-        t += solver.dt;
-
-#ifdef VERBOSE
-        if (rank == 0) {
-            printf("TIME %f , TIMESTEP %f\n", t, solver.dt);
-        }
-#else
-        printProgress(t);
-#endif
-    }
-    E = getTimeStamp();
-    stopProgress();
-    if (rank == 0) {
-        printf("Solution took %.2fs\n", E - S);
-    }
-    collectResult(&solver);
-
-    MPI_Finalize();
-    return EXIT_SUCCESS;
-}
--- a/BasicSolver/2D-mpi-v3/src/progress.c
+++ b/BasicSolver/2D-mpi-v3/src/progress.c
@@ -1,60 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved. This file is part of nusif-solver.
- * Use of this source code is governed by a MIT style
- * license that can be found in the LICENSE file.
- */
-#include <math.h>
-#include <mpi.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "progress.h"
-
-static double _end;
-static int _current;
-static int _rank = -1;
-
-void initProgress(double end)
-{
-    MPI_Comm_rank(MPI_COMM_WORLD, &_rank);
-    _end     = end;
-    _current = 0;
-
-    if (_rank == 0) {
-        printf("[          ]");
-        fflush(stdout);
-    }
-}
-
-void printProgress(double current)
-{
-    if (_rank == 0) {
-        int new = (int)rint((current / _end) * 10.0);
-
-        if (new > _current) {
-            char progress[11];
-            _current    = new;
-            progress[0] = 0;
-
-            for (int i = 0; i < 10; i++) {
-                if (i < _current) {
-                    sprintf(progress + strlen(progress), "#");
-                } else {
-                    sprintf(progress + strlen(progress), " ");
-                }
-            }
-            printf("\r[%s]", progress);
-        }
-        fflush(stdout);
-    }
-}
-
-void stopProgress()
-{
-    if (_rank == 0) {
-        printf("\n");
-        fflush(stdout);
-    }
-}
--- a/BasicSolver/2D-mpi-v3/src/solver.c
+++ b/BasicSolver/2D-mpi-v3/src/solver.c
@@ -1,833 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved. This file is part of nusif-solver.
- * Use of this source code is governed by a MIT style
- * license that can be found in the LICENSE file.
- */
-#include <float.h>
-#include <math.h>
-#include <mpi.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "allocate.h"
-#include "parameter.h"
-#include "solver.h"
-#include "util.h"
-
-#define P(i, j)   p[(j) * (imaxLocal + 2) + (i)]
-#define F(i, j)   f[(j) * (imaxLocal + 2) + (i)]
-#define G(i, j)   g[(j) * (imaxLocal + 2) + (i)]
-#define U(i, j)   u[(j) * (imaxLocal + 2) + (i)]
-#define V(i, j)   v[(j) * (imaxLocal + 2) + (i)]
-#define RHS(i, j) rhs[(j) * (imaxLocal + 2) + (i)]
-
-#define IDIM 0
-#define JDIM 1
-
-static int sizeOfRank(int rank, int size, int N)
-{
-    return N / size + ((N % size > rank) ? 1 : 0);
-}
-
-void print(Solver* solver, double* grid)
-{
-    int imaxLocal = solver->imaxLocal;
-
-    for (int i = 0; i < solver->size; i++) {
-        if (i == solver->rank) {
-            printf(
-                "### RANK %d #######################################################\n",
-                solver->rank);
-            for (int j = 0; j < solver->jmaxLocal + 2; j++) {
-                printf("%02d: ", j);
-                for (int i = 0; i < solver->imaxLocal + 2; i++) {
-                    printf("%12.8f  ", grid[j * (imaxLocal + 2) + i]);
-                }
-                printf("\n");
-            }
-            fflush(stdout);
-        }
-        MPI_Barrier(MPI_COMM_WORLD);
-    }
-}
-
-static void exchange(Solver* solver, double* grid)
-{
-    int counts[4] = { 1, 1, 1, 1 };
-
-    MPI_Neighbor_alltoallw(grid,
-        counts,
-        solver->sdispls,
-        solver->bufferTypes,
-        grid,
-        counts,
-        solver->rdispls,
-        solver->bufferTypes,
-        solver->comm);
-}
-
-static void shift(Solver* solver)
-{
-    MPI_Request requests[4] = { MPI_REQUEST_NULL,
-        MPI_REQUEST_NULL,
-        MPI_REQUEST_NULL,
-        MPI_REQUEST_NULL };
-    double* f               = solver->f;
-    double* g               = solver->g;
-
-    /* shift G */
-    double* buf = g + 1;
-    /* receive ghost cells from bottom neighbor */
-    MPI_Irecv(buf,
-        1,
-        solver->bufferTypes[2],
-        solver->jNeighbours[0],
-        0,
-        solver->comm,
-        &requests[0]);
-
-    buf = g + (solver->jmaxLocal) * (solver->imaxLocal + 2) + 1;
-    /* send ghost cells to top neighbor */
-    MPI_Isend(buf,
-        1,
-        solver->bufferTypes[2],
-        solver->jNeighbours[1],
-        0,
-        solver->comm,
-        &requests[1]);
-
-    /* shift F */
-    buf = f + (solver->imaxLocal + 2);
-    /* receive ghost cells from left neighbor */
-    MPI_Irecv(buf,
-        1,
-        solver->bufferTypes[0],
-        solver->iNeighbours[0],
-        1,
-        solver->comm,
-        &requests[2]);
-
-    buf = f + (solver->imaxLocal + 2) + (solver->imaxLocal);
-    /* send ghost cells to right neighbor */
-    MPI_Isend(buf,
-        1,
-        solver->bufferTypes[0],
-        solver->iNeighbours[1],
-        1,
-        solver->comm,
-        &requests[3]);
-
-    MPI_Waitall(4, requests, MPI_STATUSES_IGNORE);
-}
-
-void debugExchange(Solver* solver)
-{
-    for (int i = 0; i < (solver->imaxLocal + 2) * (solver->jmaxLocal + 2); i++) {
-        solver->p[i] = solver->rank;
-    }
-    exchange(solver, solver->p);
-    print(solver, solver->p);
-}
-
-static void assembleResult(Solver* solver,
-    double* src,
-    double* dst,
-    int imaxLocal[],
-    int jmaxLocal[],
-    int offset[])
-{
-    MPI_Request* requests;
-    int numRequests = 1;
-
-    if (solver->rank == 0) {
-        numRequests = solver->size + 1;
-    } else {
-        numRequests = 1;
-    }
-
-    requests = (MPI_Request*)malloc(numRequests * sizeof(MPI_Request));
-
-    /* all ranks send their bulk array */
-    MPI_Datatype bulkType;
-    const int ndims     = 2;
-    int oldSizes[ndims] = { solver->jmaxLocal + 2, solver->imaxLocal + 2 };
-    int newSizes[ndims] = { solver->jmaxLocal, solver->imaxLocal };
-    int starts[ndims]   = { 1, 1 };
-    MPI_Type_create_subarray(2,
-        oldSizes,
-        newSizes,
-        starts,
-        MPI_ORDER_C,
-        MPI_DOUBLE,
-        &bulkType);
-    MPI_Type_commit(&bulkType);
-
-    MPI_Isend(src, 1, bulkType, 0, 0, solver->comm, &requests[0]);
-
-    /* rank 0 assembles the subdomains */
-    if (solver->rank == 0) {
-        for (int i = 0; i < solver->size; i++) {
-            MPI_Datatype domainType;
-            MPI_Type_vector(jmaxLocal[i],
-                imaxLocal[i],
-                solver->imax,
-                MPI_DOUBLE,
-                &domainType);
-            MPI_Type_commit(&domainType);
-
-            MPI_Irecv(dst + offset[i],
-                1,
-                domainType,
-                i,
-                0,
-                solver->comm,
-                &requests[i + 1]);
-        }
-    }
-
-    MPI_Waitall(numRequests, requests, MPI_STATUSES_IGNORE);
-}
-
-static int sum(int* sizes, int position)
-{
-    int sum = 0;
-
-    for (int i = 0; i < position; i++) {
-        sum += sizes[i];
-    }
-
-    return sum;
-}
-
-void collectResult(Solver* solver)
-{
-    double* Pall = NULL;
-    double* Uall = NULL;
-    double* Vall = NULL;
-    int offset[solver->size];
-    int imaxLocal[solver->size];
-    int jmaxLocal[solver->size];
-
-    MPI_Gather(&solver->imaxLocal, 1, MPI_INT, imaxLocal, 1, MPI_INT, 0, MPI_COMM_WORLD);
-    MPI_Gather(&solver->jmaxLocal, 1, MPI_INT, jmaxLocal, 1, MPI_INT, 0, MPI_COMM_WORLD);
-
-    if (solver->rank == 0) {
-        Pall = allocate(64, (solver->imax) * (solver->jmax) * sizeof(double));
-        Uall = allocate(64, (solver->imax) * (solver->jmax) * sizeof(double));
-        Vall = allocate(64, (solver->imax) * (solver->jmax) * sizeof(double));
-
-        for (int i = 0; i < solver->size; i++) {
-            int coords[2];
-            MPI_Cart_coords(solver->comm, i, 2, coords);
-            int ioffset = sum(imaxLocal, coords[0]);
-            int joffset = sum(jmaxLocal, coords[1]);
-            offset[i]   = (joffset * solver->imax) + ioffset;
-            printf("Rank: %d, Coords(i,j): %d %d, Size(i,j): %d %d, Offset(i,j): %d %d\n",
-                i,
-                coords[0],
-                coords[1],
-                imaxLocal[i],
-                jmaxLocal[i],
-                ioffset,
-                joffset);
-        }
-    }
-
-    /* collect P */
-    assembleResult(solver, solver->p, Pall, imaxLocal, jmaxLocal, offset);
-
-    /* collect U */
-    assembleResult(solver, solver->u, Uall, imaxLocal, jmaxLocal, offset);
-
-    /* collect V */
-    assembleResult(solver, solver->v, Vall, imaxLocal, jmaxLocal, offset);
-
-    /* write to disk */
-    if (solver->rank == 0) writeResult(solver, Pall, Uall, Vall);
-}
-
-static void printConfig(Solver* solver)
-{
-    if (solver->rank == 0) {
-        printf("Parameters for #%s#\n", solver->problem);
-        printf("Boundary conditions Top:%d Bottom:%d Left:%d Right:%d\n",
-            solver->bcTop,
-            solver->bcBottom,
-            solver->bcLeft,
-            solver->bcRight);
-        printf("\tReynolds number: %.2f\n", solver->re);
-        printf("\tGx Gy: %.2f %.2f\n", solver->gx, solver->gy);
-        printf("Geometry data:\n");
-        printf("\tDomain box size (x, y): %.2f, %.2f\n",
-            solver->xlength,
-            solver->ylength);
-        printf("\tCells (x, y): %d, %d\n", solver->imax, solver->jmax);
-        printf("Timestep parameters:\n");
-        printf("\tDefault stepsize: %.2f, Final time %.2f\n", solver->dt, solver->te);
-        printf("\tdt bound: %.6f\n", solver->dtBound);
-        printf("\tTau factor: %.2f\n", solver->tau);
-        printf("Iterative solver parameters:\n");
-        printf("\tMax iterations: %d\n", solver->itermax);
-        printf("\tepsilon (stopping tolerance) : %f\n", solver->eps);
-        printf("\tgamma factor: %f\n", solver->gamma);
-        printf("\tomega (SOR relaxation): %f\n", solver->omega);
-        printf("Communication parameters:\n");
-    }
-    for (int i = 0; i < solver->size; i++) {
-        if (i == solver->rank) {
-            printf("\tRank %d of %d\n", solver->rank, solver->size);
-            printf("\tNeighbours (b, t, l, r): %d, %d, %d, %d\n",
-                solver->jNeighbours[0],
-                solver->jNeighbours[1],
-                solver->iNeighbours[0],
-                solver->iNeighbours[1]);
-            printf("\tCoordinates %d,%d\n", solver->coords[0], solver->coords[1]);
-            printf("\tLocal domain size: %dx%d\n", solver->imaxLocal, solver->jmaxLocal);
-            fflush(stdout);
-        }
-    }
-}
-
-void initSolver(Solver* solver, Parameter* params)
-{
-    solver->problem  = params->name;
-    solver->bcTop    = params->bcTop;
-    solver->bcBottom = params->bcBottom;
-    solver->bcLeft   = params->bcLeft;
-    solver->bcRight  = params->bcRight;
-    solver->imax     = params->imax;
-    solver->jmax     = params->jmax;
-    solver->xlength  = params->xlength;
-    solver->ylength  = params->ylength;
-    solver->dx       = params->xlength / params->imax;
-    solver->dy       = params->ylength / params->jmax;
-    solver->eps      = params->eps;
-    solver->omega    = params->omg;
-    solver->itermax  = params->itermax;
-    solver->re       = params->re;
-    solver->gx       = params->gx;
-    solver->gy       = params->gy;
-    solver->dt       = params->dt;
-    solver->te       = params->te;
-    solver->tau      = params->tau;
-    solver->gamma    = params->gamma;
-
-    /* setup communication */
-    MPI_Comm_rank(MPI_COMM_WORLD, &(solver->rank));
-    MPI_Comm_size(MPI_COMM_WORLD, &(solver->size));
-    int dims[NDIMS]    = { 0, 0 };
-    int periods[NDIMS] = { 0, 0 };
-    MPI_Dims_create(solver->size, NDIMS, dims);
-    MPI_Cart_create(MPI_COMM_WORLD, NDIMS, dims, periods, 0, &solver->comm);
-    MPI_Cart_shift(solver->comm,
-        IDIM,
-        1,
-        &solver->iNeighbours[0],
-        &solver->iNeighbours[1]);
-    MPI_Cart_shift(solver->comm,
-        JDIM,
-        1,
-        &solver->jNeighbours[0],
-        &solver->jNeighbours[1]);
-    MPI_Cart_get(solver->comm, NDIMS, solver->dims, periods, solver->coords);
-
-    solver->imaxLocal = sizeOfRank(solver->coords[IDIM], dims[IDIM], solver->imax);
-    solver->jmaxLocal = sizeOfRank(solver->coords[JDIM], dims[JDIM], solver->jmax);
-
-    MPI_Datatype jBufferType;
-    MPI_Type_contiguous(solver->imaxLocal, MPI_DOUBLE, &jBufferType);
-    MPI_Type_commit(&jBufferType);
-
-    MPI_Datatype iBufferType;
-    MPI_Type_vector(solver->jmaxLocal,
-        1,
-        solver->imaxLocal + 2,
-        MPI_DOUBLE,
-        &iBufferType);
-    MPI_Type_commit(&iBufferType);
-
-    // in the order of the dimensions i->0, j->1
-    // first negative direction, then positive direction
-    size_t dblsize         = sizeof(double);
-    int imaxLocal          = solver->imaxLocal;
-    int jmaxLocal          = solver->jmaxLocal;
-    solver->bufferTypes[0] = iBufferType; // left
-    solver->bufferTypes[1] = iBufferType; // right
-    solver->bufferTypes[2] = jBufferType; // bottom
-    solver->bufferTypes[3] = jBufferType; // top
-
-    solver->sdispls[0] = ((imaxLocal + 2) + 1) * dblsize;               // send left
-    solver->sdispls[1] = ((imaxLocal + 2) + imaxLocal) * dblsize;       // send right
-    solver->sdispls[2] = ((imaxLocal + 2) + 1) * dblsize;               // send bottom
-    solver->sdispls[3] = ((jmaxLocal) * (imaxLocal + 2) + 1) * dblsize; // send top
-
-    solver->rdispls[0] = (imaxLocal + 2) * dblsize;                         // recv left
-    solver->rdispls[1] = ((imaxLocal + 2) + (imaxLocal + 1)) * dblsize;     // recv right
-    solver->rdispls[2] = 1 * dblsize;                                       // recv bottom
-    solver->rdispls[3] = ((jmaxLocal + 1) * (imaxLocal + 2) + 1) * dblsize; // recv top
-
-    /* allocate arrays */
-    size_t bytesize = (imaxLocal + 2) * (jmaxLocal + 2) * sizeof(double);
-    solver->u       = allocate(64, bytesize);
-    solver->v       = allocate(64, bytesize);
-    solver->p       = allocate(64, bytesize);
-    solver->rhs     = allocate(64, bytesize);
-    solver->f       = allocate(64, bytesize);
-    solver->g       = allocate(64, bytesize);
-
-    for (int i = 0; i < (imaxLocal + 2) * (jmaxLocal + 2); i++) {
-        solver->u[i]   = params->u_init;
-        solver->v[i]   = params->v_init;
-        solver->p[i]   = params->p_init;
-        solver->rhs[i] = 0.0;
-        solver->f[i]   = 0.0;
-        solver->g[i]   = 0.0;
-    }
-
-    double dx          = solver->dx;
-    double dy          = solver->dy;
-    double inv_sqr_sum = 1.0 / (dx * dx) + 1.0 / (dy * dy);
-    solver->dtBound    = 0.5 * solver->re * 1.0 / inv_sqr_sum;
-#ifdef VERBOSE
-    printConfig(solver);
-#endif
-}
-
-void computeRHS(Solver* solver)
-{
-    int imaxLocal = solver->imaxLocal;
-    int jmaxLocal = solver->jmaxLocal;
-    double idx    = 1.0 / solver->dx;
-    double idy    = 1.0 / solver->dy;
-    double idt    = 1.0 / solver->dt;
-    double* rhs   = solver->rhs;
-    double* f     = solver->f;
-    double* g     = solver->g;
-
-    shift(solver);
-
-    for (int j = 1; j < jmaxLocal + 1; j++) {
-        for (int i = 1; i < imaxLocal + 1; i++) {
-            RHS(i, j) = ((F(i, j) - F(i - 1, j)) * idx + (G(i, j) - G(i, j - 1)) * idy) *
-                        idt;
-        }
-    }
-}
-
-int solve(Solver* solver)
-{
-    int imax      = solver->imax;
-    int jmax      = solver->jmax;
-    int imaxLocal = solver->imaxLocal;
-    int jmaxLocal = solver->jmaxLocal;
-    double eps    = solver->eps;
-    int itermax   = solver->itermax;
-    double dx2    = solver->dx * solver->dx;
-    double dy2    = solver->dy * solver->dy;
-    double idx2   = 1.0 / dx2;
-    double idy2   = 1.0 / dy2;
-    // identical to 1/((2/dx2)+(2/dy2))
-    double factor = solver->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
-    double* p     = solver->p;
-    double* rhs   = solver->rhs;
-    double epssq  = eps * eps;
-    int it        = 0;
-    double res    = 1.0;
-
-    while ((res >= epssq) && (it < itermax)) {
-        res = 0.0;
-        exchange(solver, p);
-
-        for (int j = 1; j < jmaxLocal + 1; j++) {
-            for (int i = 1; i < imaxLocal + 1; i++) {
-
-                double r = RHS(i, j) -
-                           ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
-                               (P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
-
-                P(i, j) -= (factor * r);
-                res += (r * r);
-            }
-        }
-
-        if (solver->coords[JDIM] == 0) { // set bottom bc
-            for (int i = 1; i < imaxLocal + 1; i++) {
-                P(i, 0) = P(i, 1);
-            }
-        }
-
-        if (solver->coords[JDIM] == (solver->dims[JDIM] - 1)) { // set top bc
-            for (int i = 1; i < imaxLocal + 1; i++) {
-                P(i, jmaxLocal + 1) = P(i, jmaxLocal);
-            }
-        }
-
-        if (solver->coords[IDIM] == 0) { // set left bc
-            for (int j = 1; j < jmaxLocal + 1; j++) {
-                P(0, j) = P(1, j);
-            }
-        }
-
-        if (solver->coords[IDIM] == (solver->dims[IDIM] - 1)) { // set right bc
-            for (int j = 1; j < jmaxLocal + 1; j++) {
-                P(imaxLocal + 1, j) = P(imaxLocal, j);
-            }
-        }
-
-        MPI_Allreduce(MPI_IN_PLACE, &res, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
-        res = res / (double)(imax * jmax);
-#ifdef DEBUG
-        if (solver->rank == 0) {
-            printf("%d Residuum: %e\n", it, res);
-        }
-#endif
-        it++;
-    }
-
-#ifdef VERBOSE
-    if (solver->rank == 0) {
-        printf("Solver took %d iterations to reach %f\n", it, sqrt(res));
-    }
-#endif
-    if (res < eps) {
-        return 0;
-    } else {
-        return 1;
-    }
-}
-
-static double maxElement(Solver* solver, double* m)
-{
-    int size      = (solver->imaxLocal + 2) * (solver->jmaxLocal + 2);
-    double maxval = DBL_MIN;
-
-    for (int i = 0; i < size; i++) {
-        maxval = MAX(maxval, fabs(m[i]));
-    }
-
-    MPI_Allreduce(MPI_IN_PLACE, &maxval, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
-    return maxval;
-}
-
-void computeTimestep(Solver* solver)
-{
-    double dt   = solver->dtBound;
-    double dx   = solver->dx;
-    double dy   = solver->dy;
-    double umax = maxElement(solver, solver->u);
-    double vmax = maxElement(solver, solver->v);
-
-    if (umax > 0) {
-        dt = (dt > dx / umax) ? dx / umax : dt;
-    }
-    if (vmax > 0) {
-        dt = (dt > dy / vmax) ? dy / vmax : dt;
-    }
-
-    solver->dt = dt * solver->tau;
-}
-
-void setBoundaryConditions(Solver* solver)
-{
-    int imaxLocal = solver->imaxLocal;
-    int jmaxLocal = solver->jmaxLocal;
-    double* u     = solver->u;
-    double* v     = solver->v;
-
-    // Northern boundary
-    if (solver->coords[JDIM] == (solver->dims[JDIM] - 1)) { // set top bc
-        switch (solver->bcTop) {
-        case NOSLIP:
-            for (int i = 1; i < imaxLocal + 1; i++) {
-                V(i, jmaxLocal)     = 0.0;
-                U(i, jmaxLocal + 1) = -U(i, jmaxLocal);
-            }
-            break;
-        case SLIP:
-            for (int i = 1; i < imaxLocal + 1; i++) {
-                V(i, jmaxLocal)     = 0.0;
-                U(i, jmaxLocal + 1) = U(i, jmaxLocal);
-            }
-            break;
-        case OUTFLOW:
-            for (int i = 1; i < imaxLocal + 1; i++) {
-                U(i, jmaxLocal + 1) = U(i, jmaxLocal);
-                V(i, jmaxLocal)     = V(i, jmaxLocal - 1);
-            }
-            break;
-        case PERIODIC:
-            break;
-        }
-    }
-
-    // Southern boundary
-    if (solver->coords[JDIM] == 0) { // set bottom bc
-        switch (solver->bcBottom) {
-        case NOSLIP:
-            for (int i = 1; i < imaxLocal + 1; i++) {
-                V(i, 0) = 0.0;
-                U(i, 0) = -U(i, 1);
-            }
-            break;
-        case SLIP:
-            for (int i = 1; i < imaxLocal + 1; i++) {
-                V(i, 0) = 0.0;
-                U(i, 0) = U(i, 1);
-            }
-            break;
-        case OUTFLOW:
-            for (int i = 1; i < imaxLocal + 1; i++) {
-                U(i, 0) = U(i, 1);
-                V(i, 0) = V(i, 1);
-            }
-            break;
-        case PERIODIC:
-            break;
-        }
-    }
-
-    // Eastern boundary
-    if (solver->coords[IDIM] == (solver->dims[IDIM] - 1)) { // set right bc
-        switch (solver->bcRight) {
-        case NOSLIP:
-            for (int j = 1; j < jmaxLocal + 1; j++) {
-                U(imaxLocal, j)     = 0.0;
-                V(imaxLocal + 1, j) = -V(imaxLocal, j);
-            }
-            break;
-        case SLIP:
-            for (int j = 1; j < jmaxLocal + 1; j++) {
-                U(imaxLocal, j)     = 0.0;
-                V(imaxLocal + 1, j) = V(imaxLocal, j);
-            }
-            break;
-        case OUTFLOW:
-            for (int j = 1; j < jmaxLocal + 1; j++) {
-                U(imaxLocal, j)     = U(imaxLocal - 1, j);
-                V(imaxLocal + 1, j) = V(imaxLocal, j);
-            }
-            break;
-        case PERIODIC:
-            break;
-        }
-    }
-
-    // Western boundary
-    if (solver->coords[IDIM] == 0) { // set left bc
-        switch (solver->bcLeft) {
-        case NOSLIP:
-            for (int j = 1; j < jmaxLocal + 1; j++) {
-                U(0, j) = 0.0;
-                V(0, j) = -V(1, j);
-            }
-            break;
-        case SLIP:
-            for (int j = 1; j < jmaxLocal + 1; j++) {
-                U(0, j) = 0.0;
-                V(0, j) = V(1, j);
-            }
-            break;
-        case OUTFLOW:
-            for (int j = 1; j < jmaxLocal + 1; j++) {
-                U(0, j) = U(1, j);
-                V(0, j) = V(1, j);
-            }
-            break;
-        case PERIODIC:
-            break;
-        }
-    }
-}
-
-void setSpecialBoundaryCondition(Solver* solver)
-{
-    int imaxLocal = solver->imaxLocal;
-    int jmaxLocal = solver->jmaxLocal;
-    double* u     = solver->u;
-
-    if (strcmp(solver->problem, "dcavity") == 0) {
-        if (solver->coords[JDIM] == (solver->dims[JDIM] - 1)) { // set top bc
-            for (int i = 1; i < imaxLocal + 1; i++) {
-                U(i, jmaxLocal + 1) = 2.0 - U(i, jmaxLocal);
-            }
-        }
-    } else if (strcmp(solver->problem, "canal") == 0) {
-        if (solver->coords[IDIM] == 0) { // set left bc
-            double ylength = solver->ylength;
-            double dy      = solver->dy;
-            int rest       = solver->jmax % solver->size;
-            int yc         = solver->rank * (solver->jmax / solver->size) +
-                     MIN(rest, solver->rank);
-            double ys = dy * (yc + 0.5);
-            double y;
-
-            /* printf("RANK %d yc: %d ys: %f\n", solver->rank, yc, ys); */
-
-            for (int j = 1; j < jmaxLocal + 1; j++) {
-                y       = ys + dy * (j - 0.5);
-                U(0, j) = y * (ylength - y) * 4.0 / (ylength * ylength);
-            }
-        }
-    }
-    /* print(solver, solver->u); */
-}
-
-void computeFG(Solver* solver)
-{
-    double* u        = solver->u;
-    double* v        = solver->v;
-    double* f        = solver->f;
-    double* g        = solver->g;
-    int imaxLocal    = solver->imaxLocal;
-    int jmaxLocal    = solver->jmaxLocal;
-    double gx        = solver->gx;
-    double gy        = solver->gy;
-    double gamma     = solver->gamma;
-    double dt        = solver->dt;
-    double inverseRe = 1.0 / solver->re;
-    double inverseDx = 1.0 / solver->dx;
-    double inverseDy = 1.0 / solver->dy;
-    double du2dx, dv2dy, duvdx, duvdy;
-    double du2dx2, du2dy2, dv2dx2, dv2dy2;
-
-    exchange(solver, u);
-    exchange(solver, v);
-
-    for (int j = 1; j < jmaxLocal + 1; j++) {
-        for (int i = 1; i < imaxLocal + 1; i++) {
-            du2dx = inverseDx * 0.25 *
-                        ((U(i, j) + U(i + 1, j)) * (U(i, j) + U(i + 1, j)) -
-                            (U(i, j) + U(i - 1, j)) * (U(i, j) + U(i - 1, j))) +
-                    gamma * inverseDx * 0.25 *
-                        (fabs(U(i, j) + U(i + 1, j)) * (U(i, j) - U(i + 1, j)) +
-                            fabs(U(i, j) + U(i - 1, j)) * (U(i, j) - U(i - 1, j)));
-
-            duvdy = inverseDy * 0.25 *
-                        ((V(i, j) + V(i + 1, j)) * (U(i, j) + U(i, j + 1)) -
-                            (V(i, j - 1) + V(i + 1, j - 1)) * (U(i, j) + U(i, j - 1))) +
-                    gamma * inverseDy * 0.25 *
-                        (fabs(V(i, j) + V(i + 1, j)) * (U(i, j) - U(i, j + 1)) +
-                            fabs(V(i, j - 1) + V(i + 1, j - 1)) *
-                                (U(i, j) - U(i, j - 1)));
-
-            du2dx2  = inverseDx * inverseDx * (U(i + 1, j) - 2.0 * U(i, j) + U(i - 1, j));
-            du2dy2  = inverseDy * inverseDy * (U(i, j + 1) - 2.0 * U(i, j) + U(i, j - 1));
-            F(i, j) = U(i, j) + dt * (inverseRe * (du2dx2 + du2dy2) - du2dx - duvdy + gx);
-
-            duvdx = inverseDx * 0.25 *
-                        ((U(i, j) + U(i, j + 1)) * (V(i, j) + V(i + 1, j)) -
-                            (U(i - 1, j) + U(i - 1, j + 1)) * (V(i, j) + V(i - 1, j))) +
-                    gamma * inverseDx * 0.25 *
-                        (fabs(U(i, j) + U(i, j + 1)) * (V(i, j) - V(i + 1, j)) +
-                            fabs(U(i - 1, j) + U(i - 1, j + 1)) *
-                                (V(i, j) - V(i - 1, j)));
-
-            dv2dy = inverseDy * 0.25 *
-                        ((V(i, j) + V(i, j + 1)) * (V(i, j) + V(i, j + 1)) -
-                            (V(i, j) + V(i, j - 1)) * (V(i, j) + V(i, j - 1))) +
-                    gamma * inverseDy * 0.25 *
-                        (fabs(V(i, j) + V(i, j + 1)) * (V(i, j) - V(i, j + 1)) +
-                            fabs(V(i, j) + V(i, j - 1)) * (V(i, j) - V(i, j - 1)));
-
-            dv2dx2  = inverseDx * inverseDx * (V(i + 1, j) - 2.0 * V(i, j) + V(i - 1, j));
-            dv2dy2  = inverseDy * inverseDy * (V(i, j + 1) - 2.0 * V(i, j) + V(i, j - 1));
-            G(i, j) = V(i, j) + dt * (inverseRe * (dv2dx2 + dv2dy2) - duvdx - dv2dy + gy);
-        }
-    }
-
-    /* ----------------------------- boundary of F --------------------------- */
-    if (solver->coords[IDIM] == 0) { // set left bc
-        for (int j = 1; j < jmaxLocal + 1; j++) {
-            F(0, j) = U(0, j);
-        }
-    }
-
-    if (solver->coords[IDIM] == (solver->dims[IDIM] - 1)) { // set right bc
-        for (int j = 1; j < jmaxLocal + 1; j++) {
-            F(imaxLocal, j) = U(imaxLocal, j);
-        }
-    }
-
-    /* ----------------------------- boundary of G --------------------------- */
-    if (solver->coords[JDIM] == 0) { // set bottom bc
-        for (int i = 1; i < imaxLocal + 1; i++) {
-            G(i, 0) = V(i, 0);
-        }
-    }
-
-    if (solver->coords[JDIM] == (solver->dims[JDIM] - 1)) { // set top bc
-        for (int i = 1; i < imaxLocal + 1; i++) {
-            G(i, jmaxLocal) = V(i, jmaxLocal);
-        }
-    }
-}
-
-void adaptUV(Solver* solver)
-{
-    int imaxLocal  = solver->imaxLocal;
-    int jmaxLocal  = solver->jmaxLocal;
-    double* p      = solver->p;
-    double* u      = solver->u;
-    double* v      = solver->v;
-    double* f      = solver->f;
-    double* g      = solver->g;
-    double factorX = solver->dt / solver->dx;
-    double factorY = solver->dt / solver->dy;
-
-    for (int j = 1; j < jmaxLocal + 1; j++) {
-        for (int i = 1; i < imaxLocal + 1; i++) {
-            U(i, j) = F(i, j) - (P(i + 1, j) - P(i, j)) * factorX;
-            V(i, j) = G(i, j) - (P(i, j + 1) - P(i, j)) * factorY;
-        }
-    }
-}
-
-void writeResult(Solver* solver, double* p, double* u, double* v)
-{
-    int imax  = solver->imax;
-    int jmax  = solver->jmax;
-    double dx = solver->dx;
-    double dy = solver->dy;
-    double x = 0.0, y = 0.0;
-
-    FILE* fp;
-    fp = fopen("pressure.dat", "w");
-
-    if (fp == NULL) {
-        printf("Error!\n");
-        exit(EXIT_FAILURE);
-    }
-
-    for (int j = 1; j < jmax; j++) {
-        y = (double)(j - 0.5) * dy;
-        for (int i = 1; i < imax; i++) {
-            x = (double)(i - 0.5) * dx;
-            fprintf(fp, "%.2f %.2f %f\n", x, y, p[j * (imax) + i]);
-        }
-        fprintf(fp, "\n");
-    }
-
-    fclose(fp);
-
-    fp = fopen("velocity.dat", "w");
-
-    if (fp == NULL) {
-        printf("Error!\n");
-        exit(EXIT_FAILURE);
-    }
-
-    for (int j = 1; j < jmax; j++) {
-        y = dy * (j - 0.5);
-        for (int i = 1; i < imax; i++) {
-            x            = dx * (i - 0.5);
-            double vel_u = (u[j * (imax) + i] + u[j * (imax) + (i - 1)]) / 2.0;
-            double vel_v = (v[j * (imax) + i] + v[(j - 1) * (imax) + i]) / 2.0;
-            double len   = sqrt((vel_u * vel_u) + (vel_v * vel_v));
-            fprintf(fp, "%.2f %.2f %f %f %f\n", x, y, vel_u, vel_v, len);
-        }
-    }
-
-    fclose(fp);
-}
--- a/BasicSolver/2D-mpi-v3/src/solver.h
+++ b/BasicSolver/2D-mpi-v3/src/solver.h
@@ -1,58 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved. This file is part of nusif-solver.
- * Use of this source code is governed by a MIT style
- * license that can be found in the LICENSE file.
- */
-#ifndef __SOLVER_H_
-#define __SOLVER_H_
-#include "parameter.h"
-#include <mpi.h>
-
-#define NDIMS 2
-
-enum BC { NOSLIP = 1, SLIP, OUTFLOW, PERIODIC };
-
-typedef struct {
-    /* geometry and grid information */
-    double dx, dy;
-    int imax, jmax;
-    double xlength, ylength;
-    /* arrays */
-    double *p, *rhs;
-    double *f, *g;
-    double *u, *v;
-    /* parameters */
-    double eps, omega;
-    double re, tau, gamma;
-    double gx, gy;
-    /* time stepping */
-    int itermax;
-    double dt, te;
-    double dtBound;
-    char* problem;
-    int bcLeft, bcRight, bcBottom, bcTop;
-    /* mpi */
-    int rank;
-    int size;
-    MPI_Comm comm;
-    MPI_Datatype bufferTypes[NDIMS * 2];
-    MPI_Aint sdispls[NDIMS * 2], rdispls[NDIMS * 2];
-    int iNeighbours[NDIMS], jNeighbours[NDIMS];
-    int coords[NDIMS], dims[NDIMS];
-    int imaxLocal, jmaxLocal;
-} Solver;
-
-void initSolver(Solver*, Parameter*);
-void computeRHS(Solver*);
-int solve(Solver*);
-void computeTimestep(Solver*);
-void setBoundaryConditions(Solver*);
-void setSpecialBoundaryCondition(Solver*);
-void computeFG(Solver*);
-void adaptUV(Solver*);
-void collectResult(Solver*);
-void writeResult(Solver*, double*, double*, double*);
-void debugExchange(Solver*);
-void print(Solver*, double*);
-#endif
--- a/BasicSolver/2D-mpi-v3/surface.plot
+++ b/BasicSolver/2D-mpi-v3/surface.plot
@@ -1,7 +0,0 @@
-set terminal png size 1024,768 enhanced font ,12
-set output 'p.png'
-set datafile separator whitespace
-
-set grid
-set hidden3d
-splot 'pressure.dat' using 1:2:3 with lines
--- a/BasicSolver/2D-mpi/Makefile
+++ b/BasicSolver/2D-mpi/Makefile
@@ -1,5 +1,5 @@
 #=======================================================================================
-# Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+# Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
 # All rights reserved.
 # Use of this source code is governed by a MIT-style
 # license that can be found in the LICENSE file.
@@ -18,9 +18,11 @@ include $(MAKE_DIR)/include_$(TAG).mk
 INCLUDES  += -I$(SRC_DIR) -I$(BUILD_DIR)

 VPATH     = $(SRC_DIR)
-SRC       = $(wildcard $(SRC_DIR)/*.c)
+SRC       = $(filter-out $(wildcard $(SRC_DIR)/*-*.c),$(wildcard $(SRC_DIR)/*.c))
 ASM       = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s, $(SRC))
 OBJ       = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o, $(SRC))
+OBJ      += $(BUILD_DIR)/comm-$(COMM_TYPE).o
+OBJ      += $(BUILD_DIR)/solver-$(SOLVER).o
 SOURCES   = $(SRC) $(wildcard $(SRC_DIR)/*.h)
 CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(OPTIONS) $(INCLUDES)

@@ -37,9 +39,20 @@ $(BUILD_DIR)/%.s:  %.c
 	$(info ===>  GENERATE ASM  $@)
 	$(CC) -S $(CPPFLAGS) $(CFLAGS) $< -o $@

-.PHONY: clean distclean tags info asm format
+.PHONY: clean distclean vis vis_clean tags info asm format

-clean:
+vis:
+	$(info ===>  GENERATE VISUALIZATION)
+	@gnuplot -e "filename='pressure.dat'" ./surface.plot
+	@gnuplot -e "filename='velocity.dat'" ./vector.plot
+	@gnuplot -e "filename='residual.dat'" ./residual.plot
+
+vis_clean:
+	$(info ===>  CLEAN VISUALIZATION)
+	@rm -f *.dat
+	@rm -f *.png
+
+clean: vis_clean
 	$(info ===>  CLEAN)
 	@rm -rf $(BUILD_DIR)
 	@rm -f tags
@@ -47,6 +60,8 @@ clean:
 distclean: clean
 	$(info ===>  DIST CLEAN)
 	@rm -f $(TARGET)
+	@rm -f *.dat
+	@rm -f *.png

 info:
 	$(info $(CFLAGS))
--- a/BasicSolver/2D-mpi/canal.par
+++ b/BasicSolver/2D-mpi/canal.par
@@ -7,10 +7,10 @@

 name canal             # name of flow setup

-bcN     1              #  flags for boundary conditions
-bcE     3              #  1 = no-slip      3 = outflow
-bcS     1              #  2 = free-slip    4 = periodic
-bcW     3              #
+bcTop      1			#  flags for boundary conditions
+bcBottom   1			#  1 = no-slip      3 = outflow
+bcLeft     3			#  2 = free-slip    4 = periodic
+bcRight    3			#

 gx     0.0      # Body forces (e.g. gravity)
 gy     0.0      #
@@ -27,15 +27,22 @@ p_init        0.0      # initial value for pressure
 xlength       30.0     # domain size in x-direction
 ylength       4.0	   # domain size in y-direction
 imax          200      # number of interior cells in x-direction
-jmax          50	   # number of interior cells in y-direction
+jmax          40	   # number of interior cells in y-direction

 # Time Data:
 # ---------

-te       100.0   # final time
+te       60.0   # final time
 dt       0.02    # time stepsize
 tau      0.5     # safety factor for time stepsize control (<0 constant delt)

+# Multigrid data:
+# ---------
+
+levels        2         # Multigrid levels
+presmooth     5         # Pre-smoothning iterations
+postsmooth    5         # Post-smoothning iterations
+
 # Pressure Iteration Data:
 # -----------------------

--- a/BasicSolver/2D-mpi/config.mk
+++ b/BasicSolver/2D-mpi/config.mk
@@ -1,10 +1,17 @@
 # Supported: GCC, CLANG, ICC
-TAG ?= CLANG
+TAG ?= ICC
+# Supported: true, false
+ENABLE_MPI ?= true
 ENABLE_OPENMP ?= false
+# Supported: rb, mg
+SOLVER ?= mg
+# Supported: v1, v2, v3
+COMM_TYPE ?= v3

 #Feature options
 OPTIONS +=  -DARRAY_ALIGNMENT=64
-#OPTIONS +=  -DVERBOSE
+OPTIONS +=  -DVERBOSE
+# OPTIONS +=  -DTEST
 #OPTIONS +=  -DVERBOSE_AFFINITY
 #OPTIONS +=  -DVERBOSE_DATASIZE
 #OPTIONS +=  -DVERBOSE_TIMER
--- a/BasicSolver/2D-mpi/dcavity.par
+++ b/BasicSolver/2D-mpi/dcavity.par
@@ -15,7 +15,7 @@ bcRight    1			#
 gx    0.0			# Body forces (e.g. gravity)
 gy    0.0			#

-re    10.0		    # Reynolds number
+re    100.0		    # Reynolds number

 u_init    0.0		# initial value for velocity in x-direction
 v_init    0.0		# initial value for velocity in y-direction
@@ -26,15 +26,22 @@ p_init    0.0		# initial value for pressure

 xlength    1.0		# domain size in x-direction
 ylength    1.0		# domain size in y-direction
-imax       100		# number of interior cells in x-direction
-jmax       100		# number of interior cells in y-direction
+imax       128		# number of interior cells in x-direction
+jmax       128		# number of interior cells in y-direction

 # Time Data:
 # ---------

-te      5.0		# final time
-dt     0.02	    # time stepsize
-tau     0.5		# safety factor for time stepsize control (<0 constant delt)
+te      10.0		# final time
+dt      0.02	    # time stepsize
+tau     0.5		    # safety factor for time stepsize control (<0 constant delt)
+
+# Multigrid data:
+# ---------
+
+levels        2         # Multigrid levels
+presmooth     20        # Pre-smoothning iterations
+postsmooth    5         # Post-smoothning iterations

 # Pressure Iteration Data:
 # -----------------------
--- a/BasicSolver/2D-mpi/include_CLANG.mk
+++ b/BasicSolver/2D-mpi/include_CLANG.mk
@@ -1,4 +1,10 @@
+ifeq ($(ENABLE_MPI),true)
 CC   = mpicc
+DEFINES  = -D_MPI
+else
+CC   = cc
+endif
+
 GCC  = cc
 LINKER = $(CC)

@@ -9,8 +15,7 @@ LIBS     = # -lomp
 endif

 VERSION  = --version
-CFLAGS   = -Ofast -std=c99 $(OPENMP)
-#CFLAGS   = -Ofast -fnt-store=aggressive  -std=c99 $(OPENMP) #AMD CLANG
-LFLAGS   = $(OPENMP)
-DEFINES  = -D_GNU_SOURCE# -DDEBUG
-INCLUDES = -I/usr/local/include
+CFLAGS   = -Ofast -std=c17
+LFLAGS   = $(OPENMP) -lm
+DEFINES  += -D_GNU_SOURCE# -DDEBUG
+INCLUDES = -I/opt/homebrew/include
--- a/BasicSolver/2D-mpi/include_GCC.mk
+++ b/BasicSolver/2D-mpi/include_GCC.mk
@@ -1,4 +1,10 @@
+ifeq ($(ENABLE_MPI),true)
+CC   = mpicc
+DEFINES  = -D_MPI
+else
 CC   = gcc
+endif
+
 GCC  = gcc
 LINKER = $(CC)

@@ -9,6 +15,6 @@ endif
 VERSION  = --version
 CFLAGS   = -Ofast -ffreestanding -std=c99 $(OPENMP)
 LFLAGS   = $(OPENMP)
-DEFINES  = -D_GNU_SOURCE
+DEFINES  += -D_GNU_SOURCE
 INCLUDES =
 LIBS     =
--- a/BasicSolver/2D-mpi/include_ICC.mk
+++ b/BasicSolver/2D-mpi/include_ICC.mk
@@ -1,4 +1,10 @@
+ifeq ($(ENABLE_MPI),true)
 CC   = mpiicc
+DEFINES  = -D_MPI
+else
+CC = icc
+endif
+
 GCC  = gcc
 LINKER = $(CC)

@@ -9,6 +15,6 @@ endif
 VERSION  = --version
 CFLAGS   =  -O3 -xHost -qopt-zmm-usage=high -std=c99 $(OPENMP)
 LFLAGS   = $(OPENMP)
-DEFINES  = -D_GNU_SOURCE
+DEFINES  += -D_GNU_SOURCE# -DDEBUG
 INCLUDES =
 LIBS     =
--- a/BasicSolver/2D-mpi/residual.plot
+++ b/BasicSolver/2D-mpi/residual.plot
@@ -0,0 +1,9 @@
+set terminal png size 1800,768 enhanced font ,12
+set output 'residual.png'
+set datafile separator whitespace
+set xlabel "Timestep"
+set ylabel "Residual"
+
+set logscale y 2
+
+plot 'residual.dat' using 1:2 title "Residual"
--- a/BasicSolver/2D-mpi/src/affinity.c
+++ b/BasicSolver/2D-mpi/src/affinity.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 * All rights reserved.
 * Use of this source code is governed by a MIT-style
 * license that can be found in the LICENSE file.
--- a/BasicSolver/2D-mpi/src/affinity.h
+++ b/BasicSolver/2D-mpi/src/affinity.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 * All rights reserved.
 * Use of this source code is governed by a MIT-style
 * license that can be found in the LICENSE file.
--- a/BasicSolver/2D-mpi/src/allocate.c
+++ b/BasicSolver/2D-mpi/src/allocate.c
@@ -1,14 +1,17 @@
 /*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 * All rights reserved.
 * Use of this source code is governed by a MIT-style
 * license that can be found in the LICENSE file.
 */
 #include <errno.h>
+#include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>

-void* allocate(int alignment, size_t bytesize)
+#include "allocate.h"
+
+void* allocate(size_t alignment, size_t bytesize)
 {
    int errorCode;
    void* ptr;
--- a/BasicSolver/2D-mpi/src/allocate.h
+++ b/BasicSolver/2D-mpi/src/allocate.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 * All rights reserved.
 * Use of this source code is governed by a MIT-style
 * license that can be found in the LICENSE file.
@@ -8,6 +8,6 @@
 #define __ALLOCATE_H_
 #include <stdlib.h>

-extern void* allocate(int alignment, size_t bytesize);
+extern void* allocate(size_t alignment, size_t bytesize);

 #endif
--- a/BasicSolver/2D-mpi/src/comm-v1.c
+++ b/BasicSolver/2D-mpi/src/comm-v1.c
@@ -0,0 +1,234 @@
+/*
+ * Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of nusif-solver.
+ * Use of this source code is governed by a MIT style
+ * license that can be found in the LICENSE file.
+ */
+#include <stdlib.h>
+
+#include "comm.h"
+
+#ifdef _MPI
+// subroutines local to this module
+static int sum(int* sizes, int position)
+{
+    int sum = 0;
+
+    for (int i = 0; i < position; i += position) {
+        sum += sizes[i];
+    }
+
+    return sum;
+}
+
+static void gatherArray(
+    Comm* c, int cnt, int* rcvCounts, int* displs, double* src, double* dst)
+{
+    double* sendbuffer = src + (c->imaxLocal + 2);
+
+    if (c->rank == 0) {
+        sendbuffer = src;
+    }
+
+    MPI_Gatherv(sendbuffer,
+        cnt,
+        MPI_DOUBLE,
+        dst,
+        rcvCounts,
+        displs,
+        MPI_DOUBLE,
+        0,
+        MPI_COMM_WORLD);
+}
+#endif // defined _MPI
+
+// exported subroutines
+int commIsBoundary(Comm* c, int direction)
+{
+#ifdef _MPI
+    switch (direction) {
+    case LEFT:
+        return 1;
+        break;
+    case RIGHT:
+        return 1;
+        break;
+    case BOTTOM:
+        return c->rank == 0;
+        break;
+    case TOP:
+        return c->rank == (c->size - 1);
+        break;
+    }
+#endif
+
+    return 1;
+}
+
+void commExchange(Comm* c, double* grid)
+{
+    // printf("Rank : %d In exchange \n", c->rank);
+#ifdef _MPI
+    MPI_Request requests[4] = { MPI_REQUEST_NULL,
+        MPI_REQUEST_NULL,
+        MPI_REQUEST_NULL,
+        MPI_REQUEST_NULL };
+
+    /* exchange ghost cells with top neighbor */
+    if (c->rank + 1 < c->size) {
+        int top     = c->rank + 1;
+        double* src = grid + (c->jmaxLocal) * (c->imaxLocal + 2) + 1;
+        double* dst = grid + (c->jmaxLocal + 1) * (c->imaxLocal + 2) + 1;
+
+        MPI_Isend(src, c->imaxLocal, MPI_DOUBLE, top, 1, MPI_COMM_WORLD, &requests[0]);
+        MPI_Irecv(dst, c->imaxLocal, MPI_DOUBLE, top, 2, MPI_COMM_WORLD, &requests[1]);
+    }
+
+    /* exchange ghost cells with bottom neighbor */
+    if (c->rank > 0) {
+        int bottom  = c->rank - 1;
+        double* src = grid + (c->imaxLocal + 2) + 1;
+        double* dst = grid + 1;
+
+        MPI_Isend(src, c->imaxLocal, MPI_DOUBLE, bottom, 2, MPI_COMM_WORLD, &requests[2]);
+        MPI_Irecv(dst, c->imaxLocal, MPI_DOUBLE, bottom, 1, MPI_COMM_WORLD, &requests[3]);
+    }
+
+    MPI_Waitall(4, requests, MPI_STATUSES_IGNORE);
+#endif
+}
+
+void commShift(Comm* c, double* f, double* g)
+{
+#ifdef _MPI
+    MPI_Request requests[2] = { MPI_REQUEST_NULL, MPI_REQUEST_NULL };
+
+    /* shift G */
+    /* receive ghost cells from bottom neighbor */
+    if (c->rank > 0) {
+        int bottom = c->rank - 1;
+        MPI_Irecv(g + 1,
+            c->imaxLocal,
+            MPI_DOUBLE,
+            bottom,
+            0,
+            MPI_COMM_WORLD,
+            &requests[0]);
+    }
+
+    if (c->rank + 1 < c->size) {
+        int top     = c->rank + 1;
+        double* buf = g + (c->jmaxLocal) * (c->imaxLocal + 2) + 1;
+        /* send ghost cells to top neighbor */
+        MPI_Isend(buf, c->imaxLocal, MPI_DOUBLE, top, 0, MPI_COMM_WORLD, &requests[1]);
+    }
+
+    MPI_Waitall(2, requests, MPI_STATUSES_IGNORE);
+#endif
+}
+
+void commCollectResult(Comm* c,
+    double* ug,
+    double* vg,
+    double* pg,
+    double* u,
+    double* v,
+    double* p,
+    int jmax,
+    int imax)
+{
+#ifdef _MPI
+    int *rcvCounts, *displs;
+    int cnt = c->jmaxLocal * (imax + 2);
+
+    if (c->rank == 0) {
+        rcvCounts = (int*)malloc(c->size * sizeof(int));
+        displs    = (int*)malloc(c->size * sizeof(int));
+    }
+
+    if (c->rank == 0 && c->size == 1) {
+        cnt = (c->jmaxLocal + 2) * (imax + 2);
+    } else if (c->rank == 0 || c->rank == (c->size - 1)) {
+        cnt = (c->jmaxLocal + 1) * (imax + 2);
+    }
+
+    MPI_Gather(&cnt, 1, MPI_INTEGER, rcvCounts, 1, MPI_INTEGER, 0, MPI_COMM_WORLD);
+
+    if (c->rank == 0) {
+        displs[0]  = 0;
+        int cursor = rcvCounts[0];
+
+        for (int i = 1; i < c->size; i++) {
+            displs[i] = cursor;
+            cursor += rcvCounts[i];
+        }
+    }
+
+    gatherArray(c, cnt, rcvCounts, displs, p, pg);
+    gatherArray(c, cnt, rcvCounts, displs, u, ug);
+    gatherArray(c, cnt, rcvCounts, displs, v, vg);
+#endif
+}
+
+void commPartition(Comm* c, int jmax, int imax)
+{
+#ifdef _MPI
+    c->imaxLocal = imax;
+    c->jmaxLocal = sizeOfRank(c->coords[JDIM], c->size, jmax);
+
+    c->neighbours[BOTTOM] = c->rank == 0 ? -1 : c->rank - 1;
+    c->neighbours[TOP]    = c->rank == (c->size - 1) ? -1 : c->rank + 1;
+    c->neighbours[LEFT]   = -1;
+    c->neighbours[RIGHT]  = -1;
+
+    c->coords[IDIM] = 0;
+    c->coords[JDIM] = c->rank;
+
+    c->dims[IDIM] = 1;
+    c->dims[JDIM] = c->size;
+#else
+    c->imaxLocal = imax;
+    c->jmaxLocal = jmax;
+#endif
+}
+
+void commUpdateDatatypes(Comm* oldcomm, Comm* newcomm, int imaxLocal, int jmaxLocal)
+{
+
+#if defined _MPI
+    newcomm->comm = MPI_COMM_NULL;
+    int result    = MPI_Comm_dup(MPI_COMM_WORLD, &newcomm->comm);
+
+    if (result == MPI_ERR_COMM) {
+        printf("\nNull communicator. Duplication failed !!\n");
+    }
+
+    newcomm->rank = oldcomm->rank;
+    newcomm->size = oldcomm->size;
+
+    newcomm->imaxLocal = imaxLocal / 2;
+    newcomm->jmaxLocal = jmaxLocal / 2;
+
+    newcomm->neighbours[BOTTOM] = newcomm->rank == 0 ? -1 : newcomm->rank - 1;
+    newcomm->neighbours[TOP]    = newcomm->rank == (newcomm->size - 1) ? -1 : newcomm->rank + 1;
+    newcomm->neighbours[LEFT]   = -1;
+    newcomm->neighbours[RIGHT]  = -1;
+
+    newcomm->coords[IDIM] = 0;
+    newcomm->coords[JDIM] = newcomm->rank;
+
+    newcomm->dims[IDIM] = 1;
+    newcomm->dims[JDIM] = newcomm->size;
+
+
+#endif
+    newcomm->imaxLocal = imaxLocal;
+    newcomm->jmaxLocal = jmaxLocal;
+}
+
+void commFreeCommunicator(Comm* comm)
+{
+#ifdef _MPI
+    MPI_Comm_free(&comm->comm);
+#endif
+}
--- a/BasicSolver/2D-mpi/src/comm-v2.c
+++ b/BasicSolver/2D-mpi/src/comm-v2.c
@@ -0,0 +1,342 @@
+/*
+ * Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of nusif-solver.
+ * Use of this source code is governed by a MIT style
+ * license that can be found in the LICENSE file.
+ */
+#include "comm.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef _MPI
+// subroutines local to this module
+static int sum(int* sizes, int init, int offset, int coord)
+{
+    int sum = 0;
+
+    for (int i = init - offset; coord > 0; i -= offset, --coord) {
+        sum += sizes[i];
+    }
+
+    return sum;
+}
+
+static void assembleResult(Comm* c, double* src, double* dst, int imax, int jmax)
+{
+    MPI_Request* requests;
+    int numRequests = 1;
+
+    if (c->rank == 0) {
+        numRequests = c->size + 1;
+    } else {
+        numRequests = 1;
+    }
+
+    requests = (MPI_Request*)malloc(numRequests * sizeof(MPI_Request));
+
+    /* all ranks send their bulk array, including the external boundary layer */
+    MPI_Datatype bulkType;
+    int oldSizes[NDIMS] = { c->jmaxLocal + 2, c->imaxLocal + 2 };
+    int newSizes[NDIMS] = { c->jmaxLocal, c->imaxLocal };
+    int starts[NDIMS]   = { 1, 1 };
+
+    if (commIsBoundary(c, LEFT)) {
+        newSizes[CIDIM] += 1;
+        starts[CIDIM] = 0;
+    }
+    if (commIsBoundary(c, RIGHT)) {
+        newSizes[CIDIM] += 1;
+    }
+    if (commIsBoundary(c, BOTTOM)) {
+        newSizes[CJDIM] += 1;
+        starts[CJDIM] = 0;
+    }
+    if (commIsBoundary(c, TOP)) {
+        newSizes[CJDIM] += 1;
+    }
+
+    MPI_Type_create_subarray(NDIMS,
+        oldSizes,
+        newSizes,
+        starts,
+        MPI_ORDER_C,
+        MPI_DOUBLE,
+        &bulkType);
+    MPI_Type_commit(&bulkType);
+    MPI_Isend(src, 1, bulkType, 0, 0, c->comm, &requests[0]);
+
+    int newSizesI[c->size];
+    int newSizesJ[c->size];
+    MPI_Gather(&newSizes[CIDIM], 1, MPI_INT, newSizesI, 1, MPI_INT, 0, MPI_COMM_WORLD);
+    MPI_Gather(&newSizes[CJDIM], 1, MPI_INT, newSizesJ, 1, MPI_INT, 0, MPI_COMM_WORLD);
+
+    /* rank 0 assembles the subdomains */
+    if (c->rank == 0) {
+        for (int i = 0; i < c->size; i++) {
+            MPI_Datatype domainType;
+            int oldSizes[NDIMS] = { jmax + 2, imax + 2 };
+            int newSizes[NDIMS] = { newSizesJ[i], newSizesI[i] };
+            int coords[NDIMS];
+            MPI_Cart_coords(c->comm, i, NDIMS, coords);
+            int starts[NDIMS] = { sum(newSizesJ, i, 1, coords[JDIM]),
+                sum(newSizesI, i, c->dims[JDIM], coords[IDIM]) };
+            printf(
+                "Rank: %d, Coords(i,j): %d %d, Size(i,j): %d %d, Target Size(i,j): %d %d "
+                "Starts(i,j): %d %d\n",
+                i,
+                coords[IDIM],
+                coords[JDIM],
+                oldSizes[CIDIM],
+                oldSizes[CJDIM],
+                newSizes[CIDIM],
+                newSizes[CJDIM],
+                starts[CIDIM],
+                starts[CJDIM]);
+
+            MPI_Type_create_subarray(NDIMS,
+                oldSizes,
+                newSizes,
+                starts,
+                MPI_ORDER_C,
+                MPI_DOUBLE,
+                &domainType);
+            MPI_Type_commit(&domainType);
+
+            MPI_Irecv(dst, 1, domainType, i, 0, c->comm, &requests[i + 1]);
+            MPI_Type_free(&domainType);
+        }
+    }
+
+    MPI_Waitall(numRequests, requests, MPI_STATUSES_IGNORE);
+}
+#endif // defined _MPI
+
+// exported subroutines
+int commIsBoundary(Comm* c, int direction)
+{
+#ifdef _MPI
+    switch (direction) {
+    case LEFT:
+        return c->coords[IDIM] == 0;
+        break;
+    case RIGHT:
+        return c->coords[IDIM] == (c->dims[IDIM] - 1);
+        break;
+    case BOTTOM:
+        return c->coords[JDIM] == 0;
+        break;
+    case TOP:
+        return c->coords[JDIM] == (c->dims[JDIM] - 1);
+        break;
+    }
+#endif
+
+    return 1;
+}
+
+void commExchange(Comm* c, double* grid)
+{
+#ifdef _MPI
+    MPI_Request requests[8];
+    for (int i = 0; i < 8; i++)
+        requests[i] = MPI_REQUEST_NULL;
+
+    for (int i = 0; i < NDIRS; i++) {
+        double* sbuf = grid + c->sdispls[i];
+        double* rbuf = grid + c->rdispls[i];
+
+        int tag = 0;
+        if (c->neighbours[i] != MPI_PROC_NULL) {
+            // printf("DEBUG: Rank %d - SendRecv with %d\n", c->rank, c->neighbours[i]);
+            tag = c->neighbours[i];
+        }
+        MPI_Irecv(rbuf,
+            1,
+            c->bufferTypes[i],
+            c->neighbours[i],
+            tag,
+            c->comm,
+            &requests[i * 2]);
+        MPI_Isend(sbuf,
+            1,
+            c->bufferTypes[i],
+            c->neighbours[i],
+            c->rank,
+            c->comm,
+            &requests[i * 2 + 1]);
+    }
+
+    MPI_Waitall(8, requests, MPI_STATUSES_IGNORE);
+#endif
+}
+
+void commShift(Comm* c, double* f, double* g)
+{
+#ifdef _MPI
+    MPI_Request requests[4] = { MPI_REQUEST_NULL,
+        MPI_REQUEST_NULL,
+        MPI_REQUEST_NULL,
+        MPI_REQUEST_NULL };
+
+    /* shift G */
+    /* receive ghost cells from bottom neighbor */
+    double* buf = g + 1;
+    MPI_Irecv(buf,
+        1,
+        c->bufferTypes[BOTTOM],
+        c->neighbours[BOTTOM],
+        0,
+        c->comm,
+        &requests[0]);
+
+    /* send ghost cells to top neighbor */
+    buf = g + (c->jmaxLocal) * (c->imaxLocal + 2) + 1;
+    MPI_Isend(buf, 1, c->bufferTypes[TOP], c->neighbours[TOP], 0, c->comm, &requests[1]);
+
+    /* shift F */
+    /* receive ghost cells from left neighbor */
+    buf = f + (c->imaxLocal + 2);
+    MPI_Irecv(buf,
+        1,
+        c->bufferTypes[LEFT],
+        c->neighbours[LEFT],
+        1,
+        c->comm,
+        &requests[2]);
+
+    /* send ghost cells to right neighbor */
+    buf = f + (c->imaxLocal + 2) + (c->imaxLocal);
+    MPI_Isend(buf,
+        1,
+        c->bufferTypes[RIGHT],
+        c->neighbours[RIGHT],
+        1,
+        c->comm,
+        &requests[3]);
+
+    MPI_Waitall(4, requests, MPI_STATUSES_IGNORE);
+#endif
+}
+
+void commCollectResult(Comm* c,
+    double* ug,
+    double* vg,
+    double* pg,
+    double* u,
+    double* v,
+    double* p,
+    int imax,
+    int jmax)
+{
+#ifdef _MPI
+    /* collect P */
+    assembleResult(c, p, pg, imax, jmax);
+
+    /* collect U */
+    assembleResult(c, u, ug, imax, jmax);
+
+    /* collect V */
+    assembleResult(c, v, vg, imax, jmax);
+#endif
+}
+
+void commPartition(Comm* c, int jmax, int imax)
+{
+#ifdef _MPI
+    int dims[NDIMS]    = { 0, 0 };
+    int periods[NDIMS] = { 0, 0 };
+    MPI_Dims_create(c->size, NDIMS, dims);
+    MPI_Cart_create(MPI_COMM_WORLD, NDIMS, dims, periods, 0, &c->comm);
+    MPI_Cart_shift(c->comm, IDIM, 1, &c->neighbours[LEFT], &c->neighbours[RIGHT]);
+    MPI_Cart_shift(c->comm, JDIM, 1, &c->neighbours[BOTTOM], &c->neighbours[TOP]);
+    MPI_Cart_get(c->comm, NDIMS, c->dims, periods, c->coords);
+
+    int imaxLocal = sizeOfRank(c->coords[IDIM], dims[IDIM], imax);
+    int jmaxLocal = sizeOfRank(c->coords[JDIM], dims[JDIM], jmax);
+
+    c->imaxLocal = imaxLocal;
+    c->jmaxLocal = jmaxLocal;
+
+    MPI_Datatype jBufferType;
+    MPI_Type_contiguous(imaxLocal, MPI_DOUBLE, &jBufferType);
+    MPI_Type_commit(&jBufferType);
+
+    MPI_Datatype iBufferType;
+    MPI_Type_vector(jmaxLocal, 1, imaxLocal + 2, MPI_DOUBLE, &iBufferType);
+    MPI_Type_commit(&iBufferType);
+
+    c->bufferTypes[LEFT]   = iBufferType;
+    c->bufferTypes[RIGHT]  = iBufferType;
+    c->bufferTypes[BOTTOM] = jBufferType;
+    c->bufferTypes[TOP]    = jBufferType;
+
+    c->sdispls[LEFT]   = (imaxLocal + 2) + 1;
+    c->sdispls[RIGHT]  = (imaxLocal + 2) + imaxLocal;
+    c->sdispls[BOTTOM] = (imaxLocal + 2) + 1;
+    c->sdispls[TOP]    = jmaxLocal * (imaxLocal + 2) + 1;
+
+    c->rdispls[LEFT]   = (imaxLocal + 2);
+    c->rdispls[RIGHT]  = (imaxLocal + 2) + (imaxLocal + 1);
+    c->rdispls[BOTTOM] = 1;
+    c->rdispls[TOP]    = (jmaxLocal + 1) * (imaxLocal + 2) + 1;
+#else
+    c->imaxLocal = imax;
+    c->jmaxLocal = jmax;
+#endif
+}
+
+void commUpdateDatatypes(Comm* oldcomm, Comm* newcomm, int imaxLocal, int jmaxLocal)
+{
+#if defined _MPI
+    newcomm->comm = MPI_COMM_NULL;
+    int result    = MPI_Comm_dup(oldcomm->comm, &newcomm->comm);
+
+    if (result == MPI_ERR_COMM) {
+        printf("\nNull communicator. Duplication failed !!\n");
+    }
+
+    newcomm->rank = oldcomm->rank;
+    newcomm->size = oldcomm->size;
+
+    memcpy(&newcomm->neighbours, &oldcomm->neighbours, sizeof(oldcomm->neighbours));
+    memcpy(&newcomm->coords, &oldcomm->coords, sizeof(oldcomm->coords));
+    memcpy(&newcomm->dims, &oldcomm->dims, sizeof(oldcomm->dims));
+
+    newcomm->imaxLocal = imaxLocal/2;
+    newcomm->jmaxLocal = jmaxLocal/2;
+
+    MPI_Datatype jBufferType;
+    MPI_Type_contiguous(imaxLocal, MPI_DOUBLE, &jBufferType);
+    MPI_Type_commit(&jBufferType);
+
+    MPI_Datatype iBufferType;
+    MPI_Type_vector(jmaxLocal, 1, imaxLocal + 2, MPI_DOUBLE, &iBufferType);
+    MPI_Type_commit(&iBufferType);
+
+    newcomm->bufferTypes[LEFT]   = iBufferType;
+    newcomm->bufferTypes[RIGHT]  = iBufferType;
+    newcomm->bufferTypes[BOTTOM] = jBufferType;
+    newcomm->bufferTypes[TOP]    = jBufferType;
+
+    newcomm->sdispls[LEFT]   = (imaxLocal + 2) + 1;
+    newcomm->sdispls[RIGHT]  = (imaxLocal + 2) + imaxLocal;
+    newcomm->sdispls[BOTTOM] = (imaxLocal + 2) + 1;
+    newcomm->sdispls[TOP]    = jmaxLocal * (imaxLocal + 2) + 1;
+
+    newcomm->rdispls[LEFT]   = (imaxLocal + 2);
+    newcomm->rdispls[RIGHT]  = (imaxLocal + 2) + (imaxLocal + 1);
+    newcomm->rdispls[BOTTOM] = 1;
+    newcomm->rdispls[TOP]    = (jmaxLocal + 1) * (imaxLocal + 2) + 1;
+#else
+    newcomm->imaxLocal = imaxLocal;
+    newcomm->jmaxLocal = jmaxLocal;
+#endif
+}
+
+void commFreeCommunicator(Comm* comm)
+{
+#ifdef _MPI
+    MPI_Comm_free(&comm->comm);
+#endif
+}
--- a/BasicSolver/2D-mpi/src/comm-v3.c
+++ b/BasicSolver/2D-mpi/src/comm-v3.c
@@ -0,0 +1,320 @@
+/*
+ * Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of nusif-solver.
+ * Use of this source code is governed by a MIT style
+ * license that can be found in the LICENSE file.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "comm.h"
+
+#ifdef _MPI
+// subroutines local to this module
+static int sum(int* sizes, int init, int offset, int coord)
+{
+    int sum = 0;
+
+    for (int i = init - offset; coord > 0; i -= offset, --coord) {
+        sum += sizes[i];
+    }
+
+    return sum;
+}
+
+static void assembleResult(Comm* c, double* src, double* dst, int imax, int jmax)
+{
+    MPI_Request* requests;
+    int numRequests = 1;
+
+    if (c->rank == 0) {
+        numRequests = c->size + 1;
+    } else {
+        numRequests = 1;
+    }
+
+    requests = (MPI_Request*)malloc(numRequests * sizeof(MPI_Request));
+
+    /* all ranks send their bulk array, including the external boundary layer */
+    MPI_Datatype bulkType;
+    int oldSizes[NDIMS] = { c->jmaxLocal + 2, c->imaxLocal + 2 };
+    int newSizes[NDIMS] = { c->jmaxLocal, c->imaxLocal };
+    int starts[NDIMS]   = { 1, 1 };
+
+    if (commIsBoundary(c, LEFT)) {
+        newSizes[CIDIM] += 1;
+        starts[CIDIM] = 0;
+    }
+    if (commIsBoundary(c, RIGHT)) {
+        newSizes[CIDIM] += 1;
+    }
+    if (commIsBoundary(c, BOTTOM)) {
+        newSizes[CJDIM] += 1;
+        starts[CJDIM] = 0;
+    }
+    if (commIsBoundary(c, TOP)) {
+        newSizes[CJDIM] += 1;
+    }
+
+    MPI_Type_create_subarray(NDIMS,
+        oldSizes,
+        newSizes,
+        starts,
+        MPI_ORDER_C,
+        MPI_DOUBLE,
+        &bulkType);
+    MPI_Type_commit(&bulkType);
+    MPI_Isend(src, 1, bulkType, 0, 0, c->comm, &requests[0]);
+
+    int newSizesI[c->size];
+    int newSizesJ[c->size];
+    MPI_Gather(&newSizes[CIDIM], 1, MPI_INT, newSizesI, 1, MPI_INT, 0, MPI_COMM_WORLD);
+    MPI_Gather(&newSizes[CJDIM], 1, MPI_INT, newSizesJ, 1, MPI_INT, 0, MPI_COMM_WORLD);
+
+    /* rank 0 assembles the subdomains */
+    if (c->rank == 0) {
+        for (int i = 0; i < c->size; i++) {
+            MPI_Datatype domainType;
+            int oldSizes[NDIMS] = { jmax + 2, imax + 2 };
+            int newSizes[NDIMS] = { newSizesJ[i], newSizesI[i] };
+            int coords[NDIMS];
+            MPI_Cart_coords(c->comm, i, NDIMS, coords);
+            int starts[NDIMS] = { sum(newSizesJ, i, 1, coords[JDIM]),
+                sum(newSizesI, i, c->dims[JDIM], coords[IDIM]) };
+            printf(
+                "Rank: %d, Coords(i,j): %d %d, Size(i,j): %d %d, Target Size(i,j): %d %d "
+                "Starts(i,j): %d %d\n",
+                i,
+                coords[IDIM],
+                coords[JDIM],
+                oldSizes[CIDIM],
+                oldSizes[CJDIM],
+                newSizes[CIDIM],
+                newSizes[CJDIM],
+                starts[CIDIM],
+                starts[CJDIM]);
+
+            MPI_Type_create_subarray(NDIMS,
+                oldSizes,
+                newSizes,
+                starts,
+                MPI_ORDER_C,
+                MPI_DOUBLE,
+                &domainType);
+            MPI_Type_commit(&domainType);
+
+            MPI_Irecv(dst, 1, domainType, i, 0, c->comm, &requests[i + 1]);
+            MPI_Type_free(&domainType);
+        }
+    }
+
+    MPI_Waitall(numRequests, requests, MPI_STATUSES_IGNORE);
+}
+#endif // defined _MPI
+
+// exported subroutines
+int commIsBoundary(Comm* c, int direction)
+{
+#ifdef _MPI
+    switch (direction) {
+    case LEFT:
+        return c->coords[IDIM] == 0;
+        break;
+    case RIGHT:
+        return c->coords[IDIM] == (c->dims[IDIM] - 1);
+        break;
+    case BOTTOM:
+        return c->coords[JDIM] == 0;
+        break;
+    case TOP:
+        return c->coords[JDIM] == (c->dims[JDIM] - 1);
+        break;
+    }
+#endif
+
+    return 1;
+}
+
+void commExchange(Comm* c, double* grid)
+{
+#ifdef _MPI
+    int counts[NDIRS] = { 1, 1, 1, 1 };
+    MPI_Neighbor_alltoallw(grid,
+        counts,
+        c->sdispls,
+        c->bufferTypes,
+        grid,
+        counts,
+        c->rdispls,
+        c->bufferTypes,
+        c->comm);
+#endif
+}
+
+void commShift(Comm* c, double* f, double* g)
+{
+#ifdef _MPI
+    MPI_Request requests[4] = { MPI_REQUEST_NULL,
+        MPI_REQUEST_NULL,
+        MPI_REQUEST_NULL,
+        MPI_REQUEST_NULL };
+
+    /* shift G */
+    /* receive ghost cells from bottom neighbor */
+    double* buf = g + 1;
+    MPI_Irecv(buf,
+        1,
+        c->bufferTypes[BOTTOM],
+        c->neighbours[BOTTOM],
+        0,
+        c->comm,
+        &requests[0]);
+
+    /* send ghost cells to top neighbor */
+    buf = g + (c->jmaxLocal) * (c->imaxLocal + 2) + 1;
+    MPI_Isend(buf, 1, c->bufferTypes[TOP], c->neighbours[TOP], 0, c->comm, &requests[1]);
+
+    /* shift F */
+    /* receive ghost cells from left neighbor */
+    buf = f + (c->imaxLocal + 2);
+    MPI_Irecv(buf,
+        1,
+        c->bufferTypes[LEFT],
+        c->neighbours[LEFT],
+        1,
+        c->comm,
+        &requests[2]);
+
+    /* send ghost cells to right neighbor */
+    buf = f + (c->imaxLocal + 2) + (c->imaxLocal);
+    MPI_Isend(buf,
+        1,
+        c->bufferTypes[RIGHT],
+        c->neighbours[RIGHT],
+        1,
+        c->comm,
+        &requests[3]);
+
+    MPI_Waitall(4, requests, MPI_STATUSES_IGNORE);
+#endif
+}
+
+void commCollectResult(Comm* c,
+    double* ug,
+    double* vg,
+    double* pg,
+    double* u,
+    double* v,
+    double* p,
+    int imax,
+    int jmax)
+{
+#ifdef _MPI
+    /* collect P */
+    assembleResult(c, p, pg, imax, jmax);
+
+    /* collect U */
+    assembleResult(c, u, ug, imax, jmax);
+
+    /* collect V */
+    assembleResult(c, v, vg, imax, jmax);
+#endif
+}
+
+void commPartition(Comm* c, int jmax, int imax)
+{
+#ifdef _MPI
+    int dims[NDIMS]    = { 0, 0 };
+    int periods[NDIMS] = { 0, 0 };
+    MPI_Dims_create(c->size, NDIMS, dims);
+    MPI_Cart_create(MPI_COMM_WORLD, NDIMS, dims, periods, 0, &c->comm);
+    MPI_Cart_shift(c->comm, IDIM, 1, &c->neighbours[LEFT], &c->neighbours[RIGHT]);
+    MPI_Cart_shift(c->comm, JDIM, 1, &c->neighbours[BOTTOM], &c->neighbours[TOP]);
+    MPI_Cart_get(c->comm, NDIMS, c->dims, periods, c->coords);
+
+    int imaxLocal = sizeOfRank(c->coords[IDIM], dims[IDIM], imax);
+    int jmaxLocal = sizeOfRank(c->coords[JDIM], dims[JDIM], jmax);
+
+    c->imaxLocal = imaxLocal;
+    c->jmaxLocal = jmaxLocal;
+
+    MPI_Datatype jBufferType;
+    MPI_Type_contiguous(imaxLocal, MPI_DOUBLE, &jBufferType);
+    MPI_Type_commit(&jBufferType);
+
+    MPI_Datatype iBufferType;
+    MPI_Type_vector(jmaxLocal, 1, imaxLocal + 2, MPI_DOUBLE, &iBufferType);
+    MPI_Type_commit(&iBufferType);
+
+    c->bufferTypes[LEFT]   = iBufferType;
+    c->bufferTypes[RIGHT]  = iBufferType;
+    c->bufferTypes[BOTTOM] = jBufferType;
+    c->bufferTypes[TOP]    = jBufferType;
+
+    size_t dblsize     = sizeof(double);
+    c->sdispls[LEFT]   = ((imaxLocal + 2) + 1) * dblsize;
+    c->sdispls[RIGHT]  = ((imaxLocal + 2) + imaxLocal) * dblsize;
+    c->sdispls[BOTTOM] = ((imaxLocal + 2) + 1) * dblsize;
+    c->sdispls[TOP]    = (jmaxLocal * (imaxLocal + 2) + 1) * dblsize;
+
+    c->rdispls[LEFT]   = (imaxLocal + 2) * dblsize;
+    c->rdispls[RIGHT]  = ((imaxLocal + 2) + (imaxLocal + 1)) * dblsize;
+    c->rdispls[BOTTOM] = 1 * dblsize;
+    c->rdispls[TOP]    = ((jmaxLocal + 1) * (imaxLocal + 2) + 1) * dblsize;
+#else
+    c->imaxLocal = imax;
+    c->jmaxLocal = jmax;
+#endif
+}
+
+void commUpdateDatatypes(Comm* oldcomm, Comm* newcomm, int imaxLocal, int jmaxLocal)
+{
+#if defined _MPI
+
+    int result = MPI_Comm_dup(oldcomm->comm, &newcomm->comm);
+
+    if (result == MPI_ERR_COMM) {
+        printf("\nNull communicator. Duplication failed !!\n");
+    }
+
+    newcomm->rank = oldcomm->rank;
+    newcomm->size = oldcomm->size;
+    
+
+    newcomm->imaxLocal = imaxLocal / 2;
+    newcomm->jmaxLocal = jmaxLocal / 2;
+
+    MPI_Datatype jBufferType;
+    MPI_Type_contiguous(imaxLocal, MPI_DOUBLE, &jBufferType);
+    MPI_Type_commit(&jBufferType);
+
+    MPI_Datatype iBufferType;
+    MPI_Type_vector(jmaxLocal, 1, imaxLocal + 2, MPI_DOUBLE, &iBufferType);
+    MPI_Type_commit(&iBufferType);
+
+    newcomm->bufferTypes[LEFT]   = iBufferType;
+    newcomm->bufferTypes[RIGHT]  = iBufferType;
+    newcomm->bufferTypes[BOTTOM] = jBufferType;
+    newcomm->bufferTypes[TOP]    = jBufferType;
+
+    newcomm->sdispls[LEFT]   = (imaxLocal + 2) + 1;
+    newcomm->sdispls[RIGHT]  = (imaxLocal + 2) + imaxLocal;
+    newcomm->sdispls[BOTTOM] = (imaxLocal + 2) + 1;
+    newcomm->sdispls[TOP]    = jmaxLocal * (imaxLocal + 2) + 1;
+
+    newcomm->rdispls[LEFT]   = (imaxLocal + 2);
+    newcomm->rdispls[RIGHT]  = (imaxLocal + 2) + (imaxLocal + 1);
+    newcomm->rdispls[BOTTOM] = 1;
+    newcomm->rdispls[TOP]    = (jmaxLocal + 1) * (imaxLocal + 2) + 1;
+#else
+    newcomm->imaxLocal = imaxLocal;
+    newcomm->jmaxLocal = jmaxLocal;
+#endif
+}
+
+void commFreeCommunicator(Comm* comm)
+{
+#ifdef _MPI
+    MPI_Comm_free(&comm->comm);
+#endif
+}
--- a/BasicSolver/2D-mpi/src/comm.c
+++ b/BasicSolver/2D-mpi/src/comm.c
@@ -1,281 +1,34 @@
 /*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 * All rights reserved. This file is part of nusif-solver.
 * Use of this source code is governed by a MIT style
 * license that can be found in the LICENSE file.
 */
-#include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>

 #include "comm.h"

 // subroutines local to this module
-static int sizeOfRank(int rank, int size, int N)
+int sizeOfRank(int rank, int size, int N)
 {
    return N / size + ((N % size > rank) ? 1 : 0);
 }

-static void setupCommunication(Comm* c, int direction, int layer)
-{
-    MPI_Datatype type;
-    size_t dblsize = sizeof(double);
-    int imaxLocal  = c->imaxLocal;
-    int jmaxLocal  = c->jmaxLocal;
-    int sizes[NDIMS];
-    int subSizes[NDIMS];
-    int starts[NDIMS];
-    int offset = 0;
-
-    sizes[IDIM] = imaxLocal + 2;
-    sizes[JDIM] = jmaxLocal + 2;
-
-    if (layer == HALO) {
-        offset = 1;
-    }
-
-    switch (direction) {
-    case LEFT:
-        subSizes[IDIM] = 1;
-        subSizes[JDIM] = jmaxLocal;
-        starts[IDIM]   = 1 - offset;
-        starts[JDIM]   = 1;
-        break;
-    case RIGHT:
-        subSizes[IDIM] = 1;
-        subSizes[JDIM] = jmaxLocal;
-        starts[IDIM]   = imaxLocal + offset;
-        starts[JDIM]   = 1;
-        break;
-    case BOTTOM:
-        subSizes[IDIM] = imaxLocal;
-        subSizes[JDIM] = 1;
-        starts[IDIM]   = 1;
-        starts[JDIM]   = 1 - offset;
-        break;
-    case TOP:
-        subSizes[IDIM] = imaxLocal;
-        subSizes[JDIM] = 1;
-        starts[IDIM]   = 1;
-        starts[JDIM]   = jmaxLocal + offset;
-        break;
-    }
-
-    MPI_Type_create_subarray(NDIMS,
-        sizes,
-        subSizes,
-        starts,
-        MPI_ORDER_C,
-        MPI_DOUBLE,
-        &type);
-    MPI_Type_commit(&type);
-
-    if (layer == HALO) {
-        c->rbufferTypes[direction] = type;
-    } else if (layer == BULK) {
-        c->sbufferTypes[direction] = type;
-    }
-}
-
-static void assembleResult(Comm* c,
-    double* src,
-    double* dst,
-    int imaxLocal[],
-    int jmaxLocal[],
-    int offset[],
-    int jmax,
-    int imax)
-{
-    MPI_Request* requests;
-    int numRequests = 1;
-
-    if (c->rank == 0) {
-        numRequests = c->size + 1;
-    } else {
-        numRequests = 1;
-    }
-
-    requests = (MPI_Request*)malloc(numRequests * sizeof(MPI_Request));
-
-    /* all ranks send their bulk array */
-    MPI_Datatype bulkType;
-    int oldSizes[NDIMS] = { c->jmaxLocal + 2, c->imaxLocal + 2 };
-    int newSizes[NDIMS] = { c->jmaxLocal, c->imaxLocal };
-    int starts[NDIMS]   = { 1, 1 };
-    MPI_Type_create_subarray(NDIMS,
-        oldSizes,
-        newSizes,
-        starts,
-        MPI_ORDER_C,
-        MPI_DOUBLE,
-        &bulkType);
-    MPI_Type_commit(&bulkType);
-
-    MPI_Isend(src, 1, bulkType, 0, 0, c->comm, &requests[0]);
-
-    /* rank 0 assembles the subdomains */
-    if (c->rank == 0) {
-        for (int i = 0; i < c->size; i++) {
-            MPI_Datatype domainType;
-            int oldSizes[NDIMS] = { jmax, imax };
-            int newSizes[NDIMS] = { jmaxLocal[i], imaxLocal[i] };
-            int starts[NDIMS]   = { offset[i * NDIMS + JDIM], offset[i * NDIMS + IDIM] };
-            MPI_Type_create_subarray(NDIMS,
-                oldSizes,
-                newSizes,
-                starts,
-                MPI_ORDER_C,
-                MPI_DOUBLE,
-                &domainType);
-            MPI_Type_commit(&domainType);
-
-            MPI_Irecv(dst, 1, domainType, i, 0, c->comm, &requests[i + 1]);
-        }
-    }
-
-    MPI_Waitall(numRequests, requests, MPI_STATUSES_IGNORE);
-}
-
-static int sum(int* sizes, int position)
-{
-    int sum = 0;
-
-    for (int i = 0; i < position; i++) {
-        sum += sizes[i];
-    }
-
-    return sum;
-}
-
-// exported subroutines
 void commReduction(double* v, int op)
 {
+#ifdef _MPI
    if (op == MAX) {
        MPI_Allreduce(MPI_IN_PLACE, v, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
    } else if (op == SUM) {
        MPI_Allreduce(MPI_IN_PLACE, v, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
    }
-}
-
-int commIsBoundary(Comm* c, int direction)
-{
-    switch (direction) {
-    case LEFT:
-        return c->coords[IDIM] == 0;
-        break;
-    case RIGHT:
-        return c->coords[IDIM] == (c->dims[IDIM] - 1);
-        break;
-    case BOTTOM:
-        return c->coords[JDIM] == 0;
-        break;
-    case TOP:
-        return c->coords[JDIM] == (c->dims[JDIM] - 1);
-        break;
-    }
-
-    return 0;
-}
-
-void commExchange(Comm* c, double* grid)
-{
-    int counts[NDIRS]      = { 1, 1, 1, 1 };
-    MPI_Aint displs[NDIRS] = { 0, 0, 0, 0 };
-
-    MPI_Neighbor_alltoallw(grid,
-        counts,
-        displs,
-        c->sbufferTypes,
-        grid,
-        counts,
-        displs,
-        c->rbufferTypes,
-        c->comm);
-}
-
-void commShift(Comm* c, double* f, double* g)
-{
-    MPI_Request requests[4] = { MPI_REQUEST_NULL,
-        MPI_REQUEST_NULL,
-        MPI_REQUEST_NULL,
-        MPI_REQUEST_NULL };
-
-    /* shift G */
-    /* receive ghost cells from bottom neighbor */
-    MPI_Irecv(g,
-        1,
-        c->rbufferTypes[BOTTOM],
-        c->neighbours[BOTTOM],
-        0,
-        c->comm,
-        &requests[0]);
-
-    /* send ghost cells to top neighbor */
-    MPI_Isend(g, 1, c->sbufferTypes[TOP], c->neighbours[TOP], 0, c->comm, &requests[1]);
-
-    /* shift F */
-    /* receive ghost cells from left neighbor */
-    MPI_Irecv(f, 1, c->rbufferTypes[LEFT], c->neighbours[LEFT], 1, c->comm, &requests[2]);
-
-    /* send ghost cells to right neighbor */
-    MPI_Isend(f,
-        1,
-        c->sbufferTypes[RIGHT],
-        c->neighbours[RIGHT],
-        1,
-        c->comm,
-        &requests[3]);
-
-    MPI_Waitall(4, requests, MPI_STATUSES_IGNORE);
-}
-
-void commCollectResult(Comm* c,
-    double* ug,
-    double* vg,
-    double* pg,
-    double* u,
-    double* v,
-    double* p,
-    int jmax,
-    int imax)
-{
-    int offset[c->size * NDIMS];
-    int imaxLocal[c->size];
-    int jmaxLocal[c->size];
-
-    MPI_Gather(&c->imaxLocal, 1, MPI_INT, imaxLocal, 1, MPI_INT, 0, MPI_COMM_WORLD);
-    MPI_Gather(&c->jmaxLocal, 1, MPI_INT, jmaxLocal, 1, MPI_INT, 0, MPI_COMM_WORLD);
-
-    if (c->rank == 0) {
-        for (int i = 0; i < c->size; i++) {
-            int coords[NDIMS];
-            MPI_Cart_coords(c->comm, i, NDIMS, coords);
-            offset[i * NDIMS + IDIM] = sum(imaxLocal, coords[IDIM]);
-            offset[i * NDIMS + JDIM] = sum(jmaxLocal, coords[JDIM]);
-            printf("Rank: %d, Coords(j,i): %d %d, Size(j,i): %d %d "
-                   "Offset(j,i): %d %d\n",
-                i,
-                coords[JDIM],
-                coords[IDIM],
-                jmaxLocal[i],
-                imaxLocal[i],
-                offset[i * NDIMS + JDIM],
-                offset[i * NDIMS + IDIM]);
-        }
-    }
-
-    /* collect P */
-    assembleResult(c, p, pg, imaxLocal, jmaxLocal, offset, jmax, imax);
-
-    /* collect U */
-    assembleResult(c, u, ug, imaxLocal, jmaxLocal, offset, jmax, imax);
-
-    /* collect V */
-    assembleResult(c, v, vg, imaxLocal, jmaxLocal, offset, jmax, imax);
+#endif
 }

 void commPrintConfig(Comm* c)
 {
+#ifdef _MPI
    fflush(stdout);
    MPI_Barrier(MPI_COMM_WORLD);
    if (commIsMaster(c)) {
@@ -290,37 +43,87 @@ void commPrintConfig(Comm* c)
                c->neighbours[TOP],
                c->neighbours[LEFT],
                c->neighbours[RIGHT]);
-            printf("\tCoordinates (j,i) %d %d\n", c->coords[JDIM], c->coords[IDIM]);
-            printf("\tLocal domain size (j,i) %dx%d\n", c->jmaxLocal, c->imaxLocal);
+            printf("\tIs boundary:\n");
+            printf("\t\tLEFT: %d\n", commIsBoundary(c, LEFT));
+            printf("\t\tRIGHT: %d\n", commIsBoundary(c, RIGHT));
+            printf("\t\tBOTTOM: %d\n", commIsBoundary(c, BOTTOM));
+            printf("\t\tTOP: %d\n", commIsBoundary(c, TOP));
+            printf("\tCoordinates (i,j) %d %d\n", c->coords[IDIM], c->coords[JDIM]);
+            printf("\tDims (i,j) %d %d\n", c->dims[IDIM], c->dims[JDIM]);
+            printf("\tLocal domain size (i,j) %dx%d\n", c->imaxLocal, c->jmaxLocal);
            fflush(stdout);
        }
+        MPI_Barrier(MPI_COMM_WORLD);
    }
-    MPI_Barrier(MPI_COMM_WORLD);
+#endif
 }

-void commInit(Comm* c, int jmax, int imax)
+void commInit(Comm* c, int argc, char** argv)
 {
-    /* setup communication */
+#ifdef _MPI
+    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &(c->rank));
    MPI_Comm_size(MPI_COMM_WORLD, &(c->size));
-    int dims[NDIMS]    = { 0, 0 };
-    int periods[NDIMS] = { 0, 0 };
-    MPI_Dims_create(c->size, NDIMS, dims);
-    MPI_Cart_create(MPI_COMM_WORLD, NDIMS, dims, periods, 0, &c->comm);
-    MPI_Cart_shift(c->comm, IDIM, 1, &c->neighbours[LEFT], &c->neighbours[RIGHT]);
-    MPI_Cart_shift(c->comm, JDIM, 1, &c->neighbours[BOTTOM], &c->neighbours[TOP]);
-    MPI_Cart_get(c->comm, NDIMS, c->dims, periods, c->coords);
-
-    c->imaxLocal = sizeOfRank(c->rank, dims[IDIM], imax);
-    c->jmaxLocal = sizeOfRank(c->rank, dims[JDIM], jmax);
-
-    // setup buffer types for communication
-    setupCommunication(c, LEFT, BULK);
-    setupCommunication(c, LEFT, HALO);
-    setupCommunication(c, RIGHT, BULK);
-    setupCommunication(c, RIGHT, HALO);
-    setupCommunication(c, BOTTOM, BULK);
-    setupCommunication(c, BOTTOM, HALO);
-    setupCommunication(c, TOP, BULK);
-    setupCommunication(c, TOP, HALO);
+#else
+    c->rank = 0;
+    c->size = 1;
+#endif
+}
+
+void commTestInit(Comm* c, double* p, double* f, double* g)
+{
+    int imax = c->imaxLocal;
+    int jmax = c->jmaxLocal;
+    int rank = c->rank;
+
+    for (int j = 0; j < jmax + 2; j++) {
+        for (int i = 0; i < imax + 2; i++) {
+            p[j * (imax + 2) + i] = rank;
+            f[j * (imax + 2) + i] = rank;
+            g[j * (imax + 2) + i] = rank;
+        }
+    }
+}
+
+static void testWriteFile(char* filename, double* grid, int imax, int jmax)
+{
+    FILE* fp = fopen(filename, "w");
+
+    if (fp == NULL) {
+        printf("Error!\n");
+        exit(EXIT_FAILURE);
+    }
+
+    for (int j = 0; j < jmax + 2; j++) {
+        for (int i = 0; i < imax + 2; i++) {
+            fprintf(fp, "%.2f ", grid[j * (imax + 2) + i]);
+        }
+        fprintf(fp, "\n");
+    }
+
+    fclose(fp);
+}
+
+void commTestWrite(Comm* c, double* p, double* f, double* g)
+{
+    int imax = c->imaxLocal;
+    int jmax = c->jmaxLocal;
+    int rank = c->rank;
+
+    char filename[30];
+    snprintf(filename, 30, "ptest-%d.dat", rank);
+    testWriteFile(filename, p, imax, jmax);
+
+    snprintf(filename, 30, "ftest-%d.dat", rank);
+    testWriteFile(filename, f, imax, jmax);
+
+    snprintf(filename, 30, "gtest-%d.dat", rank);
+    testWriteFile(filename, g, imax, jmax);
+}
+
+void commFinalize(Comm* c)
+{
+#ifdef _MPI
+    MPI_Finalize();
+#endif
 }
--- a/BasicSolver/2D-mpi/src/comm.h
+++ b/BasicSolver/2D-mpi/src/comm.h
@@ -1,35 +1,48 @@
 /*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 * All rights reserved. This file is part of nusif-solver.
 * Use of this source code is governed by a MIT style
 * license that can be found in the LICENSE file.
 */
 #ifndef __COMM_H_
 #define __COMM_H_
+#if defined(_MPI)
 #include <mpi.h>
+#endif

 enum direction { LEFT = 0, RIGHT, BOTTOM, TOP, NDIRS };
-enum dimension { JDIM = 0, IDIM, NDIMS };
+enum dimension { IDIM = 0, JDIM, NDIMS };
+enum cdimension { CJDIM = 0, CIDIM };
 enum layer { HALO = 0, BULK };
 enum op { MAX = 0, SUM };

 typedef struct {
    int rank;
    int size;
+#if defined(_MPI)
    MPI_Comm comm;
-    MPI_Datatype sbufferTypes[NDIRS];
-    MPI_Datatype rbufferTypes[NDIRS];
+    MPI_Datatype bufferTypes[NDIRS];
+    MPI_Aint sdispls[NDIRS];
+    MPI_Aint rdispls[NDIRS];
+#endif
    int neighbours[NDIRS];
    int coords[NDIMS], dims[NDIMS];
    int imaxLocal, jmaxLocal;
 } Comm;

-extern void commInit(Comm* c, int jmax, int imax);
+extern int sizeOfRank(int rank, int size, int N);
+extern void commInit(Comm* c, int argc, char** argv);
+extern void commTestInit(Comm* c, double* p, double* f, double* g);
+extern void commTestWrite(Comm* c, double* p, double* f, double* g);
+extern void commFinalize(Comm* c);
+extern void commPartition(Comm* c, int jmax, int imax);
 extern void commPrintConfig(Comm*);
 extern void commExchange(Comm*, double*);
 extern void commShift(Comm* c, double* f, double* g);
 extern void commReduction(double* v, int op);
 extern int commIsBoundary(Comm* c, int direction);
+extern void commUpdateDatatypes(Comm*, Comm*, int, int);
+extern void commFreeCommunicator(Comm*);
 extern void commCollectResult(Comm* c,
    double* ug,
    double* vg,
--- a/BasicSolver/2D-mpi/src/discretization.c
+++ b/BasicSolver/2D-mpi/src/discretization.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 * All rights reserved. This file is part of nusif-solver.
 * Use of this source code is governed by a MIT style
 * license that can be found in the LICENSE file.
@@ -12,18 +12,11 @@

 #include "allocate.h"
 #include "comm.h"
+#include "discretization.h"
 #include "parameter.h"
-#include "solver.h"
 #include "util.h"

-#define P(i, j)   p[(j) * (imaxLocal + 2) + (i)]
-#define F(i, j)   f[(j) * (imaxLocal + 2) + (i)]
-#define G(i, j)   g[(j) * (imaxLocal + 2) + (i)]
-#define U(i, j)   u[(j) * (imaxLocal + 2) + (i)]
-#define V(i, j)   v[(j) * (imaxLocal + 2) + (i)]
-#define RHS(i, j) rhs[(j) * (imaxLocal + 2) + (i)]
-
-static void printConfig(Solver* s)
+static void printConfig(Discretization* s)
 {
    if (commIsMaster(&s->comm)) {
        printf("Parameters for #%s#\n", s->problem);
@@ -35,47 +28,41 @@ static void printConfig(Solver* s)
        printf("\tReynolds number: %.2f\n", s->re);
        printf("\tGx Gy: %.2f %.2f\n", s->gx, s->gy);
        printf("Geometry data:\n");
-        printf("\tDomain box size (x, y): %.2f, %.2f\n", s->xlength, s->ylength);
-        printf("\tCells (x, y): %d, %d\n", s->imax, s->jmax);
-        printf("\tCell size (dx, dy): %f, %f\n", s->dx, s->dy);
+        printf("\tDomain box size (x, y): %.2f, %.2f\n",
+            s->grid.xlength,
+            s->grid.ylength);
+        printf("\tCells (x, y): %d, %d\n", s->grid.imax, s->grid.jmax);
+        printf("\tCell size (dx, dy): %f, %f\n", s->grid.dx, s->grid.dy);
        printf("Timestep parameters:\n");
        printf("\tDefault stepsize: %.2f, Final time %.2f\n", s->dt, s->te);
        printf("\tdt bound: %.6f\n", s->dtBound);
        printf("\tTau factor: %.2f\n", s->tau);
        printf("Iterative s parameters:\n");
-        printf("\tMax iterations: %d\n", s->itermax);
-        printf("\tepsilon (stopping tolerance) : %f\n", s->eps);
        printf("\tgamma factor: %f\n", s->gamma);
-        printf("\tomega (SOR relaxation): %f\n", s->omega);
    }
    commPrintConfig(&s->comm);
 }

-void initSolver(Solver* s, Parameter* params)
+void initDiscretiztion(Discretization* s, Parameter* params)
 {
-    s->problem  = params->name;
-    s->bcLeft   = params->bcLeft;
-    s->bcRight  = params->bcRight;
-    s->bcBottom = params->bcBottom;
-    s->bcTop    = params->bcTop;
-    s->imax     = params->imax;
-    s->jmax     = params->jmax;
-    s->xlength  = params->xlength;
-    s->ylength  = params->ylength;
-    s->dx       = params->xlength / params->imax;
-    s->dy       = params->ylength / params->jmax;
-    s->eps      = params->eps;
-    s->omega    = params->omg;
-    s->itermax  = params->itermax;
-    s->re       = params->re;
-    s->gx       = params->gx;
-    s->gy       = params->gy;
-    s->dt       = params->dt;
-    s->te       = params->te;
-    s->tau      = params->tau;
-    s->gamma    = params->gamma;
-
-    commInit(&s->comm, s->jmax, s->imax);
+    s->problem      = params->name;
+    s->bcLeft       = params->bcLeft;
+    s->bcRight      = params->bcRight;
+    s->bcBottom     = params->bcBottom;
+    s->bcTop        = params->bcTop;
+    s->grid.imax    = params->imax;
+    s->grid.jmax    = params->jmax;
+    s->grid.xlength = params->xlength;
+    s->grid.ylength = params->ylength;
+    s->grid.dx      = params->xlength / params->imax;
+    s->grid.dy      = params->ylength / params->jmax;
+    s->re           = params->re;
+    s->gx           = params->gx;
+    s->gy           = params->gy;
+    s->dt           = params->dt;
+    s->te           = params->te;
+    s->tau          = params->tau;
+    s->gamma        = params->gamma;

    /* allocate arrays */
    int imaxLocal = s->comm.imaxLocal;
@@ -98,8 +85,8 @@ void initSolver(Solver* s, Parameter* params)
        s->g[i]   = 0.0;
    }

-    double dx = s->dx;
-    double dy = s->dy;
+    double dx = s->grid.dx;
+    double dy = s->grid.dy;

    double invSqrSum = 1.0 / (dx * dx) + 1.0 / (dy * dy);
    s->dtBound       = 0.5 * s->re * 1.0 / invSqrSum;
@@ -108,12 +95,12 @@ void initSolver(Solver* s, Parameter* params)
 #endif
 }

-void computeRHS(Solver* s)
+void computeRHS(Discretization* s)
 {
    int imaxLocal = s->comm.imaxLocal;
    int jmaxLocal = s->comm.jmaxLocal;
-    double idx    = 1.0 / s->dx;
-    double idy    = 1.0 / s->dy;
+    double idx    = 1.0 / s->grid.dx;
+    double idy    = 1.0 / s->grid.dy;
    double idt    = 1.0 / s->dt;
    double* rhs   = s->rhs;
    double* f     = s->f;
@@ -129,88 +116,7 @@ void computeRHS(Solver* s)
    }
 }

-int solve(Solver* s)
-{
-    int imax      = s->imax;
-    int jmax      = s->jmax;
-    int imaxLocal = s->comm.imaxLocal;
-    int jmaxLocal = s->comm.jmaxLocal;
-    double eps    = s->eps;
-    int itermax   = s->itermax;
-    double dx2    = s->dx * s->dx;
-    double dy2    = s->dy * s->dy;
-    double idx2   = 1.0 / dx2;
-    double idy2   = 1.0 / dy2;
-    double factor = s->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
-    double* p     = s->p;
-    double* rhs   = s->rhs;
-    double epssq  = eps * eps;
-    int it        = 0;
-    double res    = 1.0;
-    commExchange(&s->comm, p);
-
-    while ((res >= epssq) && (it < itermax)) {
-        res = 0.0;
-
-        for (int j = 1; j < jmaxLocal + 1; j++) {
-            for (int i = 1; i < imaxLocal + 1; i++) {
-
-                double r = RHS(i, j) -
-                           ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
-                               (P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
-
-                P(i, j) -= (factor * r);
-                res += (r * r);
-            }
-        }
-
-        if (commIsBoundary(&s->comm, BOTTOM)) { // set bottom bc
-            for (int i = 1; i < imaxLocal + 1; i++) {
-                P(i, 0) = P(i, 1);
-            }
-        }
-
-        if (commIsBoundary(&s->comm, TOP)) { // set top bc
-            for (int i = 1; i < imaxLocal + 1; i++) {
-                P(i, jmaxLocal + 1) = P(i, jmaxLocal);
-            }
-        }
-
-        if (commIsBoundary(&s->comm, LEFT)) { // set left bc
-            for (int j = 1; j < jmaxLocal + 1; j++) {
-                P(0, j) = P(1, j);
-            }
-        }
-
-        if (commIsBoundary(&s->comm, RIGHT)) { // set right bc
-            for (int j = 1; j < jmaxLocal + 1; j++) {
-                P(imaxLocal + 1, j) = P(imaxLocal, j);
-            }
-        }
-
-        commReduction(&res, SUM);
-        res = res / (double)(imax * jmax);
-#ifdef DEBUG
-        if (commIsMaster(&s->comm)) {
-            printf("%d Residuum: %e\n", it, res);
-        }
-#endif
-        it++;
-    }
-
-#ifdef VERBOSE
-    if (commIsMaster(&s->comm)) {
-        printf("Solver took %d iterations to reach %f\n", it, sqrt(res));
-    }
-#endif
-    if (res < eps) {
-        return 0;
-    } else {
-        return 1;
-    }
-}
-
-static double maxElement(Solver* s, double* m)
+static double maxElement(Discretization* s, double* m)
 {
    int imaxLocal = s->comm.imaxLocal;
    int jmaxLocal = s->comm.jmaxLocal;
@@ -225,11 +131,11 @@ static double maxElement(Solver* s, double* m)
    return maxval;
 }

-void computeTimestep(Solver* s)
+void computeTimestep(Discretization* s)
 {
    double dt   = s->dtBound;
-    double dx   = s->dx;
-    double dy   = s->dy;
+    double dx   = s->grid.dx;
+    double dy   = s->grid.dy;
    double umax = maxElement(s, s->u);
    double vmax = maxElement(s, s->v);

@@ -243,7 +149,7 @@ void computeTimestep(Solver* s)
    s->dt = dt * s->tau;
 }

-void setBoundaryConditions(Solver* s)
+void setBoundaryConditions(Discretization* s)
 {
    int imaxLocal = s->comm.imaxLocal;
    int jmaxLocal = s->comm.jmaxLocal;
@@ -351,7 +257,7 @@ void setBoundaryConditions(Solver* s)
    }
 }

-void setSpecialBoundaryCondition(Solver* s)
+void setSpecialBoundaryCondition(Discretization* s)
 {
    int imaxLocal = s->comm.imaxLocal;
    int jmaxLocal = s->comm.jmaxLocal;
@@ -365,25 +271,27 @@ void setSpecialBoundaryCondition(Solver* s)
        }
    } else if (strcmp(s->problem, "canal") == 0) {
        if (commIsBoundary(&s->comm, LEFT)) {
-            double ylength = s->ylength;
-            double dy      = s->dy;
-            int rest       = s->jmax % s->comm.size;
-            int yc    = s->comm.rank * (s->jmax / s->comm.size) + MIN(rest, s->comm.rank);
+            double ylength = s->grid.ylength;
+            double dy      = s->grid.dy;
+            int rest       = s->grid.jmax % s->comm.dims[JDIM];
+            int yc         = s->comm.rank * (s->grid.jmax / s->comm.dims[JDIM]) +
+                     MIN(rest, s->comm.rank);
            double ys = dy * (yc + 0.5);
            double y;

-            /* printf("RANK %d yc: %d ys: %f\n", solver->rank, yc, ys); */
+            // printf("RANK %d yc: %d ys: %f\n", s->comm.rank, yc, ys);

            for (int j = 1; j < jmaxLocal + 1; j++) {
                y       = ys + dy * (j - 0.5);
                U(0, j) = y * (ylength - y) * 4.0 / (ylength * ylength);
+
            }
        }
    }
    /* print(solver, solver->u); */
 }

-void computeFG(Solver* s)
+void computeFG(Discretization* s)
 {
    double* u = s->u;
    double* v = s->v;
@@ -398,8 +306,8 @@ void computeFG(Solver* s)
    double gamma     = s->gamma;
    double dt        = s->dt;
    double inverseRe = 1.0 / s->re;
-    double inverseDx = 1.0 / s->dx;
-    double inverseDy = 1.0 / s->dy;
+    double inverseDx = 1.0 / s->grid.dx;
+    double inverseDy = 1.0 / s->grid.dy;
    double du2dx, dv2dy, duvdx, duvdy;
    double du2dx2, du2dy2, dv2dx2, dv2dy2;

@@ -475,7 +383,7 @@ void computeFG(Solver* s)
    }
 }

-void adaptUV(Solver* s)
+void adaptUV(Discretization* s)
 {
    int imaxLocal = s->comm.imaxLocal;
    int jmaxLocal = s->comm.jmaxLocal;
@@ -486,8 +394,8 @@ void adaptUV(Solver* s)
    double* f = s->f;
    double* g = s->g;

-    double factorX = s->dt / s->dx;
-    double factorY = s->dt / s->dy;
+    double factorX = s->dt / s->grid.dx;
+    double factorY = s->dt / s->grid.dy;

    for (int j = 1; j < jmaxLocal + 1; j++) {
        for (int i = 1; i < imaxLocal + 1; i++) {
@@ -497,12 +405,12 @@ void adaptUV(Solver* s)
    }
 }

-void writeResult(Solver* s, double* u, double* v, double* p)
+void writeResult(Discretization* s, double* u, double* v, double* p)
 {
-    int imax  = s->imax;
-    int jmax  = s->jmax;
-    double dx = s->dx;
-    double dy = s->dy;
+    int imax  = s->grid.imax;
+    int jmax  = s->grid.jmax;
+    double dx = s->grid.dx;
+    double dy = s->grid.dy;
    double x = 0.0, y = 0.0;

    FILE* fp;
@@ -513,11 +421,11 @@ void writeResult(Solver* s, double* u, double* v, double* p)
        exit(EXIT_FAILURE);
    }

-    for (int j = 1; j < jmax; j++) {
+    for (int j = 1; j <= jmax; j++) {
        y = (double)(j - 0.5) * dy;
-        for (int i = 1; i < imax; i++) {
+        for (int i = 1; i <= imax; i++) {
            x = (double)(i - 0.5) * dx;
-            fprintf(fp, "%.2f %.2f %f\n", x, y, p[j * (imax) + i]);
+            fprintf(fp, "%.2f %.2f %f\n", x, y, p[j * (imax + 2) + i]);
        }
        fprintf(fp, "\n");
    }
@@ -531,14 +439,14 @@ void writeResult(Solver* s, double* u, double* v, double* p)
        exit(EXIT_FAILURE);
    }

-    for (int j = 1; j < jmax; j++) {
+    for (int j = 1; j <= jmax; j++) {
        y = dy * (j - 0.5);
-        for (int i = 1; i < imax; i++) {
-            x            = dx * (i - 0.5);
-            double vel_u = (u[j * (imax) + i] + u[j * (imax) + (i - 1)]) / 2.0;
-            double vel_v = (v[j * (imax) + i] + v[(j - 1) * (imax) + i]) / 2.0;
-            double len   = sqrt((vel_u * vel_u) + (vel_v * vel_v));
-            fprintf(fp, "%.2f %.2f %f %f %f\n", x, y, vel_u, vel_v, len);
+        for (int i = 1; i <= imax; i++) {
+            x           = dx * (i - 0.5);
+            double velU = (u[j * (imax + 2) + i] + u[j * (imax + 2) + (i - 1)]) / 2.0;
+            double velV = (v[j * (imax + 2) + i] + v[(j - 1) * (imax + 2) + i]) / 2.0;
+            double len  = sqrt((velU * velU) + (velV * velV));
+            fprintf(fp, "%.2f %.2f %f %f %f\n", x, y, velU, velV, len);
        }
    }

--- a/BasicSolver/2D-mpi/src/discretization.h
+++ b/BasicSolver/2D-mpi/src/discretization.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of nusif-solver.
+ * Use of this source code is governed by a MIT style
+ * license that can be found in the LICENSE file.
+ */
+#ifndef __DISCRETIZATION_H_
+#define __DISCRETIZATION_H_
+#include "comm.h"
+#include "grid.h"
+#include "parameter.h"
+
+enum BC { NOSLIP = 1, SLIP, OUTFLOW, PERIODIC };
+
+typedef struct {
+    /* geometry and grid information */
+    Grid grid;
+    /* arrays */
+    double *p, *rhs;
+    double *f, *g;
+    double *u, *v;
+    /* parameters */
+    double re, tau, gamma;
+    double gx, gy;
+    /* time stepping */
+    double dt, te;
+    double dtBound;
+    char* problem;
+    int bcLeft, bcRight, bcBottom, bcTop;
+    /* communication */
+    Comm comm;
+} Discretization;
+
+void initDiscretiztion(Discretization*, Parameter*);
+void computeRHS(Discretization*);
+void normalizePressure(Discretization*);
+void computeTimestep(Discretization*);
+void setBoundaryConditions(Discretization*);
+void setSpecialBoundaryCondition(Discretization*);
+void computeFG(Discretization*);
+void adaptUV(Discretization*);
+void writeResult(Discretization* s, double* u, double* v, double* p);
+#endif
--- a/BasicSolver/2D-mpi/src/grid.h
+++ b/BasicSolver/2D-mpi/src/grid.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of nusif-solver.
+ * Use of this source code is governed by a MIT style
+ * license that can be found in the LICENSE file.
+ */
+#ifndef __GRID_H_
+#define __GRID_H_
+
+typedef struct {
+    double dx, dy;
+    int imax, jmax;
+    double xlength, ylength;
+} Grid;
+
+#endif // __GRID_H_
--- a/BasicSolver/2D-mpi/src/main.c
+++ b/BasicSolver/2D-mpi/src/main.c
@@ -1,95 +1,121 @@
 /*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+ * Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
 * All rights reserved.
 * Use of this source code is governed by a MIT-style
 * license that can be found in the LICENSE file.
 */
-#include <float.h>
-#include <limits.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>

 #include "allocate.h"
+#include "comm.h"
+#include "discretization.h"
 #include "parameter.h"
 #include "progress.h"
 #include "solver.h"
 #include "timing.h"
-#include <mpi.h>
+
+static void writeResults(Discretization* s)
+{
+#ifdef _MPI
+    size_t bytesize = (s->grid.imax + 2) * (s->grid.jmax + 2) * sizeof(double);
+
+    double* ug = allocate(64, bytesize);
+    double* vg = allocate(64, bytesize);
+    double* pg = allocate(64, bytesize);
+
+    commCollectResult(&s->comm, ug, vg, pg, s->u, s->v, s->p, s->grid.imax, s->grid.jmax);
+    if (commIsMaster(&s->comm)) {
+        writeResult(s, ug, vg, pg);
+    }
+
+    free(ug);
+    free(vg);
+    free(pg);
+#else
+    writeResult(s, s->u, s->v, s->p);
+#endif
+}

 int main(int argc, char** argv)
 {
    int rank;
-    double S, E;
-    Parameter params;
-    Solver solver;
+    double timeStart, timeStop;
+    Parameter p;
+    Discretization d;
+    Solver s;

-    MPI_Init(&argc, &argv);
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    initParameter(&params);
+    commInit(&d.comm, argc, argv);
+    initParameter(&p);
+
+    FILE* fp;
+    if (commIsMaster(&d.comm)) fp = initResidualWriter();

    if (argc != 2) {
        printf("Usage: %s <configFile>\n", argv[0]);
        exit(EXIT_SUCCESS);
    }

-    readParameter(&params, argv[1]);
-    if (rank == 0) {
-        printParameter(&params);
+    readParameter(&p, argv[1]);
+    commPartition(&d.comm, p.jmax, p.imax);
+    if (commIsMaster(&d.comm)) {
+        printParameter(&p);
    }
-    initSolver(&solver, &params);
-    /* debugExchange(&solver); */
-    /* exit(EXIT_SUCCESS); */
-    initProgress(solver.te);

-    double tau = solver.tau;
-    double te  = solver.te;
+    initDiscretiztion(&d, &p);
+    initSolver(&s, &d, &p);
+#ifdef TEST
+    commPrintConfig(&d.comm);
+    commTestInit(&d.comm, d.p, d.f, d.g);
+    commExchange(&d.comm, d.p);
+    commShift(&d.comm, d.f, d.g);
+    commTestWrite(&d.comm, d.p, d.f, d.g);
+    writeResults(&d);
+    commFinalize(&d.comm);
+    exit(EXIT_SUCCESS);
+#endif
+#ifndef VERBOSE
+    initProgress(d.te);
+#endif
+
+    double tau = d.tau;
+    double te  = d.te;
    double t   = 0.0;
+    double res = 0.0;

-    S = getTimeStamp();
+    timeStart = getTimeStamp();
    while (t <= te) {
-        if (tau > 0.0) {
-            computeTimestep(&solver);
-        }
        
-        setBoundaryConditions(&solver);
-        setSpecialBoundaryCondition(&solver);
-        computeFG(&solver);
-        computeRHS(&solver);
-        solve(&solver);
-        adaptUV(&solver);
-        t += solver.dt;
+        if (tau > 0.0) computeTimestep(&d);
+        setBoundaryConditions(&d);
+        setSpecialBoundaryCondition(&d);
+        computeFG(&d);
+        computeRHS(&d);
+        res = solve(&s, d.p, d.rhs);
+        adaptUV(&d);
+
+        if (commIsMaster(&d.comm)) writeResidual(fp, t, res);
+
+        t += d.dt;

 #ifdef VERBOSE
-        if (rank == 0) {
-            printf("TIME %f , TIMESTEP %f\n", t, solver.dt);
+        if (commIsMaster(s.comm)) {
+            printf("TIME %f , TIMESTEP %f\n", t, d.dt);
        }
 #else
        printProgress(t);
 #endif
    }
-    E = getTimeStamp();
+    timeStop = getTimeStamp();
+#ifndef VERBOSE
    stopProgress();
-    if (rank == 0) {
-        printf("Solution took %.2fs\n", E - S);
+#endif
+    if (commIsMaster(s.comm)) {
+        printf("Solution took %.2fs\n", timeStop - timeStart);
    }
-    size_t bytesize = solver.imax * solver.jmax * sizeof(double);
-
-    double* ug = allocate(64, bytesize);
-    double* vg = allocate(64, bytesize);
-    double* pg = allocate(64, bytesize);
-
-    commCollectResult(&solver.comm,
-        ug,
-        vg,
-        pg,
-        solver.u,
-        solver.v,
-        solver.p,
-        solver.jmax,
-        solver.imax);
-    writeResult(&solver, ug, vg, pg);
-
-    MPI_Finalize();
+    if (commIsMaster(&d.comm)) fclose(fp);
+    writeResults(&d);
+    commFinalize(s.comm);
    return EXIT_SUCCESS;
 }
--- a/BasicSolver/2D-mpi/src/parameter.c
+++ b/BasicSolver/2D-mpi/src/parameter.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 * All rights reserved. This file is part of nusif-solver.
 * Use of this source code is governed by a MIT style
 * license that can be found in the LICENSE file.
@@ -14,13 +14,16 @@

 void initParameter(Parameter* param)
 {
-    param->xlength = 1.0;
-    param->ylength = 1.0;
-    param->imax    = 100;
-    param->jmax    = 100;
-    param->itermax = 1000;
-    param->eps     = 0.0001;
-    param->omg     = 1.8;
+    param->xlength    = 1.0;
+    param->ylength    = 1.0;
+    param->imax       = 100;
+    param->jmax       = 100;
+    param->itermax    = 1000;
+    param->eps        = 0.0001;
+    param->omg        = 1.8;
+    param->levels     = 5;
+    param->presmooth  = 5;
+    param->postsmooth = 5;
 }

 void readParameter(Parameter* param, const char* filename)
@@ -72,6 +75,9 @@ void readParameter(Parameter* param, const char* filename)
            PARSE_INT(bcRight);
            PARSE_INT(bcBottom);
            PARSE_INT(bcTop);
+            PARSE_INT(levels);
+            PARSE_INT(presmooth);
+            PARSE_INT(postsmooth);
            PARSE_REAL(u_init);
            PARSE_REAL(v_init);
            PARSE_REAL(p_init);
--- a/BasicSolver/2D-mpi/src/parameter.h
+++ b/BasicSolver/2D-mpi/src/parameter.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 * All rights reserved. This file is part of nusif-solver.
 * Use of this source code is governed by a MIT style
 * license that can be found in the LICENSE file.
@@ -18,6 +18,7 @@ typedef struct {
    char* name;
    int bcLeft, bcRight, bcBottom, bcTop;
    double u_init, v_init, p_init;
+    int levels, presmooth, postsmooth;
 } Parameter;

 void initParameter(Parameter*);
--- a/BasicSolver/2D-mpi/src/progress.c
+++ b/BasicSolver/2D-mpi/src/progress.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 * All rights reserved. This file is part of nusif-solver.
 * Use of this source code is governed by a MIT style
 * license that can be found in the LICENSE file.
@@ -7,54 +7,64 @@
 #include <math.h>
 #include <mpi.h>
 #include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
-
+#include <stdlib.h>
 #include "progress.h"

 static double _end;
 static int _current;
-static int _rank = -1;

 void initProgress(double end)
 {
-    MPI_Comm_rank(MPI_COMM_WORLD, &_rank);
    _end     = end;
    _current = 0;

-    if (_rank == 0) {
-        printf("[          ]");
-        fflush(stdout);
-    }
+    printf("[          ]");
+    fflush(stdout);
 }

 void printProgress(double current)
 {
-    if (_rank == 0) {
-        int new = (int)rint((current / _end) * 10.0);
+    int new = (int)rint((current / _end) * 10.0);

-        if (new > _current) {
-            char progress[11];
-            _current    = new;
-            progress[0] = 0;
+    if (new > _current) {
+        char progress[11];
+        _current    = new;
+        progress[0] = 0;

-            for (int i = 0; i < 10; i++) {
-                if (i < _current) {
-                    sprintf(progress + strlen(progress), "#");
-                } else {
-                    sprintf(progress + strlen(progress), " ");
-                }
+        for (int i = 0; i < 10; i++) {
+            if (i < _current) {
+                sprintf(progress + strlen(progress), "#");
+            } else {
+                sprintf(progress + strlen(progress), " ");
            }
-            printf("\r[%s]", progress);
        }
-        fflush(stdout);
+        printf("\r[%s]", progress);
    }
+    fflush(stdout);
 }

 void stopProgress()
 {
-    if (_rank == 0) {
-        printf("\n");
-        fflush(stdout);
-    }
+    printf("\n");
+    fflush(stdout);
+}
+
+FILE* initResidualWriter()
+{
+    FILE* fp;
+    fp = fopen("residual.dat", "w");
+
+    if (fp == NULL) {
+        printf("Error!\n");
+        exit(EXIT_FAILURE);
+    }
+
+    return fp;
+
+}
+
+void writeResidual(FILE* fp, double ts, double res)
+{
+    fprintf(fp, "%f, %f\n", ts, res);
 }
--- a/BasicSolver/2D-mpi/src/progress.h
+++ b/BasicSolver/2D-mpi/src/progress.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 * All rights reserved.
 * Use of this source code is governed by a MIT-style
 * license that can be found in the LICENSE file.
@@ -10,5 +10,6 @@
 extern void initProgress(double);
 extern void printProgress(double);
 extern void stopProgress();
-
+extern FILE* initResidualWriter(void);
+extern void writeResidual(FILE*, double, double);
 #endif
--- a/BasicSolver/2D-mpi/src/solver-mg.c
+++ b/BasicSolver/2D-mpi/src/solver-mg.c
@@ -0,0 +1,302 @@
+/*
+ * Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of nusif-solver.
+ * Use of this source code is governed by a MIT style
+ * license that can be found in the LICENSE file.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "allocate.h"
+#include "solver.h"
+#include "util.h"
+
+#define FINEST_LEVEL   0
+#define COARSEST_LEVEL (s->levels - 1)
+// #define S(i, j)        s[(j) * (imaxLocal + 2) + (i)]
+#define E(i, j)   e[(j) * (imaxLocal + 2) + (i)]
+#define R(i, j)   r[(j) * (imaxLocal + 2) + (i)]
+#define OLD(i, j) old[(j) * (imaxLocal + 2) + (i)]
+
+static void restrictMG(Solver* s, int level, Comm* comm)
+{
+    int imaxLocal = comm->imaxLocal;
+    int jmaxLocal = comm->jmaxLocal;
+
+    double* r   = s->r[level + 1];
+    double* old = s->r[level];
+
+#ifdef _MPI
+    commExchange(comm, old);
+#endif
+
+    for (int j = 1; j < (jmaxLocal / 2) + 1; j++) {
+        for (int i = 1; i < (imaxLocal / 2) + 1; i++) {
+            R(i, j) = (OLD(2 * i - 1, 2 * j - 1) + OLD(2 * i, 2 * j - 1) * 2 +
+                          OLD(2 * i + 1, 2 * j - 1) + OLD(2 * i - 1, 2 * j) * 2 +
+                          OLD(2 * i, 2 * j) * 4 + OLD(2 * i + 1, 2 * j) * 2 +
+                          OLD(2 * i - 1, 2 * j + 1) + OLD(2 * i, 2 * j + 1) * 2 +
+                          OLD(2 * i + 1, 2 * j + 1)) /
+                      16.0;
+        }
+    }
+}
+
+static void prolongate(Solver* s, int level, Comm* comm)
+{
+    int imaxLocal = comm->imaxLocal;
+    int jmaxLocal = comm->jmaxLocal;
+
+    double* old = s->r[level + 1];
+    double* e   = s->r[level];
+
+    for (int j = 2; j < jmaxLocal + 1; j += 2) {
+        for (int i = 2; i < imaxLocal + 1; i += 2) {
+            E(i, j) = OLD(i / 2, j / 2);
+        }
+    }
+}
+
+static void correct(Solver* s, double* p, int level, Comm* comm)
+{
+    double* e     = s->e[level];
+    int imaxLocal = comm->imaxLocal;
+    int jmaxLocal = comm->jmaxLocal;
+
+    for (int j = 1; j < jmaxLocal + 1; ++j) {
+        for (int i = 1; i < imaxLocal + 1; ++i) {
+            P(i, j) += E(i, j);
+        }
+    }
+}
+
+static void setBoundaryCondition(Solver* s, double* p, int imaxLocal, int jmaxLocal)
+{
+#ifdef _MPI
+    if (commIsBoundary(s->comm, BOTTOM)) { // set bottom bc
+        for (int i = 1; i < imaxLocal + 1; i++) {
+            P(i, 0) = P(i, 1);
+        }
+    }
+
+    if (commIsBoundary(s->comm, TOP)) { // set top bc
+        for (int i = 1; i < imaxLocal + 1; i++) {
+            P(i, jmaxLocal + 1) = P(i, jmaxLocal);
+        }
+    }
+
+    if (commIsBoundary(s->comm, LEFT)) { // set left bc
+        for (int j = 1; j < jmaxLocal + 1; j++) {
+            P(0, j) = P(1, j);
+        }
+    }
+
+    if (commIsBoundary(s->comm, RIGHT)) { // set right bc
+        for (int j = 1; j < jmaxLocal + 1; j++) {
+            P(imaxLocal + 1, j) = P(imaxLocal, j);
+        }
+    }
+#else
+    for (int i = 1; i < imaxLocal + 1; i++) {
+        P(i, 0)             = P(i, 1);
+        P(i, jmaxLocal + 1) = P(i, jmaxLocal);
+    }
+
+    for (int j = 1; j < jmaxLocal + 1; j++) {
+        P(0, j)             = P(1, j);
+        P(imaxLocal + 1, j) = P(imaxLocal, j);
+    }
+#endif
+}
+
+static double smooth(Solver* s, double* p, double* rhs, int level, Comm* comm)
+{
+    int imaxLocal = comm->imaxLocal;
+    int jmaxLocal = comm->jmaxLocal;
+
+    int imax = s->grid->imax;
+    int jmax = s->grid->jmax;
+
+    double dx2  = s->grid->dx * s->grid->dx;
+    double dy2  = s->grid->dy * s->grid->dy;
+    double idx2 = 1.0 / dx2;
+    double idy2 = 1.0 / dy2;
+
+    double factor = s->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
+    double* r     = s->r[level];
+
+    double res = 1.0;
+    int pass, jsw, isw;
+
+    jsw = 1;
+
+    for (pass = 0; pass < 2; pass++) {
+        isw = jsw;
+
+#ifdef _MPI
+        commExchange(comm, p);
+#endif
+
+        for (int j = 1; j < jmaxLocal + 1; j++) {
+            for (int i = isw; i < imaxLocal + 1; i += 2) {
+
+                P(i, j) -= factor *
+                           (RHS(i, j) -
+                               ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
+                                   (P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2));
+            }
+            isw = 3 - isw;
+        }
+        jsw = 3 - jsw;
+    }
+}
+
+static double calculateResidual(Solver* s, double* p, double* rhs, int level, Comm* comm)
+{
+    int imax      = s->grid->imax;
+    int jmax      = s->grid->jmax;
+    int imaxLocal = comm->imaxLocal;
+    int jmaxLocal = comm->jmaxLocal;
+
+    double dx2    = s->grid->dx * s->grid->dx;
+    double dy2    = s->grid->dy * s->grid->dy;
+    double idx2   = 1.0 / dx2;
+    double idy2   = 1.0 / dy2;
+    double factor = s->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
+    double* r     = s->r[level];
+    double res    = 1.0;
+    int pass, jsw, isw;
+
+    jsw = 1;
+
+    for (pass = 0; pass < 2; pass++) {
+        isw = jsw;
+
+#ifdef _MPI
+        commExchange(comm, p);
+#endif
+
+        for (int j = 1; j < jmaxLocal + 1; j++) {
+            for (int i = isw; i < imaxLocal + 1; i += 2) {
+
+                R(i, j) = RHS(i, j) -
+                          ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
+                              (P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
+
+                res += (R(i, j) * R(i, j));
+            }
+            isw = 3 - isw;
+        }
+        jsw = 3 - jsw;
+    }
+
+#ifdef _MPI
+    commReduction(&res, SUM);
+#endif
+
+    res = res / (double)(imax * jmax);
+#ifdef DEBUG
+    if (commIsMaster(s->comm)) {
+        printf("%d Residuum: %e\n", it, res);
+    }
+#endif
+    return res;
+}
+
+static double multiGrid(Solver* s, double* p, double* rhs, int level, Comm* comm)
+{
+    double res = 0.0;
+
+    // coarsest level
+    if (level == COARSEST_LEVEL) {
+        for (int i = 0; i < 5; i++) {
+            smooth(s, p, rhs, level, comm);
+        }
+        return res;
+    }
+
+    // pre-smoothing
+    for (int i = 0; i < s->presmooth; i++) {
+        smooth(s, p, rhs, level, comm);
+        if (level == FINEST_LEVEL)
+            setBoundaryCondition(s, p, comm->imaxLocal, comm->jmaxLocal);
+    }
+
+    // calculate residuals
+    res = calculateResidual(s, p, rhs, level, comm);
+
+    // restrict
+    restrictMG(s, level, comm);
+
+    Comm newcomm;
+    commUpdateDatatypes(s->comm, &newcomm, comm->imaxLocal, comm->jmaxLocal);
+
+
+    // MGSolver on residual and error.
+    multiGrid(s, s->e[level + 1], s->r[level + 1], level + 1, &newcomm);
+
+    commFreeCommunicator(&newcomm);
+
+    // prolongate
+    prolongate(s, level, comm);
+
+    // correct p on finer level using residual
+    correct(s, p, level, comm);
+
+    if (level == FINEST_LEVEL)
+        setBoundaryCondition(s, p, comm->imaxLocal, comm->jmaxLocal);
+
+    // post-smoothing
+    for (int i = 0; i < s->postsmooth; i++) {
+        smooth(s, p, rhs, level, comm);
+        if (level == FINEST_LEVEL)
+            setBoundaryCondition(s, p, comm->imaxLocal, comm->jmaxLocal);
+    }
+
+    return res;
+}
+
+void initSolver(Solver* s, Discretization* d, Parameter* p)
+{
+    s->eps        = p->eps;
+    s->omega      = p->omg;
+    s->itermax    = p->itermax;
+    s->levels     = p->levels;
+    s->grid       = &d->grid;
+    s->comm       = &d->comm;
+    s->presmooth  = p->presmooth;
+    s->postsmooth = p->postsmooth;
+
+    int imax   = s->grid->imax;
+    int jmax   = s->grid->jmax;
+    int levels = s->levels;
+    printf("Using Multigrid solver with %d levels\n", levels);
+
+    s->r = malloc(levels * sizeof(double*));
+    s->e = malloc(levels * sizeof(double*));
+
+    size_t size = (imax + 2) * (jmax + 2) * sizeof(double);
+
+    for (int j = 0; j < levels; j++) {
+        s->r[j] = allocate(64, size);
+        s->e[j] = allocate(64, size);
+
+        for (int i = 0; i < (imax + 2) * (jmax + 2); i++) {
+            s->r[j][i] = 0.0;
+            s->e[j][i] = 0.0;
+        }
+    }
+}
+
+double solve(Solver* s, double* p, double* rhs)
+{
+    double res = multiGrid(s, p, rhs, 0, s->comm);
+
+#ifdef VERBOSE
+    if (commIsMaster(s->comm)) {
+        printf("Residuum: %.6f\n", res);
+    }
+#endif
+
+    return res;
+}
--- a/BasicSolver/2D-mpi/src/solver-rb.c
+++ b/BasicSolver/2D-mpi/src/solver-rb.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of nusif-solver.
+ * Use of this source code is governed by a MIT style
+ * license that can be found in the LICENSE file.
+ */
+#include <math.h>
+#include <stdio.h>
+
+#include "allocate.h"
+#include "comm.h"
+#include "discretization.h"
+#include "parameter.h"
+#include "solver.h"
+#include "util.h"
+
+void initSolver(Solver* s, Discretization* d, Parameter* p)
+{
+    s->grid    = &d->grid;
+    s->eps     = p->eps;
+    s->omega   = p->omg;
+    s->itermax = p->itermax;
+    s->comm    = &d->comm;
+}
+
+double solve(Solver* s, double* p, double* rhs)
+{
+    int imax      = s->grid->imax;
+    int jmax      = s->grid->jmax;
+    int imaxLocal = s->comm->imaxLocal;
+    int jmaxLocal = s->comm->jmaxLocal;
+    double eps    = s->eps;
+    int itermax   = s->itermax;
+    double dx2    = s->grid->dx * s->grid->dx;
+    double dy2    = s->grid->dy * s->grid->dy;
+    double idx2   = 1.0 / dx2;
+    double idy2   = 1.0 / dy2;
+    double factor = s->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
+    double epssq  = eps * eps;
+    int pass, jsw, isw;
+    int it     = 0;
+    double res = 1.0;
+
+    while ((res >= epssq) && (it < itermax)) {
+        jsw = 1;
+        for (pass = 0; pass < 2; pass++) {
+            isw = jsw;
+            commExchange(s->comm, p);
+
+            for (int j = 1; j < jmaxLocal + 1; j++) {
+                for (int i = isw; i < imaxLocal + 1; i += 2) {
+
+                    double r = RHS(i, j) -
+                               ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
+                                   (P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
+
+                    P(i, j) -= (factor * r);
+                    res += (r * r);
+                }
+                isw = 3 - isw;
+            }
+            jsw = 3 - jsw;
+        }
+
+        if (commIsBoundary(s->comm, BOTTOM)) { // set bottom bc
+            for (int i = 1; i < imaxLocal + 1; i++) {
+                P(i, 0) = P(i, 1);
+            }
+        }
+
+        if (commIsBoundary(s->comm, TOP)) { // set top bc
+            for (int i = 1; i < imaxLocal + 1; i++) {
+                P(i, jmaxLocal + 1) = P(i, jmaxLocal);
+            }
+        }
+
+        if (commIsBoundary(s->comm, LEFT)) { // set left bc
+            for (int j = 1; j < jmaxLocal + 1; j++) {
+                P(0, j) = P(1, j);
+            }
+        }
+
+        if (commIsBoundary(s->comm, RIGHT)) { // set right bc
+            for (int j = 1; j < jmaxLocal + 1; j++) {
+                P(imaxLocal + 1, j) = P(imaxLocal, j);
+            }
+        }
+
+        commReduction(&res, SUM);
+        res = res / (double)(imax * jmax);
+#ifdef DEBUG
+        if (commIsMaster(s->comm)) {
+            printf("%d Residuum: %e\n", it, res);
+        }
+#endif
+        it++;
+    }
+
+#ifdef VERBOSE
+    if (commIsMaster(s->comm)) {
+        printf("Solver took %d iterations to reach %f\n", it, sqrt(res));
+    }
+#endif
+
+    return res;
+}
--- a/BasicSolver/2D-mpi/src/solver.h
+++ b/BasicSolver/2D-mpi/src/solver.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 * All rights reserved. This file is part of nusif-solver.
 * Use of this source code is governed by a MIT style
 * license that can be found in the LICENSE file.
@@ -7,41 +7,23 @@
 #ifndef __SOLVER_H_
 #define __SOLVER_H_
 #include "comm.h"
+#include "discretization.h"
+#include "grid.h"
+#include "mpi.h"
 #include "parameter.h"

-enum BC { NOSLIP = 1, SLIP, OUTFLOW, PERIODIC };
-
 typedef struct {
    /* geometry and grid information */
-    double dx, dy;
-    int imax, jmax;
-    double xlength, ylength;
-    /* arrays */
-    double *p, *rhs;
-    double *f, *g;
-    double *u, *v;
+    Grid* grid;
    /* parameters */
    double eps, omega;
-    double re, tau, gamma;
-    double gx, gy;
-    /* time stepping */
    int itermax;
-    double dt, te;
-    double dtBound;
-    char* problem;
-    int bcLeft, bcRight, bcBottom, bcTop;
+    int levels, presmooth, postsmooth;
+    double **r, **e;
    /* communication */
-    Comm comm;
+    Comm* comm;
 } Solver;

-void initSolver(Solver*, Parameter*);
-void computeRHS(Solver*);
-int solve(Solver*);
-void normalizePressure(Solver*);
-void computeTimestep(Solver*);
-void setBoundaryConditions(Solver*);
-void setSpecialBoundaryCondition(Solver*);
-void computeFG(Solver*);
-void adaptUV(Solver*);
-void writeResult(Solver* s, double* u, double* v, double* p);
+void initSolver(Solver*, Discretization*, Parameter*);
+double solve(Solver*, double*, double*);
 #endif
--- a/BasicSolver/2D-mpi/src/timing.c
+++ b/BasicSolver/2D-mpi/src/timing.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 * All rights reserved.
 * Use of this source code is governed by a MIT-style
 * license that can be found in the LICENSE file.
@@ -7,18 +7,16 @@
 #include <stdlib.h>
 #include <time.h>

-double getTimeStamp()
+double getTimeStamp(void)
 {
    struct timespec ts;
    clock_gettime(CLOCK_MONOTONIC, &ts);
    return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
 }

-double getTimeResolution()
+double getTimeResolution(void)
 {
    struct timespec ts;
    clock_getres(CLOCK_MONOTONIC, &ts);
    return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
 }
-
-double getTimeStamp_() { return getTimeStamp(); }
--- a/BasicSolver/2D-mpi/src/timing.h
+++ b/BasicSolver/2D-mpi/src/timing.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 * All rights reserved.
 * Use of this source code is governed by a MIT-style
 * license that can be found in the LICENSE file.
@@ -7,8 +7,7 @@
 #ifndef __TIMING_H_
 #define __TIMING_H_

-extern double getTimeStamp();
-extern double getTimeResolution();
-extern double getTimeStamp_();
+extern double getTimeStamp(void);
+extern double getTimeResolution(void);

 #endif // __TIMING_H_
--- a/BasicSolver/2D-mpi/src/util.h
+++ b/BasicSolver/2D-mpi/src/util.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 * All rights reserved.
 * Use of this source code is governed by a MIT-style
 * license that can be found in the LICENSE file.
@@ -19,4 +19,11 @@
 #define ABS(a) ((a) >= 0 ? (a) : -(a))
 #endif

+#define P(i, j)   p[(j) * (imaxLocal + 2) + (i)]
+#define F(i, j)   f[(j) * (imaxLocal + 2) + (i)]
+#define G(i, j)   g[(j) * (imaxLocal + 2) + (i)]
+#define U(i, j)   u[(j) * (imaxLocal + 2) + (i)]
+#define V(i, j)   v[(j) * (imaxLocal + 2) + (i)]
+#define RHS(i, j) rhs[(j) * (imaxLocal + 2) + (i)]
+
 #endif // __UTIL_H_
--- a/BasicSolver/2D-mpi/velocity.png
+++ b/BasicSolver/2D-mpi/velocity.png
--- a/BasicSolver/2D-seq-pt/src/affinity.c
+++ b/BasicSolver/2D-seq-pt/src/affinity.c
@@ -1,61 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved.
- * Use of this source code is governed by a MIT-style
- * license that can be found in the LICENSE file.
- */
-#ifdef __linux__
-#ifdef _OPENMP
-#include <pthread.h>
-#include <sched.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/syscall.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#define MAX_NUM_THREADS 128
-#define gettid()        syscall(SYS_gettid)
-
-static int getProcessorID(cpu_set_t* cpu_set)
-{
-    int processorId;
-
-    for (processorId = 0; processorId < MAX_NUM_THREADS; processorId++) {
-        if (CPU_ISSET(processorId, cpu_set)) {
-            break;
-        }
-    }
-    return processorId;
-}
-
-int affinity_getProcessorId()
-{
-    cpu_set_t cpu_set;
-    CPU_ZERO(&cpu_set);
-    sched_getaffinity(gettid(), sizeof(cpu_set_t), &cpu_set);
-
-    return getProcessorID(&cpu_set);
-}
-
-void affinity_pinThread(int processorId)
-{
-    cpu_set_t cpuset;
-    pthread_t thread;
-
-    thread = pthread_self();
-    CPU_ZERO(&cpuset);
-    CPU_SET(processorId, &cpuset);
-    pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
-}
-
-void affinity_pinProcess(int processorId)
-{
-    cpu_set_t cpuset;
-
-    CPU_ZERO(&cpuset);
-    CPU_SET(processorId, &cpuset);
-    sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
-}
-#endif /*_OPENMP*/
-#endif /*__linux__*/
--- a/BasicSolver/2D-seq-pt/src/affinity.h
+++ b/BasicSolver/2D-seq-pt/src/affinity.h
@@ -1,14 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved.
- * Use of this source code is governed by a MIT-style
- * license that can be found in the LICENSE file.
- */
-#ifndef AFFINITY_H
-#define AFFINITY_H
-
-extern int affinity_getProcessorId();
-extern void affinity_pinProcess(int);
-extern void affinity_pinThread(int);
-
-#endif /*AFFINITY_H*/
--- a/BasicSolver/2D-seq-pt/src/likwid-marker.h
+++ b/BasicSolver/2D-seq-pt/src/likwid-marker.h
@@ -1,54 +0,0 @@
-/*
- * =======================================================================================
- *
- *      Author:   Jan Eitzinger (je), jan.eitzinger@fau.de
- *      Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
- *
- *      Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- *      furnished to do so, subject to the following conditions:
- *
- *      The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- *      THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- *      FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- *      LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * =======================================================================================
- */
-#ifndef LIKWID_MARKERS_H
-#define LIKWID_MARKERS_H
-
-#ifdef LIKWID_PERFMON
-#include <likwid.h>
-#define LIKWID_MARKER_INIT                likwid_markerInit()
-#define LIKWID_MARKER_THREADINIT          likwid_markerThreadInit()
-#define LIKWID_MARKER_SWITCH              likwid_markerNextGroup()
-#define LIKWID_MARKER_REGISTER(regionTag) likwid_markerRegisterRegion(regionTag)
-#define LIKWID_MARKER_START(regionTag)    likwid_markerStartRegion(regionTag)
-#define LIKWID_MARKER_STOP(regionTag)     likwid_markerStopRegion(regionTag)
-#define LIKWID_MARKER_CLOSE               likwid_markerClose()
-#define LIKWID_MARKER_RESET(regionTag)    likwid_markerResetRegion(regionTag)
-#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)                       \
-    likwid_markerGetRegion(regionTag, nevents, events, time, count)
-#else /* LIKWID_PERFMON */
-#define LIKWID_MARKER_INIT
-#define LIKWID_MARKER_THREADINIT
-#define LIKWID_MARKER_SWITCH
-#define LIKWID_MARKER_REGISTER(regionTag)
-#define LIKWID_MARKER_START(regionTag)
-#define LIKWID_MARKER_STOP(regionTag)
-#define LIKWID_MARKER_CLOSE
-#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
-#define LIKWID_MARKER_RESET(regionTag)
-#endif /* LIKWID_PERFMON */
-
-#endif /*LIKWID_MARKERS_H*/
--- a/BasicSolver/2D-seq-pt/src/main.c
+++ b/BasicSolver/2D-seq-pt/src/main.c
@@ -1,71 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved.
- * Use of this source code is governed by a MIT-style
- * license that can be found in the LICENSE file.
- */
-#include <float.h>
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include "parameter.h"
-#include "progress.h"
-#include "solver.h"
-#include "timing.h"
-#include "trace.h"
-
-int main(int argc, char** argv)
-{
-    double timeStart, timeEnd;
-    Parameter p;
-    Solver s;
-    Tracing t;
-    initParameter(&p);
-
-    if (argc != 2) {
-        printf("Usage: %s <configFile>\n", argv[0]);
-        exit(EXIT_SUCCESS);
-    }
-
-    readParameter(&p, argv[1]);
-    printParameter(&p);
-    initSolver(&s, &p);
-    initTrace(&t, &p);
-#ifndef VERBOSE
-    initProgress(s.te);
-#endif
-
-    double tau  = s.tau;
-    double te   = s.te;
-    double time = 0.0;
-    int nt      = 0;
-
-    timeStart = getTimeStamp();
-    while (time <= te) {
-        if (tau > 0.0) computeTimestep(&s);
-        setBoundaryConditions(&s);
-        setSpecialBoundaryCondition(&s);
-        computeFG(&s);
-        computeRHS(&s);
-        if (nt % 100 == 0) normalizePressure(&s);
-        solve(&s);
-        adaptUV(&s);
-        time += s.dt;
-        nt++;
-
-        trace(&t, s.u, s.v, time);
-
-#ifdef VERBOSE
-        printf("TIME %f , TIMESTEP %f\n", time, s.dt);
-#else
-        printProgress(time);
-#endif
-    }
-    timeEnd = getTimeStamp();
-    stopProgress();
-    printf("Solution took %.2fs\n", timeEnd - timeStart);
-    writeResult(&s);
-    return EXIT_SUCCESS;
-}
--- a/BasicSolver/2D-seq-pt/src/solver.h
+++ b/BasicSolver/2D-seq-pt/src/solver.h
@@ -1,47 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved. This file is part of nusif-solver.
- * Use of this source code is governed by a MIT style
- * license that can be found in the LICENSE file.
- */
-#ifndef __SOLVER_H_
-#define __SOLVER_H_
-#include "parameter.h"
-
-#define U(i, j) u[(j) * (imax + 2) + (i)]
-#define V(i, j) v[(j) * (imax + 2) + (i)]
-
-enum BC { NOSLIP = 1, SLIP, OUTFLOW, PERIODIC };
-
-typedef struct {
-    /* geometry and grid information */
-    double dx, dy;
-    int imax, jmax;
-    double xlength, ylength;
-    /* arrays */
-    double *p, *rhs;
-    double *f, *g;
-    double *u, *v;
-    /* parameters */
-    double eps, omega;
-    double re, tau, gamma;
-    double gx, gy;
-    /* time stepping */
-    int itermax;
-    double dt, te;
-    double dtBound;
-    char* problem;
-    int bcLeft, bcRight, bcBottom, bcTop;
-} Solver;
-
-void initSolver(Solver*, Parameter*);
-void computeRHS(Solver*);
-void solve(Solver*);
-void normalizePressure(Solver*);
-void computeTimestep(Solver*);
-void setBoundaryConditions(Solver*);
-void setSpecialBoundaryCondition(Solver*);
-void computeFG(Solver*);
-void adaptUV(Solver*);
-void writeResult(Solver*);
-#endif
--- a/BasicSolver/2D-seq-pt/src/trace.c
+++ b/BasicSolver/2D-seq-pt/src/trace.c
@@ -1,208 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved. This file is part of nusif-solver.
- * Use of this source code is governed by a MIT style
- * license that can be found in the LICENSE file.
- */
-#include <stddef.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "trace.h"
-#define U(i, j) u[(j) * (imax + 2) + (i)]
-#define V(i, j) v[(j) * (imax + 2) + (i)]
-
-static int ts = 0;
-
-static void printState(Tracing* t)
-{
-    printf("Cursor: %d Total particles: %d\n", t->cursor, t->totalParticles);
-}
-
-static void advanceParticles(
-    Tracing* t, double delt, double* restrict u, double* restrict v)
-{
-    double delx = t->grid.dx;
-    double dely = t->grid.dy;
-
-    double* m = t->memorypool;
-    int* p    = t->particles;
-    int imax  = t->grid.imax;
-    int jmax  = t->grid.jmax;
-
-    for (int i = 0; i < t->totalParticles; i++) {
-        int particleId = p[i];
-
-        double x = m[particleId * NCOORD + X];
-        double y = m[particleId * NCOORD + Y];
-        // printf("P%d - X %f Y %f\n", i, x, y);
-
-        // Interpolate U
-        int iCoord = (int)(x / delx) + 1;
-        int jCoord = (int)((y + 0.5 * dely) / dely) + 1;
-
-        double x1 = (double)(iCoord - 1) * delx;
-        double y1 = ((double)(jCoord - 1) - 0.5) * dely;
-        double x2 = (double)iCoord * delx;
-        double y2 = ((double)jCoord - 0.5) * dely;
-
-        // printf("U - iCoord %d jCoord %d\n", iCoord, jCoord);
-
-        double un = (1.0 / (delx * dely)) *
-                    ((x2 - x) * (y2 - y) * U(iCoord - 1, jCoord - 1) +
-                        (x - x1) * (y2 - y) * U(iCoord, jCoord - 1) +
-                        (x2 - x) * (y - y1) * U(iCoord - 1, jCoord) +
-                        (x - x1) * (y - y1) * U(iCoord, jCoord));
-
-        double xn                  = x + delt * un;
-        m[particleId * NCOORD + X] = xn;
-
-        // Interpolate V
-        iCoord = (int)((x + 0.5 * delx) / delx) + 1;
-        jCoord = (int)(y / dely) + 1;
-
-        x1 = ((double)(iCoord - 1) - 0.5) * delx;
-        y1 = (double)(jCoord - 1) * dely;
-        x2 = ((double)iCoord - 0.5) * delx;
-        y2 = (double)jCoord * dely;
-
-        // printf("V - iCoord %d jCoord %d\n", iCoord, jCoord);
-
-        double vn = (1.0 / (delx * dely)) *
-                    ((x2 - x) * (y2 - y) * V(iCoord - 1, jCoord - 1) +
-                        (x - x1) * (y2 - y) * V(iCoord, jCoord - 1) +
-                        (x2 - x) * (y - y1) * V(iCoord - 1, jCoord) +
-                        (x - x1) * (y - y1) * V(iCoord, jCoord));
-
-        double yn                  = y + delt * vn;
-        m[particleId * NCOORD + Y] = yn;
-        printf("P%i VEL %f %f dt %f OP %f %f NP %f %f\n", i, un, vn, delt, x, y, xn, yn);
-    }
-
-    double xlength = t->grid.xlength;
-    double ylength = t->grid.ylength;
-    int cntNew     = 0;
-    int tmp[t->totalParticles];
-
-    // Check for particles to remove
-    for (int i = 0; i < t->totalParticles; i++) {
-        int particleId = p[i];
-
-        double x = m[particleId * NCOORD + X];
-        double y = m[particleId * NCOORD + Y];
-
-        if (!((x < 0.0) || (x > xlength) || (y < 0.0) || (y > ylength))) {
-            tmp[cntNew++] = i;
-        }
-    }
-
-    t->totalParticles = cntNew;
-    memcpy(t->particles, tmp, cntNew * sizeof(int));
-}
-
-static void injectParticles(Tracing* t)
-{
-    double* line = t->line;
-    double* m    = t->memorypool;
-
-    for (int i = 0; i < t->numParticles; i++) {
-        printf("Inject %d as %d mem %d\n", i, t->totalParticles, t->cursor);
-        t->particles[t->totalParticles] = t->cursor;
-        m[(t->cursor) * NCOORD + X]     = line[i * NCOORD + X];
-        m[(t->cursor) * NCOORD + Y]     = line[i * NCOORD + Y];
-        t->cursor++;
-        t->totalParticles++;
-    }
-}
-
-static void writeParticles(Tracing* t)
-{
-    FILE* fp;
-    double* m = t->memorypool;
-    int* p    = t->particles;
-
-    char filename[50];
-    snprintf(filename, 50, "particles_%d.dat", ts++);
-    fp = fopen(filename, "w");
-
-    if (fp == NULL) {
-        printf("Error!\n");
-        exit(EXIT_FAILURE);
-    }
-
-    for (int i = 0; i < t->totalParticles; i++) {
-        int particleId = p[i];
-
-        double x = m[particleId * NCOORD + X];
-        double y = m[particleId * NCOORD + Y];
-        fprintf(fp, "%f %f\n", x, y);
-    }
-    fclose(fp);
-}
-
-void trace(Tracing* t, double* restrict u, double* restrict v, double time)
-{
-    if (time >= t->traceStart) {
-        if ((time - t->lastUpdate[INJECT]) > t->traceInject) {
-            printf("Inject at %f\n", time);
-            printState(t);
-            injectParticles(t);
-            t->lastUpdate[INJECT] = time;
-        }
-
-        if ((time - t->lastUpdate[WRITE]) > t->traceWrite) {
-            printf("Write at %f\n", time);
-            writeParticles(t);
-            t->lastUpdate[WRITE] = time;
-        }
-
-        advanceParticles(t, time - t->lastUpdate[ADVANCE], u, v);
-        t->lastUpdate[ADVANCE] = time;
-    }
-}
-
-void initTrace(Tracing* t, Parameter* p)
-{
-    size_t numParticles   = p->nparticles;
-    size_t totalParticles = (size_t)(p->te - p->traceStart) / (size_t)p->traceInject;
-    totalParticles += 2;
-    totalParticles *= numParticles;
-
-    double x1 = p->lineX1;
-    double y1 = p->lineY1;
-    double x2 = p->lineX2;
-    double y2 = p->lineY2;
-
-    for (int i = 0; i < NUMTIMERS; i++) {
-        t->lastUpdate[i] = p->traceStart;
-    }
-    t->grid.imax      = p->imax;
-    t->grid.jmax      = p->jmax;
-    t->grid.xlength   = p->xlength;
-    t->grid.ylength   = p->ylength;
-    t->grid.dx        = p->xlength / p->imax;
-    t->grid.dy        = p->ylength / p->jmax;
-    t->numParticles   = numParticles;
-    t->totalParticles = 0;
-    t->cursor         = 0;
-    t->traceStart     = p->traceStart;
-    t->traceWrite     = p->traceWrite;
-    t->traceInject    = p->traceInject;
-    t->particles      = (int*)malloc(totalParticles * sizeof(int));
-    t->memorypool     = (double*)malloc(totalParticles * NCOORD * sizeof(double));
-    t->line           = (double*)malloc(numParticles * NCOORD * sizeof(double));
-    double* line      = t->line;
-
-    for (int i = 0; i < numParticles; i++) {
-        double spacing = (double)i / (double)(numParticles - 1);
-        double x       = spacing * x1 + (1.0 - spacing) * x2;
-        double y       = spacing * y1 + (1.0 - spacing) * y2;
-
-        printf("S: %f x: %f y: %f\n", spacing, x, y);
-        line[i * NCOORD + X] = x;
-        line[i * NCOORD + Y] = y;
-    }
-}
-
-void freeTrace(Tracing* t) { free(t->line); }
--- a/BasicSolver/2D-seq-pt/src/trace.h
+++ b/BasicSolver/2D-seq-pt/src/trace.h
@@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved. This file is part of nusif-solver.
- * Use of this source code is governed by a MIT style
- * license that can be found in the LICENSE file.
- */
-#ifndef __TRACE_H_
-#define __TRACE_H_
-#include "grid.h"
-#include "parameter.h"
-
-typedef enum COORD { X = 0, Y, NCOORD } COORD;
-typedef enum { ADVANCE = 0, INJECT, WRITE, NUMTIMERS } TIMER;
-
-typedef struct Tracing {
-    double traceStart;
-    double traceWrite;
-    double traceInject;
-    double dt;
-    double lastUpdate[NUMTIMERS];
-    double* memorypool;
-    double* line;
-    int cursor;
-    int* particles;
-    int numParticles;
-    int totalParticles;
-    Grid grid;
-} Tracing;
-
-extern void initTrace(Tracing* t, Parameter* p);
-extern void trace(Tracing* t, double* u, double* v, double time);
-#endif
--- a/BasicSolver/2D-seq-pt/src/util.h
+++ b/BasicSolver/2D-seq-pt/src/util.h
@@ -1,23 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved.
- * Use of this source code is governed by a MIT-style
- * license that can be found in the LICENSE file.
- */
-#ifndef __UTIL_H_
-#define __UTIL_H_
-#define HLINE                                                                            \
-    "------------------------------------------------------------------------"           \
-    "----\n"
-
-#ifndef MIN
-#define MIN(x, y) ((x) < (y) ? (x) : (y))
-#endif
-#ifndef MAX
-#define MAX(x, y) ((x) > (y) ? (x) : (y))
-#endif
-#ifndef ABS
-#define ABS(a) ((a) >= 0 ? (a) : -(a))
-#endif
-
-#endif // __UTIL_H_
--- a/BasicSolver/2D-seq-pt/surface.plot
+++ b/BasicSolver/2D-seq-pt/surface.plot
@@ -1,7 +0,0 @@
-set terminal png size 1024,768 enhanced font ,12
-set output 'p.png'
-set datafile separator whitespace
-
-set grid
-set hidden3d
-splot 'pressure.dat' using 1:2:3 with lines
--- a/BasicSolver/2D-seq/Makefile
+++ b/BasicSolver/2D-seq/Makefile
@@ -1,5 +1,5 @@
 #=======================================================================================
-# Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+# Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
 # All rights reserved.
 # Use of this source code is governed by a MIT-style
 # license that can be found in the LICENSE file.
@@ -18,9 +18,10 @@ include $(MAKE_DIR)/include_$(TAG).mk
 INCLUDES  += -I$(SRC_DIR) -I$(BUILD_DIR)

 VPATH     = $(SRC_DIR)
-SRC       = $(wildcard $(SRC_DIR)/*.c)
+SRC       = $(filter-out $(wildcard $(SRC_DIR)/*-*.c),$(wildcard $(SRC_DIR)/*.c))
 ASM       = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s, $(SRC))
 OBJ       = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o, $(SRC))
+OBJ      += $(BUILD_DIR)/solver-$(SOLVER).o
 SOURCES   = $(SRC) $(wildcard $(SRC_DIR)/*.h)
 CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(OPTIONS) $(INCLUDES)

@@ -37,9 +38,22 @@ $(BUILD_DIR)/%.s:  %.c
 	$(info ===>  GENERATE ASM  $@)
 	$(CC) -S $(CPPFLAGS) $(CFLAGS) $< -o $@

-.PHONY: clean distclean tags info asm format
+.PHONY: clean distclean vis vis_clean tags info asm format

-clean:
+vis:
+	$(info ===>  GENERATE VISUALIZATION)
+	@gnuplot -e "filename='pressure.dat'" ./surface.plot
+	@gnuplot -e "filename='velocity.dat'" ./vector.plot
+	@gnuplot -e "filename='residual.dat'" ./residual.plot
+
+vis_clean:
+	$(info ===>  CLEAN VISUALIZATION)
+	@rm -f *.dat
+	@rm -f *.png
+	@rm -f ./vis_files/*.dat
+	@rm -f ./vis_files/*.gif
+
+clean: vis_clean
 	$(info ===>  CLEAN)
 	@rm -rf $(BUILD_DIR)
 	@rm -f tags
@@ -47,6 +61,8 @@ clean:
 distclean: clean
 	$(info ===>  DIST CLEAN)
 	@rm -f $(TARGET)
+	@rm -f *.dat
+	@rm -f *.png

 info:
 	$(info $(CFLAGS))
--- a/BasicSolver/2D-seq/canal.par
+++ b/BasicSolver/2D-seq/canal.par
@@ -36,6 +36,13 @@ te      100.0   # final time
 dt      0.02    # time stepsize
 tau     0.5     # safety factor for time stepsize control (<0 constant delt)

+# Multigrid data:
+# ---------
+
+levels        3         # Multigrid levels
+presmooth     5         # Pre-smoothning iterations
+postsmooth    5         # Post-smoothning iterations
+
 # Pressure Iteration Data:
 # -----------------------

--- a/BasicSolver/2D-seq/config.mk
+++ b/BasicSolver/2D-seq/config.mk
@@ -1,12 +1,12 @@
 # Supported: GCC, CLANG, ICC
-TAG ?= CLANG
+TAG ?= ICC
 ENABLE_OPENMP ?= false
+# Supported: sor, rb, mg
+SOLVER ?= mg
+# Run in debug settings
+DEBUG ?= false

 #Feature options
 OPTIONS +=  -DARRAY_ALIGNMENT=64
 OPTIONS +=  -DVERBOSE
 #OPTIONS +=  -DDEBUG
-#OPTIONS +=  -DBOUNDCHECK
-#OPTIONS +=  -DVERBOSE_AFFINITY
-#OPTIONS +=  -DVERBOSE_DATASIZE
-#OPTIONS +=  -DVERBOSE_TIMER
--- a/BasicSolver/2D-seq/dcavity.par
+++ b/BasicSolver/2D-seq/dcavity.par
@@ -15,7 +15,7 @@ bcRight    1			#
 gx    0.0			# Body forces (e.g. gravity)
 gy    0.0			#

-re    10.0		    # Reynolds number
+re    100.0		    # Reynolds number

 u_init    0.0		# initial value for velocity in x-direction
 v_init    0.0		# initial value for velocity in y-direction
@@ -26,8 +26,8 @@ p_init    0.0		# initial value for pressure

 xlength    1.0		# domain size in x-direction
 ylength    1.0		# domain size in y-direction
-imax       40		# number of interior cells in x-direction
-jmax       40		# number of interior cells in y-direction
+imax       128		# number of interior cells in x-direction
+jmax       128		# number of interior cells in y-direction

 # Time Data:
 # ---------
@@ -36,11 +36,19 @@ te      10.0		# final time
 dt      0.02	    # time stepsize
 tau     0.5	    	# safety factor for time stepsize control (<0 constant delt)

-# Pressure Iteration Data:
+# Multigrid data:
+# ---------
+
+levels        2         # Multigrid levels
+presmooth     20         # Pre-smoothning iterations
+postsmooth    5         # Post-smoothning iterations
+
+# Solver Data:
 # -----------------------

 itermax  1000		# maximal number of pressure iteration in one time step
 eps      0.001		# stopping tolerance for pressure iteration
+rho      0.5
 omg      1.7		# relaxation parameter for SOR iteration
 gamma    0.9		# upwind differencing factor gamma
 #===============================================================================
--- a/BasicSolver/2D-seq/include_CLANG.mk
+++ b/BasicSolver/2D-seq/include_CLANG.mk
@@ -2,16 +2,18 @@ CC   = clang
 GCC  = cc
 LINKER = $(CC)

-ifeq ($(ENABLE_OPENMP),true)
+ifeq ($(strip $(ENABLE_OPENMP)),true)
 OPENMP   = -fopenmp
 #OPENMP   = -Xpreprocessor -fopenmp #required on Macos with homebrew libomp
 LIBS     = # -lomp
 endif
+ifeq ($(strip $(DEBUG)),true)
+CFLAGS   = -O0 -g -std=c17
+else
+CFLAGS   = -O3 -std=c17 $(OPENMP)
+endif

 VERSION  = --version
-# CFLAGS   = -O3 -std=c17 $(OPENMP)
-CFLAGS   = -Ofast -std=c17
-#CFLAGS   = -Ofast -fnt-store=aggressive  -std=c99 $(OPENMP) #AMD CLANG
 LFLAGS   = $(OPENMP) -lm
-DEFINES  = -D_GNU_SOURCE# -DDEBUG
+DEFINES  = -D_GNU_SOURCE
 INCLUDES =
--- a/BasicSolver/2D-seq/residual.plot
+++ b/BasicSolver/2D-seq/residual.plot
@@ -0,0 +1,9 @@
+set terminal png size 1800,768 enhanced font ,12
+set output 'residual.png'
+set datafile separator whitespace
+set xlabel "Timestep"
+set ylabel "Residual"
+
+set logscale y 2
+
+plot 'residual.dat' using 1:2 title "Residual"
--- a/BasicSolver/2D-seq/src/affinity.c
+++ b/BasicSolver/2D-seq/src/affinity.c
@@ -1,61 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved.
- * Use of this source code is governed by a MIT-style
- * license that can be found in the LICENSE file.
- */
-#ifdef __linux__
-#ifdef _OPENMP
-#include <pthread.h>
-#include <sched.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/syscall.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#define MAX_NUM_THREADS 128
-#define gettid()        syscall(SYS_gettid)
-
-static int getProcessorID(cpu_set_t* cpu_set)
-{
-    int processorId;
-
-    for (processorId = 0; processorId < MAX_NUM_THREADS; processorId++) {
-        if (CPU_ISSET(processorId, cpu_set)) {
-            break;
-        }
-    }
-    return processorId;
-}
-
-int affinity_getProcessorId()
-{
-    cpu_set_t cpu_set;
-    CPU_ZERO(&cpu_set);
-    sched_getaffinity(gettid(), sizeof(cpu_set_t), &cpu_set);
-
-    return getProcessorID(&cpu_set);
-}
-
-void affinity_pinThread(int processorId)
-{
-    cpu_set_t cpuset;
-    pthread_t thread;
-
-    thread = pthread_self();
-    CPU_ZERO(&cpuset);
-    CPU_SET(processorId, &cpuset);
-    pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
-}
-
-void affinity_pinProcess(int processorId)
-{
-    cpu_set_t cpuset;
-
-    CPU_ZERO(&cpuset);
-    CPU_SET(processorId, &cpuset);
-    sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
-}
-#endif /*_OPENMP*/
-#endif /*__linux__*/
--- a/BasicSolver/2D-seq/src/affinity.h
+++ b/BasicSolver/2D-seq/src/affinity.h
@@ -1,14 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved.
- * Use of this source code is governed by a MIT-style
- * license that can be found in the LICENSE file.
- */
-#ifndef AFFINITY_H
-#define AFFINITY_H
-
-extern int affinity_getProcessorId();
-extern void affinity_pinProcess(int);
-extern void affinity_pinThread(int);
-
-#endif /*AFFINITY_H*/
--- a/BasicSolver/2D-seq/src/allocate.c
+++ b/BasicSolver/2D-seq/src/allocate.c
@@ -1,14 +1,17 @@
 /*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 * All rights reserved.
 * Use of this source code is governed by a MIT-style
 * license that can be found in the LICENSE file.
 */
 #include <errno.h>
+#include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>

-void* allocate(int alignment, size_t bytesize)
+#include "allocate.h"
+
+void* allocate(size_t alignment, size_t bytesize)
 {
    int errorCode;
    void* ptr;
--- a/BasicSolver/2D-seq/src/allocate.h
+++ b/BasicSolver/2D-seq/src/allocate.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 * All rights reserved.
 * Use of this source code is governed by a MIT-style
 * license that can be found in the LICENSE file.
@@ -8,6 +8,6 @@
 #define __ALLOCATE_H_
 #include <stdlib.h>

-extern void* allocate(int alignment, size_t bytesize);
+extern void* allocate(size_t alignment, size_t bytesize);

 #endif
--- a/BasicSolver/2D-seq/src/discretization.c
+++ b/BasicSolver/2D-seq/src/discretization.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 * All rights reserved. This file is part of nusif-solver.
 * Use of this source code is governed by a MIT style
 * license that can be found in the LICENSE file.
@@ -11,22 +11,17 @@
 #include <string.h>

 #include "allocate.h"
+#include "discretization.h"
 #include "parameter.h"
-#include "solver.h"
 #include "util.h"

-#define P(i, j)   p[(j) * (imax + 2) + (i)]
-#define F(i, j)   f[(j) * (imax + 2) + (i)]
-#define G(i, j)   g[(j) * (imax + 2) + (i)]
-#define RHS(i, j) rhs[(j) * (imax + 2) + (i)]
-
-static void print(Solver* solver, double* grid)
+static void print(Discretization* d, double* grid)
 {
-    int imax = solver->imax;
+    int imax = d->grid.imax;

-    for (int j = 0; j < solver->jmax + 2; j++) {
+    for (int j = 0; j < d->grid.jmax + 2; j++) {
        printf("%02d: ", j);
-        for (int i = 0; i < solver->imax + 2; i++) {
+        for (int i = 0; i < d->grid.imax + 2; i++) {
            printf("%12.8f  ", grid[j * (imax + 2) + i]);
        }
        printf("\n");
@@ -34,92 +29,86 @@ static void print(Solver* solver, double* grid)
    fflush(stdout);
 }

-static void printConfig(Solver* solver)
+static void printConfig(Discretization* d)
 {
-    printf("Parameters for #%s#\n", solver->problem);
+    printf("Parameters for #%s#\n", d->problem);
    printf("Boundary conditions Left:%d Right:%d Bottom:%d Top:%d\n",
-        solver->bcLeft,
-        solver->bcRight,
-        solver->bcBottom,
-        solver->bcTop);
-    printf("\tReynolds number: %.2f\n", solver->re);
-    printf("\tGx Gy: %.2f %.2f\n", solver->gx, solver->gy);
+        d->bcLeft,
+        d->bcRight,
+        d->bcBottom,
+        d->bcTop);
+    printf("\tReynolds number: %.2f\n", d->re);
+    printf("\tGx Gy: %.2f %.2f\n", d->gx, d->gy);
    printf("Geometry data:\n");
-    printf("\tDomain box size (x, y): %.2f, %.2f\n", solver->xlength, solver->ylength);
-    printf("\tCells (x, y): %d, %d\n", solver->imax, solver->jmax);
+    printf("\tDomain box size (x, y): %.2f, %.2f\n", d->grid.xlength, d->grid.ylength);
+    printf("\tCells (x, y): %d, %d\n", d->grid.imax, d->grid.jmax);
    printf("Timestep parameters:\n");
-    printf("\tDefault stepsize: %.2f, Final time %.2f\n", solver->dt, solver->te);
-    printf("\tdt bound: %.6f\n", solver->dtBound);
-    printf("\tTau factor: %.2f\n", solver->tau);
-    printf("Iterative solver parameters:\n");
-    printf("\tMax iterations: %d\n", solver->itermax);
-    printf("\tepsilon (stopping tolerance) : %f\n", solver->eps);
-    printf("\tgamma factor: %f\n", solver->gamma);
-    printf("\tomega (SOR relaxation): %f\n", solver->omega);
+    printf("\tDefault stepsize: %.2f, Final time %.2f\n", d->dt, d->te);
+    printf("\tdt bound: %.6f\n", d->dtBound);
+    printf("\tTau factor: %.2f\n", d->tau);
+    printf("Iterative d parameters:\n");
+    printf("\tgamma factor: %f\n", d->gamma);
 }

-void initSolver(Solver* solver, Parameter* params)
+void initDiscretization(Discretization* d, Parameter* p)
 {
-    solver->problem  = params->name;
-    solver->bcLeft   = params->bcLeft;
-    solver->bcRight  = params->bcRight;
-    solver->bcBottom = params->bcBottom;
-    solver->bcTop    = params->bcTop;
-    solver->imax     = params->imax;
-    solver->jmax     = params->jmax;
-    solver->xlength  = params->xlength;
-    solver->ylength  = params->ylength;
-    solver->dx       = params->xlength / params->imax;
-    solver->dy       = params->ylength / params->jmax;
-    solver->eps      = params->eps;
-    solver->omega    = params->omg;
-    solver->itermax  = params->itermax;
-    solver->re       = params->re;
-    solver->gx       = params->gx;
-    solver->gy       = params->gy;
-    solver->dt       = params->dt;
-    solver->te       = params->te;
-    solver->tau      = params->tau;
-    solver->gamma    = params->gamma;
+    d->problem      = p->name;
+    d->bcLeft       = p->bcLeft;
+    d->bcRight      = p->bcRight;
+    d->bcBottom     = p->bcBottom;
+    d->bcTop        = p->bcTop;
+    d->grid.imax    = p->imax;
+    d->grid.jmax    = p->jmax;
+    d->grid.xlength = p->xlength;
+    d->grid.ylength = p->ylength;
+    d->grid.dx      = p->xlength / p->imax;
+    d->grid.dy      = p->ylength / p->jmax;
+    d->re           = p->re;
+    d->gx           = p->gx;
+    d->gy           = p->gy;
+    d->dt           = p->dt;
+    d->te           = p->te;
+    d->tau          = p->tau;
+    d->gamma        = p->gamma;

-    int imax    = solver->imax;
-    int jmax    = solver->jmax;
+    int imax    = d->grid.imax;
+    int jmax    = d->grid.jmax;
    size_t size = (imax + 2) * (jmax + 2) * sizeof(double);
-    solver->u   = allocate(64, size);
-    solver->v   = allocate(64, size);
-    solver->p   = allocate(64, size);
-    solver->rhs = allocate(64, size);
-    solver->f   = allocate(64, size);
-    solver->g   = allocate(64, size);
+    d->u        = allocate(64, size);
+    d->v        = allocate(64, size);
+    d->p        = allocate(64, size);
+    d->rhs      = allocate(64, size);
+    d->f        = allocate(64, size);
+    d->g        = allocate(64, size);

    for (int i = 0; i < (imax + 2) * (jmax + 2); i++) {
-        solver->u[i]   = params->u_init;
-        solver->v[i]   = params->v_init;
-        solver->p[i]   = params->p_init;
-        solver->rhs[i] = 0.0;
-        solver->f[i]   = 0.0;
-        solver->g[i]   = 0.0;
+        d->u[i]   = p->u_init;
+        d->v[i]   = p->v_init;
+        d->p[i]   = p->p_init;
+        d->rhs[i] = 0.0;
+        d->f[i]   = 0.0;
+        d->g[i]   = 0.0;
    }

-    double dx        = solver->dx;
-    double dy        = solver->dy;
+    double dx        = d->grid.dx;
+    double dy        = d->grid.dy;
    double invSqrSum = 1.0 / (dx * dx) + 1.0 / (dy * dy);
-    solver->dtBound  = 0.5 * solver->re * 1.0 / invSqrSum;
+    d->dtBound       = 0.5 * d->re * 1.0 / invSqrSum;
 #ifdef VERBOSE
-    printConfig(solver);
+    printConfig(d);
 #endif
 }

-void computeRHS(Solver* solver)
+void computeRHS(Discretization* d)
 {
-    int imax    = solver->imax;
-    int jmax    = solver->jmax;
-    double idx  = 1.0 / solver->dx;
-    double idy  = 1.0 / solver->dy;
-    double idt  = 1.0 / solver->dt;
-    double* rhs = solver->rhs;
-    double* f   = solver->f;
-    double* g   = solver->g;
+    int imax    = d->grid.imax;
+    int jmax    = d->grid.jmax;
+    double idx  = 1.0 / d->grid.dx;
+    double idy  = 1.0 / d->grid.dy;
+    double idt  = 1.0 / d->dt;
+    double* rhs = d->rhs;
+    double* f   = d->f;
+    double* g   = d->g;

    for (int j = 1; j < jmax + 1; j++) {
        for (int i = 1; i < imax + 1; i++) {
@@ -129,63 +118,9 @@ void computeRHS(Solver* solver)
    }
 }

-void solve(Solver* solver)
+static double maxElement(Discretization* d, double* m)
 {
-    int imax      = solver->imax;
-    int jmax      = solver->jmax;
-    double eps    = solver->eps;
-    int itermax   = solver->itermax;
-    double dx2    = solver->dx * solver->dx;
-    double dy2    = solver->dy * solver->dy;
-    double idx2   = 1.0 / dx2;
-    double idy2   = 1.0 / dy2;
-    double factor = solver->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
-    double* p     = solver->p;
-    double* rhs   = solver->rhs;
-    double epssq  = eps * eps;
-    int it        = 0;
-    double res    = 1.0;
-
-    while ((res >= epssq) && (it < itermax)) {
-        res = 0.0;
-
-        for (int j = 1; j < jmax + 1; j++) {
-            for (int i = 1; i < imax + 1; i++) {
-
-                double r = RHS(i, j) -
-                           ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
-                               (P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
-
-                P(i, j) -= (factor * r);
-                res += (r * r);
-            }
-        }
-
-        for (int i = 1; i < imax + 1; i++) {
-            P(i, 0)        = P(i, 1);
-            P(i, jmax + 1) = P(i, jmax);
-        }
-
-        for (int j = 1; j < jmax + 1; j++) {
-            P(0, j)        = P(1, j);
-            P(imax + 1, j) = P(imax, j);
-        }
-
-        res = res / (double)(imax * jmax);
-#ifdef DEBUG
-        printf("%d Residuum: %e\n", it, res);
-#endif
-        it++;
-    }
-
-#ifdef VERBOSE
-    printf("Solver took %d iterations to reach %f\n", it, sqrt(res));
-#endif
-}
-
-static double maxElement(Solver* solver, double* m)
-{
-    int size      = (solver->imax + 2) * (solver->jmax + 2);
+    int size      = (d->grid.imax + 2) * (d->grid.jmax + 2);
    double maxval = DBL_MIN;

    for (int i = 0; i < size; i++) {
@@ -195,10 +130,10 @@ static double maxElement(Solver* solver, double* m)
    return maxval;
 }

-void normalizePressure(Solver* solver)
+void normalizePressure(Discretization* d)
 {
-    int size    = (solver->imax + 2) * (solver->jmax + 2);
-    double* p   = solver->p;
+    int size    = (d->grid.imax + 2) * (d->grid.jmax + 2);
+    double* p   = d->p;
    double avgP = 0.0;

    for (int i = 0; i < size; i++) {
@@ -211,13 +146,13 @@ void normalizePressure(Solver* solver)
    }
 }

-void computeTimestep(Solver* solver)
+void computeTimestep(Discretization* d)
 {
-    double dt   = solver->dtBound;
-    double dx   = solver->dx;
-    double dy   = solver->dy;
-    double umax = maxElement(solver, solver->u);
-    double vmax = maxElement(solver, solver->v);
+    double dt   = d->dtBound;
+    double dx   = d->grid.dx;
+    double dy   = d->grid.dy;
+    double umax = maxElement(d, d->u);
+    double vmax = maxElement(d, d->v);

    if (umax > 0) {
        dt = (dt > dx / umax) ? dx / umax : dt;
@@ -226,18 +161,18 @@ void computeTimestep(Solver* solver)
        dt = (dt > dy / vmax) ? dy / vmax : dt;
    }

-    solver->dt = dt * solver->tau;
+    d->dt = dt * d->tau;
 }

-void setBoundaryConditions(Solver* solver)
+void setBoundaryConditions(Discretization* d)
 {
-    int imax  = solver->imax;
-    int jmax  = solver->jmax;
-    double* u = solver->u;
-    double* v = solver->v;
+    int imax  = d->grid.imax;
+    int jmax  = d->grid.jmax;
+    double* u = d->u;
+    double* v = d->v;

    // Left boundary
-    switch (solver->bcLeft) {
+    switch (d->bcLeft) {
    case NOSLIP:
        for (int j = 1; j < jmax + 1; j++) {
            U(0, j) = 0.0;
@@ -261,7 +196,7 @@ void setBoundaryConditions(Solver* solver)
    }

    // Right boundary
-    switch (solver->bcRight) {
+    switch (d->bcRight) {
    case NOSLIP:
        for (int j = 1; j < jmax + 1; j++) {
            U(imax, j)     = 0.0;
@@ -285,7 +220,7 @@ void setBoundaryConditions(Solver* solver)
    }

    // Bottom boundary
-    switch (solver->bcBottom) {
+    switch (d->bcBottom) {
    case NOSLIP:
        for (int i = 1; i < imax + 1; i++) {
            V(i, 0) = 0.0;
@@ -309,7 +244,7 @@ void setBoundaryConditions(Solver* solver)
    }

    // Top boundary
-    switch (solver->bcTop) {
+    switch (d->bcTop) {
    case NOSLIP:
        for (int i = 1; i < imax + 1; i++) {
            V(i, jmax)     = 0.0;
@@ -333,19 +268,19 @@ void setBoundaryConditions(Solver* solver)
    }
 }

-void setSpecialBoundaryCondition(Solver* solver)
+void setSpecialBoundaryCondition(Discretization* d)
 {
-    int imax   = solver->imax;
-    int jmax   = solver->jmax;
-    double mDy = solver->dy;
-    double* u  = solver->u;
+    int imax   = d->grid.imax;
+    int jmax   = d->grid.jmax;
+    double mDy = d->grid.dy;
+    double* u  = d->u;

-    if (strcmp(solver->problem, "dcavity") == 0) {
+    if (strcmp(d->problem, "dcavity") == 0) {
        for (int i = 1; i < imax; i++) {
            U(i, jmax + 1) = 2.0 - U(i, jmax);
        }
-    } else if (strcmp(solver->problem, "canal") == 0) {
-        double ylength = solver->ylength;
+    } else if (strcmp(d->problem, "canal") == 0) {
+        double ylength = d->grid.ylength;
        double y;

        for (int j = 1; j < jmax + 1; j++) {
@@ -355,21 +290,21 @@ void setSpecialBoundaryCondition(Solver* solver)
    }
 }

-void computeFG(Solver* solver)
+void computeFG(Discretization* d)
 {
-    double* u        = solver->u;
-    double* v        = solver->v;
-    double* f        = solver->f;
-    double* g        = solver->g;
-    int imax         = solver->imax;
-    int jmax         = solver->jmax;
-    double gx        = solver->gx;
-    double gy        = solver->gy;
-    double gamma     = solver->gamma;
-    double dt        = solver->dt;
-    double inverseRe = 1.0 / solver->re;
-    double inverseDx = 1.0 / solver->dx;
-    double inverseDy = 1.0 / solver->dy;
+    double* u        = d->u;
+    double* v        = d->v;
+    double* f        = d->f;
+    double* g        = d->g;
+    int imax         = d->grid.imax;
+    int jmax         = d->grid.jmax;
+    double gx        = d->gx;
+    double gy        = d->gy;
+    double gamma     = d->gamma;
+    double dt        = d->dt;
+    double inverseRe = 1.0 / d->re;
+    double inverseDx = 1.0 / d->grid.dx;
+    double inverseDy = 1.0 / d->grid.dy;
    double du2dx, dv2dy, duvdx, duvdy;
    double du2dx2, du2dy2, dv2dx2, dv2dy2;

@@ -428,17 +363,17 @@ void computeFG(Solver* solver)
    }
 }

-void adaptUV(Solver* solver)
+void adaptUV(Discretization* d)
 {
-    int imax       = solver->imax;
-    int jmax       = solver->jmax;
-    double* p      = solver->p;
-    double* u      = solver->u;
-    double* v      = solver->v;
-    double* f      = solver->f;
-    double* g      = solver->g;
-    double factorX = solver->dt / solver->dx;
-    double factorY = solver->dt / solver->dy;
+    int imax       = d->grid.imax;
+    int jmax       = d->grid.jmax;
+    double* p      = d->p;
+    double* u      = d->u;
+    double* v      = d->v;
+    double* f      = d->f;
+    double* g      = d->g;
+    double factorX = d->dt / d->grid.dx;
+    double factorY = d->dt / d->grid.dy;

    for (int j = 1; j < jmax + 1; j++) {
        for (int i = 1; i < imax + 1; i++) {
@@ -448,15 +383,15 @@ void adaptUV(Solver* solver)
    }
 }

-void writeResult(Solver* solver)
+void writeResult(Discretization* d)
 {
-    int imax  = solver->imax;
-    int jmax  = solver->jmax;
-    double dx = solver->dx;
-    double dy = solver->dy;
-    double* p = solver->p;
-    double* u = solver->u;
-    double* v = solver->v;
+    int imax  = d->grid.imax;
+    int jmax  = d->grid.jmax;
+    double dx = d->grid.dx;
+    double dy = d->grid.dy;
+    double* p = d->p;
+    double* u = d->u;
+    double* v = d->v;
    double x = 0.0, y = 0.0;

    FILE* fp;
@@ -488,11 +423,11 @@ void writeResult(Solver* solver)
    for (int j = 1; j < jmax + 1; j++) {
        y = dy * (j - 0.5);
        for (int i = 1; i < imax + 1; i++) {
-            x            = dx * (i - 0.5);
-            double vel_u = (U(i, j) + U(i - 1, j)) / 2.0;
-            double vel_v = (V(i, j) + V(i, j - 1)) / 2.0;
-            double len   = sqrt((vel_u * vel_u) + (vel_v * vel_v));
-            fprintf(fp, "%.2f %.2f %f %f %f\n", x, y, vel_u, vel_v, len);
+            x           = dx * (i - 0.5);
+            double velU = (U(i, j) + U(i - 1, j)) / 2.0;
+            double velV = (V(i, j) + V(i, j - 1)) / 2.0;
+            double len  = sqrt((velU * velU) + (velV * velV));
+            fprintf(fp, "%.2f %.2f %f %f %f\n", x, y, velU, velV, len);
        }
    }

--- a/BasicSolver/2D-seq/src/discretization.h
+++ b/BasicSolver/2D-seq/src/discretization.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of nusif-solver.
+ * Use of this source code is governed by a MIT style
+ * license that can be found in the LICENSE file.
+ */
+#ifndef __DISCRETIZATION_H_
+#define __DISCRETIZATION_H_
+#include "grid.h"
+#include "parameter.h"
+
+enum BC { NOSLIP = 1, SLIP, OUTFLOW, PERIODIC };
+
+typedef struct {
+    /* geometry and grid information */
+    Grid grid;
+    /* arrays */
+    double *p, *rhs;
+    double *f, *g;
+    double *u, *v;
+    /* parameters */
+    double re, tau, gamma;
+    double gx, gy;
+    /* time stepping */
+    double dt, te;
+    double dtBound;
+    char* problem;
+    int bcLeft, bcRight, bcBottom, bcTop;
+} Discretization;
+
+extern void initDiscretization(Discretization*, Parameter*);
+extern void computeRHS(Discretization*);
+extern void normalizePressure(Discretization*);
+extern void computeTimestep(Discretization*);
+extern void setBoundaryConditions(Discretization*);
+extern void setSpecialBoundaryCondition(Discretization*);
+extern void computeFG(Discretization*);
+extern void adaptUV(Discretization*);
+extern void writeResult(Discretization*);
+#endif
--- a/BasicSolver/2D-seq/src/grid.h
+++ b/BasicSolver/2D-seq/src/grid.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of nusif-solver.
+ * Use of this source code is governed by a MIT style
+ * license that can be found in the LICENSE file.
+ */
+#ifndef __GRID_H_
+#define __GRID_H_
+
+typedef struct {
+    double dx, dy;
+    int imax, jmax;
+    double xlength, ylength;
+} Grid;
+
+#endif // __GRID_H_
--- a/BasicSolver/2D-seq/src/likwid-marker.h
+++ b/BasicSolver/2D-seq/src/likwid-marker.h
@@ -4,23 +4,23 @@
 *      Author:   Jan Eitzinger (je), jan.eitzinger@fau.de
 *      Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
 *
- *      Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
+ *      Permission is hereby granted, free of charge, to any person obtaining a copy
+ *      of this software and associated documentation files (the "Software"), to deal
+ *      in the Software without restriction, including without limitation the rights
+ *      to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ *      copies of the Software, and to permit persons to whom the Software is
 *      furnished to do so, subject to the following conditions:
 *
- *      The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
+ *      The above copyright notice and this permission notice shall be included in all
+ *      copies or substantial portions of the Software.
 *
- *      THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- *      FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- *      LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ *      THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *      IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *      FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *      AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *      LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ *      OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ *      SOFTWARE.
 *
 * =======================================================================================
 */
--- a/BasicSolver/2D-seq/src/main.c
+++ b/BasicSolver/2D-seq/src/main.c
@@ -1,15 +1,14 @@
 /*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 * All rights reserved.
 * Use of this source code is governed by a MIT-style
 * license that can be found in the LICENSE file.
 */
-#include <float.h>
-#include <limits.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>

+#include "discretization.h"
 #include "parameter.h"
 #include "progress.h"
 #include "solver.h"
@@ -17,50 +16,61 @@

 int main(int argc, char** argv)
 {
-    double S, E;
-    Parameter params;
-    Solver solver;
-    initParameter(&params);
+    double timeStart, timeStop;
+    Parameter p;
+    Discretization d;
+    Solver s;
+
+    initParameter(&p);
+    FILE* fp;
+    fp = initResidualWriter();

    if (argc != 2) {
        printf("Usage: %s <configFile>\n", argv[0]);
        exit(EXIT_SUCCESS);
    }

-    readParameter(&params, argv[1]);
-    printParameter(&params);
-    initSolver(&solver, &params);
+    readParameter(&p, argv[1]);
+    printParameter(&p);
+    initDiscretization(&d, &p);
+    initSolver(&s, &d, &p);
+
 #ifndef VERBOSE
-    initProgress(solver.te);
+    initProgress(d.te);
 #endif

-    double tau = solver.tau;
-    double te  = solver.te;
+    double tau = d.tau;
+    double te  = d.te;
    double t   = 0.0;
    int nt     = 0;
+    double res = 0.0;

-    S = getTimeStamp();
+    timeStart  = getTimeStamp();
    while (t <= te) {
-        if (tau > 0.0) computeTimestep(&solver);
-        setBoundaryConditions(&solver);
-        setSpecialBoundaryCondition(&solver);
-        computeFG(&solver);
-        computeRHS(&solver);
-        if (nt % 100 == 0) normalizePressure(&solver);
-        solveRB(&solver);
-        adaptUV(&solver);
-        t += solver.dt;
+        if (tau > 0.0) computeTimestep(&d);
+        setBoundaryConditions(&d);
+        setSpecialBoundaryCondition(&d);
+        computeFG(&d);
+        computeRHS(&d);
+        if (nt % 100 == 0) normalizePressure(&d);
+        res = solve(&s, d.p, d.rhs);
+        adaptUV(&d);
+
+        writeResidual(fp, t, res);
+
+        t += d.dt;
        nt++;

 #ifdef VERBOSE
-        printf("TIME %f , TIMESTEP %f\n", t, solver.dt);
+        printf("TIME %f , TIMESTEP %f\n", t, d.dt);
 #else
        printProgress(t);
 #endif
    }
-    E = getTimeStamp();
+    fclose(fp);
+    timeStop = getTimeStamp();
    stopProgress();
-    printf("Solution took %.2fs\n", E - S);
-    writeResult(&solver);
+    printf("Solution took %.2fs\n", timeStop - timeStart);
+    writeResult(&d);
    return EXIT_SUCCESS;
 }
--- a/BasicSolver/2D-seq/src/parameter.c
+++ b/BasicSolver/2D-seq/src/parameter.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 * All rights reserved. This file is part of nusif-solver.
 * Use of this source code is governed by a MIT style
 * license that can be found in the LICENSE file.
@@ -9,21 +9,23 @@
 #include <string.h>

 #include "parameter.h"
-#include "util.h"
 #define MAXLINE 4096

 void initParameter(Parameter* param)
 {
    param->xlength = 1.0;
    param->ylength = 1.0;
-    param->imax    = 100;
-    param->jmax    = 100;
+    param->imax    = 128;
+    param->jmax    = 128;
    param->itermax = 1000;
    param->eps     = 0.0001;
    param->omg     = 1.7;
    param->re      = 100.0;
    param->gamma   = 0.9;
    param->tau     = 0.5;
+    param->levels  = 5;
+    param->presmooth = 5;
+    param->postsmooth = 5;
 }

 void readParameter(Parameter* param, const char* filename)
@@ -61,6 +63,7 @@ void readParameter(Parameter* param, const char* filename)
            PARSE_INT(imax);
            PARSE_INT(jmax);
            PARSE_INT(itermax);
+            PARSE_INT(levels);
            PARSE_REAL(eps);
            PARSE_REAL(omg);
            PARSE_REAL(re);
@@ -78,6 +81,8 @@ void readParameter(Parameter* param, const char* filename)
            PARSE_REAL(u_init);
            PARSE_REAL(v_init);
            PARSE_REAL(p_init);
+            PARSE_INT(presmooth);
+            PARSE_INT(postsmooth);
        }
    }

@@ -108,4 +113,5 @@ void printParameter(Parameter* param)
    printf("\tepsilon (stopping tolerance) : %f\n", param->eps);
    printf("\tgamma (stopping tolerance) : %f\n", param->gamma);
    printf("\tomega (SOR relaxation): %f\n", param->omg);
+    printf("\tMultiGrid levels : %d\n", param->levels);
 }
--- a/BasicSolver/2D-seq/src/parameter.h
+++ b/BasicSolver/2D-seq/src/parameter.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 * All rights reserved. This file is part of nusif-solver.
 * Use of this source code is governed by a MIT style
 * license that can be found in the LICENSE file.
@@ -10,14 +10,15 @@
 typedef struct {
    double xlength, ylength;
    int imax, jmax;
-    int itermax;
-    double eps, omg;
+    int itermax, levels;
+    double eps, omg, rho;
    double re, tau, gamma;
    double te, dt;
    double gx, gy;
    char* name;
    int bcLeft, bcRight, bcBottom, bcTop;
    double u_init, v_init, p_init;
+    int presmooth, postsmooth;
 } Parameter;

 void initParameter(Parameter*);
--- a/BasicSolver/2D-seq/src/progress.c
+++ b/BasicSolver/2D-seq/src/progress.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 * All rights reserved. This file is part of nusif-solver.
 * Use of this source code is governed by a MIT style
 * license that can be found in the LICENSE file.
@@ -49,3 +49,22 @@ void stopProgress()
    printf("\n");
    fflush(stdout);
 }
+
+FILE* initResidualWriter()
+{
+    FILE* fp;
+    fp = fopen("residual.dat", "w");
+
+    if (fp == NULL) {
+        printf("Error!\n");
+        exit(EXIT_FAILURE);
+    }
+
+    return fp;
+
+}
+
+void writeResidual(FILE* fp, double ts, double res)
+{
+    fprintf(fp, "%f, %f\n", ts, res);
+}
--- a/BasicSolver/2D-seq/src/progress.h
+++ b/BasicSolver/2D-seq/src/progress.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 * All rights reserved.
 * Use of this source code is governed by a MIT-style
 * license that can be found in the LICENSE file.
@@ -9,6 +9,8 @@

 extern void initProgress(double);
 extern void printProgress(double);
-extern void stopProgress();
+extern void stopProgress(void);
+extern FILE* initResidualWriter(void);
+extern void writeResidual(FILE*, double, double);

 #endif
--- a/BasicSolver/2D-seq/src/solver-mg.c
+++ b/BasicSolver/2D-seq/src/solver-mg.c
@@ -0,0 +1,221 @@
+/*
+ * Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of nusif-solver.
+ * Use of this source code is governed by a MIT style
+ * license that can be found in the LICENSE file.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "allocate.h"
+#include "solver.h"
+#include "util.h"
+
+#define FINEST_LEVEL   0
+#define COARSEST_LEVEL (s->levels - 1)
+#define S(i, j)        s[(j) * (imax + 2) + (i)]
+#define E(i, j)        e[(j) * (imax + 2) + (i)]
+#define R(i, j)        r[(j) * (imax + 2) + (i)]
+#define OLD(i, j)      old[(j) * (imax + 2) + (i)]
+
+static void restrictMG(Solver* s, int level, int imax, int jmax)
+{
+    double* r   = s->r[level + 1];
+    double* old = s->r[level];
+
+    for (int j = 1; j < jmax + 1; j++) {
+        for (int i = 1; i < imax + 1; i++) {
+            R(i, j) = (OLD(2 * i - 1, 2 * j - 1) + OLD(2 * i, 2 * j - 1) * 2 +
+                          OLD(2 * i + 1, 2 * j - 1) + OLD(2 * i - 1, 2 * j) * 2 +
+                          OLD(2 * i, 2 * j) * 4 + OLD(2 * i + 1, 2 * j) * 2 +
+                          OLD(2 * i - 1, 2 * j + 1) + OLD(2 * i, 2 * j + 1) * 2 +
+                          OLD(2 * i + 1, 2 * j + 1)) /
+                      16.0;
+        }
+    }
+}
+
+static void prolongate(Solver* s, int level, int imax, int jmax)
+{
+    double* old = s->r[level + 1];
+    double* e   = s->r[level];
+
+    for (int j = 2; j < jmax + 1; j += 2) {
+        for (int i = 2; i < imax + 1; i += 2) {
+            E(i, j) = OLD(i / 2, j / 2);
+        }
+    }
+}
+
+static void correct(Solver* s, double* p, int level, int imax, int jmax)
+{
+    double* e = s->e[level];
+
+    for (int j = 1; j < jmax + 1; ++j) {
+        for (int i = 1; i < imax + 1; ++i) {
+            P(i, j) += E(i, j);
+        }
+    }
+}
+
+static void setBoundaryCondition(double* p, int imax, int jmax)
+{
+    for (int i = 1; i < imax + 1; i++) {
+        P(i, 0)        = P(i, 1);
+        P(i, jmax + 1) = P(i, jmax);
+    }
+
+    for (int j = 1; j < jmax + 1; j++) {
+        P(0, j)        = P(1, j);
+        P(imax + 1, j) = P(imax, j);
+    }
+}
+
+static double smooth(Solver* s, double* p, double* rhs, int level, int imax, int jmax)
+{
+    double dx2    = s->grid->dx * s->grid->dx;
+    double dy2    = s->grid->dy * s->grid->dy;
+    double idx2   = 1.0 / dx2;
+    double idy2   = 1.0 / dy2;
+    double factor = s->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
+    double* r     = s->r[level];
+    double res    = 1.0;
+    int pass, jsw, isw;
+
+    jsw = 1;
+
+    for (pass = 0; pass < 2; pass++) {
+        isw = jsw;
+
+        for (int j = 1; j < jmax + 1; j++) {
+            for (int i = isw; i < imax + 1; i += 2) {
+
+                P(i, j) -= factor * (RHS(i, j) -
+                          ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
+                              (P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2));
+
+            }
+            isw = 3 - isw;
+        }
+        jsw = 3 - jsw;
+    }
+
+}
+
+static double calculateResidual(Solver* s, double* p, double* rhs, int level, int imax, int jmax)
+{
+    double dx2    = s->grid->dx * s->grid->dx;
+    double dy2    = s->grid->dy * s->grid->dy;
+    double idx2   = 1.0 / dx2;
+    double idy2   = 1.0 / dy2;
+    double factor = s->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
+    double* r     = s->r[level];
+    double res    = 1.0;
+    int pass, jsw, isw;
+
+    jsw = 1;
+
+    for (pass = 0; pass < 2; pass++) {
+        isw = jsw;
+
+        for (int j = 1; j < jmax + 1; j++) {
+            for (int i = isw; i < imax + 1; i += 2) {
+
+                R(i, j) = RHS(i, j) -
+                          ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
+                              (P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
+
+                res += (R(i, j) * R(i, j));
+            }
+            isw = 3 - isw;
+        }
+        jsw = 3 - jsw;
+    }
+
+    res = res / (double)(imax * jmax);
+    return res;
+}
+
+static double multiGrid(Solver* s, double* p, double* rhs, int level, int imax, int jmax)
+{
+    double res = 0.0;
+
+    // coarsest level
+    if (level == COARSEST_LEVEL) {
+        for (int i = 0; i < 5; i++) {
+            smooth(s, p, rhs, level, imax, jmax);
+        }
+        return res;
+    }
+
+    // pre-smoothing
+    for (int i = 0; i < s->presmooth; i++) {
+        smooth(s, p, rhs, level, imax, jmax);
+        if (level == FINEST_LEVEL) setBoundaryCondition(p, imax, jmax);
+    }
+
+    res = calculateResidual(s, p, rhs, level, imax, jmax);
+
+    // restrict
+    restrictMG(s, level, imax, jmax);
+
+    // MGSolver on residual and error.
+    multiGrid(s, s->e[level + 1], s->r[level + 1], level + 1, imax / 2, jmax / 2);
+
+    // prolongate
+    prolongate(s, level, imax, jmax);
+
+    // correct p on finer level using residual
+    correct(s, p, level, imax, jmax);
+    if (level == FINEST_LEVEL) setBoundaryCondition(p, imax, jmax);
+
+    // post-smoothing
+    for (int i = 0; i < s->postsmooth; i++) {
+        smooth(s, p, rhs, level, imax, jmax);
+        if (level == FINEST_LEVEL) setBoundaryCondition(p, imax, jmax);
+    }
+
+    return res;
+}
+
+void initSolver(Solver* s, Discretization* d, Parameter* p)
+{
+    s->eps     = p->eps;
+    s->omega   = p->omg;
+    s->itermax = p->itermax;
+    s->levels  = p->levels;
+    s->grid    = &d->grid;
+    s->presmooth = p->presmooth;
+    s->postsmooth = p->postsmooth;
+
+    int imax   = s->grid->imax;
+    int jmax   = s->grid->jmax;
+    int levels = s->levels;
+    printf("Using Multigrid solver with %d levels\n", levels);
+
+    s->r = malloc(levels * sizeof(double*));
+    s->e = malloc(levels * sizeof(double*));
+
+    size_t size = (imax + 2) * (jmax + 2) * sizeof(double);
+
+    for (int j = 0; j < levels; j++) {
+        s->r[j] = allocate(64, size);
+        s->e[j] = allocate(64, size);
+
+        for (int i = 0; i < (imax + 2) * (jmax + 2); i++) {
+            s->r[j][i] = 0.0;
+            s->e[j][i] = 0.0;
+        }
+    }
+}
+
+double solve(Solver* s, double* p, double* rhs)
+{
+    double res = multiGrid(s, p, rhs, 0, s->grid->imax, s->grid->jmax);
+
+#ifdef VERBOSE
+    printf("Residuum: %.6f\n", res);
+#endif
+
+return res;
+}
--- a/BasicSolver/2D-seq/src/solver-rb.c
+++ b/BasicSolver/2D-seq/src/solver-rb.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of nusif-solver.
+ * Use of this source code is governed by a MIT style
+ * license that can be found in the LICENSE file.
+ */
+#include "solver.h"
+#include "util.h"
+
+void initSolver(Solver* s, Discretization* d, Parameter* p)
+{
+    s->grid    = &d->grid;
+    s->itermax = p->itermax;
+    s->eps     = p->eps;
+    s->omega   = p->omg;
+}
+
+double solve(Solver* solver, double* p, double* rhs)
+{
+    int imax      = solver->grid->imax;
+    int jmax      = solver->grid->jmax;
+    double eps    = solver->eps;
+    int itermax   = solver->itermax;
+    double dx2    = solver->grid->dx * solver->grid->dx;
+    double dy2    = solver->grid->dy * solver->grid->dy;
+    double idx2   = 1.0 / dx2;
+    double idy2   = 1.0 / dy2;
+    double factor = solver->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
+    double epssq  = eps * eps;
+    int it        = 0;
+    double res    = 1.0;
+    int pass, jsw, isw;
+
+    while ((res >= epssq) && (it < itermax)) {
+        res = 0.0;
+        jsw = 1;
+
+        for (pass = 0; pass < 2; pass++) {
+            isw = jsw;
+
+            for (int j = 1; j < jmax + 1; j++) {
+                for (int i = isw; i < imax + 1; i += 2) {
+
+                    double r = RHS(i, j) -
+                               ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
+                                   (P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
+
+                    P(i, j) -= (factor * r);
+                    res += (r * r);
+                }
+                isw = 3 - isw;
+            }
+            jsw = 3 - jsw;
+        }
+
+        for (int i = 1; i < imax + 1; i++) {
+            P(i, 0)        = P(i, 1);
+            P(i, jmax + 1) = P(i, jmax);
+        }
+
+        for (int j = 1; j < jmax + 1; j++) {
+            P(0, j)        = P(1, j);
+            P(imax + 1, j) = P(imax, j);
+        }
+
+        res = res / (double)(imax * jmax);
+#ifdef DEBUG
+        printf("%d Residuum: %e\n", it, res);
+#endif
+        it++;
+    }
+
+#ifdef VERBOSE
+    printf("Solver took %d iterations to reach %f\n", it, sqrt(res));
+#endif
+
+    return res;
+}
--- a/BasicSolver/2D-seq/src/solver-sor.c
+++ b/BasicSolver/2D-seq/src/solver-sor.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of nusif-solver.
+ * Use of this source code is governed by a MIT style
+ * license that can be found in the LICENSE file.
+ */
+#include "solver.h"
+#include "util.h"
+
+void initSolver(Solver* s, Discretization* d, Parameter* p)
+{
+    s->grid    = &d->grid;
+    s->itermax = p->itermax;
+    s->eps     = p->eps;
+    s->omega   = p->omg;
+}
+
+double solve(Solver* solver, double* p, double* rhs)
+{
+    int imax      = solver->grid->imax;
+    int jmax      = solver->grid->jmax;
+    double eps    = solver->eps;
+    int itermax   = solver->itermax;
+    double dx2    = solver->grid->dx * solver->grid->dx;
+    double dy2    = solver->grid->dy * solver->grid->dy;
+    double idx2   = 1.0 / dx2;
+    double idy2   = 1.0 / dy2;
+    double factor = solver->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
+    double epssq  = eps * eps;
+    int it        = 0;
+    double res    = 1.0;
+
+    while ((res >= epssq) && (it < itermax)) {
+        res = 0.0;
+
+        for (int j = 1; j < jmax + 1; j++) {
+            for (int i = 1; i < imax + 1; i++) {
+
+                double r = RHS(i, j) -
+                           ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
+                               (P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
+
+                P(i, j) -= (factor * r);
+                res += (r * r);
+            }
+        }
+
+        for (int i = 1; i < imax + 1; i++) {
+            P(i, 0)        = P(i, 1);
+            P(i, jmax + 1) = P(i, jmax);
+        }
+
+        for (int j = 1; j < jmax + 1; j++) {
+            P(0, j)        = P(1, j);
+            P(imax + 1, j) = P(imax, j);
+        }
+
+        res = res / (double)(imax * jmax);
+#ifdef DEBUG
+        printf("%d Residuum: %e\n", it, res);
+#endif
+        it++;
+    }
+
+#ifdef VERBOSE
+    printf("Solver took %d iterations to reach %f\n", it, sqrt(res));
+#endif
+
+    return res;
+}
--- a/BasicSolver/2D-seq/src/solver.c
+++ b/BasicSolver/2D-seq/src/solver.c
@@ -1,564 +0,0 @@
-/*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
- * All rights reserved. This file is part of nusif-solver.
- * Use of this source code is governed by a MIT style
- * license that can be found in the LICENSE file.
- */
-#include <float.h>
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "allocate.h"
-#include "parameter.h"
-#include "solver.h"
-#include "util.h"
-
-#define P(i, j)   p[(j) * (imax + 2) + (i)]
-#define F(i, j)   f[(j) * (imax + 2) + (i)]
-#define G(i, j)   g[(j) * (imax + 2) + (i)]
-#define U(i, j)   u[(j) * (imax + 2) + (i)]
-#define V(i, j)   v[(j) * (imax + 2) + (i)]
-#define RHS(i, j) rhs[(j) * (imax + 2) + (i)]
-
-static void print(Solver* solver, double* grid)
-{
-    int imax = solver->imax;
-
-    for (int j = 0; j < solver->jmax + 2; j++) {
-        printf("%02d: ", j);
-        for (int i = 0; i < solver->imax + 2; i++) {
-            printf("%12.8f  ", grid[j * (imax + 2) + i]);
-        }
-        printf("\n");
-    }
-    fflush(stdout);
-}
-
-static void printConfig(Solver* solver)
-{
-    printf("Parameters for #%s#\n", solver->problem);
-    printf("Boundary conditions Left:%d Right:%d Bottom:%d Top:%d\n",
-        solver->bcLeft,
-        solver->bcRight,
-        solver->bcBottom,
-        solver->bcTop);
-    printf("\tReynolds number: %.2f\n", solver->re);
-    printf("\tGx Gy: %.2f %.2f\n", solver->gx, solver->gy);
-    printf("Geometry data:\n");
-    printf("\tDomain box size (x, y): %.2f, %.2f\n", solver->xlength, solver->ylength);
-    printf("\tCells (x, y): %d, %d\n", solver->imax, solver->jmax);
-    printf("Timestep parameters:\n");
-    printf("\tDefault stepsize: %.2f, Final time %.2f\n", solver->dt, solver->te);
-    printf("\tdt bound: %.6f\n", solver->dtBound);
-    printf("\tTau factor: %.2f\n", solver->tau);
-    printf("Iterative solver parameters:\n");
-    printf("\tMax iterations: %d\n", solver->itermax);
-    printf("\tepsilon (stopping tolerance) : %f\n", solver->eps);
-    printf("\tgamma factor: %f\n", solver->gamma);
-    printf("\tomega (SOR relaxation): %f\n", solver->omega);
-}
-
-void initSolver(Solver* solver, Parameter* params)
-{
-    solver->problem  = params->name;
-    solver->bcLeft   = params->bcLeft;
-    solver->bcRight  = params->bcRight;
-    solver->bcBottom = params->bcBottom;
-    solver->bcTop    = params->bcTop;
-    solver->imax     = params->imax;
-    solver->jmax     = params->jmax;
-    solver->xlength  = params->xlength;
-    solver->ylength  = params->ylength;
-    solver->dx       = params->xlength / params->imax;
-    solver->dy       = params->ylength / params->jmax;
-    solver->eps      = params->eps;
-    solver->omega    = params->omg;
-    solver->itermax  = params->itermax;
-    solver->re       = params->re;
-    solver->gx       = params->gx;
-    solver->gy       = params->gy;
-    solver->dt       = params->dt;
-    solver->te       = params->te;
-    solver->tau      = params->tau;
-    solver->gamma    = params->gamma;
-
-    int imax    = solver->imax;
-    int jmax    = solver->jmax;
-    size_t size = (imax + 2) * (jmax + 2) * sizeof(double);
-    solver->u   = allocate(64, size);
-    solver->v   = allocate(64, size);
-    solver->p   = allocate(64, size);
-    solver->rhs = allocate(64, size);
-    solver->f   = allocate(64, size);
-    solver->g   = allocate(64, size);
-
-    for (int i = 0; i < (imax + 2) * (jmax + 2); i++) {
-        solver->u[i]   = params->u_init;
-        solver->v[i]   = params->v_init;
-        solver->p[i]   = params->p_init;
-        solver->rhs[i] = 0.0;
-        solver->f[i]   = 0.0;
-        solver->g[i]   = 0.0;
-    }
-
-    double dx        = solver->dx;
-    double dy        = solver->dy;
-    double invSqrSum = 1.0 / (dx * dx) + 1.0 / (dy * dy);
-    solver->dtBound  = 0.5 * solver->re * 1.0 / invSqrSum;
-#ifdef VERBOSE
-    printConfig(solver);
-#endif
-}
-
-void computeRHS(Solver* solver)
-{
-    int imax    = solver->imax;
-    int jmax    = solver->jmax;
-    double idx  = 1.0 / solver->dx;
-    double idy  = 1.0 / solver->dy;
-    double idt  = 1.0 / solver->dt;
-    double* rhs = solver->rhs;
-    double* f   = solver->f;
-    double* g   = solver->g;
-
-    for (int j = 1; j < jmax + 1; j++) {
-        for (int i = 1; i < imax + 1; i++) {
-            RHS(i, j) = idt *
-                        ((F(i, j) - F(i - 1, j)) * idx + (G(i, j) - G(i, j - 1)) * idy);
-        }
-    }
-}
-
-void solve(Solver* solver)
-{
-    int imax      = solver->imax;
-    int jmax      = solver->jmax;
-    double eps    = solver->eps;
-    int itermax   = solver->itermax;
-    double dx2    = solver->dx * solver->dx;
-    double dy2    = solver->dy * solver->dy;
-    double idx2   = 1.0 / dx2;
-    double idy2   = 1.0 / dy2;
-    double factor = solver->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
-    double* p     = solver->p;
-    double* rhs   = solver->rhs;
-    double epssq  = eps * eps;
-    int it        = 0;
-    double res    = 1.0;
-
-    while ((res >= epssq) && (it < itermax)) {
-        res = 0.0;
-
-        for (int j = 1; j < jmax + 1; j++) {
-            for (int i = 1; i < imax + 1; i++) {
-
-                double r = RHS(i, j) -
-                           ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
-                               (P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
-
-                P(i, j) -= (factor * r);
-                res += (r * r);
-            }
-        }
-
-        for (int i = 1; i < imax + 1; i++) {
-            P(i, 0)        = P(i, 1);
-            P(i, jmax + 1) = P(i, jmax);
-        }
-
-        for (int j = 1; j < jmax + 1; j++) {
-            P(0, j)        = P(1, j);
-            P(imax + 1, j) = P(imax, j);
-        }
-
-        res = res / (double)(imax * jmax);
-#ifdef DEBUG
-        printf("%d Residuum: %e\n", it, res);
-#endif
-        it++;
-    }
-
-#ifdef VERBOSE
-    printf("Solver took %d iterations to reach %f\n", it, sqrt(res));
-#endif
-}
-
-void solveRB(Solver* solver)
-{
-    int imax      = solver->imax;
-    int jmax      = solver->jmax;
-    double eps    = solver->eps;
-    int itermax   = solver->itermax;
-    double dx2    = solver->dx * solver->dx;
-    double dy2    = solver->dy * solver->dy;
-    double idx2   = 1.0 / dx2;
-    double idy2   = 1.0 / dy2;
-    double factor = solver->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
-    double* p     = solver->p;
-    double* rhs   = solver->rhs;
-    double epssq  = eps * eps;
-    int it        = 0;
-    double res    = 1.0;
-    int pass, jsw, isw;
-
-    while ((res >= epssq) && (it < itermax)) {
-        res = 0.0;
-        jsw = 1;
-
-        for (pass = 0; pass < 2; pass++) {
-            isw = jsw;
-
-            for (int j = 1; j < jmax + 1; j++) {
-                for (int i = isw; i < imax + 1; i += 2) {
-
-                    double r = RHS(i, j) -
-                               ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
-                                   (P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
-
-                    P(i, j) -= (factor * r);
-                    res += (r * r);
-                }
-                isw = 3 - isw;
-            }
-            jsw = 3 - jsw;
-        }
-
-        for (int i = 1; i < imax + 1; i++) {
-            P(i, 0)        = P(i, 1);
-            P(i, jmax + 1) = P(i, jmax);
-        }
-
-        for (int j = 1; j < jmax + 1; j++) {
-            P(0, j)        = P(1, j);
-            P(imax + 1, j) = P(imax, j);
-        }
-
-        res = res / (double)(imax * jmax);
-#ifdef DEBUG
-        printf("%d Residuum: %e\n", it, res);
-#endif
-        it++;
-    }
-
-#ifdef VERBOSE
-    printf("Solver took %d iterations to reach %f\n", it, sqrt(res));
-#endif
-}
-
-static double maxElement(Solver* solver, double* m)
-{
-    int size      = (solver->imax + 2) * (solver->jmax + 2);
-    double maxval = DBL_MIN;
-
-    for (int i = 0; i < size; i++) {
-        maxval = MAX(maxval, fabs(m[i]));
-    }
-
-    return maxval;
-}
-
-void normalizePressure(Solver* solver)
-{
-    int size    = (solver->imax + 2) * (solver->jmax + 2);
-    double* p   = solver->p;
-    double avgP = 0.0;
-
-    for (int i = 0; i < size; i++) {
-        avgP += p[i];
-    }
-    avgP /= size;
-
-    for (int i = 0; i < size; i++) {
-        p[i] = p[i] - avgP;
-    }
-}
-
-void computeTimestep(Solver* solver)
-{
-    double dt   = solver->dtBound;
-    double dx   = solver->dx;
-    double dy   = solver->dy;
-    double umax = maxElement(solver, solver->u);
-    double vmax = maxElement(solver, solver->v);
-
-    if (umax > 0) {
-        dt = (dt > dx / umax) ? dx / umax : dt;
-    }
-    if (vmax > 0) {
-        dt = (dt > dy / vmax) ? dy / vmax : dt;
-    }
-
-    solver->dt = dt * solver->tau;
-}
-
-void setBoundaryConditions(Solver* solver)
-{
-    int imax  = solver->imax;
-    int jmax  = solver->jmax;
-    double* u = solver->u;
-    double* v = solver->v;
-
-    // Left boundary
-    switch (solver->bcLeft) {
-    case NOSLIP:
-        for (int j = 1; j < jmax + 1; j++) {
-            U(0, j) = 0.0;
-            V(0, j) = -V(1, j);
-        }
-        break;
-    case SLIP:
-        for (int j = 1; j < jmax + 1; j++) {
-            U(0, j) = 0.0;
-            V(0, j) = V(1, j);
-        }
-        break;
-    case OUTFLOW:
-        for (int j = 1; j < jmax + 1; j++) {
-            U(0, j) = U(1, j);
-            V(0, j) = V(1, j);
-        }
-        break;
-    case PERIODIC:
-        break;
-    }
-
-    // Right boundary
-    switch (solver->bcRight) {
-    case NOSLIP:
-        for (int j = 1; j < jmax + 1; j++) {
-            U(imax, j)     = 0.0;
-            V(imax + 1, j) = -V(imax, j);
-        }
-        break;
-    case SLIP:
-        for (int j = 1; j < jmax + 1; j++) {
-            U(imax, j)     = 0.0;
-            V(imax + 1, j) = V(imax, j);
-        }
-        break;
-    case OUTFLOW:
-        for (int j = 1; j < jmax + 1; j++) {
-            U(imax, j)     = U(imax - 1, j);
-            V(imax + 1, j) = V(imax, j);
-        }
-        break;
-    case PERIODIC:
-        break;
-    }
-
-    // Bottom boundary
-    switch (solver->bcBottom) {
-    case NOSLIP:
-        for (int i = 1; i < imax + 1; i++) {
-            V(i, 0) = 0.0;
-            U(i, 0) = -U(i, 1);
-        }
-        break;
-    case SLIP:
-        for (int i = 1; i < imax + 1; i++) {
-            V(i, 0) = 0.0;
-            U(i, 0) = U(i, 1);
-        }
-        break;
-    case OUTFLOW:
-        for (int i = 1; i < imax + 1; i++) {
-            U(i, 0) = U(i, 1);
-            V(i, 0) = V(i, 1);
-        }
-        break;
-    case PERIODIC:
-        break;
-    }
-
-    // Top boundary
-    switch (solver->bcTop) {
-    case NOSLIP:
-        for (int i = 1; i < imax + 1; i++) {
-            V(i, jmax)     = 0.0;
-            U(i, jmax + 1) = -U(i, jmax);
-        }
-        break;
-    case SLIP:
-        for (int i = 1; i < imax + 1; i++) {
-            V(i, jmax)     = 0.0;
-            U(i, jmax + 1) = U(i, jmax);
-        }
-        break;
-    case OUTFLOW:
-        for (int i = 1; i < imax + 1; i++) {
-            U(i, jmax + 1) = U(i, jmax);
-            V(i, jmax)     = V(i, jmax - 1);
-        }
-        break;
-    case PERIODIC:
-        break;
-    }
-}
-
-void setSpecialBoundaryCondition(Solver* solver)
-{
-    int imax   = solver->imax;
-    int jmax   = solver->jmax;
-    double mDy = solver->dy;
-    double* u  = solver->u;
-
-    if (strcmp(solver->problem, "dcavity") == 0) {
-        for (int i = 1; i < imax; i++) {
-            U(i, jmax + 1) = 2.0 - U(i, jmax);
-        }
-    } else if (strcmp(solver->problem, "canal") == 0) {
-        double ylength = solver->ylength;
-        double y;
-
-        for (int j = 1; j < jmax + 1; j++) {
-            y       = mDy * (j - 0.5);
-            U(0, j) = y * (ylength - y) * 4.0 / (ylength * ylength);
-        }
-    }
-}
-
-void computeFG(Solver* solver)
-{
-    double* u        = solver->u;
-    double* v        = solver->v;
-    double* f        = solver->f;
-    double* g        = solver->g;
-    int imax         = solver->imax;
-    int jmax         = solver->jmax;
-    double gx        = solver->gx;
-    double gy        = solver->gy;
-    double gamma     = solver->gamma;
-    double dt        = solver->dt;
-    double inverseRe = 1.0 / solver->re;
-    double inverseDx = 1.0 / solver->dx;
-    double inverseDy = 1.0 / solver->dy;
-    double du2dx, dv2dy, duvdx, duvdy;
-    double du2dx2, du2dy2, dv2dx2, dv2dy2;
-
-    for (int j = 1; j < jmax + 1; j++) {
-        for (int i = 1; i < imax + 1; i++) {
-            du2dx = inverseDx * 0.25 *
-                        ((U(i, j) + U(i + 1, j)) * (U(i, j) + U(i + 1, j)) -
-                            (U(i, j) + U(i - 1, j)) * (U(i, j) + U(i - 1, j))) +
-                    gamma * inverseDx * 0.25 *
-                        (fabs(U(i, j) + U(i + 1, j)) * (U(i, j) - U(i + 1, j)) +
-                            fabs(U(i, j) + U(i - 1, j)) * (U(i, j) - U(i - 1, j)));
-
-            duvdy = inverseDy * 0.25 *
-                        ((V(i, j) + V(i + 1, j)) * (U(i, j) + U(i, j + 1)) -
-                            (V(i, j - 1) + V(i + 1, j - 1)) * (U(i, j) + U(i, j - 1))) +
-                    gamma * inverseDy * 0.25 *
-                        (fabs(V(i, j) + V(i + 1, j)) * (U(i, j) - U(i, j + 1)) +
-                            fabs(V(i, j - 1) + V(i + 1, j - 1)) *
-                                (U(i, j) - U(i, j - 1)));
-
-            du2dx2  = inverseDx * inverseDx * (U(i + 1, j) - 2.0 * U(i, j) + U(i - 1, j));
-            du2dy2  = inverseDy * inverseDy * (U(i, j + 1) - 2.0 * U(i, j) + U(i, j - 1));
-            F(i, j) = U(i, j) + dt * (inverseRe * (du2dx2 + du2dy2) - du2dx - duvdy + gx);
-
-            duvdx = inverseDx * 0.25 *
-                        ((U(i, j) + U(i, j + 1)) * (V(i, j) + V(i + 1, j)) -
-                            (U(i - 1, j) + U(i - 1, j + 1)) * (V(i, j) + V(i - 1, j))) +
-                    gamma * inverseDx * 0.25 *
-                        (fabs(U(i, j) + U(i, j + 1)) * (V(i, j) - V(i + 1, j)) +
-                            fabs(U(i - 1, j) + U(i - 1, j + 1)) *
-                                (V(i, j) - V(i - 1, j)));
-
-            dv2dy = inverseDy * 0.25 *
-                        ((V(i, j) + V(i, j + 1)) * (V(i, j) + V(i, j + 1)) -
-                            (V(i, j) + V(i, j - 1)) * (V(i, j) + V(i, j - 1))) +
-                    gamma * inverseDy * 0.25 *
-                        (fabs(V(i, j) + V(i, j + 1)) * (V(i, j) - V(i, j + 1)) +
-                            fabs(V(i, j) + V(i, j - 1)) * (V(i, j) - V(i, j - 1)));
-
-            dv2dx2  = inverseDx * inverseDx * (V(i + 1, j) - 2.0 * V(i, j) + V(i - 1, j));
-            dv2dy2  = inverseDy * inverseDy * (V(i, j + 1) - 2.0 * V(i, j) + V(i, j - 1));
-            G(i, j) = V(i, j) + dt * (inverseRe * (dv2dx2 + dv2dy2) - duvdx - dv2dy + gy);
-        }
-    }
-
-    /* ---------------------- boundary of F --------------------------- */
-    for (int j = 1; j < jmax + 1; j++) {
-        F(0, j)    = U(0, j);
-        F(imax, j) = U(imax, j);
-    }
-
-    /* ---------------------- boundary of G --------------------------- */
-    for (int i = 1; i < imax + 1; i++) {
-        G(i, 0)    = V(i, 0);
-        G(i, jmax) = V(i, jmax);
-    }
-}
-
-void adaptUV(Solver* solver)
-{
-    int imax       = solver->imax;
-    int jmax       = solver->jmax;
-    double* p      = solver->p;
-    double* u      = solver->u;
-    double* v      = solver->v;
-    double* f      = solver->f;
-    double* g      = solver->g;
-    double factorX = solver->dt / solver->dx;
-    double factorY = solver->dt / solver->dy;
-
-    for (int j = 1; j < jmax + 1; j++) {
-        for (int i = 1; i < imax + 1; i++) {
-            U(i, j) = F(i, j) - (P(i + 1, j) - P(i, j)) * factorX;
-            V(i, j) = G(i, j) - (P(i, j + 1) - P(i, j)) * factorY;
-        }
-    }
-}
-
-void writeResult(Solver* solver)
-{
-    int imax  = solver->imax;
-    int jmax  = solver->jmax;
-    double dx = solver->dx;
-    double dy = solver->dy;
-    double* p = solver->p;
-    double* u = solver->u;
-    double* v = solver->v;
-    double x = 0.0, y = 0.0;
-
-    FILE* fp;
-    fp = fopen("pressure.dat", "w");
-
-    if (fp == NULL) {
-        printf("Error!\n");
-        exit(EXIT_FAILURE);
-    }
-
-    for (int j = 1; j < jmax + 1; j++) {
-        y = (double)(j - 0.5) * dy;
-        for (int i = 1; i < imax + 1; i++) {
-            x = (double)(i - 0.5) * dx;
-            fprintf(fp, "%.2f %.2f %f\n", x, y, P(i, j));
-        }
-        fprintf(fp, "\n");
-    }
-
-    fclose(fp);
-
-    fp = fopen("velocity.dat", "w");
-
-    if (fp == NULL) {
-        printf("Error!\n");
-        exit(EXIT_FAILURE);
-    }
-
-    for (int j = 1; j < jmax + 1; j++) {
-        y = dy * (j - 0.5);
-        for (int i = 1; i < imax + 1; i++) {
-            x            = dx * (i - 0.5);
-            double vel_u = (U(i, j) + U(i - 1, j)) / 2.0;
-            double vel_v = (V(i, j) + V(i, j - 1)) / 2.0;
-            double len   = sqrt((vel_u * vel_u) + (vel_v * vel_v));
-            fprintf(fp, "%.2f %.2f %f %f %f\n", x, y, vel_u, vel_v, len);
-        }
-    }
-
-    fclose(fp);
-}
--- a/BasicSolver/2D-seq/src/solver.h
+++ b/BasicSolver/2D-seq/src/solver.h
@@ -1,46 +1,27 @@
 /*
- * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
 * All rights reserved. This file is part of nusif-solver.
 * Use of this source code is governed by a MIT style
 * license that can be found in the LICENSE file.
 */
 #ifndef __SOLVER_H_
 #define __SOLVER_H_
+#include "discretization.h"
+#include "grid.h"
 #include "parameter.h"

-enum BC { NOSLIP = 1, SLIP, OUTFLOW, PERIODIC };
-
 typedef struct {
    /* geometry and grid information */
-    double dx, dy;
-    int imax, jmax;
-    double xlength, ylength;
-    /* arrays */
-    double *p, *rhs;
-    double *f, *g;
-    double *u, *v;
+    Grid* grid;
    /* parameters */
-    double eps, omega;
-    double re, tau, gamma;
-    double gx, gy;
-    /* time stepping */
+    double eps, omega, rho;
    int itermax;
-    double dt, te;
-    double dtBound;
-    char* problem;
-    int bcLeft, bcRight, bcBottom, bcTop;
+    int levels;
+    double **r, **e;
+        int presmooth, postsmooth;
 } Solver;

-extern void initSolver(Solver*, Parameter*);
-extern void computeRHS(Solver*);
-extern void solve(Solver*);
-extern void solveRB(Solver*);
-extern void solveRBA(Solver*);
-extern void normalizePressure(Solver*);
-extern void computeTimestep(Solver*);
-extern void setBoundaryConditions(Solver*);
-extern void setSpecialBoundaryCondition(Solver*);
-extern void computeFG(Solver*);
-extern void adaptUV(Solver*);
-extern void writeResult(Solver*);
+extern void initSolver(Solver*, Discretization*, Parameter*);
+extern double solve(Solver*, double*, double*);
+
 #endif
--- a/Show More
+++ b/Show More