From 312af6f66331fd99ffdbf54399b005093ca7d87b Mon Sep 17 00:00:00 2001
From: Aditya ujeniya <aditya.ujeniya@fau.de>
Date: Wed, 24 Jul 2024 12:04:14 +0200
Subject: [PATCH] 2D MPI enhanced solver

---
 BasicSolver/2D-mpi/src/comm-v2.c              |   2 -
 BasicSolver/2D-mpi/src/solver-mg.c            |   2 +-
 EnhancedSolver/2D-mpi/Makefile                |  87 +++
 EnhancedSolver/2D-mpi/README.md               |  48 ++
 EnhancedSolver/2D-mpi/backstep.par            |  79 ++
 EnhancedSolver/2D-mpi/canal.par               |  78 ++
 EnhancedSolver/2D-mpi/config.mk               |  17 +
 EnhancedSolver/2D-mpi/dcavity.par             |  79 ++
 EnhancedSolver/2D-mpi/include_CLANG.mk        |  21 +
 EnhancedSolver/2D-mpi/include_GCC.mk          |  20 +
 EnhancedSolver/2D-mpi/include_ICC.mk          |  20 +
 EnhancedSolver/2D-mpi/karman.par              |  79 ++
 EnhancedSolver/2D-mpi/src/affinity.c          |  61 ++
 EnhancedSolver/2D-mpi/src/affinity.h          |  14 +
 EnhancedSolver/2D-mpi/src/allocate.c          |  38 +
 EnhancedSolver/2D-mpi/src/allocate.h          |  13 +
 EnhancedSolver/2D-mpi/src/comm-v1.c           | 204 +++++
 EnhancedSolver/2D-mpi/src/comm-v2.c           | 335 ++++++++
 EnhancedSolver/2D-mpi/src/comm-v3.c           | 316 ++++++++
 EnhancedSolver/2D-mpi/src/comm.c              | 129 ++++
 EnhancedSolver/2D-mpi/src/comm.h              |  57 ++
 EnhancedSolver/2D-mpi/src/discretization.c    | 718 ++++++++++++++++++
 EnhancedSolver/2D-mpi/src/discretization.h    |  66 ++
 EnhancedSolver/2D-mpi/src/grid.h              |  17 +
 EnhancedSolver/2D-mpi/src/likwid-marker.h     |  54 ++
 EnhancedSolver/2D-mpi/src/main.c              | 120 +++
 EnhancedSolver/2D-mpi/src/parameter.c         | 143 ++++
 EnhancedSolver/2D-mpi/src/parameter.h         |  38 +
 EnhancedSolver/2D-mpi/src/particletracing.c   | 612 +++++++++++++++
 EnhancedSolver/2D-mpi/src/particletracing.h   |  64 ++
 EnhancedSolver/2D-mpi/src/progress.c          |  51 ++
 EnhancedSolver/2D-mpi/src/progress.h          |  14 +
 EnhancedSolver/2D-mpi/src/solver-mg.c         | 324 ++++++++
 EnhancedSolver/2D-mpi/src/solver-rb.c         | 129 ++++
 EnhancedSolver/2D-mpi/src/solver.h            |  30 +
 EnhancedSolver/2D-mpi/src/timing.c            |  22 +
 EnhancedSolver/2D-mpi/src/timing.h            |  13 +
 EnhancedSolver/2D-mpi/src/util.h              |  30 +
 EnhancedSolver/2D-mpi/surface.plot            |   7 +
 EnhancedSolver/2D-mpi/vector.plot             |   6 +
 EnhancedSolver/2D-mpi/vis_files/animate.plot  |  10 +
 .../2D-mpi/vis_files/backstep_animate.plot    |  14 +
 .../vis_files/canal_animate.plot}             |   0
 .../2D-mpi/vis_files/karman_animate.plot      |  13 +
 EnhancedSolver/2D-seq/Makefile                |  12 +
 EnhancedSolver/2D-seq/backstep.par            |  11 +-
 EnhancedSolver/2D-seq/canal.par               |   8 +-
 EnhancedSolver/2D-seq/config.mk               |   2 +-
 EnhancedSolver/2D-seq/dcavity.par             |   8 +-
 EnhancedSolver/2D-seq/karman.par              |  10 +-
 EnhancedSolver/2D-seq/src/particletracing.c   |   2 +-
 .../2D-seq/vis_files/canal_animate.plot       |  10 +
 52 files changed, 4246 insertions(+), 11 deletions(-)
 create mode 100644 EnhancedSolver/2D-mpi/Makefile
 create mode 100644 EnhancedSolver/2D-mpi/README.md
 create mode 100644 EnhancedSolver/2D-mpi/backstep.par
 create mode 100644 EnhancedSolver/2D-mpi/canal.par
 create mode 100644 EnhancedSolver/2D-mpi/config.mk
 create mode 100644 EnhancedSolver/2D-mpi/dcavity.par
 create mode 100644 EnhancedSolver/2D-mpi/include_CLANG.mk
 create mode 100644 EnhancedSolver/2D-mpi/include_GCC.mk
 create mode 100644 EnhancedSolver/2D-mpi/include_ICC.mk
 create mode 100644 EnhancedSolver/2D-mpi/karman.par
 create mode 100644 EnhancedSolver/2D-mpi/src/affinity.c
 create mode 100644 EnhancedSolver/2D-mpi/src/affinity.h
 create mode 100644 EnhancedSolver/2D-mpi/src/allocate.c
 create mode 100644 EnhancedSolver/2D-mpi/src/allocate.h
 create mode 100644 EnhancedSolver/2D-mpi/src/comm-v1.c
 create mode 100644 EnhancedSolver/2D-mpi/src/comm-v2.c
 create mode 100644 EnhancedSolver/2D-mpi/src/comm-v3.c
 create mode 100644 EnhancedSolver/2D-mpi/src/comm.c
 create mode 100644 EnhancedSolver/2D-mpi/src/comm.h
 create mode 100644 EnhancedSolver/2D-mpi/src/discretization.c
 create mode 100644 EnhancedSolver/2D-mpi/src/discretization.h
 create mode 100644 EnhancedSolver/2D-mpi/src/grid.h
 create mode 100644 EnhancedSolver/2D-mpi/src/likwid-marker.h
 create mode 100644 EnhancedSolver/2D-mpi/src/main.c
 create mode 100644 EnhancedSolver/2D-mpi/src/parameter.c
 create mode 100644 EnhancedSolver/2D-mpi/src/parameter.h
 create mode 100644 EnhancedSolver/2D-mpi/src/particletracing.c
 create mode 100644 EnhancedSolver/2D-mpi/src/particletracing.h
 create mode 100644 EnhancedSolver/2D-mpi/src/progress.c
 create mode 100644 EnhancedSolver/2D-mpi/src/progress.h
 create mode 100644 EnhancedSolver/2D-mpi/src/solver-mg.c
 create mode 100644 EnhancedSolver/2D-mpi/src/solver-rb.c
 create mode 100644 EnhancedSolver/2D-mpi/src/solver.h
 create mode 100644 EnhancedSolver/2D-mpi/src/timing.c
 create mode 100644 EnhancedSolver/2D-mpi/src/timing.h
 create mode 100644 EnhancedSolver/2D-mpi/src/util.h
 create mode 100644 EnhancedSolver/2D-mpi/surface.plot
 create mode 100644 EnhancedSolver/2D-mpi/vector.plot
 create mode 100644 EnhancedSolver/2D-mpi/vis_files/animate.plot
 create mode 100644 EnhancedSolver/2D-mpi/vis_files/backstep_animate.plot
 rename EnhancedSolver/{2D-seq/vis_files/canal.plot => 2D-mpi/vis_files/canal_animate.plot} (100%)
 create mode 100644 EnhancedSolver/2D-mpi/vis_files/karman_animate.plot
 create mode 100644 EnhancedSolver/2D-seq/vis_files/canal_animate.plot

diff --git a/BasicSolver/2D-mpi/src/comm-v2.c b/BasicSolver/2D-mpi/src/comm-v2.c
index c0285ca..90d8e43 100644
--- a/BasicSolver/2D-mpi/src/comm-v2.c
+++ b/BasicSolver/2D-mpi/src/comm-v2.c
@@ -288,8 +288,6 @@ void commPartition(Comm* c, int jmax, int imax)
 
 void commUpdateDatatypes(Comm* oldcomm, Comm* newcomm, int imaxLocal, int jmaxLocal)
 {
-    Comm* newcomm;
-
 #if defined _MPI
     newcomm->comm = MPI_COMM_NULL;
     int result = MPI_Comm_dup(oldcomm->comm, &newcomm->comm);
diff --git a/BasicSolver/2D-mpi/src/solver-mg.c b/BasicSolver/2D-mpi/src/solver-mg.c
index b165214..44cd039 100644
--- a/BasicSolver/2D-mpi/src/solver-mg.c
+++ b/BasicSolver/2D-mpi/src/solver-mg.c
@@ -13,7 +13,7 @@
 
 #define FINEST_LEVEL   0
 #define COARSEST_LEVEL (s->levels - 1)
-#define S(i, j)        s[(j) * (imaxLocal + 2) + (i)]
+// #define S(i, j)        s[(j) * (imaxLocal + 2) + (i)]
 #define E(i, j)        e[(j) * (imaxLocal + 2) + (i)]
 #define R(i, j)        r[(j) * (imaxLocal + 2) + (i)]
 #define OLD(i, j)      old[(j) * (imaxLocal + 2) + (i)]
diff --git a/EnhancedSolver/2D-mpi/Makefile b/EnhancedSolver/2D-mpi/Makefile
new file mode 100644
index 0000000..2888d9b
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/Makefile
@@ -0,0 +1,87 @@
+#=======================================================================================
+# Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+# All rights reserved.
+# Use of this source code is governed by a MIT-style
+# license that can be found in the LICENSE file.
+#=======================================================================================
+
+#CONFIGURE BUILD SYSTEM
+TARGET	   = exe-$(TAG)
+BUILD_DIR  = ./$(TAG)
+SRC_DIR    = ./src
+MAKE_DIR   = ./
+Q         ?= @
+
+#DO NOT EDIT BELOW
+include $(MAKE_DIR)/config.mk
+include $(MAKE_DIR)/include_$(TAG).mk
+INCLUDES  += -I$(SRC_DIR) -I$(BUILD_DIR)
+
+VPATH     = $(SRC_DIR)
+SRC       = $(filter-out $(wildcard $(SRC_DIR)/*-*.c),$(wildcard $(SRC_DIR)/*.c))
+ASM       = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s, $(SRC))
+OBJ       = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o, $(SRC))
+OBJ      += $(BUILD_DIR)/comm-$(COMM_TYPE).o
+OBJ      += $(BUILD_DIR)/solver-$(SOLVER).o
+SOURCES   = $(SRC) $(wildcard $(SRC_DIR)/*.h)
+CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(OPTIONS) $(INCLUDES)
+
+${TARGET}: $(BUILD_DIR) $(OBJ)
+	$(info ===>  LINKING  $(TARGET))
+	$(Q)${LINKER} ${LFLAGS} -o $(TARGET) $(OBJ) $(LIBS)
+
+$(BUILD_DIR)/%.o:  %.c $(MAKE_DIR)/include_$(TAG).mk $(MAKE_DIR)/config.mk
+	$(info ===>  COMPILE  $@)
+	$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
+	$(Q)$(GCC) $(CPPFLAGS) -MT $(@:.d=.o) -MM  $< > $(BUILD_DIR)/$*.d
+
+$(BUILD_DIR)/%.s:  %.c
+	$(info ===>  GENERATE ASM  $@)
+	$(CC) -S $(CPPFLAGS) $(CFLAGS) $< -o $@
+
+.PHONY: clean distclean vis_clean vis tags info asm format
+
+vis:
+	$(info ===>  GENERATE VISUALIZATION)
+	@gnuplot -e "filename='pressure.dat'" ./surface.plot
+	@gnuplot -e "filename='velocity.dat'" ./vector.plot
+
+vis_clean:
+	$(info ===>  CLEAN VISUALIZATION)
+	@rm -f *.dat
+	@rm -f *.png
+	@rm -f ./vis_files/*.dat
+	@rm -f ./vis_files/*.gif
+
+clean: vis_clean
+	$(info ===>  CLEAN)
+	@rm -rf $(BUILD_DIR)
+	@rm -f tags
+
+distclean: clean
+	$(info ===>  DIST CLEAN)
+	@rm -f $(TARGET)
+	@rm -f *.dat
+	@rm -f *.png
+
+info:
+	$(info $(CFLAGS))
+	$(Q)$(CC) $(VERSION)
+
+asm:  $(BUILD_DIR) $(ASM)
+
+tags:
+	$(info ===>  GENERATE TAGS)
+	$(Q)ctags -R
+
+format:
+	@for src in $(SOURCES) ; do \
+		echo "Formatting $$src" ; \
+		clang-format -i $$src ; \
+	done
+	@echo "Done"
+
+$(BUILD_DIR):
+	@mkdir $(BUILD_DIR)
+
+-include $(OBJ:.o=.d)
diff --git a/EnhancedSolver/2D-mpi/README.md b/EnhancedSolver/2D-mpi/README.md
new file mode 100644
index 0000000..b0a80a6
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/README.md
@@ -0,0 +1,48 @@
+# C source skeleton
+
+## Build
+
+1. Configure the toolchain and additional options in `config.mk`:
+```
+# Supported: GCC, CLANG, ICC
+TAG ?= GCC
+ENABLE_OPENMP ?= false
+
+OPTIONS +=  -DARRAY_ALIGNMENT=64
+#OPTIONS +=  -DVERBOSE_AFFINITY
+#OPTIONS +=  -DVERBOSE_DATASIZE
+#OPTIONS +=  -DVERBOSE_TIMER
+```
+
+The verbosity options enable detailed output about affinity settings, allocation sizes and timer resolution.
+
+
+2. Build with:
+```
+make
+```
+
+You can build multiple toolchains in the same directory, but notice that the Makefile is only acting on the one currently set.
+Intermediate build results are located in the `<TOOLCHAIN>` directory.
+
+To output the executed commands use:
+```
+make Q=
+```
+
+3. Clean up with:
+```
+make clean
+```
+to clean intermediate build results.
+
+```
+make distclean
+```
+to clean intermediate build results and binary.
+
+4. (Optional) Generate assembler:
+```
+make asm
+```
+The assembler files will also be located in the `<TOOLCHAIN>` directory.
diff --git a/EnhancedSolver/2D-mpi/backstep.par b/EnhancedSolver/2D-mpi/backstep.par
new file mode 100644
index 0000000..1e8b5fa
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/backstep.par
@@ -0,0 +1,79 @@
+#==============================================================================
+#                            Laminar Canal Flow
+#==============================================================================
+
+# Problem specific Data:
+# ---------------------
+
+name backstep             # name of flow setup
+
+bcTop      1			#  flags for boundary conditions
+bcBottom   1			#  1 = no-slip      3 = outflow
+bcLeft     3			#  2 = free-slip    4 = periodic
+bcRight    3			#
+
+gx    0.0      # Body forces (e.g. gravity)
+gy    0.0      #
+
+re    36000.0	   # Reynolds number
+
+u_init    1.0      # initial value for velocity in x-direction
+v_init    0.0      # initial value for velocity in y-direction
+p_init    1.0      # initial value for pressure
+
+# Geometry Data:
+# -------------
+
+xlength    7.0     # domain size in x-direction
+ylength    1.5	   # domain size in y-direction
+imax       210     # number of interior cells in x-direction
+jmax       45	   # number of interior cells in y-direction
+
+# Time Data:
+# ---------
+
+te      60.0    # final time
+dt      0.02    # time stepsize
+tau     0.5     # safety factor for time stepsize control (<0 constant delt)
+
+# Pressure Iteration Data:
+# -----------------------
+
+itermax  500       # maximal number of pressure iteration in one time step
+eps      0.0001    # stopping tolerance for pressure iteration
+rho      0.52
+omg      1.8       # relaxation parameter for SOR iteration
+gamma    0.9       # upwind differencing factor gamma
+
+# Multigrid data:
+# ---------
+
+levels        3         # Multigrid levels
+presmooth     15        # Pre-smoothning iterations
+postsmooth    5         # Post-smoothning iterations
+
+# Particle Tracing Data:
+# -----------------------
+
+numberOfParticles   200
+startTime           0     #if you want to see particles trapped in recirculation zone, startTime should be set to 0 
+injectTimePeriod    1.0
+writeTimePeriod     0.5
+
+x1                  0.0
+y1                  0.5
+x2                  0.0
+y2                  1.5
+
+# Obstacle Geometry Data:
+# -----------------------
+# Shape 0 disable, 1 Rectangle/Square, 2 Circle
+
+shape               1 
+xCenter             0.0
+yCenter             0.0
+xRectLength         2.0
+yRectLength         1.0
+circleRadius        1.0
+
+#===============================================================================
diff --git a/EnhancedSolver/2D-mpi/canal.par b/EnhancedSolver/2D-mpi/canal.par
new file mode 100644
index 0000000..0feb7a9
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/canal.par
@@ -0,0 +1,78 @@
+#==============================================================================
+#                            Laminar Canal Flow
+#==============================================================================
+
+# Problem specific Data:
+# ---------------------
+
+name canal             # name of flow setup
+
+bcTop      1			#  flags for boundary conditions
+bcBottom   1			#  1 = no-slip      3 = outflow
+bcLeft     3			#  2 = free-slip    4 = periodic
+bcRight    3			#
+
+gx     0.0      # Body forces (e.g. gravity)
+gy     0.0      #
+
+re            100.0	   # Reynolds number
+
+u_init        1.0      # initial value for velocity in x-direction
+v_init        0.0      # initial value for velocity in y-direction
+p_init        0.0      # initial value for pressure
+
+# Geometry Data:
+# -------------
+
+xlength       30.0     # domain size in x-direction
+ylength       4.0	   # domain size in y-direction
+imax          200      # number of interior cells in x-direction
+jmax          40	   # number of interior cells in y-direction
+
+# Time Data:
+# ---------
+
+te       60.0   # final time
+dt       0.02    # time stepsize
+tau      0.5     # safety factor for time stepsize control (<0 constant delt)
+
+# Multigrid data:
+# ---------
+
+levels        2         # Multigrid levels
+presmooth     5         # Pre-smoothning iterations
+postsmooth    5         # Post-smoothning iterations
+
+# Pressure Iteration Data:
+# -----------------------
+
+itermax       500       # maximal number of pressure iteration in one time step
+eps           0.00001   # stopping tolerance for pressure iteration
+omg           1.8       # relaxation parameter for SOR iteration
+gamma         0.9       # upwind differencing factor gamma
+
+# Particle Tracing Data:
+# -----------------------
+
+numberOfParticles   60
+startTime           10.0
+injectTimePeriod    4.0
+writeTimePeriod     1.0
+
+x1                  1.0
+y1                  0.0
+x2                  1.0
+y2                  4.0
+
+# Obstacle Geometry Data:
+# -----------------------
+# Shape 0 disable, 1 Rectangle/Square, 2 Circle
+
+shape               0 
+xCenter             10.0
+yCenter             2
+xRectLength         6.0
+yRectLength         1.0
+circleRadius        1.0
+
+#===============================================================================
diff --git a/EnhancedSolver/2D-mpi/config.mk b/EnhancedSolver/2D-mpi/config.mk
new file mode 100644
index 0000000..0b7938e
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/config.mk
@@ -0,0 +1,17 @@
+# Supported: GCC, CLANG, ICC
+TAG ?= ICC
+# Supported: true, false
+ENABLE_MPI ?= true
+ENABLE_OPENMP ?= false
+# Supported: rb, mg
+SOLVER ?= mg
+# Run in debug settings ?= mg
+COMM_TYPE ?= v3
+
+#Feature options
+OPTIONS +=  -DARRAY_ALIGNMENT=64
+OPTIONS +=  -DVERBOSE
+# OPTIONS +=  -DTEST
+#OPTIONS +=  -DVERBOSE_AFFINITY
+#OPTIONS +=  -DVERBOSE_DATASIZE
+#OPTIONS +=  -DVERBOSE_TIMER
diff --git a/EnhancedSolver/2D-mpi/dcavity.par b/EnhancedSolver/2D-mpi/dcavity.par
new file mode 100644
index 0000000..dd092f5
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/dcavity.par
@@ -0,0 +1,79 @@
+#==============================================================================
+#                              Driven Cavity
+#==============================================================================
+
+# Problem specific Data:
+# ---------------------
+
+name dcavity        # name of flow setup
+
+bcTop      1			#  flags for boundary conditions
+bcBottom   1			#  1 = no-slip      3 = outflow
+bcLeft     1			#  2 = free-slip    4 = periodic
+bcRight    1			#
+
+gx    0.0			# Body forces (e.g. gravity)
+gy    0.0			#
+
+re    10.0		    # Reynolds number
+
+u_init    1.0		# initial value for velocity in x-direction
+v_init    0.0		# initial value for velocity in y-direction
+p_init    0.0		# initial value for pressure
+
+# Geometry Data:
+# -------------
+
+xlength    1.0		# domain size in x-direction
+ylength    1.0		# domain size in y-direction
+imax       128		# number of interior cells in x-direction
+jmax       128		# number of interior cells in y-direction
+
+# Time Data:
+# ---------
+
+te      10.0		# final time
+dt      0.02	    # time stepsize
+tau     0.5	    	# safety factor for time stepsize control (<0 constant delt)
+
+# Pressure Iteration Data:
+# -----------------------
+
+itermax  1000		# maximal number of pressure iteration in one time step
+eps      0.001		# stopping tolerance for pressure iteration
+rho      0.5
+omg      1.8		# relaxation parameter for SOR iteration
+gamma    0.9		# upwind differencing factor gamma
+
+# Multigrid data:
+# ---------
+
+levels        3         # Multigrid levels
+presmooth     10        # Pre-smoothning iterations
+postsmooth    5         # Post-smoothning iterations
+
+# Particle Tracing Data:
+# -----------------------
+
+numberOfParticles   200
+startTime           2.0
+injectTimePeriod    0.5
+writeTimePeriod     0.2
+
+x1                  0.1
+y1                  0.9
+x2                  0.9
+y2                  0.9
+
+
+# Obstacle Geometry Data:
+# -----------------------
+# Shape 0 disable, 1 Rectangle/Square, 2 Circle
+
+shape               0 
+xCenter             0.5
+yCenter             0.5
+xRectLength         0.5
+yRectLength         0.5
+circleRadius        0.5
+#===============================================================================
diff --git a/EnhancedSolver/2D-mpi/include_CLANG.mk b/EnhancedSolver/2D-mpi/include_CLANG.mk
new file mode 100644
index 0000000..3641cad
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/include_CLANG.mk
@@ -0,0 +1,21 @@
+ifeq ($(ENABLE_MPI),true)
+CC   = mpicc
+DEFINES  = -D_MPI
+else
+CC   = cc
+endif
+
+GCC  = cc
+LINKER = $(CC)
+
+ifeq ($(ENABLE_OPENMP),true)
+OPENMP   = -fopenmp
+#OPENMP   = -Xpreprocessor -fopenmp #required on Macos with homebrew libomp
+LIBS     = # -lomp
+endif
+
+VERSION  = --version
+CFLAGS   = -Ofast -std=c17
+LFLAGS   = $(OPENMP) -lm
+DEFINES  += -D_GNU_SOURCE# -DDEBUG
+INCLUDES = -I/opt/homebrew/include
diff --git a/EnhancedSolver/2D-mpi/include_GCC.mk b/EnhancedSolver/2D-mpi/include_GCC.mk
new file mode 100644
index 0000000..cf1e2aa
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/include_GCC.mk
@@ -0,0 +1,20 @@
+ifeq ($(ENABLE_MPI),true)
+CC   = mpicc
+DEFINES  = -D_MPI
+else
+CC   = gcc
+endif
+
+GCC  = gcc
+LINKER = $(CC)
+
+ifeq ($(ENABLE_OPENMP),true)
+OPENMP   = -fopenmp
+endif
+
+VERSION  = --version
+CFLAGS   = -Ofast -ffreestanding -std=c99 $(OPENMP)
+LFLAGS   = $(OPENMP)
+DEFINES  += -D_GNU_SOURCE
+INCLUDES =
+LIBS     =
diff --git a/EnhancedSolver/2D-mpi/include_ICC.mk b/EnhancedSolver/2D-mpi/include_ICC.mk
new file mode 100644
index 0000000..6bedf55
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/include_ICC.mk
@@ -0,0 +1,20 @@
+ifeq ($(ENABLE_MPI),true)
+CC   = mpiicc
+DEFINES  = -D_MPI
+else
+CC = icc
+endif
+
+GCC  = gcc
+LINKER = $(CC)
+
+ifeq ($(ENABLE_OPENMP),true)
+OPENMP   = -qopenmp
+endif
+
+VERSION  = --version
+CFLAGS   =  -O3 -xHost -qopt-zmm-usage=high -std=c99 $(OPENMP)
+LFLAGS   = $(OPENMP)
+DEFINES  += -D_GNU_SOURCE# -DDEBUG
+INCLUDES =
+LIBS     =
diff --git a/EnhancedSolver/2D-mpi/karman.par b/EnhancedSolver/2D-mpi/karman.par
new file mode 100644
index 0000000..a22eb34
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/karman.par
@@ -0,0 +1,79 @@
+#==============================================================================
+#                            Laminar Canal Flow
+#==============================================================================
+
+# Problem specific Data:
+# ---------------------
+
+name karman             # name of flow setup
+
+bcTop      1			#  flags for boundary conditions
+bcBottom   1			#  1 = no-slip      3 = outflow
+bcLeft     3			#  2 = free-slip    4 = periodic
+bcRight    3			#
+
+gx    0.0      # Body forces (e.g. gravity)
+gy    0.0      #
+
+re    5050.0	   # Reynolds number
+
+u_init    1.0      # initial value for velocity in x-direction
+v_init    0.0      # initial value for velocity in y-direction
+p_init    0.0      # initial value for pressure
+
+# Geometry Data:
+# -------------
+
+xlength    30.0     # domain size in x-direction
+ylength    8.0	   # domain size in y-direction
+imax       400      # number of interior cells in x-direction
+jmax       200	   # number of interior cells in y-direction
+
+# Time Data:
+# ---------
+
+te      150.0   # final time
+dt      0.02    # time stepsize
+tau     0.5     # safety factor for time stepsize control (<0 constant delt)
+
+# Pressure Iteration Data:
+# -----------------------
+
+itermax  200       # maximal number of pressure iteration in one time step
+eps      0.001   # stopping tolerance for pressure iteration
+rho      0.52
+omg      1.75      # relaxation parameter for SOR iteration
+gamma    0.9       # upwind differencing factor gamma
+
+# Multigrid data:
+# ---------
+
+levels        3         # Multigrid levels
+presmooth     15         # Pre-smoothning iterations
+postsmooth    5         # Post-smoothning iterations
+
+# Particle Tracing Data:
+# -----------------------
+
+numberOfParticles   200
+startTime           50
+injectTimePeriod    1.0
+writeTimePeriod     0.5
+
+x1                  0.0
+y1                  3.8
+x2                  0.0
+y2                  4.1
+
+# Obstacle Geometry Data:
+# -----------------------
+# Shape 0 disable, 1 Rectangle/Square, 2 Circle
+
+shape               2 
+xCenter             5.0
+yCenter             4.0
+xRectLength         2.0
+yRectLength         1.0
+circleRadius        1.0
+
+#===============================================================================
diff --git a/EnhancedSolver/2D-mpi/src/affinity.c b/EnhancedSolver/2D-mpi/src/affinity.c
new file mode 100644
index 0000000..ef8355a
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/src/affinity.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved.
+ * Use of this source code is governed by a MIT-style
+ * license that can be found in the LICENSE file.
+ */
+#ifdef __linux__
+#ifdef _OPENMP
+#include <pthread.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#define MAX_NUM_THREADS 128
+#define gettid()        syscall(SYS_gettid)
+
+static int getProcessorID(cpu_set_t* cpu_set)
+{
+    int processorId;
+
+    for (processorId = 0; processorId < MAX_NUM_THREADS; processorId++) {
+        if (CPU_ISSET(processorId, cpu_set)) {
+            break;
+        }
+    }
+    return processorId;
+}
+
+int affinity_getProcessorId()
+{
+    cpu_set_t cpu_set;
+    CPU_ZERO(&cpu_set);
+    sched_getaffinity(gettid(), sizeof(cpu_set_t), &cpu_set);
+
+    return getProcessorID(&cpu_set);
+}
+
+void affinity_pinThread(int processorId)
+{
+    cpu_set_t cpuset;
+    pthread_t thread;
+
+    thread = pthread_self();
+    CPU_ZERO(&cpuset);
+    CPU_SET(processorId, &cpuset);
+    pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
+}
+
+void affinity_pinProcess(int processorId)
+{
+    cpu_set_t cpuset;
+
+    CPU_ZERO(&cpuset);
+    CPU_SET(processorId, &cpuset);
+    sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
+}
+#endif /*_OPENMP*/
+#endif /*__linux__*/
diff --git a/EnhancedSolver/2D-mpi/src/affinity.h b/EnhancedSolver/2D-mpi/src/affinity.h
new file mode 100644
index 0000000..84bf733
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/src/affinity.h
@@ -0,0 +1,14 @@
+/*
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved.
+ * Use of this source code is governed by a MIT-style
+ * license that can be found in the LICENSE file.
+ */
+#ifndef AFFINITY_H
+#define AFFINITY_H
+
+extern int affinity_getProcessorId();
+extern void affinity_pinProcess(int);
+extern void affinity_pinThread(int);
+
+#endif /*AFFINITY_H*/
diff --git a/EnhancedSolver/2D-mpi/src/allocate.c b/EnhancedSolver/2D-mpi/src/allocate.c
new file mode 100644
index 0000000..cf2efd6
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/src/allocate.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved.
+ * Use of this source code is governed by a MIT-style
+ * license that can be found in the LICENSE file.
+ */
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "allocate.h"
+
+void* allocate(size_t alignment, size_t bytesize)
+{
+    int errorCode;
+    void* ptr;
+
+    errorCode = posix_memalign(&ptr, alignment, bytesize);
+
+    if (errorCode) {
+        if (errorCode == EINVAL) {
+            fprintf(stderr, "Error: Alignment parameter is not a power of two\n");
+            exit(EXIT_FAILURE);
+        }
+        if (errorCode == ENOMEM) {
+            fprintf(stderr, "Error: Insufficient memory to fulfill the request\n");
+            exit(EXIT_FAILURE);
+        }
+    }
+
+    if (ptr == NULL) {
+        fprintf(stderr, "Error: posix_memalign failed!\n");
+        exit(EXIT_FAILURE);
+    }
+
+    return ptr;
+}
diff --git a/EnhancedSolver/2D-mpi/src/allocate.h b/EnhancedSolver/2D-mpi/src/allocate.h
new file mode 100644
index 0000000..77f4ba0
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/src/allocate.h
@@ -0,0 +1,13 @@
+/*
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved.
+ * Use of this source code is governed by a MIT-style
+ * license that can be found in the LICENSE file.
+ */
+#ifndef __ALLOCATE_H_
+#define __ALLOCATE_H_
+#include <stdlib.h>
+
+extern void* allocate(size_t alignment, size_t bytesize);
+
+#endif
diff --git a/EnhancedSolver/2D-mpi/src/comm-v1.c b/EnhancedSolver/2D-mpi/src/comm-v1.c
new file mode 100644
index 0000000..a3cdaef
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/src/comm-v1.c
@@ -0,0 +1,204 @@
+/*
+ * Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of nusif-solver.
+ * Use of this source code is governed by a MIT style
+ * license that can be found in the LICENSE file.
+ */
+#include <stdlib.h>
+
+#include "comm.h"
+
+#ifdef _MPI
+// subroutines local to this module
+static int sum(int* sizes, int position)
+{
+    int sum = 0;
+
+    for (int i = 0; i < position; i++) {
+        sum += sizes[i];
+    }
+
+    return sum;
+}
+
+static void gatherArray(
+    Comm* c, int cnt, int* rcvCounts, int* displs, double* src, double* dst)
+{
+    double* sendbuffer = src + (c->imaxLocal + 2);
+
+    if (c->rank == 0) {
+        sendbuffer = src;
+    }
+
+    MPI_Gatherv(sendbuffer,
+        cnt,
+        MPI_DOUBLE,
+        dst,
+        rcvCounts,
+        displs,
+        MPI_DOUBLE,
+        0,
+        MPI_COMM_WORLD);
+}
+#endif // defined _MPI
+
+// exported subroutines
+int commIsBoundary(Comm* c, int direction)
+{
+#ifdef _MPI
+    switch (direction) {
+    case L:
+        return 1;
+        break;
+    case R:
+        return 1;
+        break;
+    case B:
+        return c->rank == 0;
+        break;
+    case T:
+        return c->rank == (c->size - 1);
+        break;
+    }
+#endif
+
+    return 1;
+}
+
+void commExchange(Comm* c, double* grid)
+{
+#ifdef _MPI
+    MPI_Request requests[4] = { MPI_REQUEST_NULL,
+        MPI_REQUEST_NULL,
+        MPI_REQUEST_NULL,
+        MPI_REQUEST_NULL };
+
+    /* exchange ghost cells with top neighbor */
+    if (c->rank + 1 < c->size) {
+        int top     = c->rank + 1;
+        double* src = grid + (c->jmaxLocal) * (c->imaxLocal + 2) + 1;
+        double* dst = grid + (c->jmaxLocal + 1) * (c->imaxLocal + 2) + 1;
+
+        MPI_Isend(src, c->imaxLocal, MPI_DOUBLE, top, 1, MPI_COMM_WORLD, &requests[0]);
+        MPI_Irecv(dst, c->imaxLocal, MPI_DOUBLE, top, 2, MPI_COMM_WORLD, &requests[1]);
+    }
+
+    /* exchange ghost cells with bottom neighbor */
+    if (c->rank > 0) {
+        int bottom  = c->rank - 1;
+        double* src = grid + (c->imaxLocal + 2) + 1;
+        double* dst = grid + 1;
+
+        MPI_Isend(src, c->imaxLocal, MPI_DOUBLE, bottom, 2, MPI_COMM_WORLD, &requests[2]);
+        MPI_Irecv(dst, c->imaxLocal, MPI_DOUBLE, bottom, 1, MPI_COMM_WORLD, &requests[3]);
+    }
+
+    MPI_Waitall(4, requests, MPI_STATUSES_IGNORE);
+#endif
+}
+
+void commShift(Comm* c, double* f, double* g)
+{
+#ifdef _MPI
+    MPI_Request requests[2] = { MPI_REQUEST_NULL, MPI_REQUEST_NULL };
+
+    /* shift G */
+    /* receive ghost cells from bottom neighbor */
+    if (c->rank > 0) {
+        int bottom = c->rank - 1;
+        MPI_Irecv(g + 1,
+            c->imaxLocal,
+            MPI_DOUBLE,
+            bottom,
+            0,
+            MPI_COMM_WORLD,
+            &requests[0]);
+    }
+
+    if (c->rank + 1 < c->size) {
+        int top     = c->rank + 1;
+        double* buf = g + (c->jmaxLocal) * (c->imaxLocal + 2) + 1;
+        /* send ghost cells to top neighbor */
+        MPI_Isend(buf, c->imaxLocal, MPI_DOUBLE, top, 0, MPI_COMM_WORLD, &requests[1]);
+    }
+
+    MPI_Waitall(2, requests, MPI_STATUSES_IGNORE);
+#endif
+}
+
+void commCollectResult(Comm* c,
+    double* ug,
+    double* vg,
+    double* pg,
+    double* u,
+    double* v,
+    double* p,
+    int jmax,
+    int imax)
+{
+#ifdef _MPI
+    int *rcvCounts, *displs;
+    int cnt = c->jmaxLocal * (imax + 2);
+
+    if (c->rank == 0) {
+        rcvCounts = (int*)malloc(c->size * sizeof(int));
+        displs    = (int*)malloc(c->size * sizeof(int));
+    }
+
+    if (c->rank == 0 && c->size == 1) {
+        cnt = (c->jmaxLocal + 2) * (imax + 2);
+    } else if (c->rank == 0 || c->rank == (c->size - 1)) {
+        cnt = (c->jmaxLocal + 1) * (imax + 2);
+    }
+
+    MPI_Gather(&cnt, 1, MPI_INTEGER, rcvCounts, 1, MPI_INTEGER, 0, MPI_COMM_WORLD);
+
+    if (c->rank == 0) {
+        displs[0]  = 0;
+        int cursor = rcvCounts[0];
+
+        for (int i = 1; i < c->size; i++) {
+            displs[i] = cursor;
+            cursor += rcvCounts[i];
+        }
+    }
+
+    gatherArray(c, cnt, rcvCounts, displs, p, pg);
+    gatherArray(c, cnt, rcvCounts, displs, u, ug);
+    gatherArray(c, cnt, rcvCounts, displs, v, vg);
+#endif
+}
+
+void commPartition(Comm* c, int jmax, int imax)
+{
+#ifdef _MPI
+    c->imaxLocal = imax;
+    c->jmaxLocal = sizeOfRank(c->coords[JDIM], c->size, jmax);
+#else
+    c->imaxLocal = imax;
+    c->jmaxLocal = jmax;
+#endif
+}
+
+void commUpdateDatatypes(Comm* oldcomm, Comm* newcomm, int imaxLocal, int jmaxLocal)
+{
+    Comm* newcomm;
+
+#if defined _MPI
+    newcomm->comm = MPI_COMM_NULL;
+    int result    = MPI_Comm_dup(oldcomm->comm, &newcomm->comm);
+
+    if (result == MPI_ERR_COMM) {
+        printf("\nNull communicator. Duplication failed !!\n");
+    }
+#endif
+    newcomm->imaxLocal = imaxLocal;
+    newcomm->jmaxLocal = jmaxLocal;
+}
+
+void commFreeCommunicator(Comm* comm)
+{
+#ifdef _MPI
+    MPI_Comm_free(&comm->comm);
+#endif
+}
\ No newline at end of file
diff --git a/EnhancedSolver/2D-mpi/src/comm-v2.c b/EnhancedSolver/2D-mpi/src/comm-v2.c
new file mode 100644
index 0000000..4800c18
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/src/comm-v2.c
@@ -0,0 +1,335 @@
+/*
+ * Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of nusif-solver.
+ * Use of this source code is governed by a MIT style
+ * license that can be found in the LICENSE file.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "comm.h"
+
+#ifdef _MPI
+// subroutines local to this module
+static int sum(int* sizes, int position)
+{
+    int sum = 0;
+
+    for (int i = 0; i < position; i++) {
+        sum += sizes[i];
+    }
+
+    return sum;
+}
+
+static void assembleResult(Comm* c, double* src, double* dst, int imax, int jmax)
+{
+    MPI_Request* requests;
+    int numRequests = 1;
+
+    if (c->rank == 0) {
+        numRequests = c->size + 1;
+    } else {
+        numRequests = 1;
+    }
+
+    requests = (MPI_Request*)malloc(numRequests * sizeof(MPI_Request));
+
+    /* all ranks send their bulk array, including the external boundary layer */
+    MPI_Datatype bulkType;
+    int oldSizes[NDIMS] = { c->jmaxLocal + 2, c->imaxLocal + 2 };
+    int newSizes[NDIMS] = { c->jmaxLocal, c->imaxLocal };
+    int starts[NDIMS]   = { 1, 1 };
+
+    if (commIsBoundary(c, L)) {
+        newSizes[CIDIM] += 1;
+        starts[CIDIM] = 0;
+    }
+    if (commIsBoundary(c, R)) {
+        newSizes[CIDIM] += 1;
+    }
+    if (commIsBoundary(c, B)) {
+        newSizes[CJDIM] += 1;
+        starts[CJDIM] = 0;
+    }
+    if (commIsBoundary(c, T)) {
+        newSizes[CJDIM] += 1;
+    }
+
+    MPI_Type_create_subarray(NDIMS,
+        oldSizes,
+        newSizes,
+        starts,
+        MPI_ORDER_C,
+        MPI_DOUBLE,
+        &bulkType);
+    MPI_Type_commit(&bulkType);
+    MPI_Isend(src, 1, bulkType, 0, 0, c->comm, &requests[0]);
+
+    int newSizesI[c->size];
+    int newSizesJ[c->size];
+    MPI_Gather(&newSizes[CIDIM], 1, MPI_INT, newSizesI, 1, MPI_INT, 0, MPI_COMM_WORLD);
+    MPI_Gather(&newSizes[CJDIM], 1, MPI_INT, newSizesJ, 1, MPI_INT, 0, MPI_COMM_WORLD);
+
+    /* rank 0 assembles the subdomains */
+    if (c->rank == 0) {
+        for (int i = 0; i < c->size; i++) {
+            MPI_Datatype domainType;
+            int oldSizes[NDIMS] = { jmax + 2, imax + 2 };
+            int newSizes[NDIMS] = { newSizesJ[i], newSizesI[i] };
+            int coords[NDIMS];
+            MPI_Cart_coords(c->comm, i, NDIMS, coords);
+            int starts[NDIMS] = { sum(newSizesJ, coords[JDIM]),
+                sum(newSizesI, coords[IDIM]) };
+            printf(
+                "Rank: %d, Coords(i,j): %d %d, Size(i,j): %d %d, Target Size(i,j): %d %d "
+                "Starts(i,j): %d %d\n",
+                i,
+                coords[IDIM],
+                coords[JDIM],
+                oldSizes[CIDIM],
+                oldSizes[CJDIM],
+                newSizes[CIDIM],
+                newSizes[CJDIM],
+                starts[CIDIM],
+                starts[CJDIM]);
+
+            MPI_Type_create_subarray(NDIMS,
+                oldSizes,
+                newSizes,
+                starts,
+                MPI_ORDER_C,
+                MPI_DOUBLE,
+                &domainType);
+            MPI_Type_commit(&domainType);
+
+            MPI_Irecv(dst, 1, domainType, i, 0, c->comm, &requests[i + 1]);
+            MPI_Type_free(&domainType);
+        }
+    }
+
+    MPI_Waitall(numRequests, requests, MPI_STATUSES_IGNORE);
+}
+#endif // defined _MPI
+
+// exported subroutines
+int commIsBoundary(Comm* c, int direction)
+{
+#ifdef _MPI
+    switch (direction) {
+    case L:
+        return c->coords[IDIM] == 0;
+        break;
+    case R:
+        return c->coords[IDIM] == (c->dims[IDIM] - 1);
+        break;
+    case B:
+        return c->coords[JDIM] == 0;
+        break;
+    case T:
+        return c->coords[JDIM] == (c->dims[JDIM] - 1);
+        break;
+    }
+#endif
+
+    return 1;
+}
+
+void commExchange(Comm* c, double* grid)
+{
+#ifdef _MPI
+    MPI_Request requests[8];
+    for (int i = 0; i < 8; i++)
+        requests[i] = MPI_REQUEST_NULL;
+
+    for (int i = 0; i < NDIRS; i++) {
+        double* sbuf = grid + c->sdispls[i];
+        double* rbuf = grid + c->rdispls[i];
+
+        int tag = 0;
+        if (c->neighbours[i] != MPI_PROC_NULL) {
+            // printf("DEBUG: Rank %d - SendRecv with %d\n", c->rank, c->neighbours[i]);
+            tag = c->neighbours[i];
+        }
+        MPI_Irecv(rbuf,
+            1,
+            c->bufferTypes[i],
+            c->neighbours[i],
+            tag,
+            c->comm,
+            &requests[i * 2]);
+        MPI_Isend(sbuf,
+            1,
+            c->bufferTypes[i],
+            c->neighbours[i],
+            c->rank,
+            c->comm,
+            &requests[i * 2 + 1]);
+    }
+
+    MPI_Waitall(8, requests, MPI_STATUSES_IGNORE);
+#endif
+}
+
+void commShift(Comm* c, double* f, double* g)
+{
+#ifdef _MPI
+    MPI_Request requests[4] = { MPI_REQUEST_NULL,
+        MPI_REQUEST_NULL,
+        MPI_REQUEST_NULL,
+        MPI_REQUEST_NULL };
+
+    /* shift G */
+    /* receive ghost cells from bottom neighbor */
+    double* buf = g + 1;
+    MPI_Irecv(buf,
+        1,
+        c->bufferTypes[B],
+        c->neighbours[B],
+        0,
+        c->comm,
+        &requests[0]);
+
+    /* send ghost cells to top neighbor */
+    buf = g + (c->jmaxLocal) * (c->imaxLocal + 2) + 1;
+    MPI_Isend(buf, 1, c->bufferTypes[T], c->neighbours[T], 0, c->comm, &requests[1]);
+
+    /* shift F */
+    /* receive ghost cells from left neighbor */
+    buf = f + (c->imaxLocal + 2);
+    MPI_Irecv(buf,
+        1,
+        c->bufferTypes[L],
+        c->neighbours[L],
+        1,
+        c->comm,
+        &requests[2]);
+
+    /* send ghost cells to right neighbor */
+    buf = f + (c->imaxLocal + 2) + (c->imaxLocal);
+    MPI_Isend(buf,
+        1,
+        c->bufferTypes[R],
+        c->neighbours[R],
+        1,
+        c->comm,
+        &requests[3]);
+
+    MPI_Waitall(4, requests, MPI_STATUSES_IGNORE);
+#endif
+}
+
+void commCollectResult(Comm* c,
+    double* ug,
+    double* vg,
+    double* pg,
+    double* u,
+    double* v,
+    double* p,
+    int imax,
+    int jmax)
+{
+#ifdef _MPI
+    /* collect P */
+    assembleResult(c, p, pg, imax, jmax);
+
+    /* collect U */
+    assembleResult(c, u, ug, imax, jmax);
+
+    /* collect V */
+    assembleResult(c, v, vg, imax, jmax);
+#endif
+}
+
+void commPartition(Comm* c, int jmax, int imax)
+{
+#ifdef _MPI
+    int dims[NDIMS]    = { 0, 0 };
+    int periods[NDIMS] = { 0, 0 };
+    MPI_Dims_create(c->size, NDIMS, dims);
+    MPI_Cart_create(MPI_COMM_WORLD, NDIMS, dims, periods, 0, &c->comm);
+    MPI_Cart_shift(c->comm, IDIM, 1, &c->neighbours[L], &c->neighbours[R]);
+    MPI_Cart_shift(c->comm, JDIM, 1, &c->neighbours[B], &c->neighbours[T]);
+    MPI_Cart_get(c->comm, NDIMS, c->dims, periods, c->coords);
+
+    int imaxLocal = sizeOfRank(c->coords[IDIM], dims[IDIM], imax);
+    int jmaxLocal = sizeOfRank(c->coords[JDIM], dims[JDIM], jmax);
+
+    c->imaxLocal = imaxLocal;
+    c->jmaxLocal = jmaxLocal;
+
+    MPI_Datatype jBufferType;
+    MPI_Type_contiguous(imaxLocal, MPI_DOUBLE, &jBufferType);
+    MPI_Type_commit(&jBufferType);
+
+    MPI_Datatype iBufferType;
+    MPI_Type_vector(jmaxLocal, 1, imaxLocal + 2, MPI_DOUBLE, &iBufferType);
+    MPI_Type_commit(&iBufferType);
+
+    c->bufferTypes[L]   = iBufferType;
+    c->bufferTypes[R]  = iBufferType;
+    c->bufferTypes[B] = jBufferType;
+    c->bufferTypes[T]    = jBufferType;
+
+    c->sdispls[L]   = (imaxLocal + 2) + 1;
+    c->sdispls[R]  = (imaxLocal + 2) + imaxLocal;
+    c->sdispls[B] = (imaxLocal + 2) + 1;
+    c->sdispls[T]    = jmaxLocal * (imaxLocal + 2) + 1;
+
+    c->rdispls[L]   = (imaxLocal + 2);
+    c->rdispls[R]  = (imaxLocal + 2) + (imaxLocal + 1);
+    c->rdispls[B] = 1;
+    c->rdispls[T]    = (jmaxLocal + 1) * (imaxLocal + 2) + 1;
+#else
+    c->imaxLocal = imax;
+    c->jmaxLocal = jmax;
+#endif
+}
+
+void commUpdateDatatypes(Comm* oldcomm, Comm* newcomm, int imaxLocal, int jmaxLocal)
+{
+#if defined _MPI
+    newcomm->comm = MPI_COMM_NULL;
+    int result = MPI_Comm_dup(oldcomm->comm, &newcomm->comm);
+
+    if (result == MPI_ERR_COMM) {
+        printf("\nNull communicator. Duplication failed !!\n");
+    }
+    
+    newcomm->imaxLocal = imaxLocal;
+    newcomm->jmaxLocal = jmaxLocal;
+
+    MPI_Datatype jBufferType;
+    MPI_Type_contiguous(imaxLocal, MPI_DOUBLE, &jBufferType);
+    MPI_Type_commit(&jBufferType);
+
+    MPI_Datatype iBufferType;
+    MPI_Type_vector(jmaxLocal, 1, imaxLocal + 2, MPI_DOUBLE, &iBufferType);
+    MPI_Type_commit(&iBufferType);
+
+    newcomm->bufferTypes[L]   = iBufferType;
+    newcomm->bufferTypes[R]  = iBufferType;
+    newcomm->bufferTypes[B] = jBufferType;
+    newcomm->bufferTypes[T]    = jBufferType;
+
+    newcomm->sdispls[L]   = (imaxLocal + 2) + 1;
+    newcomm->sdispls[R]  = (imaxLocal + 2) + imaxLocal;
+    newcomm->sdispls[B] = (imaxLocal + 2) + 1;
+    newcomm->sdispls[T]    = jmaxLocal * (imaxLocal + 2) + 1;
+
+    newcomm->rdispls[L]   = (imaxLocal + 2);
+    newcomm->rdispls[R]  = (imaxLocal + 2) + (imaxLocal + 1);
+    newcomm->rdispls[B] = 1;
+    newcomm->rdispls[T]    = (jmaxLocal + 1) * (imaxLocal + 2) + 1;
+#else
+    newcomm->imaxLocal = imaxLocal;
+    newcomm->jmaxLocal = jmaxLocal;
+#endif
+}
+
+void commFreeCommunicator(Comm* comm)
+{
+    #ifdef _MPI
+        MPI_Comm_free(&comm->comm);
+    #endif
+}
\ No newline at end of file
diff --git a/EnhancedSolver/2D-mpi/src/comm-v3.c b/EnhancedSolver/2D-mpi/src/comm-v3.c
new file mode 100644
index 0000000..a7834b2
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/src/comm-v3.c
@@ -0,0 +1,316 @@
+/*
+ * Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of nusif-solver.
+ * Use of this source code is governed by a MIT style
+ * license that can be found in the LICENSE file.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "comm.h"
+
+#ifdef _MPI
+// subroutines local to this module
+static int sum(int* sizes, int position)
+{
+    int sum = 0;
+
+    for (int i = 0; i < position; i++) {
+        sum += sizes[i];
+    }
+
+    return sum;
+}
+
+static void assembleResult(Comm* c, double* src, double* dst, int imax, int jmax)
+{
+    MPI_Request* requests;
+    int numRequests = 1;
+
+    if (c->rank == 0) {
+        numRequests = c->size + 1;
+    } else {
+        numRequests = 1;
+    }
+
+    requests = (MPI_Request*)malloc(numRequests * sizeof(MPI_Request));
+
+    /* all ranks send their bulk array, including the external boundary layer */
+    MPI_Datatype bulkType;
+    int oldSizes[NDIMS] = { c->jmaxLocal + 2, c->imaxLocal + 2 };
+    int newSizes[NDIMS] = { c->jmaxLocal, c->imaxLocal };
+    int starts[NDIMS]   = { 1, 1 };
+
+    if (commIsBoundary(c, L)) {
+        newSizes[CIDIM] += 1;
+        starts[CIDIM] = 0;
+    }
+    if (commIsBoundary(c, R)) {
+        newSizes[CIDIM] += 1;
+    }
+    if (commIsBoundary(c, B)) {
+        newSizes[CJDIM] += 1;
+        starts[CJDIM] = 0;
+    }
+    if (commIsBoundary(c, T)) {
+        newSizes[CJDIM] += 1;
+    }
+
+    MPI_Type_create_subarray(NDIMS,
+        oldSizes,
+        newSizes,
+        starts,
+        MPI_ORDER_C,
+        MPI_DOUBLE,
+        &bulkType);
+    MPI_Type_commit(&bulkType);
+    MPI_Isend(src, 1, bulkType, 0, 0, c->comm, &requests[0]);
+
+    int newSizesI[c->size];
+    int newSizesJ[c->size];
+    MPI_Gather(&newSizes[CIDIM], 1, MPI_INT, newSizesI, 1, MPI_INT, 0, MPI_COMM_WORLD);
+    MPI_Gather(&newSizes[CJDIM], 1, MPI_INT, newSizesJ, 1, MPI_INT, 0, MPI_COMM_WORLD);
+
+    /* rank 0 assembles the subdomains */
+    if (c->rank == 0) {
+        for (int i = 0; i < c->size; i++) {
+            MPI_Datatype domainType;
+            int oldSizes[NDIMS] = { jmax + 2, imax + 2 };
+            int newSizes[NDIMS] = { newSizesJ[i], newSizesI[i] };
+            int coords[NDIMS];
+            MPI_Cart_coords(c->comm, i, NDIMS, coords);
+            int starts[NDIMS] = { sum(newSizesJ, coords[JDIM]),
+                sum(newSizesI, coords[IDIM]) };
+            printf(
+                "Rank: %d, Coords(i,j): %d %d, Size(i,j): %d %d, Target Size(i,j): %d %d "
+                "Starts(i,j): %d %d\n",
+                i,
+                coords[IDIM],
+                coords[JDIM],
+                oldSizes[CIDIM],
+                oldSizes[CJDIM],
+                newSizes[CIDIM],
+                newSizes[CJDIM],
+                starts[CIDIM],
+                starts[CJDIM]);
+
+            MPI_Type_create_subarray(NDIMS,
+                oldSizes,
+                newSizes,
+                starts,
+                MPI_ORDER_C,
+                MPI_DOUBLE,
+                &domainType);
+            MPI_Type_commit(&domainType);
+
+            MPI_Irecv(dst, 1, domainType, i, 0, c->comm, &requests[i + 1]);
+            MPI_Type_free(&domainType);
+        }
+    }
+
+    MPI_Waitall(numRequests, requests, MPI_STATUSES_IGNORE);
+}
+#endif // defined _MPI
+
+// exported subroutines
+int commIsBoundary(Comm* c, int direction)
+{
+#ifdef _MPI
+    switch (direction) {
+    case L:
+        return c->coords[IDIM] == 0;
+        break;
+    case R:
+        return c->coords[IDIM] == (c->dims[IDIM] - 1);
+        break;
+    case B:
+        return c->coords[JDIM] == 0;
+        break;
+    case T:
+        return c->coords[JDIM] == (c->dims[JDIM] - 1);
+        break;
+    }
+#endif
+
+    return 1;
+}
+
+void commExchange(Comm* c, double* grid)
+{
+#ifdef _MPI
+    int counts[NDIRS] = { 1, 1, 1, 1 };
+    MPI_Neighbor_alltoallw(grid,
+        counts,
+        c->sdispls,
+        c->bufferTypes,
+        grid,
+        counts,
+        c->rdispls,
+        c->bufferTypes,
+        c->comm);
+#endif
+}
+
+void commShift(Comm* c, double* f, double* g)
+{
+#ifdef _MPI
+    MPI_Request requests[4] = { MPI_REQUEST_NULL,
+        MPI_REQUEST_NULL,
+        MPI_REQUEST_NULL,
+        MPI_REQUEST_NULL };
+
+    /* shift G */
+    /* receive ghost cells from bottom neighbor */
+    double* buf = g + 1;
+    MPI_Irecv(buf,
+        1,
+        c->bufferTypes[B],
+        c->neighbours[B],
+        0,
+        c->comm,
+        &requests[0]);
+
+    /* send ghost cells to top neighbor */
+    buf = g + (c->jmaxLocal) * (c->imaxLocal + 2) + 1;
+    MPI_Isend(buf, 1, c->bufferTypes[T], c->neighbours[T], 0, c->comm, &requests[1]);
+
+    /* shift F */
+    /* receive ghost cells from left neighbor */
+    buf = f + (c->imaxLocal + 2);
+    MPI_Irecv(buf,
+        1,
+        c->bufferTypes[L],
+        c->neighbours[L],
+        1,
+        c->comm,
+        &requests[2]);
+
+    /* send ghost cells to right neighbor */
+    buf = f + (c->imaxLocal + 2) + (c->imaxLocal);
+    MPI_Isend(buf,
+        1,
+        c->bufferTypes[R],
+        c->neighbours[R],
+        1,
+        c->comm,
+        &requests[3]);
+
+    MPI_Waitall(4, requests, MPI_STATUSES_IGNORE);
+#endif
+}
+
+void commCollectResult(Comm* c,
+    double* ug,
+    double* vg,
+    double* pg,
+    double* u,
+    double* v,
+    double* p,
+    int imax,
+    int jmax)
+{
+#ifdef _MPI
+    /* collect P */
+    assembleResult(c, p, pg, imax, jmax);
+
+    /* collect U */
+    assembleResult(c, u, ug, imax, jmax);
+
+    /* collect V */
+    assembleResult(c, v, vg, imax, jmax);
+#endif
+}
+
+void commPartition(Comm* c, int jmax, int imax)
+{
+#ifdef _MPI
+    int dims[NDIMS]    = { 0, 0 };
+    int periods[NDIMS] = { 0, 0 };
+    MPI_Dims_create(c->size, NDIMS, dims);
+    MPI_Cart_create(MPI_COMM_WORLD, NDIMS, dims, periods, 0, &c->comm);
+    MPI_Cart_shift(c->comm, IDIM, 1, &c->neighbours[L], &c->neighbours[R]);
+    MPI_Cart_shift(c->comm, JDIM, 1, &c->neighbours[B], &c->neighbours[T]);
+    MPI_Cart_get(c->comm, NDIMS, c->dims, periods, c->coords);
+
+    int imaxLocal = sizeOfRank(c->coords[IDIM], dims[IDIM], imax);
+    int jmaxLocal = sizeOfRank(c->coords[JDIM], dims[JDIM], jmax);
+
+    c->imaxLocal = imaxLocal;
+    c->jmaxLocal = jmaxLocal;
+
+    MPI_Datatype jBufferType;
+    MPI_Type_contiguous(imaxLocal, MPI_DOUBLE, &jBufferType);
+    MPI_Type_commit(&jBufferType);
+
+    MPI_Datatype iBufferType;
+    MPI_Type_vector(jmaxLocal, 1, imaxLocal + 2, MPI_DOUBLE, &iBufferType);
+    MPI_Type_commit(&iBufferType);
+
+    c->bufferTypes[L]   = iBufferType;
+    c->bufferTypes[R]  = iBufferType;
+    c->bufferTypes[B] = jBufferType;
+    c->bufferTypes[T]    = jBufferType;
+
+    size_t dblsize     = sizeof(double);
+    c->sdispls[L]   = ((imaxLocal + 2) + 1) * dblsize;
+    c->sdispls[R]  = ((imaxLocal + 2) + imaxLocal) * dblsize;
+    c->sdispls[B] = ((imaxLocal + 2) + 1) * dblsize;
+    c->sdispls[T]    = (jmaxLocal * (imaxLocal + 2) + 1) * dblsize;
+
+    c->rdispls[L]   = (imaxLocal + 2) * dblsize;
+    c->rdispls[R]  = ((imaxLocal + 2) + (imaxLocal + 1)) * dblsize;
+    c->rdispls[B] = 1 * dblsize;
+    c->rdispls[T]    = ((jmaxLocal + 1) * (imaxLocal + 2) + 1) * dblsize;
+#else
+    c->imaxLocal = imax;
+    c->jmaxLocal = jmax;
+#endif
+}
+
+void commUpdateDatatypes(Comm* oldcomm, Comm* newcomm, int imaxLocal, int jmaxLocal)
+{
+#if defined _MPI
+
+    int result    = MPI_Comm_dup(oldcomm->comm, &newcomm->comm);
+
+    if (result == MPI_ERR_COMM) {
+        printf("\nNull communicator. Duplication failed !!\n");
+    }
+
+    newcomm->imaxLocal = imaxLocal;
+    newcomm->jmaxLocal = jmaxLocal;
+
+    MPI_Datatype jBufferType;
+    MPI_Type_contiguous(imaxLocal, MPI_DOUBLE, &jBufferType);
+    MPI_Type_commit(&jBufferType);
+
+    MPI_Datatype iBufferType;
+    MPI_Type_vector(jmaxLocal, 1, imaxLocal + 2, MPI_DOUBLE, &iBufferType);
+    MPI_Type_commit(&iBufferType);
+
+    newcomm->bufferTypes[L]   = iBufferType;
+    newcomm->bufferTypes[R]  = iBufferType;
+    newcomm->bufferTypes[B] = jBufferType;
+    newcomm->bufferTypes[T]    = jBufferType;
+
+    newcomm->sdispls[L]   = (imaxLocal + 2) + 1;
+    newcomm->sdispls[R]  = (imaxLocal + 2) + imaxLocal;
+    newcomm->sdispls[B] = (imaxLocal + 2) + 1;
+    newcomm->sdispls[T]    = jmaxLocal * (imaxLocal + 2) + 1;
+
+    newcomm->rdispls[L]   = (imaxLocal + 2);
+    newcomm->rdispls[R]  = (imaxLocal + 2) + (imaxLocal + 1);
+    newcomm->rdispls[B] = 1;
+    newcomm->rdispls[T]    = (jmaxLocal + 1) * (imaxLocal + 2) + 1;
+#else
+    newcomm->imaxLocal = imaxLocal;
+    newcomm->jmaxLocal = jmaxLocal;
+#endif
+}
+
+void commFreeCommunicator(Comm* comm)
+{
+    #ifdef _MPI
+        MPI_Comm_free(&comm->comm);
+    #endif
+}
\ No newline at end of file
diff --git a/EnhancedSolver/2D-mpi/src/comm.c b/EnhancedSolver/2D-mpi/src/comm.c
new file mode 100644
index 0000000..7904a9b
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/src/comm.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of nusif-solver.
+ * Use of this source code is governed by a MIT style
+ * license that can be found in the LICENSE file.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "comm.h"
+
+// subroutines local to this module
+int sizeOfRank(int rank, int size, int N)
+{
+    return N / size + ((N % size > rank) ? 1 : 0);
+}
+
+void commReduction(double* v, int op)
+{
+#ifdef _MPI
+    if (op == MAX) {
+        MPI_Allreduce(MPI_IN_PLACE, v, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
+    } else if (op == SUM) {
+        MPI_Allreduce(MPI_IN_PLACE, v, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+    }
+#endif
+}
+
+void commPrintConfig(Comm* c)
+{
+#ifdef _MPI
+    fflush(stdout);
+    MPI_Barrier(MPI_COMM_WORLD);
+    if (commIsMaster(c)) {
+        printf("Communication setup:\n");
+    }
+
+    for (int i = 0; i < c->size; i++) {
+        if (i == c->rank) {
+            printf("\tRank %d of %d\n", c->rank, c->size);
+            printf("\tNeighbours (bottom, top, left, right): %d %d, %d, %d\n",
+                c->neighbours[B],
+                c->neighbours[T],
+                c->neighbours[L],
+                c->neighbours[R]);
+            printf("\tIs boundary:\n");
+            printf("\t\tLEFT: %d\n", commIsBoundary(c, L));
+            printf("\t\tRIGHT: %d\n", commIsBoundary(c, R));
+            printf("\t\tBOTTOM: %d\n", commIsBoundary(c, B));
+            printf("\t\tTOP: %d\n", commIsBoundary(c, T));
+            printf("\tCoordinates (i,j) %d %d\n", c->coords[IDIM], c->coords[JDIM]);
+            printf("\tDims (i,j) %d %d\n", c->dims[IDIM], c->dims[JDIM]);
+            printf("\tLocal domain size (i,j) %dx%d\n", c->imaxLocal, c->jmaxLocal);
+            fflush(stdout);
+        }
+        MPI_Barrier(MPI_COMM_WORLD);
+    }
+#endif
+}
+
+void commInit(Comm* c, int argc, char** argv)
+{
+#ifdef _MPI
+    MPI_Init(&argc, &argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &(c->rank));
+    MPI_Comm_size(MPI_COMM_WORLD, &(c->size));
+#else
+    c->rank = 0;
+    c->size = 1;
+#endif
+}
+
+void commTestInit(Comm* c, double* p, double* f, double* g)
+{
+    int imax = c->imaxLocal;
+    int jmax = c->jmaxLocal;
+    int rank = c->rank;
+
+    for (int j = 0; j < jmax + 2; j++) {
+        for (int i = 0; i < imax + 2; i++) {
+            p[j * (imax + 2) + i] = rank;
+            f[j * (imax + 2) + i] = rank;
+            g[j * (imax + 2) + i] = rank;
+        }
+    }
+}
+
+static void testWriteFile(char* filename, double* grid, int imax, int jmax)
+{
+    FILE* fp = fopen(filename, "w");
+
+    if (fp == NULL) {
+        printf("Error!\n");
+        exit(EXIT_FAILURE);
+    }
+
+    for (int j = 0; j < jmax + 2; j++) {
+        for (int i = 0; i < imax + 2; i++) {
+            fprintf(fp, "%.2f ", grid[j * (imax + 2) + i]);
+        }
+        fprintf(fp, "\n");
+    }
+
+    fclose(fp);
+}
+
+void commTestWrite(Comm* c, double* p, double* f, double* g)
+{
+    int imax = c->imaxLocal;
+    int jmax = c->jmaxLocal;
+    int rank = c->rank;
+
+    char filename[30];
+    snprintf(filename, 30, "ptest-%d.dat", rank);
+    testWriteFile(filename, p, imax, jmax);
+
+    snprintf(filename, 30, "ftest-%d.dat", rank);
+    testWriteFile(filename, f, imax, jmax);
+
+    snprintf(filename, 30, "gtest-%d.dat", rank);
+    testWriteFile(filename, g, imax, jmax);
+}
+
+void commFinalize(Comm* c)
+{
+#ifdef _MPI
+    MPI_Finalize();
+#endif
+}
diff --git a/EnhancedSolver/2D-mpi/src/comm.h b/EnhancedSolver/2D-mpi/src/comm.h
new file mode 100644
index 0000000..04d2a8a
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/src/comm.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of nusif-solver.
+ * Use of this source code is governed by a MIT style
+ * license that can be found in the LICENSE file.
+ */
+#ifndef __COMM_H_
+#define __COMM_H_
+#if defined(_MPI)
+#include <mpi.h>
+#endif
+
+enum direction { L = 0, R, B, T, NDIRS }; // L = Left, R = Right, B = Bottom, T =Top
+enum dimension { IDIM = 0, JDIM, NDIMS };
+enum cdimension { CJDIM = 0, CIDIM };
+enum layer { HALO = 0, BULK };
+enum op { MAX = 0, SUM };
+
+typedef struct {
+    int rank;
+    int size;
+#if defined(_MPI)
+    MPI_Comm comm;
+    MPI_Datatype bufferTypes[NDIRS];
+    MPI_Aint sdispls[NDIRS];
+    MPI_Aint rdispls[NDIRS];
+#endif
+    int neighbours[NDIRS];
+    int coords[NDIMS], dims[NDIMS];
+    int imaxLocal, jmaxLocal;
+} Comm;
+
+extern int sizeOfRank(int rank, int size, int N);
+extern void commInit(Comm* c, int argc, char** argv);
+extern void commTestInit(Comm* c, double* p, double* f, double* g);
+extern void commTestWrite(Comm* c, double* p, double* f, double* g);
+extern void commFinalize(Comm* c);
+extern void commPartition(Comm* c, int jmax, int imax);
+extern void commPrintConfig(Comm*);
+extern void commExchange(Comm*, double*);
+extern void commShift(Comm* c, double* f, double* g);
+extern void commReduction(double* v, int op);
+extern int commIsBoundary(Comm* c, int direction);
+extern void commUpdateDatatypes(Comm*, Comm*, int, int);
+extern void commFreeCommunicator(Comm*);
+extern void commCollectResult(Comm* c,
+    double* ug,
+    double* vg,
+    double* pg,
+    double* u,
+    double* v,
+    double* p,
+    int jmax,
+    int imax);
+
+static inline int commIsMaster(Comm* c) { return c->rank == 0; }
+#endif // __COMM_H_
diff --git a/EnhancedSolver/2D-mpi/src/discretization.c b/EnhancedSolver/2D-mpi/src/discretization.c
new file mode 100644
index 0000000..4b59553
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/src/discretization.c
@@ -0,0 +1,718 @@
+/*
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of nusif-solver.
+ * Use of this source code is governed by a MIT style
+ * license that can be found in the LICENSE file.
+ */
+#include <float.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "allocate.h"
+#include "comm.h"
+#include "discretization.h"
+#include "parameter.h"
+#include "util.h"
+
+static double distance(double i, double j, double iCenter, double jCenter)
+{
+    return sqrt(pow(iCenter - i, 2) + pow(jCenter - j, 2) * 1.0);
+}
+
+double sumOffset(double* sizes, int init, int offset, int coord)
+{
+    double sum = 0;
+
+    for (int i = init - offset; coord > 0; i -= offset, --coord) {
+        sum += sizes[i];
+    }
+
+    return sum;
+}
+
+void print(Discretization* d, double* grid)
+{
+    int imaxLocal = d->comm.imaxLocal;
+
+    for (int i = 0; i < d->comm.size; i++) {
+        if (i == d->comm.rank) {
+            sleep(1 * d->comm.rank);
+            printf("### RANK %d LVL "
+                   "###################################################### #\n ",
+                d->comm.rank);
+            for (int j = 0; j < d->comm.jmaxLocal + 2; j++) {
+                printf("%02d: ", j);
+                for (int i = 0; i < d->comm.imaxLocal + 2; i++) {
+                    printf("%2.2f  ", grid[j * (imaxLocal + 2) + i]);
+                }
+                printf("\n");
+            }
+            fflush(stdout);
+        }
+    }
+}
+
+static void printConfig(Discretization* d)
+{
+    if (commIsMaster(&d->comm)) {
+        printf("Parameters for #%s#\n", d->problem);
+        printf("BC Left:%d Right:%d Bottom:%d Top:%d\n",
+            d->bcLeft,
+            d->bcRight,
+            d->bcBottom,
+            d->bcTop);
+        printf("\tReynolds number: %.2f\n", d->re);
+        printf("\tGx Gy: %.2f %.2f\n", d->gx, d->gy);
+        printf("Geometry data:\n");
+        printf("\tDomain box size (x, y): %.2f, %.2f\n",
+            d->grid.xlength,
+            d->grid.ylength);
+        printf("\tCells (x, y): %d, %d\n", d->grid.imax, d->grid.jmax);
+        printf("\tCell size (dx, dy): %f, %f\n", d->grid.dx, d->grid.dy);
+        printf("Timestep parameters:\n");
+        printf("\tDefault stepsize: %.2f, Final time %.2f\n", d->dt, d->te);
+        printf("\tdt bound: %.6f\n", d->dtBound);
+        printf("\tTau factor: %.2f\n", d->tau);
+        printf("Iterative s parameters:\n");
+        printf("\tgamma factor: %f\n", d->gamma);
+    }
+    commPrintConfig(&d->comm);
+}
+
+void initDiscretiztion(Discretization* d, Parameter* params)
+{
+    d->problem      = params->name;
+    d->bcLeft       = params->bcLeft;
+    d->bcRight      = params->bcRight;
+    d->bcBottom     = params->bcBottom;
+    d->bcTop        = params->bcTop;
+    d->grid.imax    = params->imax;
+    d->grid.jmax    = params->jmax;
+    d->grid.xlength = params->xlength;
+    d->grid.ylength = params->ylength;
+    d->grid.dx      = params->xlength / params->imax;
+    d->grid.dy      = params->ylength / params->jmax;
+    d->re           = params->re;
+    d->gx           = params->gx;
+    d->gy           = params->gy;
+    d->dt           = params->dt;
+    d->te           = params->te;
+    d->tau          = params->tau;
+    d->gamma        = params->gamma;
+
+    /* allocate arrays */
+    int imaxLocal = d->comm.imaxLocal;
+    int jmaxLocal = d->comm.jmaxLocal;
+    size_t size   = (imaxLocal + 2) * (jmaxLocal + 2);
+
+    d->u      = allocate(64, size * sizeof(double));
+    d->v      = allocate(64, size * sizeof(double));
+    d->p      = allocate(64, size * sizeof(double));
+    d->rhs    = allocate(64, size * sizeof(double));
+    d->f      = allocate(64, size * sizeof(double));
+    d->g      = allocate(64, size * sizeof(double));
+    d->grid.s = allocate(64, size * sizeof(double));
+
+    for (int i = 0; i < size; i++) {
+        d->u[i]      = params->u_init;
+        d->v[i]      = params->v_init;
+        d->p[i]      = params->p_init;
+        d->rhs[i]    = 0.0;
+        d->f[i]      = 0.0;
+        d->g[i]      = 0.0;
+        d->grid.s[i] = FLUID;
+    }
+
+    double dx = d->grid.dx;
+    double dy = d->grid.dy;
+
+    double invSqrSum = 1.0 / (dx * dx) + 1.0 / (dy * dy);
+    d->dtBound       = 0.5 * d->re * 1.0 / invSqrSum;
+
+    d->xLocal = d->comm.imaxLocal * d->grid.dx;
+    d->yLocal = d->comm.jmaxLocal * d->grid.dy;
+
+    double xLocal[d->comm.size];
+    double yLocal[d->comm.size];
+
+#ifdef _MPI
+    MPI_Allgather(&d->xLocal, 1, MPI_DOUBLE, xLocal, 1, MPI_DOUBLE, d->comm.comm);
+    MPI_Allgather(&d->yLocal, 1, MPI_DOUBLE, yLocal, 1, MPI_DOUBLE, d->comm.comm);
+#endif
+
+    d->xOffset    = sumOffset(xLocal,
+        d->comm.rank,
+        d->comm.dims[JDIM],
+        d->comm.coords[IDIM]);
+    d->yOffset    = sumOffset(yLocal, d->comm.rank, 1, d->comm.coords[JDIM]);
+    d->xOffsetEnd = d->xOffset + d->xLocal;
+    d->yOffsetEnd = d->yOffset + d->yLocal;
+
+    printf("Rank : %d, xOffset : %.2f, yOffset : %.2f, xOffsetEnd : %.2f, yOffsetEnd : "
+           "%.2f\n",
+        d->comm.rank,
+        d->xOffset,
+        d->yOffset,
+        d->xOffsetEnd,
+        d->yOffsetEnd);
+
+    double* s   = d->grid.s;
+    int iOffset = 0, jOffset = 0;
+
+    double xCenter = 0, yCenter = 0, radius = 0;
+    double x1 = 0, x2 = 0, y1 = 0, y2 = 0;
+
+    switch (params->shape) {
+    case NOSHAPE:
+        break;
+    case RECT:
+        x1 = params->xCenter - params->xRectLength / 2;
+        x2 = params->xCenter + params->xRectLength / 2;
+        y1 = params->yCenter - params->yRectLength / 2;
+        y2 = params->yCenter + params->yRectLength / 2;
+
+        iOffset = d->xOffset / dx;
+        jOffset = d->yOffset / dy;
+
+        for (int j = 1; j < jmaxLocal + 1; ++j) {
+            for (int i = 1; i < imaxLocal + 1; ++i) {
+                if ((x1 <= ((i + iOffset) * dx)) && (((i + iOffset) * dx) <= x2) &&
+                    (y1 <= ((j + jOffset) * dy)) && (((j + jOffset) * dy) <= y2)) {
+                    S(i, j) = OBSTACLE;
+                }
+            }
+        }
+
+        break;
+    case CIRCLE:
+        xCenter = params->xCenter;
+        yCenter = params->yCenter;
+        radius  = params->circleRadius;
+
+        iOffset = d->xOffset / dx;
+        jOffset = d->yOffset / dy;
+
+        for (int j = 1; j < jmaxLocal + 1; j++) {
+            for (int i = 1; i < imaxLocal + 1; i++) {
+                if (distance(((i + iOffset) * dx),
+                        ((j + jOffset) * dy),
+                        xCenter,
+                        yCenter) <= radius) {
+                    S(i, j) = OBSTACLE;
+                }
+            }
+        }
+
+        break;
+    default:
+        break;
+    }
+
+#ifdef _MPI
+    commExchange(&d->comm, s);
+#endif
+
+    for (int j = 1; j < jmaxLocal + 1; j++) {
+        for (int i = 1; i < imaxLocal + 1; i++) {
+            if (S(i, j - 1) == FLUID && S(i, j + 1) == OBSTACLE && S(i, j) == OBSTACLE)
+                S(i, j) = BOTTOM; // BOTTOM
+            if (S(i - 1, j) == FLUID && S(i + 1, j) == OBSTACLE && S(i, j) == OBSTACLE)
+                S(i, j) = LEFT; // LEFT
+            if (S(i + 1, j) == FLUID && S(i - 1, j) == OBSTACLE && S(i, j) == OBSTACLE)
+                S(i, j) = RIGHT; // RIGHT
+            if (S(i, j + 1) == FLUID && S(i, j - 1) == OBSTACLE && S(i, j) == OBSTACLE)
+                S(i, j) = TOP; // TOP
+            if (S(i - 1, j - 1) == FLUID && S(i, j - 1) == FLUID &&
+                S(i - 1, j) == FLUID && S(i + 1, j + 1) == OBSTACLE &&
+                (S(i, j) == OBSTACLE || S(i, j) == LEFT || S(i, j) == BOTTOM))
+                S(i, j) = BOTTOMLEFT; // BOTTOMLEFT
+            if (S(i + 1, j - 1) == FLUID && S(i, j - 1) == FLUID &&
+                S(i + 1, j) == FLUID && S(i - 1, j + 1) == OBSTACLE &&
+                (S(i, j) == OBSTACLE || S(i, j) == RIGHT || S(i, j) == BOTTOM))
+                S(i, j) = BOTTOMRIGHT; // BOTTOMRIGHT
+            if (S(i - 1, j + 1) == FLUID && S(i - 1, j) == FLUID &&
+                S(i, j + 1) == FLUID && S(i + 1, j - 1) == OBSTACLE &&
+                (S(i, j) == OBSTACLE || S(i, j) == LEFT || S(i, j) == TOP))
+                S(i, j) = TOPLEFT; // TOPLEFT
+            if (S(i + 1, j + 1) == FLUID && S(i + 1, j) == FLUID &&
+                S(i, j + 1) == FLUID && S(i - 1, j - 1) == OBSTACLE &&
+                (S(i, j) == OBSTACLE || S(i, j) == RIGHT || S(i, j) == TOP))
+                S(i, j) = TOPRIGHT; // TOPRIGHT
+        }
+    }
+
+#ifdef VERBOSE
+    printConfig(d);
+#endif
+}
+
+void computeRHS(Discretization* d)
+{
+    int imaxLocal = d->comm.imaxLocal;
+    int jmaxLocal = d->comm.jmaxLocal;
+    double idx    = 1.0 / d->grid.dx;
+    double idy    = 1.0 / d->grid.dy;
+    double idt    = 1.0 / d->dt;
+    double* rhs   = d->rhs;
+    double* f     = d->f;
+    double* g     = d->g;
+
+    commShift(&d->comm, f, g);
+
+    for (int j = 1; j < jmaxLocal + 1; j++) {
+        for (int i = 1; i < imaxLocal + 1; i++) {
+            RHS(i, j) = ((F(i, j) - F(i - 1, j)) * idx + (G(i, j) - G(i, j - 1)) * idy) *
+                        idt;
+        }
+    }
+}
+
+static double maxElement(Discretization* d, double* m)
+{
+    int imaxLocal = d->comm.imaxLocal;
+    int jmaxLocal = d->comm.jmaxLocal;
+    int size      = (imaxLocal + 2) * (jmaxLocal + 2);
+    double maxval = DBL_MIN;
+
+    for (int i = 0; i < size; i++) {
+        maxval = MAX(maxval, fabs(m[i]));
+    }
+
+    commReduction(&maxval, MAX);
+    return maxval;
+}
+
+void computeTimestep(Discretization* d)
+{
+    double dt   = d->dtBound;
+    double dx   = d->grid.dx;
+    double dy   = d->grid.dy;
+    double umax = maxElement(d, d->u);
+    double vmax = maxElement(d, d->v);
+
+    if (umax > 0) {
+        dt = (dt > dx / umax) ? dx / umax : dt;
+    }
+    if (vmax > 0) {
+        dt = (dt > dy / vmax) ? dy / vmax : dt;
+    }
+
+    d->dt = dt * d->tau;
+}
+
+void setBoundaryConditions(Discretization* d)
+{
+    int imaxLocal = d->comm.imaxLocal;
+    int jmaxLocal = d->comm.jmaxLocal;
+    double* u     = d->u;
+    double* v     = d->v;
+
+    if (commIsBoundary(&d->comm, T)) {
+        switch (d->bcTop) {
+        case NOSLIP:
+            for (int i = 1; i < imaxLocal + 1; i++) {
+                V(i, jmaxLocal)     = 0.0;
+                U(i, jmaxLocal + 1) = -U(i, jmaxLocal);
+            }
+            break;
+        case SLIP:
+            for (int i = 1; i < imaxLocal + 1; i++) {
+                V(i, jmaxLocal)     = 0.0;
+                U(i, jmaxLocal + 1) = U(i, jmaxLocal);
+            }
+            break;
+        case OUTFLOW:
+            for (int i = 1; i < imaxLocal + 1; i++) {
+                U(i, jmaxLocal + 1) = U(i, jmaxLocal);
+                V(i, jmaxLocal)     = V(i, jmaxLocal - 1);
+            }
+            break;
+        case PERIODIC:
+            break;
+        }
+    }
+
+    if (commIsBoundary(&d->comm, B)) {
+        switch (d->bcBottom) {
+        case NOSLIP:
+            for (int i = 1; i < imaxLocal + 1; i++) {
+                V(i, 0) = 0.0;
+                U(i, 0) = -U(i, 1);
+            }
+            break;
+        case SLIP:
+            for (int i = 1; i < imaxLocal + 1; i++) {
+                V(i, 0) = 0.0;
+                U(i, 0) = U(i, 1);
+            }
+            break;
+        case OUTFLOW:
+            for (int i = 1; i < imaxLocal + 1; i++) {
+                U(i, 0) = U(i, 1);
+                V(i, 0) = V(i, 1);
+            }
+            break;
+        case PERIODIC:
+            break;
+        }
+    }
+
+    if (commIsBoundary(&d->comm, R)) {
+        switch (d->bcRight) {
+        case NOSLIP:
+            for (int j = 1; j < jmaxLocal + 1; j++) {
+                U(imaxLocal, j)     = 0.0;
+                V(imaxLocal + 1, j) = -V(imaxLocal, j);
+            }
+            break;
+        case SLIP:
+            for (int j = 1; j < jmaxLocal + 1; j++) {
+                U(imaxLocal, j)     = 0.0;
+                V(imaxLocal + 1, j) = V(imaxLocal, j);
+            }
+            break;
+        case OUTFLOW:
+            for (int j = 1; j < jmaxLocal + 1; j++) {
+                U(imaxLocal, j)     = U(imaxLocal - 1, j);
+                V(imaxLocal + 1, j) = V(imaxLocal, j);
+            }
+            break;
+        case PERIODIC:
+            break;
+        }
+    }
+
+    if (commIsBoundary(&d->comm, L)) {
+        switch (d->bcLeft) {
+        case NOSLIP:
+            for (int j = 1; j < jmaxLocal + 1; j++) {
+                U(0, j) = 0.0;
+                V(0, j) = -V(1, j);
+            }
+            break;
+        case SLIP:
+            for (int j = 1; j < jmaxLocal + 1; j++) {
+                U(0, j) = 0.0;
+                V(0, j) = V(1, j);
+            }
+            break;
+        case OUTFLOW:
+            for (int j = 1; j < jmaxLocal + 1; j++) {
+                U(0, j) = U(1, j);
+                V(0, j) = V(1, j);
+            }
+            break;
+        case PERIODIC:
+            break;
+        }
+    }
+}
+
+void setSpecialBoundaryCondition(Discretization* d)
+{
+    int imaxLocal = d->comm.imaxLocal;
+    int jmaxLocal = d->comm.jmaxLocal;
+    double* u     = d->u;
+    double* s     = d->grid.s;
+
+    if (strcmp(d->problem, "dcavity") == 0) {
+        if (commIsBoundary(&d->comm, T)) {
+            for (int i = 1; i < imaxLocal + 1; i++) {
+                U(i, jmaxLocal + 1) = 2.0 - U(i, jmaxLocal);
+            }
+        }
+    } else if (strcmp(d->problem, "canal") == 0) {
+        if (commIsBoundary(&d->comm, L)) {
+
+            double ylength = d->grid.ylength;
+            double dy      = d->grid.dy;
+            int rest       = d->grid.jmax % d->comm.dims[JDIM];
+            int yc         = d->comm.rank * (d->grid.jmax / d->comm.dims[JDIM]) +
+                     MIN(rest, d->comm.rank);
+            double ys = dy * (yc + 0.5);
+            double y;
+
+            // printf("RANK %d yc: %d ys: %f\n",d->comm.rank, yc, ys);
+
+            for (int j = 1; j < jmaxLocal + 1; j++) {
+                y       = ys + dy * (j - 0.5);
+                U(0, j) = y * (ylength - y) * 4.0 / (ylength * ylength);
+            }
+        }
+    } else if (strcmp(d->problem, "backstep") == 0) {
+        for (int j = 1; j < jmaxLocal + 1; j++) {
+            if (S(0, j) == FLUID) U(0, j) = 1.0;
+        }
+    } else if (strcmp(d->problem, "karman") == 0) {
+        for (int j = 1; j < jmaxLocal + 1; j++) {
+            U(0, j) = 1.0;
+        }
+    }
+    /* print(solver, solver->u); */
+}
+
+void setObjectBoundaryCondition(Discretization* d)
+{
+    int imaxLocal = d->comm.imaxLocal;
+    int jmaxLocal = d->comm.jmaxLocal;
+    double* u     = d->u;
+    double* v     = d->v;
+    double* s     = d->grid.s;
+
+    for (int j = 1; j < jmaxLocal + 1; j++) {
+        for (int i = 1; i < imaxLocal + 1; i++) {
+            switch ((int)S(i, j)) {
+            case TOP:
+                U(i, j)     = -U(i, j + 1);
+                U(i - 1, j) = -U(i - 1, j + 1);
+                V(i, j)     = 0.0;
+                break;
+            case BOTTOM:
+                U(i, j)     = -U(i, j - 1);
+                U(i - 1, j) = -U(i - 1, j - 1);
+                V(i, j)     = 0.0;
+                break;
+            case LEFT:
+                U(i - 1, j) = 0.0;
+                V(i, j)     = -V(i - 1, j);
+                V(i, j - 1) = -V(i - 1, j - 1);
+                break;
+            case RIGHT:
+                U(i, j)     = 0.0;
+                V(i, j)     = -V(i + 1, j);
+                V(i, j - 1) = -V(i + 1, j - 1);
+                break;
+            case TOPLEFT:
+                U(i, j)     = -U(i, j + 1);
+                U(i - 1, j) = 0.0;
+                V(i, j)     = 0.0;
+                V(i, j - 1) = -V(i - 1, j - 1);
+                break;
+            case TOPRIGHT:
+                U(i, j)     = 0.0;
+                U(i - 1, j) = -U(i - 1, j + 1);
+                V(i, j)     = 0.0;
+                V(i, j - 1) = -V(i + 1, j - 1);
+                break;
+            case BOTTOMLEFT:
+                U(i, j)     = -U(i, j - 1);
+                U(i - 1, j) = 0.0;
+                V(i, j)     = -V(i - 1, j);
+                V(i, j - 1) = 0.0;
+                break;
+            case BOTTOMRIGHT:
+                U(i, j)     = 0.0;
+                U(i - 1, j) = -U(i - 1, j - 1);
+                V(i, j)     = -V(i, j + 1);
+                V(i, j - 1) = 0.0;
+                break;
+            }
+        }
+    }
+}
+
+void computeFG(Discretization* d)
+{
+    double* u = d->u;
+    double* v = d->v;
+    double* f = d->f;
+    double* g = d->g;
+    double* s = d->grid.s;
+
+    int imaxLocal = d->comm.imaxLocal;
+    int jmaxLocal = d->comm.jmaxLocal;
+
+    double gx        = d->gx;
+    double gy        = d->gy;
+    double gamma     = d->gamma;
+    double dt        = d->dt;
+    double inverseRe = 1.0 / d->re;
+    double inverseDx = 1.0 / d->grid.dx;
+    double inverseDy = 1.0 / d->grid.dy;
+    double du2dx, dv2dy, duvdx, duvdy;
+    double du2dx2, du2dy2, dv2dx2, dv2dy2;
+
+    commExchange(&d->comm, u);
+    commExchange(&d->comm, v);
+
+    for (int j = 1; j < jmaxLocal + 1; j++) {
+        for (int i = 1; i < imaxLocal + 1; i++) {
+            if (S(i, j) == FLUID) {
+                du2dx = inverseDx * 0.25 *
+                            ((U(i, j) + U(i + 1, j)) * (U(i, j) + U(i + 1, j)) -
+                                (U(i, j) + U(i - 1, j)) * (U(i, j) + U(i - 1, j))) +
+                        gamma * inverseDx * 0.25 *
+                            (fabs(U(i, j) + U(i + 1, j)) * (U(i, j) - U(i + 1, j)) +
+                                fabs(U(i, j) + U(i - 1, j)) * (U(i, j) - U(i - 1, j)));
+
+                duvdy = inverseDy * 0.25 *
+                            ((V(i, j) + V(i + 1, j)) * (U(i, j) + U(i, j + 1)) -
+                                (V(i, j - 1) + V(i + 1, j - 1)) *
+                                    (U(i, j) + U(i, j - 1))) +
+                        gamma * inverseDy * 0.25 *
+                            (fabs(V(i, j) + V(i + 1, j)) * (U(i, j) - U(i, j + 1)) +
+                                fabs(V(i, j - 1) + V(i + 1, j - 1)) *
+                                    (U(i, j) - U(i, j - 1)));
+
+                du2dx2 = inverseDx * inverseDx *
+                         (U(i + 1, j) - 2.0 * U(i, j) + U(i - 1, j));
+                du2dy2 = inverseDy * inverseDy *
+                         (U(i, j + 1) - 2.0 * U(i, j) + U(i, j - 1));
+                F(i, j) = U(i, j) +
+                          dt * (inverseRe * (du2dx2 + du2dy2) - du2dx - duvdy + gx);
+
+                duvdx = inverseDx * 0.25 *
+                            ((U(i, j) + U(i, j + 1)) * (V(i, j) + V(i + 1, j)) -
+                                (U(i - 1, j) + U(i - 1, j + 1)) *
+                                    (V(i, j) + V(i - 1, j))) +
+                        gamma * inverseDx * 0.25 *
+                            (fabs(U(i, j) + U(i, j + 1)) * (V(i, j) - V(i + 1, j)) +
+                                fabs(U(i - 1, j) + U(i - 1, j + 1)) *
+                                    (V(i, j) - V(i - 1, j)));
+
+                dv2dy = inverseDy * 0.25 *
+                            ((V(i, j) + V(i, j + 1)) * (V(i, j) + V(i, j + 1)) -
+                                (V(i, j) + V(i, j - 1)) * (V(i, j) + V(i, j - 1))) +
+                        gamma * inverseDy * 0.25 *
+                            (fabs(V(i, j) + V(i, j + 1)) * (V(i, j) - V(i, j + 1)) +
+                                fabs(V(i, j) + V(i, j - 1)) * (V(i, j) - V(i, j - 1)));
+
+                dv2dx2 = inverseDx * inverseDx *
+                         (V(i + 1, j) - 2.0 * V(i, j) + V(i - 1, j));
+                dv2dy2 = inverseDy * inverseDy *
+                         (V(i, j + 1) - 2.0 * V(i, j) + V(i, j - 1));
+                G(i, j) = V(i, j) +
+                          dt * (inverseRe * (dv2dx2 + dv2dy2) - duvdx - dv2dy + gy);
+            } else {
+                switch ((int)S(i, j)) {
+                case TOP:
+                    G(i, j) = V(i, j);
+                    break;
+                case BOTTOM:
+                    G(i, j - 1) = V(i, j - 1);
+                    break;
+                case LEFT:
+                    F(i - 1, j) = U(i - 1, j);
+                    break;
+                case RIGHT:
+                    F(i, j) = U(i, j);
+                    break;
+                case TOPLEFT:
+                    F(i - 1, j) = U(i - 1, j);
+                    G(i, j)     = V(i, j);
+                    break;
+                case TOPRIGHT:
+                    F(i, j) = U(i, j);
+                    G(i, j) = V(i, j);
+                    break;
+                case BOTTOMLEFT:
+                    F(i - 1, j) = U(i - 1, j);
+                    G(i, j - 1) = V(i, j - 1);
+                    break;
+                case BOTTOMRIGHT:
+                    F(i, j)     = U(i, j);
+                    G(i, j - 1) = V(i, j - 1);
+                    break;
+                }
+            }
+        }
+    }
+
+    /* ----------------------------- boundary of F --------------------------- */
+    if (commIsBoundary(&d->comm, L)) {
+        for (int j = 1; j < jmaxLocal + 1; j++) {
+            F(0, j) = U(0, j);
+        }
+    }
+
+    if (commIsBoundary(&d->comm, R)) {
+        for (int j = 1; j < jmaxLocal + 1; j++) {
+            F(imaxLocal, j) = U(imaxLocal, j);
+        }
+    }
+
+    /* ----------------------------- boundary of G --------------------------- */
+    if (commIsBoundary(&d->comm, B)) {
+        for (int i = 1; i < imaxLocal + 1; i++) {
+            G(i, 0) = V(i, 0);
+        }
+    }
+
+    if (commIsBoundary(&d->comm, T)) {
+        for (int i = 1; i < imaxLocal + 1; i++) {
+            G(i, jmaxLocal) = V(i, jmaxLocal);
+        }
+    }
+}
+
+void adaptUV(Discretization* d)
+{
+    int imaxLocal = d->comm.imaxLocal;
+    int jmaxLocal = d->comm.jmaxLocal;
+
+    double* p = d->p;
+    double* u = d->u;
+    double* v = d->v;
+    double* f = d->f;
+    double* g = d->g;
+
+    double factorX = d->dt / d->grid.dx;
+    double factorY = d->dt / d->grid.dy;
+
+    for (int j = 1; j < jmaxLocal + 1; j++) {
+        for (int i = 1; i < imaxLocal + 1; i++) {
+            U(i, j) = F(i, j) - (P(i + 1, j) - P(i, j)) * factorX;
+            V(i, j) = G(i, j) - (P(i, j + 1) - P(i, j)) * factorY;
+        }
+    }
+}
+
+void writeResult(Discretization* d, double* u, double* v, double* p)
+{
+    int imax  = d->grid.imax;
+    int jmax  = d->grid.jmax;
+    double dx = d->grid.dx;
+    double dy = d->grid.dy;
+    double x = 0.0, y = 0.0;
+
+    FILE* fp;
+    fp = fopen("pressure.dat", "w");
+
+    if (fp == NULL) {
+        printf("Error!\n");
+        exit(EXIT_FAILURE);
+    }
+
+    for (int j = 1; j <= jmax; j++) {
+        y = (double)(j - 0.5) * dy;
+        for (int i = 1; i <= imax; i++) {
+            x = (double)(i - 0.5) * dx;
+            fprintf(fp, "%.2f %.2f %f\n", x, y, p[j * (imax + 2) + i]);
+        }
+        fprintf(fp, "\n");
+    }
+
+    fclose(fp);
+
+    fp = fopen("velocity.dat", "w");
+
+    if (fp == NULL) {
+        printf("Error!\n");
+        exit(EXIT_FAILURE);
+    }
+
+    for (int j = 1; j <= jmax; j++) {
+        y = dy * (j - 0.5);
+        for (int i = 1; i <= imax; i++) {
+            x           = dx * (i - 0.5);
+            double velU = (u[j * (imax + 2) + i] + u[j * (imax + 2) + (i - 1)]) / 2.0;
+            double velV = (v[j * (imax + 2) + i] + v[(j - 1) * (imax + 2) + i]) / 2.0;
+            double len  = sqrt((velU * velU) + (velV * velV));
+            fprintf(fp, "%.2f %.2f %f %f %f\n", x, y, velU, velV, len);
+        }
+    }
+
+    fclose(fp);
+}
diff --git a/EnhancedSolver/2D-mpi/src/discretization.h b/EnhancedSolver/2D-mpi/src/discretization.h
new file mode 100644
index 0000000..5765a2e
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/src/discretization.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of nusif-solver.
+ * Use of this source code is governed by a MIT style
+ * license that can be found in the LICENSE file.
+ */
+#ifndef __DISCRETIZATION_H_
+#define __DISCRETIZATION_H_
+#include "comm.h"
+#include "grid.h"
+#include "parameter.h"
+#include<unistd.h>
+
+enum BC { NOSLIP = 1, SLIP, OUTFLOW, PERIODIC };
+
+enum OBJECTBOUNDARY {
+    FLUID = 0,
+    TOP,
+    BOTTOM,
+    LEFT,
+    RIGHT,
+    TOPLEFT,
+    BOTTOMLEFT,
+    TOPRIGHT,
+    BOTTOMRIGHT,
+    OBSTACLE
+};
+
+enum SHAPE { NOSHAPE = 0, RECT, CIRCLE };
+
+typedef struct {
+    /* geometry and grid information */
+    Grid grid;
+    /* arrays */
+    double *p, *rhs;
+    double *f, *g;
+    double *u, *v;
+    /* parameters */
+    double re, tau, gamma;
+    double gx, gy;
+    /* time stepping */
+    double dt, te;
+    double dtBound;
+    char* problem;
+
+    double xLocal, yLocal, xOffset, yOffset, xOffsetEnd, yOffsetEnd;
+
+
+    int bcLeft, bcRight, bcBottom, bcTop;
+    /* communication */
+    Comm comm;
+} Discretization;
+
+extern void initDiscretiztion(Discretization*, Parameter*);
+extern void computeRHS(Discretization*);
+extern void normalizePressure(Discretization*);
+extern void computeTimestep(Discretization*);
+extern void setBoundaryConditions(Discretization*);
+extern void setSpecialBoundaryCondition(Discretization*);
+extern void setObjectBoundaryCondition(Discretization*);
+extern void computeFG(Discretization*);
+extern void adaptUV(Discretization*);
+extern void writeResult(Discretization* s, double* u, double* v, double* p);
+extern double sumOffset(double* , int , int , int );
+extern void print(Discretization* , double* );
+#endif
diff --git a/EnhancedSolver/2D-mpi/src/grid.h b/EnhancedSolver/2D-mpi/src/grid.h
new file mode 100644
index 0000000..d326641
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/src/grid.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of nusif-solver.
+ * Use of this source code is governed by a MIT style
+ * license that can be found in the LICENSE file.
+ */
+#ifndef __GRID_H_
+#define __GRID_H_
+
+typedef struct {
+    double dx, dy;
+    int imax, jmax;
+    double xlength, ylength;
+    double* s;
+} Grid;
+
+#endif // __GRID_H_
diff --git a/EnhancedSolver/2D-mpi/src/likwid-marker.h b/EnhancedSolver/2D-mpi/src/likwid-marker.h
new file mode 100644
index 0000000..c3770c0
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/src/likwid-marker.h
@@ -0,0 +1,54 @@
+/*
+ * =======================================================================================
+ *
+ *      Author:   Jan Eitzinger (je), jan.eitzinger@fau.de
+ *      Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
+ *
+ *      Permission is hereby granted, free of charge, to any person obtaining a copy
+ *      of this software and associated documentation files (the "Software"), to deal
+ *      in the Software without restriction, including without limitation the rights
+ *      to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ *      copies of the Software, and to permit persons to whom the Software is
+ *      furnished to do so, subject to the following conditions:
+ *
+ *      The above copyright notice and this permission notice shall be included in all
+ *      copies or substantial portions of the Software.
+ *
+ *      THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *      IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *      FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *      AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *      LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ *      OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ *      SOFTWARE.
+ *
+ * =======================================================================================
+ */
+#ifndef LIKWID_MARKERS_H
+#define LIKWID_MARKERS_H
+
+#ifdef LIKWID_PERFMON
+#include <likwid.h>
+#define LIKWID_MARKER_INIT                likwid_markerInit()
+#define LIKWID_MARKER_THREADINIT          likwid_markerThreadInit()
+#define LIKWID_MARKER_SWITCH              likwid_markerNextGroup()
+#define LIKWID_MARKER_REGISTER(regionTag) likwid_markerRegisterRegion(regionTag)
+#define LIKWID_MARKER_START(regionTag)    likwid_markerStartRegion(regionTag)
+#define LIKWID_MARKER_STOP(regionTag)     likwid_markerStopRegion(regionTag)
+#define LIKWID_MARKER_CLOSE               likwid_markerClose()
+#define LIKWID_MARKER_RESET(regionTag)    likwid_markerResetRegion(regionTag)
+#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)                       \
+    likwid_markerGetRegion(regionTag, nevents, events, time, count)
+#else /* LIKWID_PERFMON */
+#define LIKWID_MARKER_INIT
+#define LIKWID_MARKER_THREADINIT
+#define LIKWID_MARKER_SWITCH
+#define LIKWID_MARKER_REGISTER(regionTag)
+#define LIKWID_MARKER_START(regionTag)
+#define LIKWID_MARKER_STOP(regionTag)
+#define LIKWID_MARKER_CLOSE
+#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
+#define LIKWID_MARKER_RESET(regionTag)
+#endif /* LIKWID_PERFMON */
+
+#endif /*LIKWID_MARKERS_H*/
diff --git a/EnhancedSolver/2D-mpi/src/main.c b/EnhancedSolver/2D-mpi/src/main.c
new file mode 100644
index 0000000..91b853a
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/src/main.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved.
+ * Use of this source code is governed by a MIT-style
+ * license that can be found in the LICENSE file.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "allocate.h"
+#include "comm.h"
+#include "discretization.h"
+#include "grid.h"
+#include "parameter.h"
+#include "particletracing.h"
+#include "progress.h"
+#include "timing.h"
+
+static void writeResults(Discretization* s)
+{
+#ifdef _MPI
+    size_t bytesize = (s->grid.imax + 2) * (s->grid.jmax + 2) * sizeof(double);
+
+    double* ug = allocate(64, bytesize);
+    double* vg = allocate(64, bytesize);
+    double* pg = allocate(64, bytesize);
+
+    commCollectResult(&s->comm, ug, vg, pg, s->u, s->v, s->p, s->grid.imax, s->grid.jmax);
+    if (commIsMaster(&s->comm)) {
+        writeResult(s, ug, vg, pg);
+    }
+
+    free(ug);
+    free(vg);
+    free(pg);
+#else
+    writeResult(s, s->u, s->v, s->p);
+#endif
+}
+
+int main(int argc, char** argv)
+{
+    int rank;
+    double timeStart, timeStop;
+    Parameter p;
+    Discretization d;
+    Solver s;
+    ParticleTracer particletracer;
+
+    commInit(&d.comm, argc, argv);
+    initParameter(&p);
+
+    if (argc != 2) {
+        printf("Usage: %s <configFile>\n", argv[0]);
+        exit(EXIT_SUCCESS);
+    }
+
+    readParameter(&p, argv[1]);
+    commPartition(&d.comm, p.jmax, p.imax);
+    if (commIsMaster(&d.comm)) {
+        printParameter(&p);
+    }
+
+    initDiscretiztion(&d, &p);
+    initSolver(&s, &d, &p);
+    initParticleTracer(&particletracer, &d, &p);
+
+#ifdef TEST
+    commPrintConfig(&d.comm);
+    commTestInit(&d.comm, d.p, d.f, d.g);
+    commExchange(&d.comm, d.p);
+    commShift(&d.comm, d.f, d.g);
+    commTestWrite(&d.comm, d.p, d.f, d.g);
+    writeResults(&d);
+    commFinalize(&d.comm);
+    exit(EXIT_SUCCESS);
+#endif
+#ifndef VERBOSE
+    initProgress(d.te);
+#endif
+
+    double tau = d.tau;
+    double te  = d.te;
+    double t   = 0.0;
+
+    timeStart = getTimeStamp();
+    while (t <= te) {
+        if (tau > 0.0) computeTimestep(&d);
+        setBoundaryConditions(&d);
+        setSpecialBoundaryCondition(&d);
+        setObjectBoundaryCondition(&d);
+        computeFG(&d);
+        computeRHS(&d);
+        solve(&s, d.p, d.rhs);
+        adaptUV(&d);
+        trace(&particletracer, &d, t);
+        t += d.dt;
+
+#ifdef VERBOSE
+        if (commIsMaster(s.comm)) {
+            printf("TIME %f , TIMESTEP %f\n", t, d.dt);
+        }
+#else
+        printProgress(t);
+#endif
+    }
+    timeStop = getTimeStamp();
+#ifndef VERBOSE
+    stopProgress();
+#endif
+    if (commIsMaster(s.comm)) {
+        printf("Solution took %.2fs\n", timeStop - timeStart);
+    }
+
+    freeParticles(&particletracer);
+    writeResults(&d);
+    commFinalize(s.comm);
+    return EXIT_SUCCESS;
+}
diff --git a/EnhancedSolver/2D-mpi/src/parameter.c b/EnhancedSolver/2D-mpi/src/parameter.c
new file mode 100644
index 0000000..3f388c4
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/src/parameter.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of nusif-solver.
+ * Use of this source code is governed by a MIT style
+ * license that can be found in the LICENSE file.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "parameter.h"
+#include "util.h"
+#define MAXLINE 4096
+
+void initParameter(Parameter* param)
+{
+    param->xlength    = 1.0;
+    param->ylength    = 1.0;
+    param->imax       = 100;
+    param->jmax       = 100;
+    param->itermax    = 1000;
+    param->eps        = 0.0001;
+    param->omg        = 1.8;
+    param->levels     = 5;
+    param->presmooth  = 5;
+    param->postsmooth = 5;
+}
+
+void readParameter(Parameter* param, const char* filename)
+{
+    FILE* fp = fopen(filename, "r");
+    char line[MAXLINE];
+    int i;
+
+    if (!fp) {
+        fprintf(stderr, "Could not open parameter file: %s\n", filename);
+        exit(EXIT_FAILURE);
+    }
+
+    while (!feof(fp)) {
+        line[0] = '\0';
+        fgets(line, MAXLINE, fp);
+        for (i = 0; line[i] != '\0' && line[i] != '#'; i++)
+            ;
+        line[i] = '\0';
+
+        char* tok = strtok(line, " ");
+        char* val = strtok(NULL, " ");
+
+#define PARSE_PARAM(p, f)                                                                \
+    if (strncmp(tok, #p, sizeof(#p) / sizeof(#p[0]) - 1) == 0) {                         \
+        param->p = f(val);                                                               \
+    }
+#define PARSE_STRING(p) PARSE_PARAM(p, strdup)
+#define PARSE_INT(p)    PARSE_PARAM(p, atoi)
+#define PARSE_REAL(p)   PARSE_PARAM(p, atof)
+
+        if (tok != NULL && val != NULL) {
+            PARSE_REAL(xlength);
+            PARSE_REAL(ylength);
+            PARSE_INT(imax);
+            PARSE_INT(jmax);
+            PARSE_INT(itermax);
+            PARSE_REAL(eps);
+            PARSE_REAL(omg);
+            PARSE_REAL(re);
+            PARSE_REAL(tau);
+            PARSE_REAL(gamma);
+            PARSE_REAL(dt);
+            PARSE_REAL(te);
+            PARSE_REAL(gx);
+            PARSE_REAL(gy);
+            PARSE_STRING(name);
+            PARSE_INT(bcLeft);
+            PARSE_INT(bcRight);
+            PARSE_INT(bcBottom);
+            PARSE_INT(bcTop);
+            PARSE_INT(levels);
+            PARSE_INT(presmooth);
+            PARSE_INT(postsmooth);
+            PARSE_REAL(u_init);
+            PARSE_REAL(v_init);
+            PARSE_REAL(p_init);
+
+            /* Added new particle tracing parameters */
+            PARSE_INT(numberOfParticles);
+            PARSE_REAL(startTime);
+            PARSE_REAL(injectTimePeriod);
+            PARSE_REAL(writeTimePeriod);
+            PARSE_REAL(x1);
+            PARSE_REAL(y1);
+            PARSE_REAL(x2);
+            PARSE_REAL(y2);
+
+            /* Added obstacle geometry parameters */
+            PARSE_INT(shape);
+            PARSE_REAL(xCenter);
+            PARSE_REAL(yCenter);
+            PARSE_REAL(xRectLength);
+            PARSE_REAL(yRectLength);
+            PARSE_REAL(circleRadius);
+        }
+    }
+
+    fclose(fp);
+}
+
+void printParameter(Parameter* param)
+{
+    printf("Parameters for %s\n", param->name);
+    printf("Boundary conditions Left:%d Right:%d Bottom:%d Top:%d\n",
+        param->bcLeft,
+        param->bcRight,
+        param->bcBottom,
+        param->bcTop);
+    printf("\tReynolds number: %.2f\n", param->re);
+    printf("\tInit arrays: U:%.2f V:%.2f P:%.2f\n",
+        param->u_init,
+        param->v_init,
+        param->p_init);
+    printf("Geometry data:\n");
+    printf("\tDomain box size (x, y): %.2f, %.2f\n", param->xlength, param->ylength);
+    printf("\tCells (x, y): %d, %d\n", param->imax, param->jmax);
+    printf("Timestep parameters:\n");
+    printf("\tDefault stepsize: %.2f, Final time %.2f\n", param->dt, param->te);
+    printf("\tTau factor: %.2f\n", param->tau);
+    printf("Iterative solver parameters:\n");
+    printf("\tMax iterations: %d\n", param->itermax);
+    printf("\tepsilon (stopping tolerance) : %f\n", param->eps);
+    printf("\tgamma (stopping tolerance) : %f\n", param->gamma);
+    printf("\tomega (SOR relaxation): %f\n", param->omg);
+    printf("Particle tracer parameters:\n");
+    printf("\tNumber of particles: %d\n", param->numberOfParticles);
+    printf("\tstartTime : %f\n", param->startTime);
+    printf("\tinjecTimePeriod : %f\n", param->injectTimePeriod);
+    printf("\twriteTimePeriod: %f\n", param->writeTimePeriod);
+    printf("\tx1 : %f, x2 : %f, y1 : %f, y2 : %f\n",
+        param->x1,
+        param->x2,
+        param->y1,
+        param->y2);
+    printf("\tShape : %d\n", param->shape);
+}
diff --git a/EnhancedSolver/2D-mpi/src/parameter.h b/EnhancedSolver/2D-mpi/src/parameter.h
new file mode 100644
index 0000000..3085db3
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/src/parameter.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of nusif-solver.
+ * Use of this source code is governed by a MIT style
+ * license that can be found in the LICENSE file.
+ */
+#ifndef __PARAMETER_H_
+#define __PARAMETER_H_
+
+typedef struct {
+    double xlength, ylength;
+    int imax, jmax;
+    int itermax;
+    double eps, omg;
+    double re, tau, gamma;
+    double te, dt;
+    double gx, gy;
+    char* name;
+    int bcLeft, bcRight, bcBottom, bcTop;
+    double u_init, v_init, p_init;
+    
+    int levels, presmooth, postsmooth;
+
+    int numberOfParticles;
+    double startTime, injectTimePeriod, writeTimePeriod;
+
+    double x1, y1, x2, y2;
+
+    int shape;
+    double xCenter, yCenter, xRectLength, yRectLength, circleRadius;
+
+
+} Parameter;
+
+void initParameter(Parameter*);
+void readParameter(Parameter*, const char*);
+void printParameter(Parameter*);
+#endif
diff --git a/EnhancedSolver/2D-mpi/src/particletracing.c b/EnhancedSolver/2D-mpi/src/particletracing.c
new file mode 100644
index 0000000..980bd88
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/src/particletracing.c
@@ -0,0 +1,612 @@
+/*
+ * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of nusif-solver.
+ * Use of this source code is governed by a MIT style
+ * license that can be found in the LICENSE file.
+ */
+#include <float.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "particletracing.h"
+
+#define U(i, j) u[(j) * (imaxLocal + 2) + (i)]
+#define V(i, j) v[(j) * (imaxLocal + 2) + (i)]
+#define S(i, j) s[(j) * (imaxLocal + 2) + (i)]
+
+static int ts = 0;
+
+#define IDIM 0
+#define JDIM 1
+
+#define XOFFSET    0
+#define YOFFSET    1
+#define XOFFSETEND 2
+#define YOFFSETEND 3
+
+static double sum(int* sizes, int size)
+{
+    double sum = 0;
+
+    for (int i = 0; i < size; ++i) {
+        sum += sizes[i];
+    }
+
+    return sum;
+}
+
+void printUV(ParticleTracer* particletracer, double* u, double* v)
+{
+    int imaxLocal = particletracer->imaxLocal;
+
+    for (int i = 0; i < particletracer->size; i++) {
+        if (i == particletracer->rank) {
+            printf(
+                "\n### RANK %d #######################################################\n",
+                particletracer->rank);
+            printf("\nGrid U : \n");
+            for (int j = 0; j < particletracer->jmaxLocal + 2; j++) {
+                printf("%02d: ", j);
+                for (int i = 0; i < particletracer->imaxLocal + 2; i++) {
+                    printf("%4.2f  ", u[j * (imaxLocal + 2) + i]);
+                }
+                printf("\n");
+            }
+            fflush(stdout);
+            printf("\nGrid V : \n");
+            for (int j = 0; j < particletracer->jmaxLocal + 2; j++) {
+                printf("%02d: ", j);
+                for (int i = 0; i < particletracer->imaxLocal + 2; i++) {
+                    printf("%4.2f  ", v[j * (imaxLocal + 2) + i]);
+                }
+                printf("\n");
+            }
+            fflush(stdout);
+        }
+
+#ifdef _MPI
+        MPI_Barrier(MPI_COMM_WORLD);
+#endif
+    }
+}
+
+void initParticleTracer(
+    ParticleTracer* particletracer, Discretization* d, Parameter* params)
+{
+    int dims[NDIMS]    = { 0, 0 };
+    int periods[NDIMS] = { 0, 0 };
+
+    /* initializing local properties from params */
+    particletracer->rank = d->comm.rank;
+    particletracer->size = d->comm.size;
+
+    particletracer->numberOfParticles = params->numberOfParticles;
+    particletracer->startTime         = params->startTime;
+    particletracer->injectTimePeriod  = params->injectTimePeriod;
+    particletracer->writeTimePeriod   = params->writeTimePeriod;
+
+    particletracer->dt = params->dt;
+    particletracer->dx = params->xlength / params->imax;
+    particletracer->dy = params->ylength / params->jmax;
+
+    particletracer->xlength = params->xlength;
+    particletracer->ylength = params->ylength;
+
+    particletracer->x1 = params->x1;
+    particletracer->y1 = params->y1;
+    particletracer->x2 = params->x2;
+    particletracer->y2 = params->y2;
+
+    particletracer->lastInjectTime = params->startTime;
+    particletracer->lastUpdateTime = params->startTime;
+    particletracer->lastWriteTime  = params->startTime;
+
+    particletracer->pointer        = 0;
+    particletracer->totalParticles = 0;
+
+    particletracer->imax = params->imax;
+    particletracer->jmax = params->jmax;
+
+    particletracer->imaxLocal        = d->comm.imaxLocal;
+    particletracer->jmaxLocal        = d->comm.jmaxLocal;
+    particletracer->removedParticles = 0;
+
+    // Estimating the number of particles over the number of timesteps that could be
+    // required.
+    particletracer->estimatedNumParticles = (particletracer->imaxLocal *
+                                             particletracer->jmaxLocal);
+
+    // Allocating memory for the estimated particles over the timesteps.
+    particletracer->particlePool = malloc(
+        sizeof(Particle) * particletracer->estimatedNumParticles);
+
+    // Initializing the number of particles to 0 and turning OFF all of the particles.
+    // Contain information in x and y length metric, not i and j discretization metrics.
+    for (int i = 0; i < particletracer->estimatedNumParticles; ++i) {
+        particletracer->particlePool[i].x    = 0.0;
+        particletracer->particlePool[i].y    = 0.0;
+        particletracer->particlePool[i].flag = false;
+    }
+
+    // Creating a linearly spaced particle line.
+    particletracer->linSpaceLine = malloc(
+        sizeof(Particle) * particletracer->numberOfParticles);
+
+    // Creating an array for each rank that will hold information about
+    // offsets from other ranks. Holds each ranks x and y length metrics.
+    particletracer->offset = (double*)malloc(sizeof(double) * 4 * particletracer->size);
+
+    // Calculating each ranks local x and y length metrics.
+
+    double offset[4][particletracer->size];
+
+    // Calculating each ranks x and y local lengths.
+    particletracer->xLocal = d->xLocal;
+    particletracer->yLocal = d->yLocal;
+
+    double xLocal[particletracer->size];
+    double yLocal[particletracer->size];
+
+    // Calculate own x and y length metric offset based on other ranks offset data.
+    particletracer->xOffset    = d->xOffset;
+    particletracer->yOffset    = d->yOffset;
+    particletracer->xOffsetEnd = d->xOffsetEnd;
+    particletracer->yOffsetEnd = d->yOffsetEnd;
+
+    printf("Rank : %d, xOffset : %.2f, yOffset : %.2f, xOffsetEnd : %.2f, yOffsetEnd : "
+           "%.2f\n",
+        particletracer->rank,
+        particletracer->xOffset,
+        particletracer->yOffset,
+        particletracer->xOffsetEnd,
+        particletracer->yOffsetEnd);
+
+#ifdef _MPI
+    // Gather each ranks x and y length metric that marks each ranks own territory.
+    // Once the boundary leaves local domain, then it needs to know which ranks to send.
+    // And to know whos boundary it is, we need to know the rank.
+    MPI_Allgather(&particletracer->xOffset,
+        1,
+        MPI_DOUBLE,
+        offset[0],
+        1,
+        MPI_DOUBLE,
+        d->comm.comm);
+    MPI_Allgather(&particletracer->yOffset,
+        1,
+        MPI_DOUBLE,
+        offset[1],
+        1,
+        MPI_DOUBLE,
+        d->comm.comm);
+    MPI_Allgather(&particletracer->xOffsetEnd,
+        1,
+        MPI_DOUBLE,
+        offset[2],
+        1,
+        MPI_DOUBLE,
+        d->comm.comm);
+    MPI_Allgather(&particletracer->yOffsetEnd,
+        1,
+        MPI_DOUBLE,
+        offset[3],
+        1,
+        MPI_DOUBLE,
+        d->comm.comm);
+#endif
+
+    memcpy(particletracer->offset, offset, sizeof(offset));
+
+    for (int i = 0; i < particletracer->numberOfParticles; ++i) {
+        double spacing = (double)i / (double)(particletracer->numberOfParticles - 1);
+        particletracer->linSpaceLine[i].x = spacing * particletracer->x1 +
+                                            (1.0 - spacing) * particletracer->x2;
+        particletracer->linSpaceLine[i].y = spacing * particletracer->y1 +
+                                            (1.0 - spacing) * particletracer->y2;
+        particletracer->linSpaceLine[i].flag = true;
+    }
+
+#ifdef _MPI
+    // Create the mpi_particle datatype
+    MPI_Datatype mpi_particle;
+    int lengths[3] = { 1, 1, 1 };
+
+    MPI_Aint displacements[3];
+    Particle dummy_particle;
+    MPI_Aint base_address;
+    MPI_Get_address(&dummy_particle, &base_address);
+    MPI_Get_address(&dummy_particle.x, &displacements[0]);
+    MPI_Get_address(&dummy_particle.y, &displacements[1]);
+    MPI_Get_address(&dummy_particle.flag, &displacements[2]);
+    displacements[0] = MPI_Aint_diff(displacements[0], base_address);
+    displacements[1] = MPI_Aint_diff(displacements[1], base_address);
+    displacements[2] = MPI_Aint_diff(displacements[2], base_address);
+
+    MPI_Datatype types[3] = { MPI_DOUBLE, MPI_DOUBLE, MPI_C_BOOL };
+    MPI_Type_create_struct(3,
+        lengths,
+        displacements,
+        types,
+        &particletracer->mpi_particle);
+    MPI_Type_commit(&particletracer->mpi_particle);
+#endif
+}
+
+void printParticles(ParticleTracer* particletracer)
+{
+    for (int i = 0; i < particletracer->totalParticles; ++i) {
+        printf("Rank : %d Particle position X : %.2f, Y : %.2f, flag : %d, total pt : "
+               "%d, pointer : %d, xOffset : %.2f, yOffset : %.2f, xOffsetEnd : %.2f, "
+               "yOffsetEnd : %.2f\n",
+            particletracer->rank,
+            particletracer->particlePool[i].x,
+            particletracer->particlePool[i].y,
+            particletracer->particlePool[i].flag,
+            particletracer->totalParticles,
+            particletracer->pointer,
+            particletracer->xOffset,
+            particletracer->yOffset,
+            particletracer->xOffsetEnd,
+            particletracer->yOffsetEnd);
+    }
+}
+void injectParticles(ParticleTracer* particletracer)
+{
+    double x, y;
+    for (int i = 0; i < particletracer->numberOfParticles; ++i) {
+        x = particletracer->linSpaceLine[i].x;
+        y = particletracer->linSpaceLine[i].y;
+        if (x >= particletracer->xOffset && y >= particletracer->yOffset &&
+            x <= particletracer->xOffsetEnd && y <= particletracer->yOffsetEnd) {
+
+            particletracer->particlePool[particletracer->pointer].x    = x;
+            particletracer->particlePool[particletracer->pointer].y    = y;
+            particletracer->particlePool[particletracer->pointer].flag = true;
+            ++(particletracer->pointer);
+            ++(particletracer->totalParticles);
+        }
+    }
+}
+
+void advanceParticles(ParticleTracer* particletracer,
+    double* restrict u,
+    double* restrict v,
+    double* restrict s,
+    Comm* comm,
+    double time)
+{
+
+    int imax      = particletracer->imax;
+    int jmax      = particletracer->jmax;
+    int imaxLocal = particletracer->imaxLocal;
+    int jmaxLocal = particletracer->jmaxLocal;
+
+    double dx = particletracer->dx;
+    double dy = particletracer->dy;
+
+    double xlength = particletracer->xlength;
+    double ylength = particletracer->ylength;
+
+    Particle buff[particletracer->size][100];
+
+    for (int i = 0; i < particletracer->size; ++i) {
+        for (int j = 0; j < 100; ++j) {
+            buff[i][j].x    = 0.0;
+            buff[i][j].y    = 0.0;
+            buff[i][j].flag = false;
+        }
+    }
+    int particleBufIndex[particletracer->size];
+
+    memset(particleBufIndex, 0, sizeof(particleBufIndex));
+
+    for (int i = 0; i < particletracer->totalParticles; ++i) {
+        if (particletracer->particlePool[i].flag == true) {
+            double xTemp = particletracer->particlePool[i].x;
+            double yTemp = particletracer->particlePool[i].y;
+
+            double x = xTemp - particletracer->xOffset;
+            double y = yTemp - particletracer->yOffset;
+
+            int iCoord = (int)(x / dx) + 1;
+            int jCoord = (int)((y + 0.5 * dy) / dy) + 1;
+
+            double x1 = (double)(iCoord - 1) * dx;
+            double y1 = ((double)(jCoord - 1) - 0.5) * dy;
+            double x2 = (double)iCoord * dx;
+            double y2 = ((double)jCoord - 0.5) * dy;
+
+            double u_n = (1.0 / (dx * dy)) *
+                         ((x2 - x) * (y2 - y) * U(iCoord - 1, jCoord - 1) +
+                             (x - x1) * (y2 - y) * U(iCoord, jCoord - 1) +
+                             (x2 - x) * (y - y1) * U(iCoord - 1, jCoord) +
+                             (x - x1) * (y - y1) * U(iCoord, jCoord));
+
+            double new_x = (x + particletracer->xOffset) + particletracer->dt * u_n;
+            particletracer->particlePool[i].x = new_x;
+
+            iCoord = (int)((x + 0.5 * dx) / dx) + 1;
+            jCoord = (int)(y / dy) + 1;
+
+            x1 = ((double)(iCoord - 1) - 0.5) * dx;
+            y1 = (double)(jCoord - 1) * dy;
+            x2 = ((double)iCoord - 0.5) * dx;
+            y2 = (double)jCoord * dy;
+
+            double v_n = (1.0 / (dx * dy)) *
+                         ((x2 - x) * (y2 - y) * V(iCoord - 1, jCoord - 1) +
+                             (x - x1) * (y2 - y) * V(iCoord, jCoord - 1) +
+                             (x2 - x) * (y - y1) * V(iCoord - 1, jCoord) +
+                             (x - x1) * (y - y1) * V(iCoord, jCoord));
+
+            double new_y = (y + particletracer->yOffset) + particletracer->dt * v_n;
+            particletracer->particlePool[i].y = new_y;
+
+            if (((new_x < particletracer->xOffset) ||
+                    (new_x >= particletracer->xOffsetEnd) ||
+                    (new_y < particletracer->yOffset) ||
+                    (new_y >= particletracer->yOffsetEnd))) {
+                // New logic to transfer particles to neighbouring ranks or discard the
+                // particle.
+
+                for (int i = 0; i < particletracer->size; ++i) {
+                    if ((new_x >=
+                            particletracer->offset[i + particletracer->size * XOFFSET]) &&
+                        (new_x <= particletracer
+                                      ->offset[i + particletracer->size * XOFFSETEND]) &&
+                        (new_y >=
+                            particletracer->offset[i + particletracer->size * YOFFSET]) &&
+                        (new_y <= particletracer
+                                      ->offset[i + particletracer->size * YOFFSETEND]) &&
+                        i != particletracer->rank) {
+                        buff[i][particleBufIndex[i]].x    = new_x;
+                        buff[i][particleBufIndex[i]].y    = new_y;
+                        buff[i][particleBufIndex[i]].flag = true;
+                        ++particleBufIndex[i];
+                    }
+                }
+                particletracer->particlePool[i].flag = false;
+                particletracer->removedParticles++;
+            }
+
+            int i_new = new_x / dx, j_new = new_y / dy;
+            int iOffset = particletracer->xOffset / dx,
+                jOffset = particletracer->yOffset / dy;
+
+            if (S(i_new - iOffset, j_new - jOffset) != FLUID) {
+                particletracer->particlePool[i].flag = false;
+                particletracer->removedParticles++;
+            }
+        }
+    }
+
+#ifdef _MPI
+    for (int i = 0; i < particletracer->size; ++i) {
+        if (i != particletracer->rank) {
+            MPI_Send(buff[i], 100, particletracer->mpi_particle, i, 0, comm->comm);
+        }
+    }
+    for (int i = 0; i < particletracer->size; ++i) {
+        if (i != particletracer->rank) {
+            MPI_Recv(buff[i],
+                100,
+                particletracer->mpi_particle,
+                i,
+                0,
+                comm->comm,
+                MPI_STATUS_IGNORE);
+        }
+    }
+    for (int i = 0; i < particletracer->size; ++i) {
+        if (i != particletracer->rank) {
+            for (int j = 0; j < 100; ++j) {
+                if (buff[i][j].flag == true) {
+                    particletracer->particlePool[particletracer->pointer].x = buff[i][j]
+                                                                                  .x;
+                    particletracer->particlePool[particletracer->pointer].y = buff[i][j]
+                                                                                  .y;
+                    particletracer->particlePool[particletracer->pointer].flag = true;
+                    ++(particletracer->pointer);
+                    ++(particletracer->totalParticles);
+                }
+            }
+        }
+    }
+#endif
+}
+
+void freeParticles(ParticleTracer* particletracer)
+{
+    free(particletracer->particlePool);
+    free(particletracer->linSpaceLine);
+    free(particletracer->offset);
+}
+
+void writeParticles(ParticleTracer* particletracer, Comm* comm)
+{
+    int collectedBuffIndex[particletracer->size];
+
+#ifdef _MPI
+
+    MPI_Gather(&particletracer->totalParticles,
+        1,
+        MPI_INT,
+        collectedBuffIndex,
+        1,
+        MPI_INT,
+        0,
+        comm->comm);
+
+    if (particletracer->rank != 0) {
+        Particle buff[particletracer->totalParticles];
+        for (int i = 0; i < particletracer->totalParticles; ++i) {
+            buff[i].x    = particletracer->particlePool[i].x;
+            buff[i].y    = particletracer->particlePool[i].y;
+            buff[i].flag = particletracer->particlePool[i].flag;
+            // printf("Rank : %d sending to rank 0 X : %.2f, Y : %.2f with totalpt :
+            // %d\n", particletracer->rank, buff[i].x, buff[i].y,
+            // particletracer->totalParticles);
+        }
+        MPI_Send(buff,
+            particletracer->totalParticles,
+            particletracer->mpi_particle,
+            0,
+            1,
+            comm->comm);
+    }
+#endif
+
+    if (particletracer->rank == 0) {
+        char filename[50];
+        FILE* fp;
+
+        snprintf(filename, 50, "vis_files/particles_%d.dat", ts);
+        fp = fopen(filename, "w");
+
+        if (fp == NULL) {
+            printf("Error!\n");
+            exit(EXIT_FAILURE);
+        }
+        // fprintf(fp, "# vtk DataFile Version 3.0\n");
+        // fprintf(fp, "PAMPI cfd solver particle tracing file\n");
+        // fprintf(fp, "ASCII\n");
+
+        // fprintf(fp, "DATASET UNSTRUCTURED_GRID\n");
+        // fprintf(fp, "FIELD FieldData 2\n");
+        // fprintf(fp, "TIME 1 1 double\n");
+        // fprintf(fp, "%d\n", ts);
+        // fprintf(fp, "CYCLE 1 1 int\n");
+        // fprintf(fp, "1\n");
+
+        int overallTotalParticles = sum(collectedBuffIndex, particletracer->size);
+
+        // fprintf(fp, "POINTS %d float\n", overallTotalParticles);
+
+        // printf("Total particles : %d\n", overallTotalParticles);
+
+#ifdef _MPI
+        for (int i = 1; i < particletracer->size; ++i) {
+            Particle recvBuff[collectedBuffIndex[i]];
+            MPI_Recv(&recvBuff,
+                collectedBuffIndex[i],
+                particletracer->mpi_particle,
+                i,
+                1,
+                comm->comm,
+                MPI_STATUS_IGNORE);
+
+            for (int j = 0; j < collectedBuffIndex[i]; ++j) {
+                double x = recvBuff[j].x;
+                double y = recvBuff[j].y;
+                fprintf(fp, "%f %f\n", x, y);
+                // printf("Rank : 0 receiving from rank %d X : %.2f, Y : %.2f with totalpt
+                // : %d\n", i, x, y, particletracer->totalParticles);
+            }
+        }
+#endif
+        for (int i = 0; i < particletracer->totalParticles; ++i) {
+            double x = particletracer->particlePool[i].x;
+            double y = particletracer->particlePool[i].y;
+            fprintf(fp, "%f %f\n", x, y);
+        }
+
+        // fprintf(fp, "CELLS %d %d\n", overallTotalParticles, 2 * overallTotalParticles);
+
+        // for (int i = 0; i < overallTotalParticles; ++i)
+        // {
+        //     fprintf(fp, "1 %d\n", i);
+        // }
+
+        // fprintf(fp, "CELL_TYPES %d\n", overallTotalParticles);
+
+        // for (int i = 0; i < overallTotalParticles; ++i)
+        // {
+        //     fprintf(fp, "1\n");
+        // }
+
+        fclose(fp);
+    }
+
+    ++ts;
+}
+
+void printParticleTracerParameters(ParticleTracer* particletracer)
+{
+    printf("Particle Tracing data:\n");
+    printf("Rank : %d\n", particletracer->rank);
+    printf("\tNumber of particles : %d being injected for every period of %.2f\n",
+        particletracer->numberOfParticles,
+        particletracer->injectTimePeriod);
+    printf("\tstartTime : %.2f\n", particletracer->startTime);
+    printf("\t(Line along which the particles are to be injected) \n\tx1 : %.2f, y1 : "
+           "%.2f, x2 : %.2f, y2 : %.2f\n",
+        particletracer->x1,
+        particletracer->y1,
+        particletracer->x2,
+        particletracer->y2);
+    printf("\tPointer : %d, TotalParticles : %d\n",
+        particletracer->pointer,
+        particletracer->totalParticles);
+    printf("\tdt : %.2f, dx : %.2f, dy : %.2f\n",
+        particletracer->dt,
+        particletracer->dx,
+        particletracer->dy);
+    printf("\txOffset : %.2f, yOffset : %.2f\n",
+        particletracer->xOffset,
+        particletracer->yOffset);
+    printf("\txOffsetEnd : %.2f, yOffsetEnd : %.2f\n",
+        particletracer->xOffsetEnd,
+        particletracer->yOffsetEnd);
+    printf("\txLocal : %.2f, yLocal : %.2f\n",
+        particletracer->xLocal,
+        particletracer->yLocal);
+}
+
+void trace(ParticleTracer* particletracer, Discretization* d, double time)
+{
+    if (time >= particletracer->startTime) {
+        if ((time - particletracer->lastInjectTime) >= particletracer->injectTimePeriod) {
+            injectParticles(particletracer);
+            particletracer->lastInjectTime = time;
+        }
+        if ((time - particletracer->lastWriteTime) >= particletracer->writeTimePeriod) {
+            writeParticles(particletracer, &d->comm);
+            particletracer->lastWriteTime = time;
+        }
+        advanceParticles(particletracer, d->u, d->v, d->grid.s, &d->comm, time);
+
+        if (particletracer->removedParticles > (particletracer->totalParticles * 0.2)) {
+            compress(particletracer);
+        }
+
+        particletracer->lastUpdateTime = time;
+    }
+}
+
+void compress(ParticleTracer* particletracer)
+{
+    Particle* memPool = particletracer->particlePool;
+    Particle tempPool[particletracer->totalParticles];
+    int totalParticles = 0;
+
+    printf("Performing compression ...");
+
+    for (int i = 0; i < particletracer->totalParticles; ++i) {
+        if (memPool[i].flag == true) {
+            tempPool[totalParticles].x    = memPool[i].x;
+            tempPool[totalParticles].y    = memPool[i].y;
+            tempPool[totalParticles].flag = memPool[i].flag;
+            ++totalParticles;
+        }
+    }
+
+    printf(" remove %d particles\n", particletracer->totalParticles - totalParticles);
+
+    particletracer->totalParticles   = totalParticles;
+    particletracer->removedParticles = 0;
+    particletracer->pointer          = totalParticles + 1;
+
+    memcpy(particletracer->particlePool, tempPool, totalParticles * sizeof(Particle));
+}
\ No newline at end of file
diff --git a/EnhancedSolver/2D-mpi/src/particletracing.h b/EnhancedSolver/2D-mpi/src/particletracing.h
new file mode 100644
index 0000000..cc47fb0
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/src/particletracing.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of nusif-solver.
+ * Use of this source code is governed by a MIT style
+ * license that can be found in the LICENSE file.
+ */
+#ifndef __PARTICLETRACING_H_
+#define __PARTICLETRACING_H_
+#include "allocate.h"
+#include "parameter.h"
+
+#include "solver.h"
+#include <mpi.h>
+#include <stdbool.h>
+
+#define NDIMS 2
+
+typedef enum COORD { X = 0, Y, NCOORD } COORD;
+
+typedef struct {
+    double x, y;
+    bool flag;
+} Particle;
+
+typedef struct {
+    int numberOfParticles, removedParticles, totalParticles;
+    double startTime, injectTimePeriod, writeTimePeriod, lastInjectTime, lastUpdateTime,
+        lastWriteTime;
+
+    int estimatedNumParticles;
+
+    double dx, dy, dt;
+    Particle* linSpaceLine;
+    Particle* particlePool;
+
+    int pointer;
+
+    double imax, jmax, xlength, ylength, imaxLocal, jmaxLocal;
+
+    double x1, y1, x2, y2;
+
+    int size, rank;
+    
+#ifdef _MPI
+    MPI_Datatype mpi_particle;
+#endif
+
+    double xLocal, yLocal, xOffset, yOffset, xOffsetEnd, yOffsetEnd;
+
+    double* offset;
+
+} ParticleTracer;
+
+extern void initParticleTracer(ParticleTracer*, Discretization*, Parameter*);
+extern void injectParticles(ParticleTracer*);
+extern void advanceParticles(ParticleTracer*, double*, double*, double*, Comm*, double);
+extern void freeParticles(ParticleTracer*);
+extern void writeParticles(ParticleTracer*, Comm*);
+extern void printParticleTracerParameters(ParticleTracer*);
+extern void printParticles(ParticleTracer*);
+extern void trace(ParticleTracer*, Discretization*, double);
+extern void compress(ParticleTracer*);
+
+#endif
\ No newline at end of file
diff --git a/EnhancedSolver/2D-mpi/src/progress.c b/EnhancedSolver/2D-mpi/src/progress.c
new file mode 100644
index 0000000..be98fce
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/src/progress.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of nusif-solver.
+ * Use of this source code is governed by a MIT style
+ * license that can be found in the LICENSE file.
+ */
+#include <math.h>
+#include <mpi.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "progress.h"
+
+static double _end;
+static int _current;
+
+void initProgress(double end)
+{
+    _end     = end;
+    _current = 0;
+
+    printf("[          ]");
+    fflush(stdout);
+}
+
+void printProgress(double current)
+{
+    int new = (int)rint((current / _end) * 10.0);
+
+    if (new > _current) {
+        char progress[11];
+        _current    = new;
+        progress[0] = 0;
+
+        for (int i = 0; i < 10; i++) {
+            if (i < _current) {
+                sprintf(progress + strlen(progress), "#");
+            } else {
+                sprintf(progress + strlen(progress), " ");
+            }
+        }
+        printf("\r[%s]", progress);
+    }
+    fflush(stdout);
+}
+
+void stopProgress()
+{
+    printf("\n");
+    fflush(stdout);
+}
diff --git a/EnhancedSolver/2D-mpi/src/progress.h b/EnhancedSolver/2D-mpi/src/progress.h
new file mode 100644
index 0000000..1cdb9a3
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/src/progress.h
@@ -0,0 +1,14 @@
+/*
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved.
+ * Use of this source code is governed by a MIT-style
+ * license that can be found in the LICENSE file.
+ */
+#ifndef __PROGRESS_H_
+#define __PROGRESS_H_
+
+extern void initProgress(double);
+extern void printProgress(double);
+extern void stopProgress();
+
+#endif
diff --git a/EnhancedSolver/2D-mpi/src/solver-mg.c b/EnhancedSolver/2D-mpi/src/solver-mg.c
new file mode 100644
index 0000000..08a39fd
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/src/solver-mg.c
@@ -0,0 +1,324 @@
+/*
+ * Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of nusif-solver.
+ * Use of this source code is governed by a MIT style
+ * license that can be found in the LICENSE file.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "allocate.h"
+#include "solver.h"
+#include "util.h"
+
+#define FINEST_LEVEL   0
+#define COARSEST_LEVEL (s->levels - 1)
+
+#define E(i, j)   e[(j) * (imaxLocal + 2) + (i)]
+#define R(i, j)   r[(j) * (imaxLocal + 2) + (i)]
+#define OLD(i, j) old[(j) * (imaxLocal + 2) + (i)]
+
+void printSolver(Solver* s, double* grid, char* gridname)
+{
+    if (0 == s->comm->rank) printf("Grid name : %s", gridname);
+    int imaxLocal = s->comm->imaxLocal;
+    int jmaxLocal = s->comm->jmaxLocal;
+
+    for (int i = 0; i < s->comm->size; i++) {
+        if (i == s->comm->rank) {
+            sleep(1 * s->comm->rank);
+            printf("### RANK %d LVL "
+                   "###################################################### #\n ",
+                s->comm->rank);
+            for (int j = 0; j < jmaxLocal + 2; j++) {
+                printf("%02d: ", j);
+                for (int i = 0; i < imaxLocal + 2; i++) {
+                    printf("%2.2f  ", grid[j * (imaxLocal + 2) + i]);
+                }
+                printf("\n");
+            }
+            fflush(stdout);
+        }
+    }
+}
+
+static void restrictMG(Solver* s, int level, Comm* comm)
+{
+    int imaxLocal = comm->imaxLocal;
+    int jmaxLocal = comm->jmaxLocal;
+
+    double* r   = s->r[level + 1];
+    double* old = s->r[level];
+
+#ifdef _MPI
+    commExchange(comm, old);
+#endif
+
+    for (int j = 1; j < (jmaxLocal / 2) + 1; j++) {
+        for (int i = 1; i < (imaxLocal / 2) + 1; i++) {
+            R(i, j) = (OLD(2 * i - 1, 2 * j - 1) + OLD(2 * i, 2 * j - 1) * 2 +
+                          OLD(2 * i + 1, 2 * j - 1) + OLD(2 * i - 1, 2 * j) * 2 +
+                          OLD(2 * i, 2 * j) * 4 + OLD(2 * i + 1, 2 * j) * 2 +
+                          OLD(2 * i - 1, 2 * j + 1) + OLD(2 * i, 2 * j + 1) * 2 +
+                          OLD(2 * i + 1, 2 * j + 1)) /
+                      16.0;
+        }
+    }
+}
+
+static void prolongate(Solver* s, int level, Comm* comm)
+{
+    int imaxLocal = comm->imaxLocal;
+    int jmaxLocal = comm->jmaxLocal;
+
+    double* old = s->r[level + 1];
+    double* e   = s->r[level];
+
+    for (int j = 2; j < jmaxLocal + 1; j += 2) {
+        for (int i = 2; i < imaxLocal + 1; i += 2) {
+            E(i, j) = OLD(i / 2, j / 2);
+        }
+    }
+}
+
+static void correct(Solver* s, double* p, int level, Comm* comm)
+{
+    double* e     = s->e[level];
+    int imaxLocal = comm->imaxLocal;
+    int jmaxLocal = comm->jmaxLocal;
+
+    for (int j = 1; j < jmaxLocal + 1; ++j) {
+        for (int i = 1; i < imaxLocal + 1; ++i) {
+            P(i, j) += E(i, j);
+        }
+    }
+}
+
+static void setBoundaryCondition(Solver* s, double* p, int imaxLocal, int jmaxLocal)
+{
+#ifdef _MPI
+    if (commIsBoundary(s->comm, B)) { // set bottom bc
+        for (int i = 1; i < imaxLocal + 1; i++) {
+            P(i, 0) = P(i, 1);
+        }
+    }
+
+    if (commIsBoundary(s->comm, T)) { // set top bc
+        for (int i = 1; i < imaxLocal + 1; i++) {
+            P(i, jmaxLocal + 1) = P(i, jmaxLocal);
+        }
+    }
+
+    if (commIsBoundary(s->comm, L)) { // set left bc
+        for (int j = 1; j < jmaxLocal + 1; j++) {
+            P(0, j) = P(1, j);
+        }
+    }
+
+    if (commIsBoundary(s->comm, R)) { // set right bc
+        for (int j = 1; j < jmaxLocal + 1; j++) {
+            P(imaxLocal + 1, j) = P(imaxLocal, j);
+        }
+    }
+#else
+    for (int i = 1; i < imaxLocal + 1; i++) {
+        P(i, 0)             = P(i, 1);
+        P(i, jmaxLocal + 1) = P(i, jmaxLocal);
+    }
+
+    for (int j = 1; j < jmaxLocal + 1; j++) {
+        P(0, j)             = P(1, j);
+        P(imaxLocal + 1, j) = P(imaxLocal, j);
+    }
+#endif
+}
+
+static double smooth(Solver* s, double* p, double* rhs, int level, Comm* comm)
+{
+    int imaxLocal = comm->imaxLocal;
+    int jmaxLocal = comm->jmaxLocal;
+
+    int imax = s->grid->imax;
+    int jmax = s->grid->jmax;
+
+    double dx2  = s->grid->dx * s->grid->dx;
+    double dy2  = s->grid->dy * s->grid->dy;
+    double idx2 = 1.0 / dx2;
+    double idy2 = 1.0 / dy2;
+
+    double factor = s->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
+    double* r     = s->r[level];
+
+    double res = 1.0;
+    int pass, jsw, isw;
+
+    jsw = 1;
+
+    for (pass = 0; pass < 2; pass++) {
+        isw = jsw;
+
+#ifdef _MPI
+        commExchange(comm, p);
+#endif
+
+        for (int j = 1; j < jmaxLocal + 1; j++) {
+            for (int i = isw; i < imaxLocal + 1; i += 2) {
+
+                P(i, j) -= factor *
+                           (RHS(i, j) -
+                               ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
+                                   (P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2));
+            }
+            isw = 3 - isw;
+        }
+        jsw = 3 - jsw;
+    }
+}
+
+static double calculateResidual(Solver* s, double* p, double* rhs, int level, Comm* comm)
+{
+    int imax      = s->grid->imax;
+    int jmax      = s->grid->jmax;
+    int imaxLocal = comm->imaxLocal;
+    int jmaxLocal = comm->jmaxLocal;
+
+    double dx2    = s->grid->dx * s->grid->dx;
+    double dy2    = s->grid->dy * s->grid->dy;
+    double idx2   = 1.0 / dx2;
+    double idy2   = 1.0 / dy2;
+    double factor = s->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
+    double* r     = s->r[level];
+    double res    = 1.0;
+    int pass, jsw, isw;
+
+    jsw = 1;
+
+    for (pass = 0; pass < 2; pass++) {
+        isw = jsw;
+
+#ifdef _MPI
+        commExchange(comm, p);
+#endif
+
+        for (int j = 1; j < jmaxLocal + 1; j++) {
+            for (int i = isw; i < imaxLocal + 1; i += 2) {
+
+                R(i, j) = RHS(i, j) -
+                          ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
+                              (P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
+
+                res += (R(i, j) * R(i, j));
+            }
+            isw = 3 - isw;
+        }
+        jsw = 3 - jsw;
+    }
+
+#ifdef _MPI
+    commReduction(&res, SUM);
+#endif
+
+    res = res / (double)(imax * jmax);
+#ifdef DEBUG
+    if (commIsMaster(s->comm)) {
+        printf("%d Residuum: %e\n", it, res);
+    }
+#endif
+    return res;
+}
+
+static double multiGrid(Solver* s, double* p, double* rhs, int level, Comm* comm)
+{
+    double res = 0.0;
+
+    // coarsest level
+    if (level == COARSEST_LEVEL) {
+        for (int i = 0; i < 5; i++) {
+            smooth(s, p, rhs, level, comm);
+        }
+        return res;
+    }
+
+    // pre-smoothing
+    for (int i = 0; i < s->presmooth; i++) {
+        smooth(s, p, rhs, level, comm);
+
+        if (level == FINEST_LEVEL)
+            setBoundaryCondition(s, p, comm->imaxLocal, comm->jmaxLocal);
+    }
+
+    // calculate residuals
+    res = calculateResidual(s, p, rhs, level, comm);
+
+    // restrict
+    restrictMG(s, level, comm);
+
+    Comm newcomm;
+    commUpdateDatatypes(s->comm, &newcomm, comm->imaxLocal / 2, comm->jmaxLocal / 2);
+
+    // MGSolver on residual and error.
+    multiGrid(s, s->e[level + 1], s->r[level + 1], level + 1, &newcomm);
+
+    commFreeCommunicator(&newcomm);
+
+    // prolongate
+    prolongate(s, level, comm);
+
+    // correct p on finer level using residual
+    correct(s, p, level, comm);
+
+    if (level == FINEST_LEVEL)
+        setBoundaryCondition(s, p, comm->imaxLocal, comm->jmaxLocal);
+
+    // post-smoothing
+    for (int i = 0; i < s->postsmooth; i++) {
+        smooth(s, p, rhs, level, comm);
+        if (level == FINEST_LEVEL)
+            setBoundaryCondition(s, p, comm->imaxLocal, comm->jmaxLocal);
+    }
+
+    return res;
+}
+
+void initSolver(Solver* s, Discretization* d, Parameter* p)
+{
+    s->eps        = p->eps;
+    s->omega      = p->omg;
+    s->itermax    = p->itermax;
+    s->levels     = p->levels;
+    s->grid       = &d->grid;
+    s->comm       = &d->comm;
+    s->presmooth  = p->presmooth;
+    s->postsmooth = p->postsmooth;
+
+    int imax   = s->grid->imax;
+    int jmax   = s->grid->jmax;
+    int levels = s->levels;
+    printf("Using Multigrid solver with %d levels\n", levels);
+
+    s->r = malloc(levels * sizeof(double*));
+    s->e = malloc(levels * sizeof(double*));
+
+    size_t size = (imax + 2) * (jmax + 2) * sizeof(double);
+
+    for (int j = 0; j < levels; j++) {
+        s->r[j] = allocate(64, size);
+        s->e[j] = allocate(64, size);
+
+        for (int i = 0; i < (imax + 2) * (jmax + 2); i++) {
+            s->r[j][i] = 0.0;
+            s->e[j][i] = 0.0;
+        }
+    }
+}
+
+void solve(Solver* s, double* p, double* rhs)
+{
+    double res = multiGrid(s, p, rhs, 0, s->comm);
+
+#ifdef VERBOSE
+    if (commIsMaster(s->comm)) {
+        printf("Residuum: %.6f\n", res);
+    }
+#endif
+}
diff --git a/EnhancedSolver/2D-mpi/src/solver-rb.c b/EnhancedSolver/2D-mpi/src/solver-rb.c
new file mode 100644
index 0000000..4257381
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/src/solver-rb.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of nusif-solver.
+ * Use of this source code is governed by a MIT style
+ * license that can be found in the LICENSE file.
+ */
+#include <math.h>
+#include <stdio.h>
+
+#include "allocate.h"
+#include "comm.h"
+#include "discretization.h"
+#include "parameter.h"
+#include "solver.h"
+#include "util.h"
+
+void printSolver(Solver* s, double* grid, char* gridname)
+{
+    printf("Grid name : %s", gridname);
+    int imaxLocal = s->comm->imaxLocal;
+    int jmaxLocal = s->comm->jmaxLocal;
+
+    for (int i = 0; i < s->comm->size; i++) {
+        if (i == s->comm->rank) {
+            sleep(1 * s->comm->rank);
+            printf("### RANK %d LVL "
+                   "###################################################### #\n ",
+                s->comm->rank);
+            for (int j = 0; j < jmaxLocal + 2; j++) {
+                printf("%02d: ", j);
+                for (int i = 0; i < imaxLocal + 2; i++) {
+                    printf("%2.2f  ", grid[j * (imaxLocal + 2) + i]);
+                }
+                printf("\n");
+            }
+            fflush(stdout);
+        }
+    }
+}
+
+
+void initSolver(Solver* s, Discretization* d, Parameter* p)
+{
+    s->grid    = &d->grid;
+    s->eps     = p->eps;
+    s->omega   = p->omg;
+    s->itermax = p->itermax;
+    s->comm    = &d->comm;
+}
+
+void solve(Solver* s, double* p, double* rhs)
+{
+    int imax      = s->grid->imax;
+    int jmax      = s->grid->jmax;
+    int imaxLocal = s->comm->imaxLocal;
+    int jmaxLocal = s->comm->jmaxLocal;
+    double eps    = s->eps;
+    int itermax   = s->itermax;
+    double dx2    = s->grid->dx * s->grid->dx;
+    double dy2    = s->grid->dy * s->grid->dy;
+    double idx2   = 1.0 / dx2;
+    double idy2   = 1.0 / dy2;
+    double factor = s->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
+    double epssq  = eps * eps;
+    int pass, jsw, isw;
+    int it     = 0;
+    double res = 1.0;
+
+    while ((res >= epssq) && (it < itermax)) {
+        jsw = 1;
+        for (pass = 0; pass < 2; pass++) {
+            isw = jsw;
+            commExchange(s->comm, p);
+
+            for (int j = 1; j < jmaxLocal + 1; j++) {
+                for (int i = isw; i < imaxLocal + 1; i += 2) {
+
+                    double r = RHS(i, j) -
+                               ((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
+                                   (P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
+
+                    P(i, j) -= (factor * r);
+                    res += (r * r);
+                }
+                isw = 3 - isw;
+            }
+            jsw = 3 - jsw;
+        }
+
+        if (commIsBoundary(s->comm, B)) { // set bottom bc
+            for (int i = 1; i < imaxLocal + 1; i++) {
+                P(i, 0) = P(i, 1);
+            }
+        }
+
+        if (commIsBoundary(s->comm, T)) { // set top bc
+            for (int i = 1; i < imaxLocal + 1; i++) {
+                P(i, jmaxLocal + 1) = P(i, jmaxLocal);
+            }
+        }
+
+        if (commIsBoundary(s->comm, L)) { // set left bc
+            for (int j = 1; j < jmaxLocal + 1; j++) {
+                P(0, j) = P(1, j);
+            }
+        }
+
+        if (commIsBoundary(s->comm, R)) { // set right bc
+            for (int j = 1; j < jmaxLocal + 1; j++) {
+                P(imaxLocal + 1, j) = P(imaxLocal, j);
+            }
+        }
+
+        commReduction(&res, SUM);
+        res = res / (double)(imax * jmax);
+#ifdef DEBUG
+        if (commIsMaster(s->comm)) {
+            printf("%d Residuum: %e\n", it, res);
+        }
+#endif
+        it++;
+    }
+
+#ifdef VERBOSE
+    if (commIsMaster(s->comm)) {
+        printf("Solver took %d iterations to reach %f\n", it, sqrt(res));
+    }
+#endif
+}
diff --git a/EnhancedSolver/2D-mpi/src/solver.h b/EnhancedSolver/2D-mpi/src/solver.h
new file mode 100644
index 0000000..2ca94e6
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/src/solver.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of nusif-solver.
+ * Use of this source code is governed by a MIT style
+ * license that can be found in the LICENSE file.
+ */
+#ifndef __SOLVER_H_
+#define __SOLVER_H_
+#include "comm.h"
+#include "discretization.h"
+#include "grid.h"
+#include "mpi.h"
+#include "parameter.h"
+
+typedef struct {
+    /* geometry and grid information */
+    Grid* grid;
+    /* parameters */
+    double eps, omega;
+    int itermax;
+    int levels, presmooth, postsmooth;
+    double **r, **e;
+    /* communication */
+    Comm* comm;
+} Solver;
+
+void initSolver(Solver*, Discretization*, Parameter*);
+void solve(Solver*, double*, double*);
+void printSolver(Solver* , double*, char*);
+#endif
diff --git a/EnhancedSolver/2D-mpi/src/timing.c b/EnhancedSolver/2D-mpi/src/timing.c
new file mode 100644
index 0000000..78b01c4
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/src/timing.c
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved.
+ * Use of this source code is governed by a MIT-style
+ * license that can be found in the LICENSE file.
+ */
+#include <stdlib.h>
+#include <time.h>
+
+double getTimeStamp(void)
+{
+    struct timespec ts;
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
+}
+
+double getTimeResolution(void)
+{
+    struct timespec ts;
+    clock_getres(CLOCK_MONOTONIC, &ts);
+    return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
+}
diff --git a/EnhancedSolver/2D-mpi/src/timing.h b/EnhancedSolver/2D-mpi/src/timing.h
new file mode 100644
index 0000000..ed05a8c
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/src/timing.h
@@ -0,0 +1,13 @@
+/*
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved.
+ * Use of this source code is governed by a MIT-style
+ * license that can be found in the LICENSE file.
+ */
+#ifndef __TIMING_H_
+#define __TIMING_H_
+
+extern double getTimeStamp(void);
+extern double getTimeResolution(void);
+
+#endif // __TIMING_H_
diff --git a/EnhancedSolver/2D-mpi/src/util.h b/EnhancedSolver/2D-mpi/src/util.h
new file mode 100644
index 0000000..ae396bd
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/src/util.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C)  NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved.
+ * Use of this source code is governed by a MIT-style
+ * license that can be found in the LICENSE file.
+ */
+#ifndef __UTIL_H_
+#define __UTIL_H_
+#define HLINE                                                                            \
+    "----------------------------------------------------------------------------\n"
+
+#ifndef MIN
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
+#endif
+#ifndef MAX
+#define MAX(x, y) ((x) > (y) ? (x) : (y))
+#endif
+#ifndef ABS
+#define ABS(a) ((a) >= 0 ? (a) : -(a))
+#endif
+
+#define P(i, j)   p[(j) * (imaxLocal + 2) + (i)]
+#define F(i, j)   f[(j) * (imaxLocal + 2) + (i)]
+#define G(i, j)   g[(j) * (imaxLocal + 2) + (i)]
+#define U(i, j)   u[(j) * (imaxLocal + 2) + (i)]
+#define V(i, j)   v[(j) * (imaxLocal + 2) + (i)]
+#define S(i, j)   s[(j) * (imaxLocal + 2) + (i)]
+#define RHS(i, j) rhs[(j) * (imaxLocal + 2) + (i)]
+
+#endif // __UTIL_H_
diff --git a/EnhancedSolver/2D-mpi/surface.plot b/EnhancedSolver/2D-mpi/surface.plot
new file mode 100644
index 0000000..4f7ccd9
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/surface.plot
@@ -0,0 +1,7 @@
+set terminal png size 1024,768 enhanced font ,12
+set output 'p.png'
+set datafile separator whitespace
+
+set grid
+set hidden3d
+splot 'pressure.dat' using 1:2:3 with lines
diff --git a/EnhancedSolver/2D-mpi/vector.plot b/EnhancedSolver/2D-mpi/vector.plot
new file mode 100644
index 0000000..e24b2b2
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/vector.plot
@@ -0,0 +1,6 @@
+set terminal png size 3600,1400 enhanced font ,12
+set output 'velocity.png'
+set datafile separator whitespace
+set size ratio -1
+
+plot 'velocity.dat' using 1:2:3:4:5 with vectors filled head size 0.01,20,60 lc palette
diff --git a/EnhancedSolver/2D-mpi/vis_files/animate.plot b/EnhancedSolver/2D-mpi/vis_files/animate.plot
new file mode 100644
index 0000000..d54ba32
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/vis_files/animate.plot
@@ -0,0 +1,10 @@
+unset border; unset tics; unset key;
+set term gif animate delay 30
+set output "trace.gif"
+set xrange [0:1]
+set yrange [0:1]
+
+do for [ts=0:120] {
+    plot "particles_".ts.".dat" with points pointtype 7
+}
+unset output
diff --git a/EnhancedSolver/2D-mpi/vis_files/backstep_animate.plot b/EnhancedSolver/2D-mpi/vis_files/backstep_animate.plot
new file mode 100644
index 0000000..2888e5d
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/vis_files/backstep_animate.plot
@@ -0,0 +1,14 @@
+unset border; unset tics; unset key;
+set term gif animate delay 10
+set output "trace.gif"
+set xrange [0:7]
+set yrange [0:1.5]
+set size ratio -1
+
+set object 1 rect from 0.0,0.0 to 1.0,0.5 lw 5
+
+
+do for [ts=0:300] {
+    plot "particles_".ts.".dat" with points pointtype 7
+}
+unset output
diff --git a/EnhancedSolver/2D-seq/vis_files/canal.plot b/EnhancedSolver/2D-mpi/vis_files/canal_animate.plot
similarity index 100%
rename from EnhancedSolver/2D-seq/vis_files/canal.plot
rename to EnhancedSolver/2D-mpi/vis_files/canal_animate.plot
diff --git a/EnhancedSolver/2D-mpi/vis_files/karman_animate.plot b/EnhancedSolver/2D-mpi/vis_files/karman_animate.plot
new file mode 100644
index 0000000..d78f0c3
--- /dev/null
+++ b/EnhancedSolver/2D-mpi/vis_files/karman_animate.plot
@@ -0,0 +1,13 @@
+unset border; unset tics; unset key;
+set term gif animate delay 10
+set output "trace.gif"
+set xrange [0:30]
+set yrange [0:8]
+set size ratio -1
+set object 1 circle front at 5.0,4.0 size 1.0 fillcolor rgb "black" lw 2
+
+
+do for [ts=0:500] {
+    plot "particles_".ts.".dat" with points pointtype 7 pointsize 0.3
+}
+unset output
diff --git a/EnhancedSolver/2D-seq/Makefile b/EnhancedSolver/2D-seq/Makefile
index dc8a79d..c253d05 100644
--- a/EnhancedSolver/2D-seq/Makefile
+++ b/EnhancedSolver/2D-seq/Makefile
@@ -70,6 +70,18 @@ format:
 	done
 	@echo "Done"
 
+vis:
+	$(info ===>  GENERATE VISUALIZATION)
+	@gnuplot -e "filename='pressure.dat'" ./surface.plot
+	@gnuplot -e "filename='velocity.dat'" ./vector.plot
+
+vis_clean:
+	$(info ===>  CLEAN VISUALIZATION)
+	@rm -f *.dat
+	@rm -f *.png
+	@rm -f ./vis_files/*.dat
+	@rm -f ./vis_files/*.gif
+
 $(BUILD_DIR):
 	@mkdir $(BUILD_DIR)
 
diff --git a/EnhancedSolver/2D-seq/backstep.par b/EnhancedSolver/2D-seq/backstep.par
index 656ac31..3e70238 100644
--- a/EnhancedSolver/2D-seq/backstep.par
+++ b/EnhancedSolver/2D-seq/backstep.par
@@ -15,7 +15,7 @@ bcRight    3			#
 gx    0.0      # Body forces (e.g. gravity)
 gy    0.0      #
 
-re    36500.0	   # Reynolds number
+re    20000.0	   # Reynolds number
 
 u_init    1.0      # initial value for velocity in x-direction
 v_init    0.0      # initial value for velocity in y-direction
@@ -45,11 +45,18 @@ rho      0.52
 omg      1.8       # relaxation parameter for SOR iteration
 gamma    0.9       # upwind differencing factor gamma
 
+# Multigrid data:
+# ---------
+
+levels        3         # Multigrid levels
+presmooth     5         # Pre-smoothning iterations
+postsmooth    5         # Post-smoothning iterations
+
 # Particle Tracing Data:
 # -----------------------
 
 numberOfParticles   200
-startTime           100
+startTime           10
 injectTimePeriod    1.0
 writeTimePeriod     0.5
 
diff --git a/EnhancedSolver/2D-seq/canal.par b/EnhancedSolver/2D-seq/canal.par
index 1661db6..1813820 100644
--- a/EnhancedSolver/2D-seq/canal.par
+++ b/EnhancedSolver/2D-seq/canal.par
@@ -44,7 +44,13 @@ eps      0.0001   # stopping tolerance for pressure iteration
 rho      0.52
 omg      1.8       # relaxation parameter for SOR iteration
 gamma    0.9       # upwind differencing factor gamma
-levels   5         # Multigrid levels
+
+# Multigrid data:
+# ---------
+
+levels        3         # Multigrid levels
+presmooth     5         # Pre-smoothning iterations
+postsmooth    5         # Post-smoothning iterations
 
 # Particle Tracing Data:
 # -----------------------
diff --git a/EnhancedSolver/2D-seq/config.mk b/EnhancedSolver/2D-seq/config.mk
index ceed6bf..af8912f 100644
--- a/EnhancedSolver/2D-seq/config.mk
+++ b/EnhancedSolver/2D-seq/config.mk
@@ -1,5 +1,5 @@
 # Supported: GCC, CLANG, ICC
-TAG ?= CLANG
+TAG ?= ICC
 ENABLE_OPENMP ?= false
 # Supported: sor, rb, mg
 SOLVER ?= mg
diff --git a/EnhancedSolver/2D-seq/dcavity.par b/EnhancedSolver/2D-seq/dcavity.par
index 8ff64b6..dd092f5 100644
--- a/EnhancedSolver/2D-seq/dcavity.par
+++ b/EnhancedSolver/2D-seq/dcavity.par
@@ -44,7 +44,13 @@ eps      0.001		# stopping tolerance for pressure iteration
 rho      0.5
 omg      1.8		# relaxation parameter for SOR iteration
 gamma    0.9		# upwind differencing factor gamma
-levels   5         # Multigrid levels
+
+# Multigrid data:
+# ---------
+
+levels        3         # Multigrid levels
+presmooth     10        # Pre-smoothning iterations
+postsmooth    5         # Post-smoothning iterations
 
 # Particle Tracing Data:
 # -----------------------
diff --git a/EnhancedSolver/2D-seq/karman.par b/EnhancedSolver/2D-seq/karman.par
index 60664b6..fb61961 100644
--- a/EnhancedSolver/2D-seq/karman.par
+++ b/EnhancedSolver/2D-seq/karman.par
@@ -44,13 +44,19 @@ eps      0.001   # stopping tolerance for pressure iteration
 rho      0.52
 omg      1.75      # relaxation parameter for SOR iteration
 gamma    0.9       # upwind differencing factor gamma
-levels   5         # Multigrid levels
+
+# Multigrid data:
+# ---------
+
+levels        3         # Multigrid levels
+presmooth     5         # Pre-smoothning iterations
+postsmooth    5         # Post-smoothning iterations
 
 # Particle Tracing Data:
 # -----------------------
 
 numberOfParticles   200
-startTime           201
+startTime           50
 injectTimePeriod    1.0
 writeTimePeriod     0.5
 
diff --git a/EnhancedSolver/2D-seq/src/particletracing.c b/EnhancedSolver/2D-seq/src/particletracing.c
index 07d575d..54e6eed 100644
--- a/EnhancedSolver/2D-seq/src/particletracing.c
+++ b/EnhancedSolver/2D-seq/src/particletracing.c
@@ -98,7 +98,7 @@ static void advanceParticles(
             if (!gridIsFluid(p->grid, newI, newJ)) {
                 p->particlePool[i].flag = false;
                 p->removedParticles++;
-                printf("Forbidden movement of particle into obstacle!\n");
+                // printf("Forbidden movement of particle into obstacle!\n");
             }
         }
     }
diff --git a/EnhancedSolver/2D-seq/vis_files/canal_animate.plot b/EnhancedSolver/2D-seq/vis_files/canal_animate.plot
new file mode 100644
index 0000000..bf9ce60
--- /dev/null
+++ b/EnhancedSolver/2D-seq/vis_files/canal_animate.plot
@@ -0,0 +1,10 @@
+unset border; unset tics; unset key;
+set term gif animate delay 30
+set output "trace.gif"
+set xrange [0:30]
+set yrange [0:4]
+
+do for [ts=0:120] {
+    plot "particles_".ts.".dat" with points pointtype 7
+}
+unset output