forked from moebiusband/NuSiF-Solver
Compare commits
1 Commits
main
...
poisson/2D
Author | SHA1 | Date | |
---|---|---|---|
356c4fe4fa |
@ -1,62 +0,0 @@
|
|||||||
#=======================================================================================
|
|
||||||
# Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
|
||||||
# All rights reserved.
|
|
||||||
# Use of this source code is governed by a MIT-style
|
|
||||||
# license that can be found in the LICENSE file.
|
|
||||||
#=======================================================================================
|
|
||||||
|
|
||||||
#CONFIGURE BUILD SYSTEM
|
|
||||||
TARGET = exe-$(TAG)
|
|
||||||
BUILD_DIR = ./$(TAG)
|
|
||||||
SRC_DIR = ./src
|
|
||||||
MAKE_DIR = ./
|
|
||||||
Q ?= @
|
|
||||||
|
|
||||||
#DO NOT EDIT BELOW
|
|
||||||
include $(MAKE_DIR)/config.mk
|
|
||||||
include $(MAKE_DIR)/include_$(TAG).mk
|
|
||||||
INCLUDES += -I$(SRC_DIR)/includes -I$(BUILD_DIR)
|
|
||||||
|
|
||||||
VPATH = $(SRC_DIR)
|
|
||||||
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c))
|
|
||||||
OBJ = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c))
|
|
||||||
CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(OPTIONS) $(INCLUDES)
|
|
||||||
|
|
||||||
${TARGET}: $(BUILD_DIR) $(OBJ)
|
|
||||||
$(info ===> LINKING $(TARGET))
|
|
||||||
$(Q)${LINKER} ${LFLAGS} -o $(TARGET) $(OBJ) $(LIBS)
|
|
||||||
|
|
||||||
$(BUILD_DIR)/%.o: %.c $(MAKE_DIR)/include_$(TAG).mk
|
|
||||||
$(info ===> COMPILE $@)
|
|
||||||
$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
|
|
||||||
$(Q)$(GCC) $(CPPFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d
|
|
||||||
|
|
||||||
$(BUILD_DIR)/%.s: %.c
|
|
||||||
$(info ===> GENERATE ASM $@)
|
|
||||||
$(CC) -S $(CPPFLAGS) $(CFLAGS) $< -o $@
|
|
||||||
|
|
||||||
.PHONY: clean distclean tags info asm
|
|
||||||
|
|
||||||
clean:
|
|
||||||
$(info ===> CLEAN)
|
|
||||||
@rm -rf $(BUILD_DIR)
|
|
||||||
@rm -f tags
|
|
||||||
|
|
||||||
distclean: clean
|
|
||||||
$(info ===> DIST CLEAN)
|
|
||||||
@rm -f $(TARGET)
|
|
||||||
|
|
||||||
info:
|
|
||||||
$(info $(CFLAGS))
|
|
||||||
$(Q)$(CC) $(VERSION)
|
|
||||||
|
|
||||||
asm: $(BUILD_DIR) $(ASM)
|
|
||||||
|
|
||||||
tags:
|
|
||||||
$(info ===> GENERATE TAGS)
|
|
||||||
$(Q)ctags -R
|
|
||||||
|
|
||||||
$(BUILD_DIR):
|
|
||||||
@mkdir $(BUILD_DIR)
|
|
||||||
|
|
||||||
-include $(OBJ:.o=.d)
|
|
@ -1,48 +0,0 @@
|
|||||||
# C source skeleton
|
|
||||||
|
|
||||||
## Build
|
|
||||||
|
|
||||||
1. Configure the toolchain and additional options in `config.mk`:
|
|
||||||
```
|
|
||||||
# Supported: GCC, CLANG, ICC
|
|
||||||
TAG ?= GCC
|
|
||||||
ENABLE_OPENMP ?= false
|
|
||||||
|
|
||||||
OPTIONS += -DARRAY_ALIGNMENT=64
|
|
||||||
#OPTIONS += -DVERBOSE_AFFINITY
|
|
||||||
#OPTIONS += -DVERBOSE_DATASIZE
|
|
||||||
#OPTIONS += -DVERBOSE_TIMER
|
|
||||||
```
|
|
||||||
|
|
||||||
The verbosity options enable detailed output about affinity settings, allocation sizes and timer resolution.
|
|
||||||
|
|
||||||
|
|
||||||
2. Build with:
|
|
||||||
```
|
|
||||||
make
|
|
||||||
```
|
|
||||||
|
|
||||||
You can build multiple toolchains in the same directory, but notice that the Makefile is only acting on the one currently set.
|
|
||||||
Intermediate build results are located in the `<TOOLCHAIN>` directory.
|
|
||||||
|
|
||||||
To output the executed commands use:
|
|
||||||
```
|
|
||||||
make Q=
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Clean up with:
|
|
||||||
```
|
|
||||||
make clean
|
|
||||||
```
|
|
||||||
to clean intermediate build results.
|
|
||||||
|
|
||||||
```
|
|
||||||
make distclean
|
|
||||||
```
|
|
||||||
to clean intermediate build results and binary.
|
|
||||||
|
|
||||||
4. (Optional) Generate assembler:
|
|
||||||
```
|
|
||||||
make asm
|
|
||||||
```
|
|
||||||
The assembler files will also be located in the `<TOOLCHAIN>` directory.
|
|
@ -1,15 +0,0 @@
|
|||||||
set term png size 1024,768 enhanced font ,12
|
|
||||||
set datafile separator whitespace
|
|
||||||
set grid
|
|
||||||
set hidden3d
|
|
||||||
set xrange [0:40]
|
|
||||||
set yrange [0:40]
|
|
||||||
set zrange [-2:2]
|
|
||||||
|
|
||||||
input(n) = sprintf("p-%d.dat", n)
|
|
||||||
output(n) = sprintf("%03d.png", n)
|
|
||||||
|
|
||||||
do for [i=1:50] {
|
|
||||||
set output output(i)
|
|
||||||
splot input(i) matrix using 1:2:3 with lines
|
|
||||||
}
|
|
@ -1,8 +0,0 @@
|
|||||||
# Supported: GCC, CLANG, ICC
|
|
||||||
TAG ?= CLANG
|
|
||||||
|
|
||||||
#Feature options
|
|
||||||
OPTIONS += -DARRAY_ALIGNMENT=64
|
|
||||||
#OPTIONS += -DVERBOSE_AFFINITY
|
|
||||||
#OPTIONS += -DVERBOSE_DATASIZE
|
|
||||||
#OPTIONS += -DVERBOSE_TIMER
|
|
@ -1,18 +0,0 @@
|
|||||||
CC = mpicc
|
|
||||||
GCC = cc
|
|
||||||
LINKER = $(CC)
|
|
||||||
|
|
||||||
ifeq ($(ENABLE_OPENMP),true)
|
|
||||||
OPENMP = -fopenmp
|
|
||||||
#OPENMP = -Xpreprocessor -fopenmp #required on Macos with homebrew libomp
|
|
||||||
LIBS = # -lomp
|
|
||||||
endif
|
|
||||||
|
|
||||||
VERSION = --version
|
|
||||||
CFLAGS = -Ofast -std=c99 $(OPENMP)
|
|
||||||
#CFLAGS = -Ofast -fnt-store=aggressive -std=c99 $(OPENMP) #AMD CLANG
|
|
||||||
LFLAGS = $(OPENMP) -lm
|
|
||||||
DEFINES = -D_GNU_SOURCE
|
|
||||||
DEFINES += -DANIMATE
|
|
||||||
# DEFINES += -DDEBUG
|
|
||||||
INCLUDES =
|
|
@ -1,14 +0,0 @@
|
|||||||
CC = gcc
|
|
||||||
GCC = gcc
|
|
||||||
LINKER = $(CC)
|
|
||||||
|
|
||||||
ifeq ($(ENABLE_OPENMP),true)
|
|
||||||
OPENMP = -fopenmp
|
|
||||||
endif
|
|
||||||
|
|
||||||
VERSION = --version
|
|
||||||
CFLAGS = -Ofast -ffreestanding -std=c99 $(OPENMP)
|
|
||||||
LFLAGS = $(OPENMP)
|
|
||||||
DEFINES = -D_GNU_SOURCE
|
|
||||||
INCLUDES =
|
|
||||||
LIBS =
|
|
@ -1,14 +0,0 @@
|
|||||||
CC = icc
|
|
||||||
GCC = gcc
|
|
||||||
LINKER = $(CC)
|
|
||||||
|
|
||||||
ifeq ($(ENABLE_OPENMP),true)
|
|
||||||
OPENMP = -qopenmp
|
|
||||||
endif
|
|
||||||
|
|
||||||
VERSION = --version
|
|
||||||
CFLAGS = -O3 -xHost -qopt-zmm-usage=high -std=c99 $(OPENMP)
|
|
||||||
LFLAGS = $(OPENMP)
|
|
||||||
DEFINES = -D_GNU_SOURCE
|
|
||||||
INCLUDES =
|
|
||||||
LIBS =
|
|
@ -1,22 +0,0 @@
|
|||||||
# Problem specific Data:
|
|
||||||
# ---------------------
|
|
||||||
|
|
||||||
name poisson
|
|
||||||
|
|
||||||
# Geometry Data:
|
|
||||||
# -------------
|
|
||||||
|
|
||||||
xlength 1.0 # domain size in x-direction
|
|
||||||
ylength 1.0 # domain size in y-direction
|
|
||||||
imax 100 # number of interior cells in x-direction
|
|
||||||
jmax 100 # number of interior cells in y-direction
|
|
||||||
|
|
||||||
# Pressure Iteration Data:
|
|
||||||
# -----------------------
|
|
||||||
|
|
||||||
itermax 1000000 # maximal number of pressure iteration in one time step
|
|
||||||
eps 0.000001 # stopping tolerance for pressure iteration
|
|
||||||
rho 0.99999 # relaxation parameter for SOR iteration
|
|
||||||
omg 1.2 # relaxation parameter for SOR iteration
|
|
||||||
|
|
||||||
#===============================================================================
|
|
@ -1,37 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
|
||||||
* All rights reserved.
|
|
||||||
* Use of this source code is governed by a MIT-style
|
|
||||||
* license that can be found in the LICENSE file.
|
|
||||||
*/
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <errno.h>
|
|
||||||
|
|
||||||
void* allocate (int alignment, size_t bytesize)
|
|
||||||
{
|
|
||||||
int errorCode;
|
|
||||||
void* ptr;
|
|
||||||
|
|
||||||
errorCode = posix_memalign(&ptr, alignment, bytesize);
|
|
||||||
|
|
||||||
if (errorCode) {
|
|
||||||
if (errorCode == EINVAL) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"Error: Alignment parameter is not a power of two\n");
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
if (errorCode == ENOMEM) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"Error: Insufficient memory to fulfill the request\n");
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ptr == NULL) {
|
|
||||||
fprintf(stderr, "Error: posix_memalign failed!\n");
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
return ptr;
|
|
||||||
}
|
|
@ -1,11 +0,0 @@
|
|||||||
/* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
|
||||||
* All rights reserved.
|
|
||||||
* Use of this source code is governed by a MIT-style
|
|
||||||
* license that can be found in the LICENSE file. */
|
|
||||||
#ifndef __ALLOCATE_H_
|
|
||||||
#define __ALLOCATE_H_
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
extern void* allocate(int alignment, size_t bytesize);
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,53 +0,0 @@
|
|||||||
/*
|
|
||||||
* =======================================================================================
|
|
||||||
*
|
|
||||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
|
||||||
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
* of this software and associated documentation files (the "Software"), to deal
|
|
||||||
* in the Software without restriction, including without limitation the rights
|
|
||||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the Software is
|
|
||||||
* furnished to do so, subject to the following conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be included in all
|
|
||||||
* copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
* SOFTWARE.
|
|
||||||
*
|
|
||||||
* =======================================================================================
|
|
||||||
*/
|
|
||||||
#ifndef LIKWID_MARKERS_H
|
|
||||||
#define LIKWID_MARKERS_H
|
|
||||||
|
|
||||||
#ifdef LIKWID_PERFMON
|
|
||||||
#include <likwid.h>
|
|
||||||
#define LIKWID_MARKER_INIT likwid_markerInit()
|
|
||||||
#define LIKWID_MARKER_THREADINIT likwid_markerThreadInit()
|
|
||||||
#define LIKWID_MARKER_SWITCH likwid_markerNextGroup()
|
|
||||||
#define LIKWID_MARKER_REGISTER(regionTag) likwid_markerRegisterRegion(regionTag)
|
|
||||||
#define LIKWID_MARKER_START(regionTag) likwid_markerStartRegion(regionTag)
|
|
||||||
#define LIKWID_MARKER_STOP(regionTag) likwid_markerStopRegion(regionTag)
|
|
||||||
#define LIKWID_MARKER_CLOSE likwid_markerClose()
|
|
||||||
#define LIKWID_MARKER_RESET(regionTag) likwid_markerResetRegion(regionTag)
|
|
||||||
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count) likwid_markerGetRegion(regionTag, nevents, events, time, count)
|
|
||||||
#else /* LIKWID_PERFMON */
|
|
||||||
#define LIKWID_MARKER_INIT
|
|
||||||
#define LIKWID_MARKER_THREADINIT
|
|
||||||
#define LIKWID_MARKER_SWITCH
|
|
||||||
#define LIKWID_MARKER_REGISTER(regionTag)
|
|
||||||
#define LIKWID_MARKER_START(regionTag)
|
|
||||||
#define LIKWID_MARKER_STOP(regionTag)
|
|
||||||
#define LIKWID_MARKER_CLOSE
|
|
||||||
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
|
|
||||||
#define LIKWID_MARKER_RESET(regionTag)
|
|
||||||
#endif /* LIKWID_PERFMON */
|
|
||||||
|
|
||||||
#endif /*LIKWID_MARKERS_H*/
|
|
@ -1,39 +0,0 @@
|
|||||||
/* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.ke
|
|
||||||
* All rights reserved.
|
|
||||||
* Use of this source code is governed by a MIT-style
|
|
||||||
* license that can be found in the LICENSE file. */
|
|
||||||
#include <mpi.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
#include "parameter.h"
|
|
||||||
#include "solver.h"
|
|
||||||
#include "timing.h"
|
|
||||||
|
|
||||||
int main(int argc, char** argv)
|
|
||||||
{
|
|
||||||
|
|
||||||
MPI_Init(&argc, &argv);
|
|
||||||
|
|
||||||
double startTime, endTime;
|
|
||||||
Parameter params;
|
|
||||||
Solver solver;
|
|
||||||
|
|
||||||
initParameter(¶ms);
|
|
||||||
|
|
||||||
if (argc < 2) {
|
|
||||||
printf("Usage: %s <configFile>\n", argv[0]);
|
|
||||||
exit(EXIT_SUCCESS);
|
|
||||||
}
|
|
||||||
|
|
||||||
readParameter(¶ms, argv[1]);
|
|
||||||
initSolver(&solver, ¶ms, 2);
|
|
||||||
startTime = getTimeStamp();
|
|
||||||
solveRB(&solver);
|
|
||||||
endTime = getTimeStamp();
|
|
||||||
writeResult(&solver, "p.dat");
|
|
||||||
if(solver.rank == 0) printf(" %.2fs\n", endTime - startTime);
|
|
||||||
|
|
||||||
MPI_Finalize();
|
|
||||||
return EXIT_SUCCESS;
|
|
||||||
}
|
|
@ -1,79 +0,0 @@
|
|||||||
/* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
|
||||||
* All rights reserved.
|
|
||||||
* Use of this source code is governed by a MIT-style
|
|
||||||
* license that can be found in the LICENSE file. */
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
//---
|
|
||||||
#include "parameter.h"
|
|
||||||
#include "util.h"
|
|
||||||
#define MAXLINE 4096
|
|
||||||
|
|
||||||
void initParameter(Parameter* param)
|
|
||||||
{
|
|
||||||
param->xlength = 1.0;
|
|
||||||
param->ylength = 1.0;
|
|
||||||
param->imax = 100;
|
|
||||||
param->jmax = 100;
|
|
||||||
param->itermax = 1000;
|
|
||||||
param->eps = 0.0001;
|
|
||||||
param->omg = 1.8;
|
|
||||||
param->rho = 0.99;
|
|
||||||
}
|
|
||||||
|
|
||||||
void readParameter(Parameter* param, const char* filename)
|
|
||||||
{
|
|
||||||
FILE* fp = fopen(filename, "r");
|
|
||||||
char line[MAXLINE];
|
|
||||||
int i;
|
|
||||||
|
|
||||||
if (!fp) {
|
|
||||||
fprintf(stderr, "Could not open parameter file: %s\n", filename);
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
while (!feof(fp)) {
|
|
||||||
line[0] = '\0';
|
|
||||||
fgets(line, MAXLINE, fp);
|
|
||||||
for (i = 0; line[i] != '\0' && line[i] != '#'; i++)
|
|
||||||
;
|
|
||||||
line[i] = '\0';
|
|
||||||
|
|
||||||
char* tok = strtok(line, " ");
|
|
||||||
char* val = strtok(NULL, " ");
|
|
||||||
|
|
||||||
#define PARSE_PARAM(p, f) \
|
|
||||||
if (strncmp(tok, #p, sizeof(#p) / sizeof(#p[0]) - 1) == 0) { \
|
|
||||||
param->p = f(val); \
|
|
||||||
}
|
|
||||||
#define PARSE_STRING(p) PARSE_PARAM(p, strdup)
|
|
||||||
#define PARSE_INT(p) PARSE_PARAM(p, atoi)
|
|
||||||
#define PARSE_REAL(p) PARSE_PARAM(p, atof)
|
|
||||||
|
|
||||||
if (tok != NULL && val != NULL) {
|
|
||||||
PARSE_REAL(xlength);
|
|
||||||
PARSE_REAL(ylength);
|
|
||||||
PARSE_INT(imax);
|
|
||||||
PARSE_INT(jmax);
|
|
||||||
PARSE_INT(itermax);
|
|
||||||
PARSE_REAL(eps);
|
|
||||||
PARSE_REAL(omg);
|
|
||||||
PARSE_REAL(rho);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fclose(fp);
|
|
||||||
}
|
|
||||||
|
|
||||||
void printParameter(Parameter* param)
|
|
||||||
{
|
|
||||||
printf("Parameters:\n");
|
|
||||||
printf("Geometry data:\n");
|
|
||||||
printf("\tDomain box size (x, y): %e, %e\n", param->xlength, param->ylength);
|
|
||||||
printf("\tCells (x, y): %d, %d\n", param->imax, param->jmax);
|
|
||||||
printf("Iterative solver parameters:\n");
|
|
||||||
printf("\tMax iterations: %d\n", param->itermax);
|
|
||||||
printf("\tepsilon (stopping tolerance) : %e\n", param->eps);
|
|
||||||
printf("\tomega (SOR relaxation): %e\n", param->omg);
|
|
||||||
}
|
|
@ -1,18 +0,0 @@
|
|||||||
/* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
|
||||||
* All rights reserved.
|
|
||||||
* Use of this source code is governed by a MIT-style
|
|
||||||
* license that can be found in the LICENSE file. */
|
|
||||||
#ifndef __PARAMETER_H_
|
|
||||||
#define __PARAMETER_H_
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
double xlength, ylength;
|
|
||||||
int imax, jmax;
|
|
||||||
int itermax;
|
|
||||||
double eps, omg, rho, gamma;
|
|
||||||
} Parameter;
|
|
||||||
|
|
||||||
void initParameter(Parameter*);
|
|
||||||
void readParameter(Parameter*, const char*);
|
|
||||||
void printParameter(Parameter*);
|
|
||||||
#endif
|
|
@ -1,322 +0,0 @@
|
|||||||
/* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
|
||||||
* All rights reserved.
|
|
||||||
* Use of this source code is governed by a MIT-style
|
|
||||||
* license that can be found in the LICENSE file. */
|
|
||||||
#include "math.h"
|
|
||||||
#include "stdio.h"
|
|
||||||
#include "stdlib.h"
|
|
||||||
|
|
||||||
#include "allocate.h"
|
|
||||||
#include "parameter.h"
|
|
||||||
#include "solver.h"
|
|
||||||
#include <alloca.h>
|
|
||||||
#include <mpi.h>
|
|
||||||
#include <stddef.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
#define PI 3.14159265358979323846
|
|
||||||
#define P(i, j) p[(i) * (solver->ljmax + 2) + (j)]
|
|
||||||
#define RHS(i, j) rhs[(i) * (solver->ljmax + 2) + (j)]
|
|
||||||
|
|
||||||
int distribute_dim(int dim_rank, int dim_comm_size, int dim_size)
|
|
||||||
{
|
|
||||||
int base_size = dim_size / dim_comm_size;
|
|
||||||
return dim_rank < (dim_size % dim_comm_size) ? base_size + 1 : base_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
int get_dim_start(int dim_rank, int dim_comm_size, int dim_size)
|
|
||||||
{
|
|
||||||
int dim_start = 0;
|
|
||||||
for (int i = 0; i < dim_rank; i++)
|
|
||||||
dim_start += distribute_dim(i, dim_comm_size, dim_size);
|
|
||||||
return dim_start;
|
|
||||||
}
|
|
||||||
|
|
||||||
void write_matrix_to_file(
|
|
||||||
const char* _Nonnull filename, double* matrix, size_t rows, size_t cols)
|
|
||||||
{
|
|
||||||
FILE* fp;
|
|
||||||
fp = fopen(filename, "w");
|
|
||||||
|
|
||||||
if (fp == NULL) {
|
|
||||||
printf("Error!\n");
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < rows; i++) {
|
|
||||||
for (int j = 0; j < cols; j++) {
|
|
||||||
fprintf(fp, "%f ", matrix[i * (cols) + j]);
|
|
||||||
}
|
|
||||||
fprintf(fp, "\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
fclose(fp);
|
|
||||||
}
|
|
||||||
|
|
||||||
void initSolver(Solver* solver, Parameter* params, int problem)
|
|
||||||
{
|
|
||||||
int x_low, x_high, y_low, y_high;
|
|
||||||
int rank, size;
|
|
||||||
MPI_Datatype contiguos_boundary, vector_boundary;
|
|
||||||
|
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
|
||||||
MPI_Comm_size(MPI_COMM_WORLD, &size);
|
|
||||||
|
|
||||||
solver->imax = params->imax;
|
|
||||||
solver->jmax = params->jmax;
|
|
||||||
solver->dx = params->xlength / params->imax;
|
|
||||||
solver->dy = params->ylength / params->jmax;
|
|
||||||
solver->eps = params->eps;
|
|
||||||
solver->omega = params->omg;
|
|
||||||
solver->rho = params->rho;
|
|
||||||
solver->itermax = params->itermax;
|
|
||||||
solver->dims[0] = 0;
|
|
||||||
solver->dims[1] = 0;
|
|
||||||
solver->rank = 0;
|
|
||||||
solver->size = 0;
|
|
||||||
solver->rank = rank;
|
|
||||||
solver->size = size;
|
|
||||||
|
|
||||||
MPI_Dims_create(size, 2, solver->dims);
|
|
||||||
MPI_Cart_create(MPI_COMM_WORLD,
|
|
||||||
2,
|
|
||||||
solver->dims,
|
|
||||||
(int[2]) { 0, 0 },
|
|
||||||
0,
|
|
||||||
&solver->cart_comm);
|
|
||||||
MPI_Cart_coords(solver->cart_comm, rank, 2, solver->coords);
|
|
||||||
solver->boundary = NB;
|
|
||||||
MPI_Cart_shift(solver->cart_comm, 0, 1, &x_low, &x_high);
|
|
||||||
MPI_Cart_shift(solver->cart_comm, 1, 1, &y_low, &y_high);
|
|
||||||
|
|
||||||
if (x_low == MPI_PROC_NULL) solver->boundary += BOTTOM;
|
|
||||||
if (y_low == MPI_PROC_NULL) solver->boundary += LEFT;
|
|
||||||
if (x_high == MPI_PROC_NULL) solver->boundary += TOP;
|
|
||||||
if (y_high == MPI_PROC_NULL) solver->boundary += RIGHT;
|
|
||||||
|
|
||||||
solver->limax = distribute_dim(solver->coords[0], solver->dims[0], solver->imax);
|
|
||||||
solver->ljmax = distribute_dim(solver->coords[1], solver->dims[1], solver->jmax);
|
|
||||||
|
|
||||||
MPI_Type_contiguous(solver->ljmax, MPI_DOUBLE, &contiguos_boundary);
|
|
||||||
MPI_Type_vector(solver->limax, 1, solver->ljmax + 2, MPI_DOUBLE, &vector_boundary);
|
|
||||||
MPI_Type_commit(&contiguos_boundary);
|
|
||||||
MPI_Type_commit(&vector_boundary);
|
|
||||||
solver->types[0] = contiguos_boundary;
|
|
||||||
solver->types[1] = contiguos_boundary;
|
|
||||||
solver->types[2] = vector_boundary;
|
|
||||||
solver->types[3] = vector_boundary;
|
|
||||||
|
|
||||||
solver->snd_displacements[0] = ((solver->ljmax + 2) + 1) * sizeof(double);
|
|
||||||
solver->snd_displacements[1] = ((solver->ljmax + 2) * (solver->limax) + 1) *
|
|
||||||
sizeof(double);
|
|
||||||
solver->snd_displacements[2] = ((solver->ljmax + 2) + 1) * sizeof(double);
|
|
||||||
solver->snd_displacements[3] = ((solver->ljmax + 2) + solver->ljmax) * sizeof(double);
|
|
||||||
|
|
||||||
solver->rcv_displacements[0] = (1) * sizeof(double);
|
|
||||||
solver->rcv_displacements[1] = ((solver->ljmax + 2) * (solver->limax + 1) + 1) *
|
|
||||||
sizeof(double);
|
|
||||||
solver->rcv_displacements[2] = ((solver->ljmax + 2)) * sizeof(double);
|
|
||||||
solver->rcv_displacements[3] = ((solver->ljmax + 2) + solver->ljmax + 1) *
|
|
||||||
sizeof(double);
|
|
||||||
|
|
||||||
size_t bytesize = (solver->limax + 2) * (solver->ljmax + 2) * sizeof(double);
|
|
||||||
solver->p = allocate(64, bytesize);
|
|
||||||
solver->rhs = allocate(64, bytesize);
|
|
||||||
double dx = solver->dx;
|
|
||||||
double dy = solver->dy;
|
|
||||||
double* p = solver->p;
|
|
||||||
double* rhs = solver->rhs;
|
|
||||||
solver->i_start = get_dim_start(solver->coords[0], solver->dims[0], solver->imax);
|
|
||||||
solver->j_start = get_dim_start(solver->coords[1], solver->dims[1], solver->jmax);
|
|
||||||
|
|
||||||
#ifdef DEBUG
|
|
||||||
printf("Prc %d; {%d,%d}, {%d,%d}, {%d,%d}\n",
|
|
||||||
rank,
|
|
||||||
solver->coords[0],
|
|
||||||
solver->coords[1],
|
|
||||||
solver->limax,
|
|
||||||
solver->ljmax,
|
|
||||||
solver->i_start,
|
|
||||||
solver->j_start);
|
|
||||||
fflush(stdout);
|
|
||||||
#endif /* ifdef DEBUG */
|
|
||||||
|
|
||||||
for (int i = 0; i < solver->limax + 2; i++) {
|
|
||||||
for (int j = 0; j < solver->ljmax + 2; j++) {
|
|
||||||
P(i, j) = sin(2.0 * PI * (solver->i_start + i) * dx * 2.0) +
|
|
||||||
sin(2.0 * PI * (solver->j_start + j) * dy * 2.0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (problem == 2) {
|
|
||||||
for (int i = 0; i < solver->limax + 2; i++) {
|
|
||||||
for (int j = 0; j < solver->ljmax + 2; j++) {
|
|
||||||
RHS(i, j) = sin(2.0 * PI * (solver->i_start + i) * dx);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (int i = 0; i < solver->limax + 2; i++) {
|
|
||||||
for (int j = 0; j < solver->ljmax + 2; j++) {
|
|
||||||
RHS(i, j) = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void solveRB(Solver* solver)
|
|
||||||
{
|
|
||||||
int imax = solver->imax;
|
|
||||||
int jmax = solver->jmax;
|
|
||||||
double eps = solver->eps;
|
|
||||||
int itermax = solver->itermax;
|
|
||||||
double dx2 = solver->dx * solver->dx;
|
|
||||||
double dy2 = solver->dy * solver->dy;
|
|
||||||
double idx2 = 1.0 / dx2;
|
|
||||||
double idy2 = 1.0 / dy2;
|
|
||||||
double factor = solver->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
|
|
||||||
double* p = solver->p;
|
|
||||||
double* rhs = solver->rhs;
|
|
||||||
double epssq = eps * eps;
|
|
||||||
int it = 0;
|
|
||||||
double res = 1.0;
|
|
||||||
int pass, jsw, isw;
|
|
||||||
|
|
||||||
while ((res >= epssq) && (it < itermax)) {
|
|
||||||
res = 0.0;
|
|
||||||
jsw = solver->j_start % 2 ? 1 : 2;
|
|
||||||
|
|
||||||
MPI_Neighbor_alltoallw(solver->p,
|
|
||||||
(int[4]) { 1, 1, 1, 1 },
|
|
||||||
solver->snd_displacements,
|
|
||||||
solver->types,
|
|
||||||
solver->p,
|
|
||||||
(int[4]) { 1, 1, 1, 1 },
|
|
||||||
solver->rcv_displacements,
|
|
||||||
solver->types,
|
|
||||||
solver->cart_comm);
|
|
||||||
|
|
||||||
for (pass = 0; pass < 2; pass++) { // why ??
|
|
||||||
isw = jsw;
|
|
||||||
for (int i = 1; i < solver->limax + 1; i++) {
|
|
||||||
for (int j = isw; j < solver->ljmax + 1; j += 2) {
|
|
||||||
double r = RHS(i, j) -
|
|
||||||
((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
|
|
||||||
(P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
|
|
||||||
|
|
||||||
P(i, j) -= (factor * r);
|
|
||||||
res += (r * r);
|
|
||||||
}
|
|
||||||
isw = 3 - isw;
|
|
||||||
}
|
|
||||||
jsw = 3 - jsw;
|
|
||||||
}
|
|
||||||
|
|
||||||
MPI_Allreduce(MPI_IN_PLACE, &res, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
|
||||||
|
|
||||||
if (solver->boundary & BOTTOM)
|
|
||||||
for (int j = 1; j < solver->ljmax + 1; j++)
|
|
||||||
P(0, j) = P(1, j);
|
|
||||||
if (solver->boundary & TOP)
|
|
||||||
for (int j = 1; j < solver->ljmax + 1; j++)
|
|
||||||
P(solver->limax + 1, j) = P(solver->limax, j);
|
|
||||||
if (solver->boundary & LEFT)
|
|
||||||
for (int i = 1; i < solver->limax + 1; i++)
|
|
||||||
P(i, 0) = P(i, 1);
|
|
||||||
if (solver->boundary & RIGHT)
|
|
||||||
for (int i = 1; i < solver->limax + 1; i++)
|
|
||||||
P(i, solver->ljmax + 1) = P(i, solver->ljmax);
|
|
||||||
|
|
||||||
res = res / (double)(imax * jmax);
|
|
||||||
|
|
||||||
#ifdef DEBUG
|
|
||||||
printf("%d Residuum: %e\n", it, res);
|
|
||||||
#endif
|
|
||||||
it++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (solver->rank == 0)
|
|
||||||
printf("Solver took %d iterations to reach %f\n", it, sqrt(res));
|
|
||||||
}
|
|
||||||
|
|
||||||
void writeResult(Solver* solver, char* filename)
|
|
||||||
{
|
|
||||||
int imax = solver->imax;
|
|
||||||
int jmax = solver->jmax;
|
|
||||||
double* p = solver->p;
|
|
||||||
int* rcv_count = NULL;
|
|
||||||
int* displacements = NULL;
|
|
||||||
double* p_total = NULL;
|
|
||||||
double* send_buffer = NULL;
|
|
||||||
int local_size = solver->limax * solver->ljmax;
|
|
||||||
|
|
||||||
if (solver->rank == 0) {
|
|
||||||
|
|
||||||
rcv_count = allocate(64, solver->size * sizeof(int));
|
|
||||||
displacements = allocate(64, solver->size * sizeof(int));
|
|
||||||
p_total = allocate(64, sizeof(double) * solver->imax * solver->jmax);
|
|
||||||
double* p_total_ordered = allocate(64,
|
|
||||||
sizeof(double) * solver->imax * solver->jmax);
|
|
||||||
|
|
||||||
MPI_Gather(&local_size, 1, MPI_INT, rcv_count, 1, MPI_INT, 0, MPI_COMM_WORLD);
|
|
||||||
|
|
||||||
displacements[0] = 0;
|
|
||||||
|
|
||||||
for (int i = 0; i < solver->limax; i++)
|
|
||||||
for (int j = 0; j < solver->ljmax; j++)
|
|
||||||
p_total_ordered[i * (solver->ljmax) + j] =
|
|
||||||
solver->p[(i + 1) * (solver->ljmax + 2) + (j + 1)];
|
|
||||||
|
|
||||||
for (int i = 1; i < solver->size; i++)
|
|
||||||
displacements[i] = displacements[i - 1] + rcv_count[i - 1];
|
|
||||||
|
|
||||||
MPI_Gatherv(p_total_ordered,
|
|
||||||
local_size,
|
|
||||||
MPI_DOUBLE,
|
|
||||||
p_total,
|
|
||||||
rcv_count,
|
|
||||||
displacements,
|
|
||||||
MPI_DOUBLE,
|
|
||||||
0,
|
|
||||||
MPI_COMM_WORLD);
|
|
||||||
|
|
||||||
for (int process = 0; process < solver->size; process++) {
|
|
||||||
|
|
||||||
int coords[2] = { 0 };
|
|
||||||
|
|
||||||
MPI_Cart_coords(solver->cart_comm, process, 2, coords);
|
|
||||||
|
|
||||||
int i_max_local = distribute_dim(coords[0], solver->dims[0], solver->imax);
|
|
||||||
int j_max_local = distribute_dim(coords[1], solver->dims[1], solver->jmax);
|
|
||||||
int i_start_local = get_dim_start(coords[0], solver->dims[0], solver->imax);
|
|
||||||
int j_start_local = get_dim_start(coords[1], solver->dims[1], solver->jmax);
|
|
||||||
|
|
||||||
for (int i = 0; i < i_max_local; i++)
|
|
||||||
for (int j = 0; j < j_max_local; j++)
|
|
||||||
p_total_ordered[(i + i_start_local) * (solver->jmax) +
|
|
||||||
(j + j_start_local)] =
|
|
||||||
p_total[displacements[process] + i * (j_max_local) + j];
|
|
||||||
}
|
|
||||||
|
|
||||||
write_matrix_to_file(filename, p_total_ordered, solver->imax, solver->jmax);
|
|
||||||
|
|
||||||
} else {
|
|
||||||
|
|
||||||
send_buffer = allocate(64, local_size * sizeof(double));
|
|
||||||
for (int i = 0; i < solver->limax; i++)
|
|
||||||
for (int j = 0; j < solver->ljmax; j++)
|
|
||||||
send_buffer[i * (solver->ljmax) + j] =
|
|
||||||
solver->p[(i + 1) * (solver->ljmax + 2) + (j + 1)];
|
|
||||||
|
|
||||||
MPI_Gather(&local_size, 1, MPI_INT, rcv_count, 1, MPI_INT, 0, MPI_COMM_WORLD);
|
|
||||||
|
|
||||||
MPI_Gatherv(send_buffer,
|
|
||||||
local_size,
|
|
||||||
MPI_DOUBLE,
|
|
||||||
p_total,
|
|
||||||
rcv_count,
|
|
||||||
displacements,
|
|
||||||
MPI_DOUBLE,
|
|
||||||
0,
|
|
||||||
MPI_COMM_WORLD);
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,44 +0,0 @@
|
|||||||
/* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
|
||||||
* All rights reserved.
|
|
||||||
* Use of this source code is governed by a MIT-style
|
|
||||||
* license that can be found in the LICENSE file. */
|
|
||||||
#ifndef __SOLVER_H_
|
|
||||||
#define __SOLVER_H_
|
|
||||||
#include "parameter.h"
|
|
||||||
#include <mpi.h>
|
|
||||||
typedef enum {
|
|
||||||
NB = 0b0000,
|
|
||||||
LEFT = 0b0001,
|
|
||||||
RIGHT = 0b0010,
|
|
||||||
TOP = 0b0100,
|
|
||||||
BOTTOM = 0b1000,
|
|
||||||
LFTO = LEFT + TOP,
|
|
||||||
LFBO = LEFT + BOTTOM,
|
|
||||||
RITO = RIGHT + TOP,
|
|
||||||
ROBO = RIGHT + BOTTOM
|
|
||||||
} boundary_t;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
|
|
||||||
double dx, dy;
|
|
||||||
int imax, jmax;
|
|
||||||
int limax, ljmax;
|
|
||||||
int i_start, j_start;
|
|
||||||
double *p, *rhs;
|
|
||||||
double eps, omega, rho;
|
|
||||||
int itermax;
|
|
||||||
MPI_Comm cart_comm;
|
|
||||||
MPI_Datatype types[4];
|
|
||||||
MPI_Aint rcv_displacements[4];
|
|
||||||
MPI_Aint snd_displacements[4];
|
|
||||||
int coords[2];
|
|
||||||
int dims[2];
|
|
||||||
int rank, size;
|
|
||||||
boundary_t boundary;
|
|
||||||
|
|
||||||
} Solver;
|
|
||||||
|
|
||||||
extern void initSolver(Solver*, Parameter*, int problem);
|
|
||||||
extern void writeResult(Solver*, char*);
|
|
||||||
extern void solveRB(Solver*);
|
|
||||||
#endif
|
|
@ -1,22 +0,0 @@
|
|||||||
/* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
|
||||||
* All rights reserved.
|
|
||||||
* Use of this source code is governed by a MIT-style
|
|
||||||
* license that can be found in the LICENSE file. */
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <time.h>
|
|
||||||
|
|
||||||
double getTimeStamp()
|
|
||||||
{
|
|
||||||
struct timespec ts;
|
|
||||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
|
||||||
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
|
|
||||||
}
|
|
||||||
|
|
||||||
double getTimeResolution()
|
|
||||||
{
|
|
||||||
struct timespec ts;
|
|
||||||
clock_getres(CLOCK_MONOTONIC, &ts);
|
|
||||||
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
|
|
||||||
}
|
|
||||||
|
|
||||||
double getTimeStamp_() { return getTimeStamp(); }
|
|
@ -1,11 +0,0 @@
|
|||||||
/* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
|
||||||
* All rights reserved.
|
|
||||||
* Use of this source code is governed by a MIT-style
|
|
||||||
* license that can be found in the LICENSE file. */
|
|
||||||
#ifndef __TIMING_H_
|
|
||||||
#define __TIMING_H_
|
|
||||||
|
|
||||||
extern double getTimeStamp();
|
|
||||||
extern double getTimeResolution();
|
|
||||||
|
|
||||||
#endif // __TIMING_H_
|
|
@ -1,20 +0,0 @@
|
|||||||
/* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
|
||||||
* All rights reserved.
|
|
||||||
* Use of this source code is governed by a MIT-style
|
|
||||||
* license that can be found in the LICENSE file. */
|
|
||||||
#ifndef __UTIL_H_
|
|
||||||
#define __UTIL_H_
|
|
||||||
#define HLINE \
|
|
||||||
"----------------------------------------------------------------------------\n"
|
|
||||||
|
|
||||||
#ifndef MIN
|
|
||||||
#define MIN(x, y) ((x) < (y) ? (x) : (y))
|
|
||||||
#endif
|
|
||||||
#ifndef MAX
|
|
||||||
#define MAX(x, y) ((x) > (y) ? (x) : (y))
|
|
||||||
#endif
|
|
||||||
#ifndef ABS
|
|
||||||
#define ABS(a) ((a) >= 0 ? (a) : -(a))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // __UTIL_H_
|
|
@ -1,7 +0,0 @@
|
|||||||
set terminal png size 1024,768 enhanced font ,12
|
|
||||||
set output 'p.png'
|
|
||||||
set datafile separator whitespace
|
|
||||||
|
|
||||||
set grid
|
|
||||||
set hidden3d
|
|
||||||
splot 'p.dat' matrix using 1:2:3 with lines
|
|
@ -35,7 +35,7 @@ int main(int argc, char** argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
initSolver(&solver, ¶ms, 2);
|
initSolver(&solver, ¶ms, 2);
|
||||||
solve(&solver);
|
solveRB(&solver);
|
||||||
getResult(&solver);
|
getResult(&solver);
|
||||||
|
|
||||||
MPI_Finalize();
|
MPI_Finalize();
|
||||||
|
@ -310,12 +310,17 @@ int solveRB(Solver *solver) {
|
|||||||
}
|
}
|
||||||
jsw = 3 - jsw;
|
jsw = 3 - jsw;
|
||||||
}
|
}
|
||||||
|
if(solver->rank==0)
|
||||||
for (int i = 1; i < imax + 1; i++) {
|
for(int i = 1 ; i < imax+1 ;i++)P(i,0)=P(i,1);
|
||||||
P(i, 0) = P(i, 1);
|
|
||||||
P(i, jmaxLocal + 1) = P(i, jmaxLocal);
|
if(solver->rank== solver->size-1)
|
||||||
}
|
for(int i = 1; i < imax +1; i++)P(i, jmaxLocal + 1) = P(i, jmaxLocal);
|
||||||
|
|
||||||
|
// for (int i = 1; i < imax + 1; i++) {
|
||||||
|
// P(i, 0) = P(i, 1);
|
||||||
|
// P(i, jmaxLocal + 1) = P(i, jmaxLocal);
|
||||||
|
// }
|
||||||
|
|
||||||
for (int j = 1; j < jmaxLocal + 1; j++) {
|
for (int j = 1; j < jmaxLocal + 1; j++) {
|
||||||
P(0, j) = P(1, j);
|
P(0, j) = P(1, j);
|
||||||
P(imax + 1, j) = P(imax, j);
|
P(imax + 1, j) = P(imax, j);
|
||||||
|
@ -25,5 +25,5 @@ extern void debug(Solver*);
|
|||||||
extern void initSolver(Solver*, Parameter*, int problem);
|
extern void initSolver(Solver*, Parameter*, int problem);
|
||||||
extern void getResult(Solver*);
|
extern void getResult(Solver*);
|
||||||
extern void writeResult(Solver*, double*, char*);
|
extern void writeResult(Solver*, double*, char*);
|
||||||
extern int solve(Solver*);
|
extern int solveRB(Solver*);
|
||||||
#endif
|
#endif
|
||||||
|
@ -6,10 +6,10 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
#include "likwid-marker.h"
|
#include "likwid-marker.h"
|
||||||
#include "omp.h"
|
|
||||||
#include "parameter.h"
|
#include "parameter.h"
|
||||||
#include "solver.h"
|
#include "solver.h"
|
||||||
#include "timing.h"
|
#include "timing.h"
|
||||||
|
#include "omp.h"
|
||||||
|
|
||||||
#define LIKWID_PROFILE(tag, call) \
|
#define LIKWID_PROFILE(tag, call) \
|
||||||
startTime = getTimeStamp(); \
|
startTime = getTimeStamp(); \
|
||||||
@ -18,17 +18,21 @@
|
|||||||
LIKWID_MARKER_STOP(#tag); \
|
LIKWID_MARKER_STOP(#tag); \
|
||||||
endTime = getTimeStamp();
|
endTime = getTimeStamp();
|
||||||
|
|
||||||
|
enum VARIANT { SOR = 1, RB, RBA };
|
||||||
|
|
||||||
int main(int argc, char** argv)
|
int main(int argc, char** argv)
|
||||||
{
|
{
|
||||||
int volatile dummy = 0;
|
int volatile dummy = 0;
|
||||||
|
int variant = RB;
|
||||||
double startTime, endTime;
|
double startTime, endTime;
|
||||||
Parameter params;
|
Parameter params;
|
||||||
Solver solver;
|
Solver solver;
|
||||||
initParameter(¶ms);
|
initParameter(¶ms);
|
||||||
#pragma omp parallel
|
LIKWID_MARKER_INIT;
|
||||||
|
#pragma omp parallel
|
||||||
{
|
{
|
||||||
if (dummy == 1 || omp_get_thread_num() == 0)
|
if(dummy==1 || omp_get_thread_num()==0)
|
||||||
printf("OMP_THREADS_DETECTED: %d\n", omp_get_num_threads());
|
printf("OMP_THREADS_DETECTED: %d\n",omp_get_num_threads());
|
||||||
}
|
}
|
||||||
if (argc < 2) {
|
if (argc < 2) {
|
||||||
printf("Usage: %s <configFile>\n", argv[0]);
|
printf("Usage: %s <configFile>\n", argv[0]);
|
||||||
@ -36,13 +40,37 @@ int main(int argc, char** argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
readParameter(¶ms, argv[1]);
|
readParameter(¶ms, argv[1]);
|
||||||
|
// printParameter(¶ms);
|
||||||
|
if (argc == 3) {
|
||||||
|
variant = atoi(argv[2]);
|
||||||
|
}
|
||||||
|
if (argc == 4) {
|
||||||
|
sscanf("%lf", argv[3], ¶ms.omg);
|
||||||
|
}
|
||||||
|
|
||||||
initSolver(&solver, ¶ms, 2);
|
initSolver(&solver, ¶ms, 2);
|
||||||
startTime = getTimeStamp();
|
writeResult(&solver, "p-0.dat");
|
||||||
solveRB(&solver);
|
|
||||||
endTime = getTimeStamp();
|
|
||||||
printf(" %.2fs\n", endTime - startTime);
|
|
||||||
writeResult(&solver, "p.dat");
|
|
||||||
|
|
||||||
|
switch (variant) {
|
||||||
|
case SOR:
|
||||||
|
printf("Plain SOR\n");
|
||||||
|
fflush(stdout);
|
||||||
|
LIKWID_PROFILE("SOR", solve);
|
||||||
|
break;
|
||||||
|
case RB:
|
||||||
|
printf("Red-black SOR\n");
|
||||||
|
fflush(stdout);
|
||||||
|
LIKWID_PROFILE("RB", solveRB);
|
||||||
|
break;
|
||||||
|
case RBA:
|
||||||
|
printf("Red-black SOR with acceleration\n");
|
||||||
|
fflush(stdout);
|
||||||
|
LIKWID_PROFILE("RBA", solveRBA);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
printf(" %.2fs\n", endTime - startTime);
|
||||||
|
writeResult(&solver, "p-final.dat");
|
||||||
|
|
||||||
|
LIKWID_MARKER_CLOSE;
|
||||||
return EXIT_SUCCESS;
|
return EXIT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -9,42 +9,11 @@
|
|||||||
#include "allocate.h"
|
#include "allocate.h"
|
||||||
#include "parameter.h"
|
#include "parameter.h"
|
||||||
#include "solver.h"
|
#include "solver.h"
|
||||||
#include <omp.h>
|
|
||||||
#include <stddef.h>
|
|
||||||
|
|
||||||
#define PI 3.14159265358979323846
|
#define PI 3.14159265358979323846
|
||||||
#define P(i, j) p[(i) * (jmax + 2) + (j)]
|
#define P(i, j) p[(j) * (imax + 2) + (i)]
|
||||||
#define RHS(i, j) rhs[(i) * (jmax + 2) + (j)]
|
#define RHS(i, j) rhs[(j) * (imax + 2) + (i)]
|
||||||
|
|
||||||
void omp_create_dim(int num_threads, int* dim)
|
|
||||||
{
|
|
||||||
int center_int = (int)sqrt(num_threads);
|
|
||||||
dim[0] = num_threads;
|
|
||||||
dim[1] = 1;
|
|
||||||
for (int num = center_int; num != 1; num--)
|
|
||||||
if (num_threads % num == 0) {
|
|
||||||
dim[0] = num;
|
|
||||||
dim[1] = num_threads / num;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int distribute_dim(int dim_rank, int dim_comm_size, int dim_size)
|
|
||||||
{
|
|
||||||
int base_size = dim_size / dim_comm_size;
|
|
||||||
return dim_rank < (dim_size % dim_comm_size) ? base_size + 1 : base_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
int get_dim_start(int dim_rank, int dim_comm_size, int dim_size)
|
|
||||||
{
|
|
||||||
int dim_start = 0;
|
|
||||||
for (int i = 0; i < dim_rank; i++)
|
|
||||||
dim_start += distribute_dim(i, dim_comm_size, dim_size);
|
|
||||||
return dim_start;
|
|
||||||
}
|
|
||||||
|
|
||||||
int get_x_choord(const int proc_num, const int const* dims) { return proc_num / dims[1]; }
|
|
||||||
int get_y_choord(const int proc_num, const int const* dims) { return proc_num % dims[1]; }
|
|
||||||
void initSolver(Solver* solver, Parameter* params, int problem)
|
void initSolver(Solver* solver, Parameter* params, int problem)
|
||||||
{
|
{
|
||||||
solver->imax = params->imax;
|
solver->imax = params->imax;
|
||||||
@ -62,149 +31,224 @@ void initSolver(Solver* solver, Parameter* params, int problem)
|
|||||||
solver->p = allocate(64, bytesize);
|
solver->p = allocate(64, bytesize);
|
||||||
solver->rhs = allocate(64, bytesize);
|
solver->rhs = allocate(64, bytesize);
|
||||||
|
|
||||||
double dx = solver->dx;
|
double dx = solver->dx;
|
||||||
double dy = solver->dy;
|
double dy = solver->dy;
|
||||||
double* p = solver->p;
|
double* p = solver->p;
|
||||||
double* rhs = solver->rhs;
|
double* rhs = solver->rhs;
|
||||||
int dim[2] = { 0 };
|
#pragma omp parallel for collapse(2)
|
||||||
int num_threads = 1;
|
for (int j = 0; j < jmax + 2; j++) {
|
||||||
#pragma omp parallel
|
for (int i = 0; i < imax + 2; i++) {
|
||||||
{
|
P(i, j) = sin(2.0 * PI * i * dx * 2.0) + sin(2.0 * PI * j * dy * 2.0);
|
||||||
#pragma omp critical
|
}
|
||||||
num_threads = omp_get_num_threads();
|
|
||||||
}
|
}
|
||||||
omp_create_dim(num_threads, dim);
|
|
||||||
printf("%d: { %d, %d}\n", num_threads, dim[0], dim[1]);
|
|
||||||
#pragma omp parallel
|
|
||||||
{
|
|
||||||
int jsw, isw;
|
|
||||||
double local_res = 0.0;
|
|
||||||
int li_start = get_dim_start(get_x_choord(omp_get_thread_num(), dim),
|
|
||||||
dim[0],
|
|
||||||
solver->imax);
|
|
||||||
int lj_start = get_dim_start(get_y_choord(omp_get_thread_num(), dim),
|
|
||||||
dim[1],
|
|
||||||
solver->jmax);
|
|
||||||
int limax = li_start + distribute_dim(get_x_choord(omp_get_thread_num(), dim),
|
|
||||||
dim[0],
|
|
||||||
solver->imax);
|
|
||||||
int ljmax = lj_start + distribute_dim(get_y_choord(omp_get_thread_num(), dim),
|
|
||||||
dim[1],
|
|
||||||
solver->jmax);
|
|
||||||
|
|
||||||
for (int i = li_start; i < limax + 2; i++) {
|
if (problem == 2) {
|
||||||
for (int j = lj_start; j < ljmax + 2; j++) {
|
#pragma omp parallel for collapse(2)
|
||||||
P(i, j) = sin(2.0 * PI * i * dx * 2.0) + sin(2.0 * PI * j * dy * 2.0);
|
for (int j = 0; j < jmax + 2; j++) {
|
||||||
|
for (int i = 0; i < imax + 2; i++) {
|
||||||
|
RHS(i, j) = sin(2.0 * PI * i * dx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
if (problem == 2) {
|
#pragma omp parallel for collapse(2)
|
||||||
for (int i = li_start; i < limax + 2; i++) {
|
for (int j = 0; j < jmax + 2; j++) {
|
||||||
for (int j = lj_start; j < ljmax + 2; j++) {
|
for (int i = 0; i < imax + 2; i++) {
|
||||||
RHS(i, j) = sin(2.0 * PI * i * dx);
|
RHS(i, j) = 0.0;
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (int i = li_start; i < limax + 2; i++) {
|
|
||||||
for (int j = lj_start; j < ljmax + 2; j++) {
|
|
||||||
RHS(i, j) = 0.0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void solveRB(Solver* solver)
|
void solve(Solver* solver)
|
||||||
{
|
{
|
||||||
|
int imax = solver->imax;
|
||||||
|
int jmax = solver->jmax;
|
||||||
|
double eps = solver->eps;
|
||||||
|
int itermax = solver->itermax;
|
||||||
|
double dx2 = solver->dx * solver->dx;
|
||||||
|
double dy2 = solver->dy * solver->dy;
|
||||||
|
double idx2 = 1.0 / dx2;
|
||||||
|
double idy2 = 1.0 / dy2;
|
||||||
|
double factor = solver->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
|
||||||
|
double* p = solver->p;
|
||||||
|
double* rhs = solver->rhs;
|
||||||
|
double epssq = eps * eps;
|
||||||
|
int it = 0;
|
||||||
|
double res = 1.0;
|
||||||
|
char filename[20];
|
||||||
|
|
||||||
const int imax = solver->imax;
|
while ((res >= epssq) && (it < itermax)) {
|
||||||
const int jmax = solver->jmax;
|
res = 0.0;
|
||||||
const int itermax = solver->itermax;
|
|
||||||
const double epssq = solver->eps * solver->eps;
|
|
||||||
|
|
||||||
const double dx2 = solver->dx * solver->dx;
|
for (int j = 1; j < jmax + 1; j++) {
|
||||||
const double dy2 = solver->dy * solver->dy;
|
for (int i = 1; i < imax + 1; i++) {
|
||||||
const double idx2 = 1.0 / dx2;
|
|
||||||
const double idy2 = 1.0 / dy2;
|
|
||||||
const double factor = solver->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
|
|
||||||
|
|
||||||
double* __restrict p = solver->p;
|
double r = RHS(i, j) -
|
||||||
double* __restrict rhs = solver->rhs;
|
((P(i - 1, j) - 2.0 * P(i, j) + P(i + 1, j)) * idx2 +
|
||||||
|
(P(i, j - 1) - 2.0 * P(i, j) + P(i, j + 1)) * idy2);
|
||||||
|
|
||||||
int dim[2] = { 0 };
|
P(i, j) -= (factor * r);
|
||||||
#pragma omp parallel
|
res += (r * r);
|
||||||
#pragma omp single
|
|
||||||
{
|
|
||||||
omp_create_dim(omp_get_num_threads(), dim);
|
|
||||||
}
|
|
||||||
|
|
||||||
double res = 0.0;
|
|
||||||
|
|
||||||
for (int it = 0; it < itermax; ++it) {
|
|
||||||
|
|
||||||
res = 0.0;
|
|
||||||
|
|
||||||
#pragma omp parallel reduction(+ : res)
|
|
||||||
{
|
|
||||||
const int tid = omp_get_thread_num();
|
|
||||||
|
|
||||||
const int li_start = get_dim_start(get_x_choord(tid, dim), dim[0], imax);
|
|
||||||
const int lj_start = get_dim_start(get_y_choord(tid, dim), dim[1], jmax);
|
|
||||||
|
|
||||||
const int limax = li_start +
|
|
||||||
distribute_dim(get_x_choord(tid, dim), dim[0], imax);
|
|
||||||
const int ljmax = lj_start +
|
|
||||||
distribute_dim(get_y_choord(tid, dim), dim[1], jmax);
|
|
||||||
|
|
||||||
|
|
||||||
int jsw = ((li_start) % 2 == 0) == ((lj_start) % 2 == 0) ? 1 : 2;
|
|
||||||
|
|
||||||
for (int pass = 0; pass < 2; ++pass) {
|
|
||||||
int isw = jsw;
|
|
||||||
for (int i = li_start + 1; i < limax + 1; ++i) {
|
|
||||||
for (int j = lj_start + isw; j < ljmax + 1; j += 2) {
|
|
||||||
|
|
||||||
double r = RHS(i, j) -
|
|
||||||
((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
|
|
||||||
(P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) *
|
|
||||||
idy2);
|
|
||||||
|
|
||||||
P(i, j) -= factor * r;
|
|
||||||
res += r * r; /* reduction variable */
|
|
||||||
}
|
|
||||||
isw = 3 - isw;
|
|
||||||
}
|
|
||||||
#pragma omp barrier
|
|
||||||
jsw = 3 - jsw;
|
|
||||||
}
|
}
|
||||||
if (lj_start == 0)
|
}
|
||||||
for (int i = li_start + 1; i < limax + 1; i++)
|
|
||||||
P(i, 0) = P(i, 1);
|
|
||||||
if (ljmax == jmax)
|
|
||||||
for (int i = li_start + 1; i < limax + 1; i++)
|
|
||||||
P(i, ljmax + 1) = P(i, ljmax);
|
|
||||||
if (li_start == 0)
|
|
||||||
for (int j = lj_start + 1; j < ljmax + 1; j++)
|
|
||||||
P(0, j) = P(1, j);
|
|
||||||
if (limax == imax)
|
|
||||||
for (int j = lj_start + 1; j < ljmax + 1; j++)
|
|
||||||
P(limax + 1, j) = P(limax, j);
|
|
||||||
|
|
||||||
}
|
for (int i = 1; i < imax + 1; i++) {
|
||||||
|
P(i, 0) = P(i, 1);
|
||||||
|
P(i, jmax + 1) = P(i, jmax);
|
||||||
|
}
|
||||||
|
|
||||||
res /= (double)(imax * jmax);
|
for (int j = 1; j < jmax + 1; j++) {
|
||||||
|
P(0, j) = P(1, j);
|
||||||
|
P(imax + 1, j) = P(imax, j);
|
||||||
|
}
|
||||||
|
|
||||||
|
res = res / (double)(imax * jmax);
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
printf("%d Residuum: %e\n", it, res);
|
printf("%d Residuum: %e\n", it, res);
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef ANIMATE
|
||||||
if (res < epssq) {
|
sprintf(filename, "p-%d.dat", it);
|
||||||
printf("Solver took %d iterations to reach %e\n", it + 1, sqrt(res));
|
writeResult(solver, filename);
|
||||||
return;
|
#endif
|
||||||
}
|
it++;
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("Solver reached itermax (%d) with residual %e\n", itermax, sqrt(res));
|
printf("%d, %f\n", it, solver->omega);
|
||||||
|
}
|
||||||
|
|
||||||
|
void solveRB(Solver* solver)
|
||||||
|
{
|
||||||
|
int imax = solver->imax;
|
||||||
|
int jmax = solver->jmax;
|
||||||
|
double eps = solver->eps;
|
||||||
|
int itermax = solver->itermax;
|
||||||
|
double dx2 = solver->dx * solver->dx;
|
||||||
|
double dy2 = solver->dy * solver->dy;
|
||||||
|
double idx2 = 1.0 / dx2;
|
||||||
|
double idy2 = 1.0 / dy2;
|
||||||
|
double factor = solver->omega * 0.5 * (dx2 * dy2) / (dx2 + dy2);
|
||||||
|
double* p = solver->p;
|
||||||
|
double* rhs = solver->rhs;
|
||||||
|
double epssq = eps * eps;
|
||||||
|
int it = 0;
|
||||||
|
double res = 1.0;
|
||||||
|
int pass, jsw, isw;
|
||||||
|
|
||||||
|
while ((res >= epssq) && (it < itermax)) {
|
||||||
|
res = 0.0;
|
||||||
|
jsw = 1;
|
||||||
|
|
||||||
|
for (pass = 0; pass < 2; pass++) {
|
||||||
|
isw = jsw;
|
||||||
|
#pragma omp parallel for firstprivate(isw)
|
||||||
|
for (int j = 1; j < jmax + 1; j++) {
|
||||||
|
for (int i = isw; i < imax + 1; i += 2) {
|
||||||
|
|
||||||
|
double r = RHS(i, j) -
|
||||||
|
((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
|
||||||
|
(P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
|
||||||
|
|
||||||
|
P(i, j) -= (factor * r);
|
||||||
|
res += (r * r);
|
||||||
|
}
|
||||||
|
isw = 3 - isw;
|
||||||
|
}
|
||||||
|
jsw = 3 - jsw;
|
||||||
|
}
|
||||||
|
#pragma omp parallel for
|
||||||
|
for (int i = 1; i < imax + 1; i++) {
|
||||||
|
P(i, 0) = P(i, 1);
|
||||||
|
P(i, jmax + 1) = P(i, jmax);
|
||||||
|
}
|
||||||
|
#pragma omp parallel for
|
||||||
|
for (int j = 1; j < jmax + 1; j++) {
|
||||||
|
P(0, j) = P(1, j);
|
||||||
|
P(imax + 1, j) = P(imax, j);
|
||||||
|
}
|
||||||
|
|
||||||
|
res = res / (double)(imax * jmax);
|
||||||
|
#ifdef DEBUG
|
||||||
|
printf("%d Residuum: %e\n", it, res);
|
||||||
|
#endif
|
||||||
|
// #ifdef ANIMATE
|
||||||
|
// sprintf(filename, "p-%d.dat", it);
|
||||||
|
// writeResult(solver, filename);
|
||||||
|
// #endif
|
||||||
|
// it++;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Solver took %d iterations to reach %f\n", it, sqrt(res));
|
||||||
|
printf("%d, %f\n", it, solver->omega);
|
||||||
|
}
|
||||||
|
|
||||||
|
void solveRBA(Solver* solver)
|
||||||
|
{
|
||||||
|
int imax = solver->imax;
|
||||||
|
int jmax = solver->jmax;
|
||||||
|
double eps = solver->eps;
|
||||||
|
int itermax = solver->itermax;
|
||||||
|
double dx2 = solver->dx * solver->dx;
|
||||||
|
double dy2 = solver->dy * solver->dy;
|
||||||
|
double idx2 = 1.0 / dx2;
|
||||||
|
double idy2 = 1.0 / dy2;
|
||||||
|
double factor = 0.5 * (dx2 * dy2) / (dx2 + dy2);
|
||||||
|
double rho = solver->rho;
|
||||||
|
double* p = solver->p;
|
||||||
|
double* rhs = solver->rhs;
|
||||||
|
double epssq = eps * eps;
|
||||||
|
int it = 0;
|
||||||
|
double res = 1.0;
|
||||||
|
int pass, jsw, isw;
|
||||||
|
double omega = 1.0;
|
||||||
|
|
||||||
|
while ((res >= epssq) && (it < itermax)) {
|
||||||
|
res = 0.0;
|
||||||
|
jsw = 1;
|
||||||
|
|
||||||
|
for (pass = 0; pass < 2; pass++) {
|
||||||
|
isw = jsw;
|
||||||
|
|
||||||
|
for (int j = 1; j < jmax + 1; j++) {
|
||||||
|
for (int i = isw; i < imax + 1; i += 2) {
|
||||||
|
|
||||||
|
double r = RHS(i, j) -
|
||||||
|
((P(i + 1, j) - 2.0 * P(i, j) + P(i - 1, j)) * idx2 +
|
||||||
|
(P(i, j + 1) - 2.0 * P(i, j) + P(i, j - 1)) * idy2);
|
||||||
|
|
||||||
|
P(i, j) -= (omega * factor * r);
|
||||||
|
res += (r * r);
|
||||||
|
}
|
||||||
|
isw = 3 - isw;
|
||||||
|
}
|
||||||
|
jsw = 3 - jsw;
|
||||||
|
omega = (it == 0 && pass == 0 ? 1.0 / (1.0 - 0.5 * rho * rho)
|
||||||
|
: 1.0 / (1.0 - 0.25 * rho * rho * omega));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 1; i < imax + 1; i++) {
|
||||||
|
P(i, 0) = P(i, 1);
|
||||||
|
P(i, jmax + 1) = P(i, jmax);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int j = 1; j < jmax + 1; j++) {
|
||||||
|
P(0, j) = P(1, j);
|
||||||
|
P(imax + 1, j) = P(imax, j);
|
||||||
|
}
|
||||||
|
|
||||||
|
res = res / (double)(imax * jmax);
|
||||||
|
#ifdef DEBUG
|
||||||
|
printf("%d Residuum: %e Omega: %e\n", it, res, omega);
|
||||||
|
#endif
|
||||||
|
#ifdef ANIMATE
|
||||||
|
sprintf(filename, "p-%d.dat", it);
|
||||||
|
writeResult(solver, filename);
|
||||||
|
#endif
|
||||||
|
it++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// printf("Final omega: %f\n", omega);
|
||||||
|
// printf("Solver took %d iterations to reach %f\n", it, sqrt(res));
|
||||||
|
printf("%d, %f\n", it, omega);
|
||||||
}
|
}
|
||||||
|
|
||||||
void writeResult(Solver* solver, char* filename)
|
void writeResult(Solver* solver, char* filename)
|
||||||
@ -221,8 +265,8 @@ void writeResult(Solver* solver, char* filename)
|
|||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 1; i < imax + 1; i++) {
|
for (int j = 0; j < jmax + 2; j++) {
|
||||||
for (int j = 1; j < jmax + 1; j++) {
|
for (int i = 0; i < imax + 2; i++) {
|
||||||
fprintf(fp, "%f ", P(i, j));
|
fprintf(fp, "%f ", P(i, j));
|
||||||
}
|
}
|
||||||
fprintf(fp, "\n");
|
fprintf(fp, "\n");
|
||||||
|
Loading…
x
Reference in New Issue
Block a user