diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..5370032 --- /dev/null +++ b/Makefile @@ -0,0 +1,57 @@ +TAG = CLANG + +#CONFIGURE BUILD SYSTEM +TARGET = MDBench-$(TAG) +BUILD_DIR = ./$(TAG) +SRC_DIR = ./src +MAKE_DIR = ./ +Q ?= @ + +#DO NOT EDIT BELOW +include $(MAKE_DIR)/include_$(TAG).mk +INCLUDES += -I./src/includes + +VPATH = $(SRC_DIR) +ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c)) +OBJ = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c)) +CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(INCLUDES) + + +${TARGET}: $(BUILD_DIR) $(OBJ) + @echo "===> LINKING $(TARGET)" + $(Q)${LINKER} ${LFLAGS} -o $(TARGET) $(OBJ) $(LIBS) + +asm: $(BUILD_DIR) $(ASM) + +$(BUILD_DIR)/%.o: %.c + @echo "===> COMPILE $@" + $(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ + $(Q)$(CC) $(CPPFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d + +$(BUILD_DIR)/%.s: %.c + @echo "===> GENERATE ASM $@" + $(Q)$(CC) -S $(ASFLAGS) $(CPPFLAGS) $(CFLAGS) $< -o $@ + +tags: + @echo "===> GENERATE TAGS" + $(Q)ctags -R + + +$(BUILD_DIR): + @mkdir $(BUILD_DIR) + +ifeq ($(findstring $(MAKECMDGOALS),clean),) +-include $(OBJ:.o=.d) +endif + +.PHONY: clean distclean + +clean: + @echo "===> CLEAN" + @rm -rf $(BUILD_DIR) + @rm -f tags + +distclean: clean + @echo "===> DIST CLEAN" + @rm -f $(TARGET) + @rm -f tags diff --git a/include_CLANG.mk b/include_CLANG.mk new file mode 100644 index 0000000..c5faaac --- /dev/null +++ b/include_CLANG.mk @@ -0,0 +1,17 @@ +CC = cc +CXX = cc +LINKER = $(CC) + +ANSI_CFLAGS = -ansi +ANSI_CFLAGS += -std=c99 +ANSI_CFLAGS += -pedantic +ANSI_CFLAGS += -Wextra + +CFLAGS = -O2 -g #$(ANSI_CFLAGS) +ASFLAGS = -masm=intel +CXXFLAGS = $(CFLAGS) +FCFLAGS = +LFLAGS = +DEFINES = -D_GNU_SOURCE +INCLUDES = +LIBS = diff --git a/include_GCC.mk b/include_GCC.mk new file mode 100644 index 0000000..83b5c5b --- /dev/null +++ b/include_GCC.mk @@ -0,0 +1,21 @@ +CC = gcc +CXX = g++ +FC = gfortran +LINKER = $(CXX) + +ANSI_CFLAGS = -ansi +ANSI_CFLAGS += -std=c99 +ANSI_CFLAGS += -pedantic +ANSI_CFLAGS += -Wextra + +# CFLAGS = -O0 -g -std=c99 -fargument-noalias +CFLAGS = -O3 -march=znver1 -ffast-math -funroll-loops -fopenmp +CXXFLAGS = $(CFLAGS) +ASFLAGS = -masm=intel +FCFLAGS = +LFLAGS = +DEFINES = -D_GNU_SOURCE +INCLUDES = +LIBS = + + diff --git a/include_ICC.mk b/include_ICC.mk new file mode 100644 index 0000000..13bc255 --- /dev/null +++ b/include_ICC.mk @@ -0,0 +1,20 @@ +CC = icc +CXX = mpiicpc +FC = ifort +LINKER = $(CXX) + +PROFILE = #-g -pg +# OPTS = -O3 -xCORE-AVX512 -qopt-zmm-usage=high $(PROFILE) +# OPTS = -O3 -xCORE-AVX2 $(PROFILE) +# OPTS = -O3 -xAVX $(PROFILE) +# OPTS = -O3 -xSSE4.2 $(PROFILE) +OPTS = -O3 -no-vec $(PROFILE) +# OPTS = -O3 -xHost $(PROFILE) +CFLAGS = $(PROFILE) -restrict $(OPTS) +CXXFLAGS = $(CFLAGS) +ASFLAGS = -masm=intel +FCFLAGS = +LFLAGS = $(PROFILE) $(OPTS) +DEFINES = -D_GNU_SOURCE -DNOCHUNK -DUSE_SIMD# -DLIKWID_PERFMON -DPRECISION=1 +INCLUDES = $(MPIINC) $(LIKWID_INC) +LIBS = $(LIKWID_LIB) -llikwid diff --git a/src/allocate.c b/src/allocate.c new file mode 100644 index 0000000..2ad9236 --- /dev/null +++ b/src/allocate.c @@ -0,0 +1,58 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2019 RRZE, University Erlangen-Nuremberg + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * ======================================================================================= + */ + +#include +#include +#include + +void* allocate (int alignment, size_t bytesize) +{ + int errorCode; + void* ptr; + + errorCode = posix_memalign(&ptr, alignment, bytesize); + + if (errorCode) { + if (errorCode == EINVAL) { + fprintf(stderr, + "Error: Alignment parameter is not a power of two\n"); + exit(EXIT_FAILURE); + } + if (errorCode == ENOMEM) { + fprintf(stderr, + "Error: Insufficient memory to fulfill the request\n"); + exit(EXIT_FAILURE); + } + } + + if (ptr == NULL) { + fprintf(stderr, "Error: posix_memalign failed!\n"); + exit(EXIT_FAILURE); + } + + return ptr; +} diff --git a/src/includes/allocate.h b/src/includes/allocate.h new file mode 100644 index 0000000..1e83fe0 --- /dev/null +++ b/src/includes/allocate.h @@ -0,0 +1,33 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2019 RRZE, University Erlangen-Nuremberg + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * ======================================================================================= + */ + +#ifndef __ALLOCATE_H_ +#define __ALLOCATE_H_ + +extern void* allocate (int alignment, size_t bytesize); + +#endif diff --git a/src/includes/timing.h b/src/includes/timing.h new file mode 100644 index 0000000..b7260cb --- /dev/null +++ b/src/includes/timing.h @@ -0,0 +1,35 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2019 RRZE, University Erlangen-Nuremberg + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * ======================================================================================= + */ + +#ifndef __TIMING_H_ +#define __TIMING_H_ + +extern double getTimeStamp(); +extern double getTimeResolution(); +extern double getTimeStamp_(); + +#endif diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..1ecee96 --- /dev/null +++ b/src/main.c @@ -0,0 +1,999 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2019 RRZE, University Erlangen-Nuremberg + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * ======================================================================================= + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define HLINE "----------------------------------------------------------------------------\n" + +#define FACTOR 0.999 +#define SMALL 1.0e-6 +#define DELTA 20000 + +#ifndef MIN +#define MIN(x,y) ((x)<(y)?(x):(y)) +#endif +#ifndef MAX +#define MAX(x,y) ((x)>(y)?(x):(y)) +#endif +#ifndef ABS +#define ABS(a) ((a) >= 0 ? (a) : -(a)) +#endif + +static int Natoms, Nlocal, Nghost, Nmax; +static double xprd, yprd, zprd; +static double xlo, xhi; +static double ylo, yhi; +static double zlo, zhi; +static double *x, *y, *z; +static double *vx, *vy, *vz; +static double *fx, *fy, *fz; +static double eng_vdwl; +static double virial; + +typedef enum { + LEFT = 0, + RIGHT, + FRONT, + BACK, + TOP, + BOTTOM, + TOPRIGHTBACK, + TOPLEFTBACK, + TOPRIGHTFRONT, + TOPLEFTFRONT, + BOTTOMRIGHTBACK, + BOTTOMLEFTBACK, + BOTTOMRIGHTFRONT, + BOTTOMLEFTFRONT, + DCOUNT +} direction; + +typedef struct { + int* numneigh; + int* neighbors; + int maxneighs; + int nbinx, nbiny, nbinz; + double cutneigh; // neighbor cutoff + double cutneighsq; // neighbor cutoff squared + int every; + int ncalls; + int max_totalneigh; + int *bincount; + int *bins; + int nmax; + int nstencil; // # of bins in stencil + int* stencil; // stencil list of bin offsets + int mbins; //total number of bins + int atoms_per_bin; // max atoms per bin + int mbinx, mbiny, mbinz; // n bins in x, y, z + int mbinxlo, mbinylo, mbinzlo; + double binsizex, binsizey, binsizez; + double bininvx, bininvy, bininvz; +} Neighbor; + +typedef struct { + double epsilon; + double sigma6; + double temp; + double rho; + double mass; + int ntimes; + int nstat; + double dt; + double dtforce; + double cutforce; + int nx, ny, nz; +} Parameter; + +typedef struct { + int *steparr; + double *tmparr; + double *engarr; + double *prsarr; + double mvv2e; + int dof_boltz; + double t_scale; + double p_scale; + double e_scale; + int mstat; +} Thermo; + +/* Park/Miller RNG w/out MASKING, so as to be like f90s version */ +#define IA 16807 +#define IM 2147483647 +#define AM (1.0/IM) +#define IQ 127773 +#define IR 2836 +#define MASK 123459876 + +double myrandom(int* idum) +{ + int k; + double ans; + + k = (*idum) / IQ; + *idum = IA * (*idum - k * IQ) - IR * k; + + if(*idum < 0) *idum += IM; + + ans = AM * (*idum); + return ans; +} + +// Neighbor list related subroutines + +// convert atom coordinates in bin index +int coord2bin(Neighbor* neighbor, double x, double y, double z) +{ + int ix, iy, iz; + double bininvx = neighbor->bininvx; + double bininvy = neighbor->bininvy; + double bininvz = neighbor->bininvz; + int mbinxlo = neighbor->mbinxlo; + int mbinylo = neighbor->mbinylo; + int mbinzlo = neighbor->mbinzlo; + + if(x >= xprd) { + ix = (int)((x - xprd) * bininvx) + neighbor->nbinx - mbinxlo; + } else if(x >= 0.0) { + ix = (int)(x * bininvx) - mbinxlo; + } else { + ix = (int)(x * bininvx) - mbinxlo - 1; + } + + if(y >= yprd) { + iy = (int)((y - yprd) * bininvy) + neighbor->nbiny - mbinylo; + } else if(y >= 0.0) { + iy = (int)(y * bininvy) - mbinylo; + } else { + iy = (int)(y * bininvy) - mbinylo - 1; + } + + if(z >= zprd) { + iz = (int)((z - zprd) * bininvz) + neighbor->nbinz - mbinzlo; + } else if(z >= 0.0) { + iz = (int)(z * bininvz) - mbinzlo; + } else { + iz = (int)(z * bininvz) - mbinzlo - 1; + } + + return (iz * neighbor->mbiny * neighbor->mbinx + iy * neighbor->mbinx + ix + 1); +} + + +// Sort atoms in bins. All bins can hold the same number of max atoms +// If one bin is exceeded reallocate and start over. +void binatoms(Neighbor *neighbor) +{ + int* bincount = neighbor->bincount; + int mbins = neighbor->mbins; + int nall = Nlocal + Nghost; + int resize = 1; + + while(resize > 0) { + resize = 0; + + for(int i = 0; i < mbins; i++) { + bincount[i] = 0; + } + + for(int i = 0; i < nall; i++) { + int ibin = coord2bin(neighbor, x[i], y[i], z[i]); + + if(bincount[ibin] < neighbor->atoms_per_bin) { + int ac = neighbor->bincount[ibin]++; + neighbor->bins[ibin * neighbor->atoms_per_bin + ac] = i; + } else { + resize = 1; + } + } + + if(resize) { + free(neighbor->bins); + neighbor->atoms_per_bin *= 2; + neighbor->bins = (int*) malloc(mbins * neighbor->atoms_per_bin * sizeof(int)); + } + } +} + +/* compute closest distance between central bin (0,0,0) and bin (i,j,k) */ +double bindist(Neighbor *neighbor, int i, int j, int k) +{ + double delx, dely, delz; + + if(i > 0) { + delx = (i - 1) * neighbor->binsizex; + } else if(i == 0) { + delx = 0.0; + } else { + delx = (i + 1) * neighbor->binsizex; + } + + if(j > 0) { + dely = (j - 1) * neighbor->binsizey; + } else if(j == 0) { + dely = 0.0; + } else { + dely = (j + 1) * neighbor->binsizey; + } + + if(k > 0) { + delz = (k - 1) * neighbor->binsizez; + } else if(k == 0) { + delz = 0.0; + } else { + delz = (k + 1) * neighbor->binsizez; + } + + return (delx * delx + dely * dely + delz * delz); +} + +void buildNeighborlist(Neighbor *neighbor) +{ + neighbor->ncalls++; + int nall = Nlocal + Nghost; + + /* extend atom arrays if necessary */ + if(nall > Nmax) { + Nmax = nall; + if(neighbor->numneigh) free(neighbor->numneigh); + if(neighbor->neighbors) free(neighbor->neighbors); + neighbor->numneigh = (int*) malloc(Nmax * sizeof(int)); + neighbor->neighbors = (int*) malloc(Nmax * neighbor->maxneighs * sizeof(int*)); + } + + /* bin local & ghost atoms */ + binatoms(neighbor); + int resize = 1; + + /* loop over each atom, storing neighbors */ + while(resize) { + int new_maxneighs = neighbor->maxneighs; + resize = 0; + + for(int i = 0; i < Nlocal; i++) { + int* neighptr = &neighbor->neighbors[i * neighbor->maxneighs]; + int n = 0; + double xtmp = x[i]; + double ytmp = y[i]; + double ztmp = z[i]; + int ibin = coord2bin(neighbor, xtmp, ytmp, ztmp); + + for(int k = 0; k < neighbor->nstencil; k++) { + int jbin = ibin + neighbor->stencil[k]; + int* loc_bin = &neighbor->bins[jbin * neighbor->atoms_per_bin]; + + for(int m = 0; m < neighbor->bincount[jbin]; m++) { + int j = loc_bin[m]; + double delx = xtmp - x[j]; + double dely = ytmp - y[j]; + double delz = ztmp - z[j]; + double rsq = delx * delx + dely * dely + delz * delz; + + if( rsq <= neighbor->cutneighsq ) { + neighptr[n++] = j; + } + } + } + + neighbor->numneigh[i] = n; + + if(n >= neighbor->maxneighs) { + resize = 1; + + if(n >= new_maxneighs) { + new_maxneighs = n; + } + } + } + + if(resize) { + neighbor->maxneighs = new_maxneighs * 1.2; + free(neighbor->neighbors); + neighbor->neighbors = (int*) malloc(Nmax* neighbor->maxneighs * sizeof(int)); + } + } +} + +void init(Neighbor *neighbor, Parameter *param) +{ + x = NULL; y = NULL; z = NULL; + vx = NULL; vy = NULL; vz = NULL; + fx = NULL; fy = NULL; fz = NULL; + + param->epsilon = 1.0; + param->sigma6 = 1.0; + param->rho = 0.8442; + param->ntimes = 200; + param->dt = 0.005; + param->nx = 32; + param->ny = 32; + param->nz = 32; + param->cutforce = 2.5; + param->temp = 1.44; + param->nstat = 100; + param->mass = 1.0; + param->dtforce = 0.5 * param->dt; + + double neighscale = 5.0 / 6.0; + neighbor->nbinx = neighscale * param->nx; + neighbor->nbiny = neighscale * param->ny; + neighbor->nbinz = neighscale * param->nz; + neighbor->every = 20; + neighbor->ncalls = 0; + neighbor->atoms_per_bin = 8; + neighbor->cutneigh = param->cutforce + 0.30; + neighbor->numneigh = NULL; + neighbor->neighbors = NULL; + neighbor->stencil = NULL; + neighbor->bins = NULL; + neighbor->bincount = NULL; +} + +void setup(Neighbor *neighbor, Parameter *param) +{ + double lattice = pow((4.0 / param->rho), (1.0 / 3.0)); + double coord; + int mbinxhi, mbinyhi, mbinzhi; + int nextx, nexty, nextz; + double cutneigh = neighbor->cutneigh; + + xprd = param->nx * lattice; + yprd = param->ny * lattice; + zprd = param->nz * lattice; + + xlo = 0.0; xhi = xprd; + ylo = 0.0; yhi = yprd; + zlo = 0.0; zhi = zprd; + + neighbor->cutneighsq = cutneigh * cutneigh; + neighbor->binsizex = xprd / neighbor->nbinx; + neighbor->binsizey = yprd / neighbor->nbiny; + neighbor->binsizez = zprd / neighbor->nbinz; + + neighbor->bininvx = 1.0 / neighbor->binsizex; + neighbor->bininvy = 1.0 / neighbor->binsizey; + neighbor->bininvz = 1.0 / neighbor->binsizez; + + // x coordinate + coord = xlo - cutneigh - SMALL * xprd; + neighbor->mbinxlo = (int) (coord * neighbor->bininvx); + + if (coord < 0.0) { + neighbor->mbinxlo = neighbor->mbinxlo - 1; + } + + coord = xhi + cutneigh + SMALL * xprd; + mbinxhi = (int) (coord * neighbor->bininvx); + + // y coordinate + coord = ylo - cutneigh - SMALL * yprd; + neighbor->mbinylo = (int) (coord * neighbor->bininvy); + + if (coord < 0.0) { + neighbor->mbinylo = neighbor->mbinylo - 1; + } + + coord = yhi + cutneigh + SMALL * yprd; + mbinyhi = (int) (coord * neighbor->bininvy); + + // z coordinate + coord = zlo - cutneigh - SMALL * zprd; + neighbor->mbinzlo = (int) (coord * neighbor->bininvz); + + if (coord < 0.0) { + neighbor->mbinzlo = neighbor->mbinzlo - 1; + } + + coord = zhi + cutneigh + SMALL * zprd; + mbinzhi = (int) (coord * neighbor->bininvz); + + + neighbor->mbinxlo = neighbor->mbinxlo - 1; + mbinxhi = mbinxhi + 1; + neighbor->mbinx = mbinxhi - neighbor->mbinxlo + 1; + + neighbor->mbinylo = neighbor->mbinylo - 1; + mbinyhi = mbinyhi + 1; + neighbor->mbiny = mbinyhi - neighbor->mbinylo + 1; + + neighbor->mbinzlo = neighbor->mbinzlo - 1; + mbinzhi = mbinzhi + 1; + neighbor->mbinz = mbinzhi - neighbor->mbinzlo + 1; + + nextx = (int) (cutneigh * neighbor->bininvx); + if(nextx * neighbor->binsizex < FACTOR * cutneigh) nextx++; + + nexty = (int) (cutneigh * neighbor->bininvy); + if(nexty * neighbor->binsizey < FACTOR * cutneigh) nexty++; + + nextz = (int) (cutneigh * neighbor->bininvz); + if(nextz * neighbor->binsizez < FACTOR * cutneigh) nextz++; + + if (neighbor->stencil) { + free(neighbor->stencil); + } + + neighbor->stencil = (int*) malloc( + (2 * nextz + 1) * (2 * nexty + 1) * (2 * nextx + 1) * sizeof(int)); + + neighbor->nstencil = 0; + int kstart = -nextz; + + for(int k = kstart; k <= nextz; k++) { + for(int j = -nexty; j <= nexty; j++) { + for(int i = -nextx; i <= nextx; i++) { + if(bindist(neighbor, i, j, k) < neighbor->cutneighsq) { + neighbor->stencil[neighbor->nstencil++] = + k * neighbor->mbiny * neighbor->mbinx + j * neighbor->mbinx + i; + } + } + } + } + +/* printf("STENCIL %d \n", neighbor->nstencil); */ +/* for (int i=0; instencil; i++) { */ +/* printf("%d ", neighbor->stencil[i]); */ +/* } */ +/* printf("\n"); */ + + neighbor->mbins = neighbor->mbinx * neighbor->mbiny * neighbor->mbinz; + + if (neighbor->bincount) { + free(neighbor->bincount); + } + neighbor->bincount = (int*) malloc(neighbor->mbins * sizeof(int)); + + if (neighbor->bins) { + free(neighbor->bins); + } + neighbor->bins = (int*) malloc(neighbor->mbins * neighbor->atoms_per_bin * sizeof(int)); +} + +double* myrealloc(double *ptr, int n, int nold) { + + double* newarray; + + newarray = (double*) malloc(n * sizeof(double)); + + if(nold) { + memcpy(newarray, ptr, nold * sizeof(double)); + } + + if(ptr) { + free(ptr); + } + + return newarray; +} + +void growarray() +{ + int nold = Nmax; + Nmax += DELTA; + + x = myrealloc(x, Nmax, nold); + y = myrealloc(y, Nmax, nold); + z = myrealloc(z, Nmax, nold); + vx = myrealloc(vx, Nmax, nold); + vy = myrealloc(vy, Nmax, nold); + vz = myrealloc(vz, Nmax, nold); + fx = myrealloc(fx, Nmax, nold); + fy = myrealloc(fy, Nmax, nold); + fz = myrealloc(fz, Nmax, nold); + + if(x == NULL || y == NULL || z == NULL || + vx == NULL || vy == NULL || vz == NULL || + fx == NULL || fy == NULL || fz == NULL ) { + printf("ERROR: No memory for atoms\n"); + } +} + +/* Enforce periodic boundary conditions + * Relocate atoms that have left domain for + * 6 planes (and 8 corners (diagonal)) + * Setup ghost atoms at boundaries*/ +void processBorders(double cutneigh) +{ + Nghost = 0; + int lastidx = Nlocal; + + for(int i = 0; i < Nlocal; i++) { + + /* Relocate atoms that have left the domain */ + if(x[i] < 0.0) { + x[i] += xprd; + } else if(x[i] >= xprd) { + x[i] -= xprd; + } + + if(y[i] < 0.0) { + y[i] += yprd; + } else if(y[i] >= yprd) { + y[i] -= yprd; + } + + if(z[i] < 0.0) { + z[i] += zprd; + } else if(z[i] >= zprd) { + z[i] -= zprd; + } + + if (lastidx+1 >= Nmax) { + growarray(); + } + + /* Setup ghost atoms */ + if (x[i] < cutneigh) { /* left plane */ + x[lastidx+1] = x[i] + xprd; + y[lastidx+1] = y[i]; + z[lastidx+1] = z[i]; + lastidx++; Nghost++; + + /* treat corners */ + if (y[i] < cutneigh && z[i] < cutneigh) { /* bottom left front corner */ + x[lastidx+1] = x[i] + xprd; + y[lastidx+1] = y[i] + yprd; + z[lastidx+1] = z[i] + zprd; + lastidx++; Nghost++; + } else if (y[i] >= (yprd-cutneigh) && z[i] < cutneigh) { /* bottom left back corner */ + x[lastidx+1] = x[i] + xprd; + y[lastidx+1] = y[i] - yprd; + z[lastidx+1] = z[i] + zprd; + lastidx++; Nghost++; + } else if (y[i] < cutneigh && z[i] >= (zprd-cutneigh)) { /* top left front corner */ + x[lastidx+1] = x[i] + xprd; + y[lastidx+1] = y[i] + yprd; + z[lastidx+1] = z[i] - zprd; + lastidx++; Nghost++; + } else if (y[i] >= (yprd-cutneigh) && z[i] >= (zprd-cutneigh)) { /* top left back corner */ + x[lastidx+1] = x[i] + xprd; + y[lastidx+1] = y[i] - yprd; + z[lastidx+1] = z[i] - zprd; + lastidx++; Nghost++; + } + } else if (x[i] >= (xprd-cutneigh)) { /* right plane */ + x[lastidx+1] = x[i] - xprd; + y[lastidx+1] = y[i]; + z[lastidx+1] = z[i]; + lastidx++; Nghost++; + + /* treat corners */ + if (y[i] < cutneigh && z[i] < cutneigh) { /* bottom right front corner */ + x[lastidx+1] = x[i] - xprd; + y[lastidx+1] = y[i] + yprd; + z[lastidx+1] = z[i] + zprd; + lastidx++; Nghost++; + } else if (y[i] >= (yprd-cutneigh) && z[i] < cutneigh) { /* bottom right back corner */ + x[lastidx+1] = x[i] - xprd; + y[lastidx+1] = y[i] - yprd; + z[lastidx+1] = z[i] + zprd; + lastidx++; Nghost++; + } else if (y[i] < cutneigh && z[i] >= (zprd-cutneigh)) { /* top right front corner */ + x[lastidx+1] = x[i] - xprd; + y[lastidx+1] = y[i] + yprd; + z[lastidx+1] = z[i] - zprd; + lastidx++; Nghost++; + } else if (y[i] >= (yprd-cutneigh) && z[i] >= (zprd-cutneigh)) { /* top right back corner */ + x[lastidx+1] = x[i] - xprd; + y[lastidx+1] = y[i] - yprd; + z[lastidx+1] = z[i] - zprd; + lastidx++; Nghost++; + } + } else if (y[i] < cutneigh) { /* front plane */ + x[lastidx+1] = x[i]; + y[lastidx+1] = y[i] + yprd; + z[lastidx+1] = z[i]; + lastidx++; Nghost++; + } else if (y[i] >= (yprd-cutneigh)) { /* back plane */ + x[lastidx+1] = x[i]; + y[lastidx+1] = y[i] - yprd; + z[lastidx+1] = z[i]; + lastidx++; Nghost++; + } else if (z[i] < cutneigh) { /* bottom plane */ + x[lastidx+1] = x[i]; + y[lastidx+1] = y[i]; + z[lastidx+1] = z[i] + zprd; + lastidx++; Nghost++; + } else if (z[i] >= (zprd-cutneigh)) { /* top plane */ + x[lastidx+1] = x[i]; + y[lastidx+1] = y[i]; + z[lastidx+1] = z[i] - zprd; + lastidx++; Nghost++; + } + } +} + +void addatom(double x_in, double y_in, double z_in, + double vx_in, double vy_in, double vz_in) +{ + if(Nlocal == Nmax) { + growarray(); + } + + x[Nlocal] = x_in; + y[Nlocal] = y_in; + z[Nlocal] = z_in; + vx[Nlocal] = vx_in; + vy[Nlocal] = vy_in; + vz[Nlocal] = vz_in; + + Nlocal++; +} + +/* place atoms in the same bin consecutive in memory */ +void sortAtoms(Neighbor *neighbor) +{ + binatoms(neighbor); + int* binpos = neighbor->bincount; + int* bins = neighbor->bins; + + int mbins = neighbor->mbins; + int atoms_per_bin = neighbor->atoms_per_bin; + + for(int i=1; i0?binpos[mybin-1]:0; + int count = binpos[mybin] - start; + + for(int k=0; knx * param->ny * param->nz; + Nlocal = 0; + + /* determine loop bounds of lattice subsection that overlaps my sub-box + insure loop bounds do not exceed nx,ny,nz */ + double alat = pow((4.0 / param->rho), (1.0 / 3.0)); + int ilo = (int) xlo / (0.5 * alat) - 1; + int ihi = (int) xhi / (0.5 * alat) + 1; + int jlo = (int) ylo / (0.5 * alat) - 1; + int jhi = (int) yhi / (0.5 * alat) + 1; + int klo = (int) zlo / (0.5 * alat) - 1; + int khi = (int) zhi / (0.5 * alat) + 1; + + ilo = MAX(ilo, 0); + ihi = MIN(ihi, 2 * param->nx - 1); + jlo = MAX(jlo, 0); + jhi = MIN(jhi, 2 * param->ny - 1); + klo = MAX(klo, 0); + khi = MIN(khi, 2 * param->nz - 1); + + /* each proc generates positions and velocities of atoms on fcc sublattice + that overlaps its box, only store atoms that fall in my box + use atom # (generated from lattice coords) as unique seed to generate a + unique velocity exercise RNG between calls to avoid correlations in adjacent atoms */ + + double xtmp, ytmp, ztmp, vx, vy, vz; + int i, j, k, m, n; + int sx = 0; int sy = 0; int sz = 0; + int ox = 0; int oy = 0; int oz = 0; + int subboxdim = 8; + + while(oz * subboxdim <= khi) { + + k = oz * subboxdim + sz; + j = oy * subboxdim + sy; + i = ox * subboxdim + sx; + + if(((i + j + k) % 2 == 0) && + (i >= ilo) && (i <= ihi) && + (j >= jlo) && (j <= jhi) && + (k >= klo) && (k <= khi)) { + + xtmp = 0.5 * alat * i; + ytmp = 0.5 * alat * j; + ztmp = 0.5 * alat * k; + + if( xtmp >= xlo && xtmp < xhi && + ytmp >= ylo && ytmp < yhi && + ztmp >= zlo && ztmp < zhi ) { + + n = k * (2 * param->ny) * (2 * param->nx) + j * (2 * param->nx) + i + 1; + + for(m = 0; m < 5; m++) { + myrandom(&n); + } + vx = myrandom(&n); + + for(m = 0; m < 5; m++){ + myrandom(&n); + } + vy = myrandom(&n); + + for(m = 0; m < 5; m++) { + myrandom(&n); + } + vz = myrandom(&n); + + addatom(xtmp, ytmp, ztmp, vx, vy, vz); + } + } + + sx++; + + if(sx == subboxdim) { + sx = 0; + sy++; + } + + if(sy == subboxdim) { + sy = 0; + sz++; + } + + if(sz == subboxdim) { + sz = 0; + ox++; + } + + if(ox * subboxdim > ihi) { + ox = 0; + oy++; + } + + if(oy * subboxdim > jhi) { + oy = 0; + oz++; + } + } +} + +void thermoSetup(Parameter *param, Thermo *thermo) +{ + int maxstat = param->ntimes / param->nstat + 2; + + thermo->steparr = (int*) malloc(maxstat * sizeof(int)); + thermo->tmparr = (double*) malloc(maxstat * sizeof(double)); + thermo->engarr = (double*) malloc(maxstat * sizeof(double)); + thermo->prsarr = (double*) malloc(maxstat * sizeof(double)); + + thermo->mvv2e = 1.0; + thermo->dof_boltz = (Natoms * 3 - 3); + thermo->t_scale = thermo->mvv2e / thermo->dof_boltz; + thermo->p_scale = 1.0 / 3 / xprd / yprd / zprd; + thermo->e_scale = 0.5; + + eng_vdwl = 0.0; + virial = 0.0; +} + + +void thermoCompute(int iflag, Parameter *param, Thermo *thermo) +{ + double t, eng, p; + + if(iflag > 0 && iflag % param->nstat){ + return; + } + + if(iflag == -1 && param->nstat > 0 && param->ntimes % param->nstat == 0) + { + return; + } + + t = 0.0; + + for(int i = 0; i < Nlocal; i++) { + t += (vx[i] * vx[i] + vy[i] * vy[i] + vz[i] * vz[i]) * param->mass; + } + + t = t * thermo->t_scale; + eng = eng_vdwl * thermo->e_scale / Natoms; + p = (t * thermo->dof_boltz) * thermo->p_scale; + + int istep = iflag; + + if(iflag == -1){ + istep = param->ntimes; + } + + if(iflag == 0){ + thermo->mstat = 0; + } + + thermo->steparr[thermo->mstat] = istep; + thermo->tmparr[thermo->mstat] = t; + thermo->engarr[thermo->mstat] = eng; + thermo->prsarr[thermo->mstat] = p; + + thermo->mstat++; + fprintf(stdout, "%i %e %e %e\n", istep, t, eng, p); +} + + +void initialIntegrate(Parameter *param) +{ + for(int i = 0; i < Nlocal; i++) { + vx[i] += param->dtforce * fx[i]; + vy[i] += param->dtforce * fy[i]; + vz[i] += param->dtforce * fz[i]; + x[i] += param->dt * vx[i]; + y[i] += param->dt * vy[i]; + z[i] += param->dt * vz[i]; + } +} + +void finalIntegrate(Parameter *param) +{ + for(int i = 0; i < Nlocal; i++) { + vx[i] += param->dtforce * fx[i]; + vy[i] += param->dtforce * fy[i]; + vz[i] += param->dtforce * fz[i]; + } +} + +void computeForce(Neighbor *neighbor, Parameter *param) +{ + int* neighs; + double cutforcesq = param->cutforce * param->cutforce; + double sigma6 = param->sigma6; + double epsilon = param->epsilon; + double t_eng_vdwl = 0.0; + double t_virial = 0.0; + + for(int i = 0; i < Nlocal; i++) { + fx[i] = 0.0; + fy[i] = 0.0; + fz[i] = 0.0; + } + + // loop over all neighbors of my atoms + // store force on atom i + for(int i = 0; i < Nlocal; i++) { + neighs = &neighbor->neighbors[i * neighbor->maxneighs]; + int numneighs = neighbor->numneigh[i]; + double xtmp = x[i]; + double ytmp = y[i]; + double ztmp = z[i]; + + double fix = 0; + double fiy = 0; + double fiz = 0; + + for(int k = 0; k < numneighs; k++) { + int j = neighs[k]; + double delx = xtmp - x[j]; + double dely = ytmp - y[j]; + double delz = ztmp - z[j]; + double rsq = delx * delx + dely * dely + delz * delz; + + if(rsq < cutforcesq) { + double sr2 = 1.0 / rsq; + double sr6 = sr2 * sr2 * sr2 * sigma6; + double force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon; + + fix += delx * force; + fiy += dely * force; + fiz += delz * force; + + t_eng_vdwl += sr6 * (sr6 - 1.0) * epsilon; + t_virial += (delx * delx + dely * dely + delz * delz) * force; + } + } + + fx[i] += fix; + fy[i] += fiy; + fz[i] += fiz; + } + + eng_vdwl += (4.0 * t_eng_vdwl); + virial += (0.5 * t_virial); +} + +void printState() +{ + printf("Atom counts: Natoms=%d Nlocal=%d Nghost=%d Nmax=%d\n", + Natoms, Nlocal, Nghost, Nmax); +} + +int main (int argc, char** argv) +{ + Neighbor neighbor; + Parameter param; + Thermo thermo; + + init(&neighbor, ¶m); + printState(); + setup(&neighbor, ¶m); + create_atoms(¶m); + printState(); + thermoSetup(¶m, &thermo); + processBorders(neighbor.cutneigh); + buildNeighborlist(&neighbor); + printState(); + exit(EXIT_SUCCESS); + + for(int n = 0; n < param.ntimes; n++) { + + initialIntegrate(¶m); + + if(!((n + 1) % neighbor.every)) { + processBorders(neighbor.cutneigh); + sortAtoms(&neighbor); + buildNeighborlist(&neighbor); + } + + computeForce(&neighbor, ¶m); + finalIntegrate(¶m); + + if(param.nstat) { + thermoCompute(n + 1, ¶m, &thermo); + } + } + + return EXIT_SUCCESS; +} diff --git a/src/timing.c b/src/timing.c new file mode 100644 index 0000000..2daf260 --- /dev/null +++ b/src/timing.c @@ -0,0 +1,48 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2019 RRZE, University Erlangen-Nuremberg + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * ======================================================================================= + */ + +#include +#include + +double getTimeStamp() +{ + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9; +} + +double getTimeResolution() +{ + struct timespec ts; + clock_getres(CLOCK_MONOTONIC, &ts); + return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9; +} + +double getTimeStamp_() +{ + return getTimeStamp(); +}