diff --git a/include_CLANG.mk b/include_CLANG.mk index 4a862c8..68e0076 100644 --- a/include_CLANG.mk +++ b/include_CLANG.mk @@ -12,6 +12,6 @@ ASFLAGS = -masm=intel CXXFLAGS = $(CFLAGS) FCFLAGS = LFLAGS = -DEFINES = -D_GNU_SOURCE -DALIGNMENT=64 +DEFINES = -D_GNU_SOURCE -DALIGNMENT=64 -DPRECISION=2 INCLUDES = LIBS = -lomp diff --git a/include_ICC.mk b/include_ICC.mk index c501612..25207af 100644 --- a/include_ICC.mk +++ b/include_ICC.mk @@ -2,18 +2,19 @@ CC = icc CXX = icpc LINKER = $(CC) -PROFILE = #-g -pg -OPTS = -Ofast -xCORE-AVX512 -qopt-zmm-usage=high $(PROFILE) -#OPTS = -O3 -xCORE-AVX2 $(PROFILE) -#OPTS = -O3 -xAVX $(PROFILE) -#OPTS = -O3 -xSSE4.2 $(PROFILE) -#OPTS = -O3 -no-vec $(PROFILE) -#OPTS = -Ofast -xHost $(PROFILE) -CFLAGS = $(PROFILE) -restrict $(OPTS) +OPENMP = #-qopenmp +PROFILE = #-profile-functions -g -pg +# OPTS = -fast -xCORE-AVX512 -qopt-zmm-usage=high $(PROFILE) +#OPTS = -fast -xCORE-AVX2 $(PROFILE) +#OPTS = -fast -xAVX $(PROFILE) +#OPTS = -fast -xSSE4.2 $(PROFILE) +#OPTS = -fast -no-vec $(PROFILE) +OPTS = -fast -xHost $(PROFILE) +CFLAGS = $(PROFILE) -restrict $(OPENMP) $(OPTS) CXXFLAGS = $(CFLAGS) ASFLAGS = -masm=intel FCFLAGS = -LFLAGS = $(PROFILE) $(OPTS) +LFLAGS = $(PROFILE) $(OPTS) $(OPENMP) DEFINES = -D_GNU_SOURCE -DALIGNMENT=64 # -DLIKWID_PERFMON -DPRECISION=1 -INCLUDES = -LIBS = +INCLUDES = #$(LIKWID_INC) +LIBS = #$(LIKWID_LIB) -llikwid diff --git a/src/atom.c b/src/atom.c index 7573351..313ffdc 100644 --- a/src/atom.c +++ b/src/atom.c @@ -43,12 +43,12 @@ void initAtom(Atom *atom) void createAtom(Atom *atom, Parameter *param) { - double xlo = 0.0; double xhi = param->xprd; - double ylo = 0.0; double yhi = param->yprd; - double zlo = 0.0; double zhi = param->zprd; + MD_FLOAT xlo = 0.0; MD_FLOAT xhi = param->xprd; + MD_FLOAT ylo = 0.0; MD_FLOAT yhi = param->yprd; + MD_FLOAT zlo = 0.0; MD_FLOAT zhi = param->zprd; atom->Natoms = 4 * param->nx * param->ny * param->nz; atom->Nlocal = 0; - double alat = pow((4.0 / param->rho), (1.0 / 3.0)); + MD_FLOAT alat = pow((4.0 / param->rho), (1.0 / 3.0)); int ilo = (int) (xlo / (0.5 * alat) - 1); int ihi = (int) (xhi / (0.5 * alat) + 1); int jlo = (int) (ylo / (0.5 * alat) - 1); @@ -63,7 +63,7 @@ void createAtom(Atom *atom, Parameter *param) klo = MAX(klo, 0); khi = MIN(khi, 2 * param->nz - 1); - double xtmp, ytmp, ztmp, vxtmp, vytmp, vztmp; + MD_FLOAT xtmp, ytmp, ztmp, vxtmp, vytmp, vztmp; int i, j, k, m, n; int sx = 0; int sy = 0; int sz = 0; int ox = 0; int oy = 0; int oz = 0; @@ -136,15 +136,15 @@ void growAtom(Atom *atom) int nold = atom->Nmax; atom->Nmax += DELTA; - atom->x = (double*) reallocate(atom->x, ALIGNMENT, atom->Nmax * sizeof(double), nold * sizeof(double)); - atom->y = (double*) reallocate(atom->y, ALIGNMENT, atom->Nmax * sizeof(double), nold * sizeof(double)); - atom->z = (double*) reallocate(atom->z, ALIGNMENT, atom->Nmax * sizeof(double), nold * sizeof(double)); - atom->vx = (double*) reallocate(atom->vx, ALIGNMENT, atom->Nmax * sizeof(double), nold * sizeof(double)); - atom->vy = (double*) reallocate(atom->vy, ALIGNMENT, atom->Nmax * sizeof(double), nold * sizeof(double)); - atom->vz = (double*) reallocate(atom->vz, ALIGNMENT, atom->Nmax * sizeof(double), nold * sizeof(double)); - atom->fx = (double*) reallocate(atom->fx, ALIGNMENT, atom->Nmax * sizeof(double), nold * sizeof(double)); - atom->fy = (double*) reallocate(atom->fy, ALIGNMENT, atom->Nmax * sizeof(double), nold * sizeof(double)); - atom->fz = (double*) reallocate(atom->fz, ALIGNMENT, atom->Nmax * sizeof(double), nold * sizeof(double)); + atom->x = (MD_FLOAT*) reallocate(atom->x, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); + atom->y = (MD_FLOAT*) reallocate(atom->y, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); + atom->z = (MD_FLOAT*) reallocate(atom->z, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); + atom->vx = (MD_FLOAT*) reallocate(atom->vx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); + atom->vy = (MD_FLOAT*) reallocate(atom->vy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); + atom->vz = (MD_FLOAT*) reallocate(atom->vz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); + atom->fx = (MD_FLOAT*) reallocate(atom->fx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); + atom->fy = (MD_FLOAT*) reallocate(atom->fy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); + atom->fz = (MD_FLOAT*) reallocate(atom->fz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); } diff --git a/src/includes/atom.h b/src/includes/atom.h index a3ed00b..97e0429 100644 --- a/src/includes/atom.h +++ b/src/includes/atom.h @@ -27,9 +27,9 @@ typedef struct { int Natoms, Nlocal, Nghost, Nmax; - double *x, *y, *z; - double *vx, *vy, *vz; - double *fx, *fy, *fz; + MD_FLOAT *x, *y, *z; + MD_FLOAT *vx, *vy, *vz; + MD_FLOAT *fx, *fy, *fz; } Atom; extern void initAtom(Atom*); diff --git a/src/includes/likwid-marker.h b/src/includes/likwid-marker.h new file mode 100644 index 0000000..ebf8b89 --- /dev/null +++ b/src/includes/likwid-marker.h @@ -0,0 +1,170 @@ +/* + * ======================================================================================= + * + * Filename: likwid-marker.h + * + * Description: Header File of likwid Marker API + * + * Version: + * Released: + * + * Authors: Thomas Gruber (tg), thomas.roehl@googlemail.com + * + * Project: likwid + * + * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg + * + * This program is free software: you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + * + * ======================================================================================= + */ +#ifndef LIKWID_MARKER_H +#define LIKWID_MARKER_H + + +/** \addtogroup MarkerAPI Marker API module +* @{ +*/ +/*! +\def LIKWID_MARKER_INIT +Shortcut for likwid_markerInit() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_MARKER_THREADINIT +Shortcut for likwid_markerThreadInit() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_MARKER_REGISTER(regionTag) +Shortcut for likwid_markerRegisterRegion() with \a regionTag if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_MARKER_START(regionTag) +Shortcut for likwid_markerStartRegion() with \a regionTag if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_MARKER_STOP(regionTag) +Shortcut for likwid_markerStopRegion() with \a regionTag if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_MARKER_GET(regionTag, nevents, events, time, count) +Shortcut for likwid_markerGetResults() for \a regionTag if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_MARKER_SWITCH +Shortcut for likwid_markerNextGroup() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_MARKER_RESET(regionTag) +Shortcut for likwid_markerResetRegion() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_MARKER_CLOSE +Shortcut for likwid_markerClose() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed +*/ +/** @}*/ + +#ifdef LIKWID_PERFMON +#include +#define LIKWID_MARKER_INIT likwid_markerInit() +#define LIKWID_MARKER_THREADINIT likwid_markerThreadInit() +#define LIKWID_MARKER_SWITCH likwid_markerNextGroup() +#define LIKWID_MARKER_REGISTER(regionTag) likwid_markerRegisterRegion(regionTag) +#define LIKWID_MARKER_START(regionTag) likwid_markerStartRegion(regionTag) +#define LIKWID_MARKER_STOP(regionTag) likwid_markerStopRegion(regionTag) +#define LIKWID_MARKER_CLOSE likwid_markerClose() +#define LIKWID_MARKER_RESET(regionTag) likwid_markerResetRegion(regionTag) +#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count) likwid_markerGetRegion(regionTag, nevents, events, time, count) +#else /* LIKWID_PERFMON */ +#define LIKWID_MARKER_INIT +#define LIKWID_MARKER_THREADINIT +#define LIKWID_MARKER_SWITCH +#define LIKWID_MARKER_REGISTER(regionTag) +#define LIKWID_MARKER_START(regionTag) +#define LIKWID_MARKER_STOP(regionTag) +#define LIKWID_MARKER_CLOSE +#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count) +#define LIKWID_MARKER_RESET(regionTag) +#endif /* LIKWID_PERFMON */ + + +/** \addtogroup NvMarkerAPI NvMarker API module (MarkerAPI for Nvidia GPUs) +* @{ +*/ +/*! +\def LIKWID_NVMARKER_INIT +Shortcut for likwid_gpuMarkerInit() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_NVMARKER_THREADINIT +Shortcut for likwid_gpuMarkerThreadInit() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_NVMARKER_REGISTER(regionTag) +Shortcut for likwid_gpuMarkerRegisterRegion() with \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_NVMARKER_START(regionTag) +Shortcut for likwid_gpuMarkerStartRegion() with \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_NVMARKER_STOP(regionTag) +Shortcut for likwid_gpuMarkerStopRegion() with \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_NVMARKER_GET(regionTag, ngpus, nevents, events, time, count) +Shortcut for likwid_gpuMarkerGetRegion() for \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_NVMARKER_SWITCH +Shortcut for likwid_gpuMarkerNextGroup() if compiled with -DLIKWID_NVMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_NVMARKER_RESET(regionTag) +Shortcut for likwid_gpuMarkerResetRegion() if compiled with -DLIKWID_NVMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_NVMARKER_CLOSE +Shortcut for likwid_gpuMarkerClose() if compiled with -DLIKWID_NVMON. Otherwise no operation is performed +*/ +/** @}*/ + +#ifdef LIKWID_NVMON +#ifndef LIKWID_WITH_NVMON +#define LIKWID_WITH_NVMON +#endif +#include +#define LIKWID_NVMARKER_INIT likwid_gpuMarkerInit() +#define LIKWID_NVMARKER_THREADINIT likwid_gpuMarkerThreadInit() +#define LIKWID_NVMARKER_SWITCH likwid_gpuMarkerNextGroup() +#define LIKWID_NVMARKER_REGISTER(regionTag) likwid_gpuMarkerRegisterRegion(regionTag) +#define LIKWID_NVMARKER_START(regionTag) likwid_gpuMarkerStartRegion(regionTag) +#define LIKWID_NVMARKER_STOP(regionTag) likwid_gpuMarkerStopRegion(regionTag) +#define LIKWID_NVMARKER_CLOSE likwid_gpuMarkerClose() +#define LIKWID_NVMARKER_RESET(regionTag) likwid_gpuMarkerResetRegion(regionTag) +#define LIKWID_NVMARKER_GET(regionTag, ngpus, nevents, events, time, count) \ + likwid_gpuMarkerGetRegion(regionTag, ngpus, nevents, events, time, count) +#else /* LIKWID_NVMON */ +#define LIKWID_NVMARKER_INIT +#define LIKWID_NVMARKER_THREADINIT +#define LIKWID_NVMARKER_SWITCH +#define LIKWID_NVMARKER_REGISTER(regionTag) +#define LIKWID_NVMARKER_START(regionTag) +#define LIKWID_NVMARKER_STOP(regionTag) +#define LIKWID_NVMARKER_CLOSE +#define LIKWID_NVMARKER_GET(regionTag, nevents, events, time, count) +#define LIKWID_NVMARKER_RESET(regionTag) +#endif /* LIKWID_NVMON */ + + + +#endif /* LIKWID_MARKER_H */ diff --git a/src/includes/neighbor.h b/src/includes/neighbor.h index a722b7d..c9bad95 100644 --- a/src/includes/neighbor.h +++ b/src/includes/neighbor.h @@ -26,7 +26,6 @@ #ifndef __NEIGHBOR_H_ #define __NEIGHBOR_H_ typedef struct { - /* double cutneigh; // neighbor cutoff */ int every; int ncalls; int* neighbors; diff --git a/src/includes/parameter.h b/src/includes/parameter.h index 7a1574c..87da5f8 100644 --- a/src/includes/parameter.h +++ b/src/includes/parameter.h @@ -23,20 +23,29 @@ #ifndef __PARAMETER_H_ #define __PARAMETER_H_ +#ifndef PRECISION +#define PRECISION 2 +#endif +#if PRECISION == 1 +#define MD_FLOAT float +#else +#define MD_FLOAT double +#endif + typedef struct { - double epsilon; - double sigma6; - double temp; - double rho; - double mass; + MD_FLOAT epsilon; + MD_FLOAT sigma6; + MD_FLOAT temp; + MD_FLOAT rho; + MD_FLOAT mass; int ntimes; int nstat; int every; - double dt; - double dtforce; - double cutforce; - double cutneigh; + MD_FLOAT dt; + MD_FLOAT dtforce; + MD_FLOAT cutforce; + MD_FLOAT cutneigh; int nx, ny, nz; - double xprd, yprd, zprd; + MD_FLOAT xprd, yprd, zprd; } Parameter; #endif diff --git a/src/main.c b/src/main.c index b5200f1..7fb68d1 100644 --- a/src/main.c +++ b/src/main.c @@ -28,6 +28,8 @@ #include #include +#include + #include #include #include @@ -100,11 +102,13 @@ double reneighbour( double S, E; S = getTimeStamp(); + LIKWID_MARKER_START("reneighbour"); updateAtomsPbc(atom, param); setupPbc(atom, param); updatePbc(atom, param); /* sortAtom(); */ buildNeighbor(atom, neighbor); + LIKWID_MARKER_STOP("reneighbour"); E = getTimeStamp(); return E-S; @@ -112,9 +116,9 @@ double reneighbour( void initialIntegrate(Parameter *param, Atom *atom) { - double* x = atom->x; double* y = atom->y; double* z = atom->z; - double* fx = atom->fx; double* fy = atom->fy; double* fz = atom->fz; - double* vx = atom->vx; double* vy = atom->vy; double* vz = atom->vz; + MD_FLOAT* x = atom->x; MD_FLOAT* y = atom->y; MD_FLOAT* z = atom->z; + MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz; + MD_FLOAT* vx = atom->vx; MD_FLOAT* vy = atom->vy; MD_FLOAT* vz = atom->vz; for(int i = 0; i < atom->Nlocal; i++) { vx[i] += param->dtforce * fx[i]; @@ -128,8 +132,8 @@ void initialIntegrate(Parameter *param, Atom *atom) void finalIntegrate(Parameter *param, Atom *atom) { - double* fx = atom->fx; double* fy = atom->fy; double* fz = atom->fz; - double* vx = atom->vx; double* vy = atom->vy; double* vz = atom->vz; + MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz; + MD_FLOAT* vx = atom->vx; MD_FLOAT* vy = atom->vy; MD_FLOAT* vz = atom->vz; for(int i = 0; i < atom->Nlocal; i++) { vx[i] += param->dtforce * fx[i]; @@ -142,12 +146,12 @@ double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor) { int Nlocal = atom->Nlocal; int* neighs; - double cutforcesq = param->cutforce * param->cutforce; - double sigma6 = param->sigma6; - double epsilon = param->epsilon; - double* x = atom->x; double* y = atom->y; double* z = atom->z; - double* fx = atom->fx; double* fy = atom->fy; double* fz = atom->fz; - double S, E; + MD_FLOAT cutforcesq = param->cutforce * param->cutforce; + MD_FLOAT sigma6 = param->sigma6; + MD_FLOAT epsilon = param->epsilon; + MD_FLOAT* x = atom->x; MD_FLOAT* y = atom->y; MD_FLOAT* z = atom->z; + MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz; + MD_FLOAT S, E; S = getTimeStamp(); for(int i = 0; i < Nlocal; i++) { @@ -156,29 +160,31 @@ double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor) fz[i] = 0.0; } + LIKWID_MARKER_START("force"); + #pragma omp parallel for for(int i = 0; i < Nlocal; i++) { neighs = &neighbor->neighbors[i * neighbor->maxneighs]; int numneighs = neighbor->numneigh[i]; - double xtmp = x[i]; - double ytmp = y[i]; - double ztmp = z[i]; + MD_FLOAT xtmp = x[i]; + MD_FLOAT ytmp = y[i]; + MD_FLOAT ztmp = z[i]; - double fix = 0; - double fiy = 0; - double fiz = 0; + MD_FLOAT fix = 0; + MD_FLOAT fiy = 0; + MD_FLOAT fiz = 0; for(int k = 0; k < numneighs; k++) { int j = neighs[k]; - double delx = xtmp - x[j]; - double dely = ytmp - y[j]; - double delz = ztmp - z[j]; - double rsq = delx * delx + dely * dely + delz * delz; + MD_FLOAT delx = xtmp - x[j]; + MD_FLOAT dely = ytmp - y[j]; + MD_FLOAT delz = ztmp - z[j]; + MD_FLOAT rsq = delx * delx + dely * dely + delz * delz; if(rsq < cutforcesq) { - double sr2 = 1.0 / rsq; - double sr6 = sr2 * sr2 * sr2 * sigma6; - double force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon; + MD_FLOAT sr2 = 1.0 / rsq; + MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6; + MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon; fix += delx * force; fiy += dely * force; fiz += delz * force; @@ -189,6 +195,7 @@ double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor) fy[i] += fiy; fz[i] += fiz; } + LIKWID_MARKER_STOP("force"); E = getTimeStamp(); return E-S; @@ -214,6 +221,13 @@ int main (int argc, char** argv) Neighbor neighbor; Parameter param; + LIKWID_MARKER_INIT; +#pragma omp parallel + { + LIKWID_MARKER_REGISTER("force"); + LIKWID_MARKER_REGISTER("reneighbour"); + LIKWID_MARKER_REGISTER("pbc"); + } init(¶m); for(int i = 0; i < argc; i++) @@ -279,6 +293,12 @@ int main (int argc, char** argv) computeThermo(-1, ¶m, &atom); printf(HLINE); +#if PRECISION == 1 + printf("Using single precision floating point.\n"); +#else + printf("Using double precision floating point.\n"); +#endif + printf(HLINE); printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, atom.Nghost, param.ntimes); printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n", timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH]); @@ -286,5 +306,6 @@ int main (int argc, char** argv) printf("Performance: %.2f million atom updates per second\n", 1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]); + LIKWID_MARKER_CLOSE; return EXIT_SUCCESS; } diff --git a/src/neighbor.c b/src/neighbor.c index d061431..7460efb 100644 --- a/src/neighbor.c +++ b/src/neighbor.c @@ -31,8 +31,8 @@ #define SMALL 1.0e-6 #define FACTOR 0.999 -static double xprd, yprd, zprd; -static double bininvx, bininvy, bininvz; +static MD_FLOAT xprd, yprd, zprd; +static MD_FLOAT bininvx, bininvy, bininvz; static int mbinxlo, mbinylo, mbinzlo; static int nbinx, nbiny, nbinz; static int mbinx, mbiny, mbinz; // n bins in x, y, z @@ -40,21 +40,21 @@ static int *bincount; static int *bins; static int mbins; //total number of bins static int atoms_per_bin; // max atoms per bin -static double cutneigh; -static double cutneighsq; // neighbor cutoff squared +static MD_FLOAT cutneigh; +static MD_FLOAT cutneighsq; // neighbor cutoff squared static int nmax; -static int nstencil; // # of bins in stencil -static int* stencil; // stencil list of bin offsets -static double binsizex, binsizey, binsizez; +static int nstencil; // # of bins in stencil +static int* stencil; // stencil list of bin offsets +static MD_FLOAT binsizex, binsizey, binsizez; -static int coord2bin(double, double , double); -static double bindist(int, int, int); +static int coord2bin(MD_FLOAT, MD_FLOAT , MD_FLOAT); +static MD_FLOAT bindist(int, int, int); /* exported subroutines */ void initNeighbor(Neighbor *neighbor, Parameter *param) { - double lattice = pow((4.0 / param->rho), (1.0 / 3.0)); - double neighscale = 5.0 / 6.0; + MD_FLOAT lattice = pow((4.0 / param->rho), (1.0 / 3.0)); + MD_FLOAT neighscale = 5.0 / 6.0; xprd = param->nx * lattice; yprd = param->ny * lattice; zprd = param->nz * lattice; @@ -74,12 +74,12 @@ void initNeighbor(Neighbor *neighbor, Parameter *param) void setupNeighbor() { - double coord; + MD_FLOAT coord; int mbinxhi, mbinyhi, mbinzhi; int nextx, nexty, nextz; - double xlo = 0.0; double xhi = xprd; - double ylo = 0.0; double yhi = yprd; - double zlo = 0.0; double zhi = zprd; + MD_FLOAT xlo = 0.0; MD_FLOAT xhi = xprd; + MD_FLOAT ylo = 0.0; MD_FLOAT yhi = yprd; + MD_FLOAT zlo = 0.0; MD_FLOAT zhi = zprd; cutneighsq = cutneigh * cutneigh; binsizex = xprd / nbinx; @@ -184,9 +184,9 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) /* bin local & ghost atoms */ binatoms(atom); int resize = 1; - double* x = atom->x; - double* y = atom->y; - double* z = atom->z; + MD_FLOAT* x = atom->x; + MD_FLOAT* y = atom->y; + MD_FLOAT* z = atom->z; /* loop over each atom, storing neighbors */ while(resize) { @@ -196,9 +196,9 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) for(int i = 0; i < atom->Nlocal; i++) { int* neighptr = &(neighbor->neighbors[i * neighbor->maxneighs]); int n = 0; - double xtmp = x[i]; - double ytmp = y[i]; - double ztmp = z[i]; + MD_FLOAT xtmp = x[i]; + MD_FLOAT ytmp = y[i]; + MD_FLOAT ztmp = z[i]; int ibin = coord2bin(xtmp, ytmp, ztmp); for(int k = 0; k < nstencil; k++) { @@ -212,10 +212,10 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) continue; } - double delx = xtmp - x[j]; - double dely = ytmp - y[j]; - double delz = ztmp - z[j]; - double rsq = delx * delx + dely * dely + delz * delz; + MD_FLOAT delx = xtmp - x[j]; + MD_FLOAT dely = ytmp - y[j]; + MD_FLOAT delz = ztmp - z[j]; + MD_FLOAT rsq = delx * delx + dely * dely + delz * delz; if( rsq <= cutneighsq ) { neighptr[n++] = j; @@ -244,9 +244,9 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) } /* internal subroutines */ -double bindist(int i, int j, int k) +MD_FLOAT bindist(int i, int j, int k) { - double delx, dely, delz; + MD_FLOAT delx, dely, delz; if(i > 0) { delx = (i - 1) * binsizex; @@ -275,7 +275,7 @@ double bindist(int i, int j, int k) return (delx * delx + dely * dely + delz * delz); } -int coord2bin(double xin, double yin, double zin) +int coord2bin(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin) { int ix, iy, iz; @@ -309,9 +309,9 @@ int coord2bin(double xin, double yin, double zin) void binatoms(Atom *atom) { int nall = atom->Nlocal + atom->Nghost; - double* x = atom->x; - double* y = atom->y; - double* z = atom->z; + MD_FLOAT* x = atom->x; + MD_FLOAT* y = atom->y; + MD_FLOAT* z = atom->z; int resize = 1; while(resize > 0) { diff --git a/src/pbc.c b/src/pbc.c index d1532c6..54ef7f8 100644 --- a/src/pbc.c +++ b/src/pbc.c @@ -48,12 +48,12 @@ void initPbc() void updatePbc(Atom *atom, Parameter *param) { int nlocal = atom->Nlocal; - double* x = atom->x; - double* y = atom->y; - double* z = atom->z; - double xprd = param->xprd; - double yprd = param->yprd; - double zprd = param->zprd; + MD_FLOAT* x = atom->x; + MD_FLOAT* y = atom->y; + MD_FLOAT* z = atom->z; + MD_FLOAT xprd = param->xprd; + MD_FLOAT yprd = param->yprd; + MD_FLOAT zprd = param->zprd; for(int i = 0; i < atom->Nghost; i++) { x[nlocal + i] = x[BorderMap[i]] + PBCx[i] * xprd; @@ -66,12 +66,12 @@ void updatePbc(Atom *atom, Parameter *param) * to periodic boundary conditions */ void updateAtomsPbc(Atom *atom, Parameter *param) { - double* x = atom->x; - double* y = atom->y; - double* z = atom->z; - double xprd = param->xprd; - double yprd = param->yprd; - double zprd = param->zprd; + MD_FLOAT* x = atom->x; + MD_FLOAT* y = atom->y; + MD_FLOAT* z = atom->z; + MD_FLOAT xprd = param->xprd; + MD_FLOAT yprd = param->yprd; + MD_FLOAT zprd = param->zprd; for(int i = 0; i < atom->Nlocal; i++) { @@ -102,11 +102,11 @@ void updateAtomsPbc(Atom *atom, Parameter *param) #define ADDGHOST(dx,dy,dz) Nghost++; BorderMap[Nghost] = i; PBCx[Nghost] = dx; PBCy[Nghost] = dy; PBCz[Nghost] = dz; void setupPbc(Atom *atom, Parameter *param) { - double* x = atom->x; double* y = atom->y; double* z = atom->z; - double xprd = param->xprd; - double yprd = param->yprd; - double zprd = param->zprd; - double Cutneigh = param->cutneigh; + MD_FLOAT* x = atom->x; MD_FLOAT* y = atom->y; MD_FLOAT* z = atom->z; + MD_FLOAT xprd = param->xprd; + MD_FLOAT yprd = param->yprd; + MD_FLOAT zprd = param->zprd; + MD_FLOAT Cutneigh = param->cutneigh; int Nghost = -1; for(int i = 0; i < atom->Nlocal; i++) { diff --git a/src/thermo.c b/src/thermo.c index 327cb99..786d0d4 100644 --- a/src/thermo.c +++ b/src/thermo.c @@ -27,17 +27,17 @@ #include static int *steparr; -static double *tmparr; -static double *engarr; -static double *prsarr; -static double mvv2e; +static MD_FLOAT *tmparr; +static MD_FLOAT *engarr; +static MD_FLOAT *prsarr; +static MD_FLOAT mvv2e; static int dof_boltz; -static double t_scale; -static double p_scale; -static double e_scale; -static double t_act; -static double p_act; -static double e_act; +static MD_FLOAT t_scale; +static MD_FLOAT p_scale; +static MD_FLOAT e_scale; +static MD_FLOAT t_act; +static MD_FLOAT p_act; +static MD_FLOAT e_act; static int mstat; /* exported subroutines */ @@ -46,9 +46,9 @@ void setupThermo(Parameter *param, int natoms) int maxstat = param->ntimes / param->nstat + 2; steparr = (int*) malloc(maxstat * sizeof(int)); - tmparr = (double*) malloc(maxstat * sizeof(double)); - engarr = (double*) malloc(maxstat * sizeof(double)); - prsarr = (double*) malloc(maxstat * sizeof(double)); + tmparr = (MD_FLOAT*) malloc(maxstat * sizeof(MD_FLOAT)); + engarr = (MD_FLOAT*) malloc(maxstat * sizeof(MD_FLOAT)); + prsarr = (MD_FLOAT*) malloc(maxstat * sizeof(MD_FLOAT)); mvv2e = 1.0; dof_boltz = (natoms * 3 - 3); @@ -61,10 +61,10 @@ void setupThermo(Parameter *param, int natoms) void computeThermo(int iflag, Parameter *param, Atom *atom) { - double t = 0.0, p; - double* vx = atom->vx; - double* vy = atom->vy; - double* vz = atom->vz; + MD_FLOAT t = 0.0, p; + MD_FLOAT* vx = atom->vx; + MD_FLOAT* vy = atom->vy; + MD_FLOAT* vz = atom->vz; for(int i = 0; i < atom->Nlocal; i++) { t += (vx[i] * vx[i] + vy[i] * vy[i] + vz[i] * vz[i]) * param->mass; @@ -92,8 +92,8 @@ void computeThermo(int iflag, Parameter *param, Atom *atom) void adjustThermo(Parameter *param, Atom *atom) { /* zero center-of-mass motion */ - double vxtot = 0.0; double vytot = 0.0; double vztot = 0.0; - double* vx = atom->vx; double* vy = atom->vy; double* vz = atom->vz; + MD_FLOAT vxtot = 0.0; MD_FLOAT vytot = 0.0; MD_FLOAT vztot = 0.0; + MD_FLOAT* vx = atom->vx; MD_FLOAT* vy = atom->vy; MD_FLOAT* vz = atom->vz; for(int i = 0; i < atom->Nlocal; i++) { vxtot += vx[i]; @@ -112,14 +112,14 @@ void adjustThermo(Parameter *param, Atom *atom) } t_act = 0; - double t = 0.0; + MD_FLOAT t = 0.0; for(int i = 0; i < atom->Nlocal; i++) { t += (vx[i] * vx[i] + vy[i] * vy[i] + vz[i] * vz[i]) * param->mass; } t *= t_scale; - double factor = sqrt(param->temp / t); + MD_FLOAT factor = sqrt(param->temp / t); for(int i = 0; i < atom->Nlocal; i++) { vx[i] *= factor;