diff --git a/gromacs/cuda/force_lj.cu b/gromacs/cuda/force_lj.cu index 75d6929..1dcbde5 100644 --- a/gromacs/cuda/force_lj.cu +++ b/gromacs/cuda/force_lj.cu @@ -56,8 +56,8 @@ void initDevice(Atom *atom, Neighbor *neighbor) { cuda_PBCz = (int *) allocateGPU(atom->Nclusters_max * sizeof(int)); cuda_numneigh = (int *) allocateGPU(atom->Nclusters_max * sizeof(int)); cuda_neighbors = (int *) allocateGPU(atom->Nclusters_max * neighbor->maxneighs * sizeof(int)); - natoms = (int *) malloc(atom->Nclusters_max); - ngatoms = (int *) malloc(atom->Nclusters_max); + natoms = (int *) malloc(atom->Nclusters_max * sizeof(int)); + ngatoms = (int *) malloc(atom->Nclusters_max * sizeof(int)); isReneighboured = 1; } diff --git a/gromacs/main.c b/gromacs/main.c index e724348..93be03b 100644 --- a/gromacs/main.c +++ b/gromacs/main.c @@ -65,9 +65,9 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats * buildClusters(atom); defineJClusters(atom); setupPbc(atom, param); - initDevice(atom, neighbor); binClusters(atom); buildNeighbor(atom, neighbor); + initDevice(atom, neighbor); E = getTimeStamp(); return E-S; } diff --git a/include_NVCC.mk b/include_NVCC.mk index 8a7fed7..de7a370 100644 --- a/include_NVCC.mk +++ b/include_NVCC.mk @@ -10,7 +10,7 @@ ANSI_CFLAGS += -Wextra # A100 + Native CFLAGS = -O3 -arch=sm_80 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp # A40 + Native -#CFLAGS = -O3 -arch=sm_80 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp +#CFLAGS = -O3 -arch=sm_86 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp # Cascade Lake #CFLAGS = -O3 -march=cascadelake -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp # For GROMACS kernels, we need at least sm_61 due to atomicAdd with doubles