From 6eedf1776e6d960ca9afd76be41a95598d1dc383 Mon Sep 17 00:00:00 2001 From: Rafael Ravedutti Date: Mon, 14 Nov 2022 18:21:14 +0100 Subject: [PATCH] Small fixes into GROMACS GPU code Signed-off-by: Rafael Ravedutti --- gromacs/cuda/force_lj.cu | 4 ++-- gromacs/main.c | 2 +- include_NVCC.mk | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/gromacs/cuda/force_lj.cu b/gromacs/cuda/force_lj.cu index 75d6929..1dcbde5 100644 --- a/gromacs/cuda/force_lj.cu +++ b/gromacs/cuda/force_lj.cu @@ -56,8 +56,8 @@ void initDevice(Atom *atom, Neighbor *neighbor) { cuda_PBCz = (int *) allocateGPU(atom->Nclusters_max * sizeof(int)); cuda_numneigh = (int *) allocateGPU(atom->Nclusters_max * sizeof(int)); cuda_neighbors = (int *) allocateGPU(atom->Nclusters_max * neighbor->maxneighs * sizeof(int)); - natoms = (int *) malloc(atom->Nclusters_max); - ngatoms = (int *) malloc(atom->Nclusters_max); + natoms = (int *) malloc(atom->Nclusters_max * sizeof(int)); + ngatoms = (int *) malloc(atom->Nclusters_max * sizeof(int)); isReneighboured = 1; } diff --git a/gromacs/main.c b/gromacs/main.c index e724348..93be03b 100644 --- a/gromacs/main.c +++ b/gromacs/main.c @@ -65,9 +65,9 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats * buildClusters(atom); defineJClusters(atom); setupPbc(atom, param); - initDevice(atom, neighbor); binClusters(atom); buildNeighbor(atom, neighbor); + initDevice(atom, neighbor); E = getTimeStamp(); return E-S; } diff --git a/include_NVCC.mk b/include_NVCC.mk index 8a7fed7..de7a370 100644 --- a/include_NVCC.mk +++ b/include_NVCC.mk @@ -10,7 +10,7 @@ ANSI_CFLAGS += -Wextra # A100 + Native CFLAGS = -O3 -arch=sm_80 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp # A40 + Native -#CFLAGS = -O3 -arch=sm_80 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp +#CFLAGS = -O3 -arch=sm_86 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp # Cascade Lake #CFLAGS = -O3 -march=cascadelake -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp # For GROMACS kernels, we need at least sm_61 due to atomicAdd with doubles