Small fixes into GROMACS GPU code
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
parent
93188d1383
commit
6eedf1776e
@ -56,8 +56,8 @@ void initDevice(Atom *atom, Neighbor *neighbor) {
|
||||
cuda_PBCz = (int *) allocateGPU(atom->Nclusters_max * sizeof(int));
|
||||
cuda_numneigh = (int *) allocateGPU(atom->Nclusters_max * sizeof(int));
|
||||
cuda_neighbors = (int *) allocateGPU(atom->Nclusters_max * neighbor->maxneighs * sizeof(int));
|
||||
natoms = (int *) malloc(atom->Nclusters_max);
|
||||
ngatoms = (int *) malloc(atom->Nclusters_max);
|
||||
natoms = (int *) malloc(atom->Nclusters_max * sizeof(int));
|
||||
ngatoms = (int *) malloc(atom->Nclusters_max * sizeof(int));
|
||||
isReneighboured = 1;
|
||||
}
|
||||
|
||||
|
@ -65,9 +65,9 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *
|
||||
buildClusters(atom);
|
||||
defineJClusters(atom);
|
||||
setupPbc(atom, param);
|
||||
initDevice(atom, neighbor);
|
||||
binClusters(atom);
|
||||
buildNeighbor(atom, neighbor);
|
||||
initDevice(atom, neighbor);
|
||||
E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
||||
|
@ -10,7 +10,7 @@ ANSI_CFLAGS += -Wextra
|
||||
# A100 + Native
|
||||
CFLAGS = -O3 -arch=sm_80 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp
|
||||
# A40 + Native
|
||||
#CFLAGS = -O3 -arch=sm_80 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp
|
||||
#CFLAGS = -O3 -arch=sm_86 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp
|
||||
# Cascade Lake
|
||||
#CFLAGS = -O3 -march=cascadelake -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp
|
||||
# For GROMACS kernels, we need at least sm_61 due to atomicAdd with doubles
|
||||
|
Loading…
Reference in New Issue
Block a user