Small fixes into GROMACS GPU code
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
		@@ -56,8 +56,8 @@ void initDevice(Atom *atom, Neighbor *neighbor) {
 | 
			
		||||
    cuda_PBCz               =   (int *) allocateGPU(atom->Nclusters_max * sizeof(int));
 | 
			
		||||
    cuda_numneigh           =   (int *) allocateGPU(atom->Nclusters_max * sizeof(int));
 | 
			
		||||
    cuda_neighbors          =   (int *) allocateGPU(atom->Nclusters_max * neighbor->maxneighs * sizeof(int));
 | 
			
		||||
    natoms = (int *) malloc(atom->Nclusters_max);
 | 
			
		||||
    ngatoms = (int *) malloc(atom->Nclusters_max);
 | 
			
		||||
    natoms                  =   (int *) malloc(atom->Nclusters_max * sizeof(int));
 | 
			
		||||
    ngatoms                 =   (int *) malloc(atom->Nclusters_max * sizeof(int));
 | 
			
		||||
    isReneighboured = 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -65,9 +65,9 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *
 | 
			
		||||
    buildClusters(atom);
 | 
			
		||||
    defineJClusters(atom);
 | 
			
		||||
    setupPbc(atom, param);
 | 
			
		||||
    initDevice(atom, neighbor);
 | 
			
		||||
    binClusters(atom);
 | 
			
		||||
    buildNeighbor(atom, neighbor);
 | 
			
		||||
    initDevice(atom, neighbor);
 | 
			
		||||
    E = getTimeStamp();
 | 
			
		||||
    return E-S;
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -10,7 +10,7 @@ ANSI_CFLAGS += -Wextra
 | 
			
		||||
# A100 + Native
 | 
			
		||||
CFLAGS   = -O3 -arch=sm_80 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp
 | 
			
		||||
# A40 + Native
 | 
			
		||||
#CFLAGS   = -O3 -arch=sm_80 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp
 | 
			
		||||
#CFLAGS   = -O3 -arch=sm_86 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp
 | 
			
		||||
# Cascade Lake
 | 
			
		||||
#CFLAGS   = -O3 -march=cascadelake  -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp
 | 
			
		||||
# For GROMACS kernels, we need at least sm_61 due to atomicAdd with doubles
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user