Small fixes into GROMACS GPU code
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
		@@ -56,8 +56,8 @@ void initDevice(Atom *atom, Neighbor *neighbor) {
 | 
				
			|||||||
    cuda_PBCz               =   (int *) allocateGPU(atom->Nclusters_max * sizeof(int));
 | 
					    cuda_PBCz               =   (int *) allocateGPU(atom->Nclusters_max * sizeof(int));
 | 
				
			||||||
    cuda_numneigh           =   (int *) allocateGPU(atom->Nclusters_max * sizeof(int));
 | 
					    cuda_numneigh           =   (int *) allocateGPU(atom->Nclusters_max * sizeof(int));
 | 
				
			||||||
    cuda_neighbors          =   (int *) allocateGPU(atom->Nclusters_max * neighbor->maxneighs * sizeof(int));
 | 
					    cuda_neighbors          =   (int *) allocateGPU(atom->Nclusters_max * neighbor->maxneighs * sizeof(int));
 | 
				
			||||||
    natoms = (int *) malloc(atom->Nclusters_max);
 | 
					    natoms                  =   (int *) malloc(atom->Nclusters_max * sizeof(int));
 | 
				
			||||||
    ngatoms = (int *) malloc(atom->Nclusters_max);
 | 
					    ngatoms                 =   (int *) malloc(atom->Nclusters_max * sizeof(int));
 | 
				
			||||||
    isReneighboured = 1;
 | 
					    isReneighboured = 1;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -65,9 +65,9 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *
 | 
				
			|||||||
    buildClusters(atom);
 | 
					    buildClusters(atom);
 | 
				
			||||||
    defineJClusters(atom);
 | 
					    defineJClusters(atom);
 | 
				
			||||||
    setupPbc(atom, param);
 | 
					    setupPbc(atom, param);
 | 
				
			||||||
    initDevice(atom, neighbor);
 | 
					 | 
				
			||||||
    binClusters(atom);
 | 
					    binClusters(atom);
 | 
				
			||||||
    buildNeighbor(atom, neighbor);
 | 
					    buildNeighbor(atom, neighbor);
 | 
				
			||||||
 | 
					    initDevice(atom, neighbor);
 | 
				
			||||||
    E = getTimeStamp();
 | 
					    E = getTimeStamp();
 | 
				
			||||||
    return E-S;
 | 
					    return E-S;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -10,7 +10,7 @@ ANSI_CFLAGS += -Wextra
 | 
				
			|||||||
# A100 + Native
 | 
					# A100 + Native
 | 
				
			||||||
CFLAGS   = -O3 -arch=sm_80 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp
 | 
					CFLAGS   = -O3 -arch=sm_80 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp
 | 
				
			||||||
# A40 + Native
 | 
					# A40 + Native
 | 
				
			||||||
#CFLAGS   = -O3 -arch=sm_80 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp
 | 
					#CFLAGS   = -O3 -arch=sm_86 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp
 | 
				
			||||||
# Cascade Lake
 | 
					# Cascade Lake
 | 
				
			||||||
#CFLAGS   = -O3 -march=cascadelake  -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp
 | 
					#CFLAGS   = -O3 -march=cascadelake  -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp
 | 
				
			||||||
# For GROMACS kernels, we need at least sm_61 due to atomicAdd with doubles
 | 
					# For GROMACS kernels, we need at least sm_61 due to atomicAdd with doubles
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user