Implement Neighbour list AoS memory layout + performance measurement

This commit is contained in:
Maximilian Gaul 2022-01-31 20:27:59 +01:00
parent b2a6574426
commit 696e6da01d
3 changed files with 8 additions and 7 deletions

View File

@ -7,7 +7,7 @@ ANSI_CFLAGS += -pedantic
ANSI_CFLAGS += -Wextra ANSI_CFLAGS += -Wextra
# CFLAGS = -O0 -g -std=c99 -fargument-noalias # CFLAGS = -O0 -g -std=c99 -fargument-noalias
CFLAGS = -O3 -arch=sm_61 # -fopenmp CFLAGS = -O3 -g -arch=sm_61 # -fopenmp
ASFLAGS = -masm=intel ASFLAGS = -masm=intel
LFLAGS = LFLAGS =
DEFINES = -D_GNU_SOURCE -DLIKWID_PERFMON DEFINES = -D_GNU_SOURCE -DLIKWID_PERFMON

View File

@ -51,8 +51,7 @@ __global__ void calc_force(
Atom *atom = &a; Atom *atom = &a;
int *neighs = &neigh_neighbors[i * neigh_maxneighs]; const int numneighs = neigh_numneigh[i];
int numneighs = neigh_numneigh[i];
MD_FLOAT xtmp = atom_x(i); MD_FLOAT xtmp = atom_x(i);
MD_FLOAT ytmp = atom_y(i); MD_FLOAT ytmp = atom_y(i);
@ -63,7 +62,7 @@ __global__ void calc_force(
MD_FLOAT fiz = 0; MD_FLOAT fiz = 0;
for(int k = 0; k < numneighs; k++) { for(int k = 0; k < numneighs; k++) {
int j = neighs[k]; int j = neigh_neighbors[atom->Nlocal * k + i];
MD_FLOAT delx = xtmp - atom_x(j); MD_FLOAT delx = xtmp - atom_x(j);
MD_FLOAT dely = ytmp - atom_y(j); MD_FLOAT dely = ytmp - atom_y(j);
MD_FLOAT delz = ztmp - atom_z(j); MD_FLOAT delz = ztmp - atom_z(j);

View File

@ -178,7 +178,7 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor)
if(neighbor->numneigh) cudaFreeHost(neighbor->numneigh); if(neighbor->numneigh) cudaFreeHost(neighbor->numneigh);
if(neighbor->neighbors) cudaFreeHost(neighbor->neighbors); if(neighbor->neighbors) cudaFreeHost(neighbor->neighbors);
checkCUDAError( "buildNeighbor numneigh", cudaMallocHost((void**)&(neighbor->numneigh), nmax * sizeof(int)) ); checkCUDAError( "buildNeighbor numneigh", cudaMallocHost((void**)&(neighbor->numneigh), nmax * sizeof(int)) );
checkCUDAError( "buildNeighbor neighbors", cudaMallocHost((void**)&(neighbor->neighbors), nmax * neighbor->maxneighs * sizeof(int*)) ); checkCUDAError( "buildNeighbor neighbors", cudaMallocHost((void**)&(neighbor->neighbors), nmax * neighbor->maxneighs * sizeof(int)) );
// neighbor->numneigh = (int*) malloc(nmax * sizeof(int)); // neighbor->numneigh = (int*) malloc(nmax * sizeof(int));
// neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int*)); // neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int*));
} }
@ -193,7 +193,7 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor)
resize = 0; resize = 0;
for(int i = 0; i < atom->Nlocal; i++) { for(int i = 0; i < atom->Nlocal; i++) {
int* neighptr = &(neighbor->neighbors[i * neighbor->maxneighs]); int* neighptr = &(neighbor->neighbors[i]);
int n = 0; int n = 0;
MD_FLOAT xtmp = atom_x(i); MD_FLOAT xtmp = atom_x(i);
MD_FLOAT ytmp = atom_y(i); MD_FLOAT ytmp = atom_y(i);
@ -226,7 +226,9 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor)
#endif #endif
if( rsq <= cutoff ) { if( rsq <= cutoff ) {
neighptr[n++] = j; int idx = atom->Nlocal * n;
neighptr[idx] = j;
n += 1;
} }
} }
} }