Implement Neighbour list AoS memory layout + performance measurement
This commit is contained in:
parent
b2a6574426
commit
696e6da01d
@ -7,7 +7,7 @@ ANSI_CFLAGS += -pedantic
|
|||||||
ANSI_CFLAGS += -Wextra
|
ANSI_CFLAGS += -Wextra
|
||||||
|
|
||||||
# CFLAGS = -O0 -g -std=c99 -fargument-noalias
|
# CFLAGS = -O0 -g -std=c99 -fargument-noalias
|
||||||
CFLAGS = -O3 -arch=sm_61 # -fopenmp
|
CFLAGS = -O3 -g -arch=sm_61 # -fopenmp
|
||||||
ASFLAGS = -masm=intel
|
ASFLAGS = -masm=intel
|
||||||
LFLAGS =
|
LFLAGS =
|
||||||
DEFINES = -D_GNU_SOURCE -DLIKWID_PERFMON
|
DEFINES = -D_GNU_SOURCE -DLIKWID_PERFMON
|
||||||
|
@ -51,8 +51,7 @@ __global__ void calc_force(
|
|||||||
|
|
||||||
Atom *atom = &a;
|
Atom *atom = &a;
|
||||||
|
|
||||||
int *neighs = &neigh_neighbors[i * neigh_maxneighs];
|
const int numneighs = neigh_numneigh[i];
|
||||||
int numneighs = neigh_numneigh[i];
|
|
||||||
|
|
||||||
MD_FLOAT xtmp = atom_x(i);
|
MD_FLOAT xtmp = atom_x(i);
|
||||||
MD_FLOAT ytmp = atom_y(i);
|
MD_FLOAT ytmp = atom_y(i);
|
||||||
@ -63,7 +62,7 @@ __global__ void calc_force(
|
|||||||
MD_FLOAT fiz = 0;
|
MD_FLOAT fiz = 0;
|
||||||
|
|
||||||
for(int k = 0; k < numneighs; k++) {
|
for(int k = 0; k < numneighs; k++) {
|
||||||
int j = neighs[k];
|
int j = neigh_neighbors[atom->Nlocal * k + i];
|
||||||
MD_FLOAT delx = xtmp - atom_x(j);
|
MD_FLOAT delx = xtmp - atom_x(j);
|
||||||
MD_FLOAT dely = ytmp - atom_y(j);
|
MD_FLOAT dely = ytmp - atom_y(j);
|
||||||
MD_FLOAT delz = ztmp - atom_z(j);
|
MD_FLOAT delz = ztmp - atom_z(j);
|
||||||
|
@ -178,7 +178,7 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor)
|
|||||||
if(neighbor->numneigh) cudaFreeHost(neighbor->numneigh);
|
if(neighbor->numneigh) cudaFreeHost(neighbor->numneigh);
|
||||||
if(neighbor->neighbors) cudaFreeHost(neighbor->neighbors);
|
if(neighbor->neighbors) cudaFreeHost(neighbor->neighbors);
|
||||||
checkCUDAError( "buildNeighbor numneigh", cudaMallocHost((void**)&(neighbor->numneigh), nmax * sizeof(int)) );
|
checkCUDAError( "buildNeighbor numneigh", cudaMallocHost((void**)&(neighbor->numneigh), nmax * sizeof(int)) );
|
||||||
checkCUDAError( "buildNeighbor neighbors", cudaMallocHost((void**)&(neighbor->neighbors), nmax * neighbor->maxneighs * sizeof(int*)) );
|
checkCUDAError( "buildNeighbor neighbors", cudaMallocHost((void**)&(neighbor->neighbors), nmax * neighbor->maxneighs * sizeof(int)) );
|
||||||
// neighbor->numneigh = (int*) malloc(nmax * sizeof(int));
|
// neighbor->numneigh = (int*) malloc(nmax * sizeof(int));
|
||||||
// neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int*));
|
// neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int*));
|
||||||
}
|
}
|
||||||
@ -193,7 +193,7 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor)
|
|||||||
resize = 0;
|
resize = 0;
|
||||||
|
|
||||||
for(int i = 0; i < atom->Nlocal; i++) {
|
for(int i = 0; i < atom->Nlocal; i++) {
|
||||||
int* neighptr = &(neighbor->neighbors[i * neighbor->maxneighs]);
|
int* neighptr = &(neighbor->neighbors[i]);
|
||||||
int n = 0;
|
int n = 0;
|
||||||
MD_FLOAT xtmp = atom_x(i);
|
MD_FLOAT xtmp = atom_x(i);
|
||||||
MD_FLOAT ytmp = atom_y(i);
|
MD_FLOAT ytmp = atom_y(i);
|
||||||
@ -226,7 +226,9 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
if( rsq <= cutoff ) {
|
if( rsq <= cutoff ) {
|
||||||
neighptr[n++] = j;
|
int idx = atom->Nlocal * n;
|
||||||
|
neighptr[idx] = j;
|
||||||
|
n += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user