Adjust neighbor lists layout to keep neighbor ids contiguous in memory

Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
Rafael Ravedutti 2023-03-30 01:57:26 +02:00
parent 3eb7170a65
commit 43259eb3cf
5 changed files with 51 additions and 53 deletions

View File

@ -45,7 +45,7 @@ static inline void gmx_load_simd_4xn_interactions(
double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
DEBUG_MESSAGE("computeForceLJ begin\n");
int Nlocal = atom->Nlocal;
NeighborCluster* neighs;
int *neighs;
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon;
@ -77,7 +77,7 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
int numneighs = neighbor->numneigh[ci];
for(int k = 0; k < numneighs; k++) {
int cj = neighs[k].cj;
int cj = neighs[k];
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
int any = 0;
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
@ -158,7 +158,7 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
DEBUG_MESSAGE("computeForceLJ_2xnn begin\n");
int Nlocal = atom->Nlocal;
NeighborCluster* neighs;
int *neighs;
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon;
@ -240,9 +240,9 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
MD_SIMD_FLOAT fiz2 = simd_zero();
for(int k = 0; k < numneighs_masked; k++) {
int cj = neighs[k].cj;
int cj = neighs[k];
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
//int imask = neighs[k].imask;
//int imask = neighs_imask[k];
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
//MD_SIMD_MASK interact0;
@ -331,7 +331,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
}
for(int k = numneighs_masked; k < numneighs; k++) {
int cj = neighs[k].cj;
int cj = neighs[k];
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
@ -401,7 +401,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
double computeForceLJ_2xnn_full(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
DEBUG_MESSAGE("computeForceLJ_2xnn begin\n");
int Nlocal = atom->Nlocal;
NeighborCluster* neighs;
int *neighs;
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon;
@ -454,9 +454,8 @@ double computeForceLJ_2xnn_full(Parameter *param, Atom *atom, Neighbor *neighbor
MD_SIMD_FLOAT fiz2 = simd_zero();
for(int k = 0; k < numneighs_masked; k++) {
int cj = neighs[k].cj;
int cj = neighs[k];
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
int imask = neighs[k].imask;
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
unsigned int mask0, mask1, mask2, mask3;
@ -507,7 +506,7 @@ double computeForceLJ_2xnn_full(Parameter *param, Atom *atom, Neighbor *neighbor
}
for(int k = numneighs_masked; k < numneighs; k++) {
int cj = neighs[k].cj;
int cj = neighs[k];
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
@ -570,7 +569,7 @@ double computeForceLJ_2xnn(Parameter *param, Atom *atom, Neighbor *neighbor, Sta
double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
DEBUG_MESSAGE("computeForceLJ_4xn begin\n");
int Nlocal = atom->Nlocal;
NeighborCluster* neighs;
int *neighs;
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon;
@ -635,9 +634,8 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
MD_SIMD_FLOAT fiz3 = simd_zero();
for(int k = 0; k < numneighs_masked; k++) {
int cj = neighs[k].cj;
int cj = neighs[k];
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
int imask = neighs[k].imask;
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
MD_SIMD_FLOAT xj_tmp = simd_load(&cj_x[CL_X_OFFSET]);
@ -741,9 +739,8 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
}
for(int k = numneighs_masked; k < numneighs; k++) {
int cj = neighs[k].cj;
int cj = neighs[k];
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
int imask = neighs[k].imask;
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
MD_SIMD_FLOAT xj_tmp = simd_load(&cj_x[CL_X_OFFSET]);
@ -846,7 +843,7 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
double computeForceLJ_4xn_full(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
DEBUG_MESSAGE("computeForceLJ_4xn begin\n");
int Nlocal = atom->Nlocal;
NeighborCluster* neighs;
int *neighs;
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon;
@ -911,9 +908,8 @@ double computeForceLJ_4xn_full(Parameter *param, Atom *atom, Neighbor *neighbor,
MD_SIMD_FLOAT fiz3 = simd_zero();
for(int k = 0; k < numneighs_masked; k++) {
int cj = neighs[k].cj;
int cj = neighs[k];
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
int imask = neighs[k].imask;
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
MD_SIMD_FLOAT xj_tmp = simd_load(&cj_x[CL_X_OFFSET]);
MD_SIMD_FLOAT yj_tmp = simd_load(&cj_x[CL_Y_OFFSET]);
@ -991,9 +987,8 @@ double computeForceLJ_4xn_full(Parameter *param, Atom *atom, Neighbor *neighbor,
}
for(int k = numneighs_masked; k < numneighs; k++) {
int cj = neighs[k].cj;
int cj = neighs[k];
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
int imask = neighs[k].imask;
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
MD_SIMD_FLOAT xj_tmp = simd_load(&cj_x[CL_X_OFFSET]);
MD_SIMD_FLOAT yj_tmp = simd_load(&cj_x[CL_Y_OFFSET]);

View File

@ -25,11 +25,6 @@
#define NBNXN_INTERACTION_MASK_DIAG_J8_0 0xf0f8fcfeU
#define NBNXN_INTERACTION_MASK_DIAG_J8_1 0x0080c0e0U
typedef struct {
int cj;
unsigned int imask;
} NeighborCluster;
typedef struct {
int every;
int ncalls;
@ -37,7 +32,8 @@ typedef struct {
int* numneigh;
int* numneigh_masked;
int half_neigh;
NeighborCluster* neighbors;
int* neighbors;
unsigned int* neighbors_imask;
} Neighbor;
extern void initNeighbor(Neighbor*, Parameter*);

View File

@ -72,7 +72,8 @@ void createNeighbors(Atom *atom, Neighbor *neighbor, int pattern, int nneighs, i
const int ncj = atom->Nclusters_local / jfac;
const unsigned int imask = NBNXN_INTERACTION_MASK_ALL;
neighbor->numneigh = (int*) malloc(atom->Nclusters_max * sizeof(int));
neighbor->neighbors = (NeighborCluster*) malloc(atom->Nclusters_max * maxneighs * sizeof(int));
neighbor->neighbors = (int*) malloc(atom->Nclusters_max * maxneighs * sizeof(int));
neighbor->neighbors_imask = (unsigned int*) malloc(atom->Nclusters_max * maxneighs * sizeof(unsigned int));
if(pattern == P_RAND && ncj <= nneighs) {
fprintf(stderr, "Error: P_RAND: Number of j-clusters should be higher than number of j-cluster neighbors per i-cluster!\n");
@ -80,7 +81,8 @@ void createNeighbors(Atom *atom, Neighbor *neighbor, int pattern, int nneighs, i
}
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
NeighborCluster *neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]);
int *neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]);
unsigned int *neighptr_imask = &(neighbor->neighbors_imask[ci * neighbor->maxneighs]);
int j = (pattern == P_SEQ) ? CJ0_FROM_CI(ci) : 0;
int m = (pattern == P_SEQ) ? ncj : nneighs;
int k = 0;
@ -90,26 +92,26 @@ void createNeighbors(Atom *atom, Neighbor *neighbor, int pattern, int nneighs, i
int found = 0;
do {
int cj = rand() % ncj;
neighptr[k].cj = cj;
neighptr[k].imask = imask;
neighptr[k] = cj;
neighptr_imask[k] = imask;
found = 0;
for(int l = 0; l < k; l++) {
if(neighptr[l].cj == cj) {
if(neighptr[l] == cj) {
found = 1;
}
}
} while(found == 1);
} else {
neighptr[k].cj = j;
neighptr[k].imask = imask;
neighptr[k] = j;
neighptr_imask[k] = imask;
j = (j + 1) % m;
}
}
for(int r = 1; r < nreps; r++) {
for(int k = 0; k < nneighs; k++) {
neighptr[r * nneighs + k].cj = neighptr[k].cj;
neighptr[r * nneighs + k].imask = neighptr[k].imask;
neighptr[r * nneighs + k] = neighptr[k];
neighptr_imask[r * nneighs + k] = neighptr_imask[k];
}
}

View File

@ -232,7 +232,8 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
if(neighbor->neighbors) free(neighbor->neighbors);
neighbor->numneigh = (int*) malloc(nmax * sizeof(int));
neighbor->numneigh_masked = (int*) malloc(nmax * sizeof(int));
neighbor->neighbors = (NeighborCluster*) malloc(nmax * neighbor->maxneighs * sizeof(NeighborCluster));
neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int));
neighbor->neighbors_imask = (unsigned int*) malloc(nmax * neighbor->maxneighs * sizeof(unsigned int));
}
MD_FLOAT bbx = 0.5 * (binsizex + binsizex);
@ -248,7 +249,8 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj1 = CJ1_FROM_CI(ci);
NeighborCluster *neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]);
int *neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]);
unsigned int *neighptr_imask = &(neighbor->neighbors_imask[ci * neighbor->maxneighs]);
int n = 0, nmasked = 0;
int ibin = atom->icluster_bin[ci];
MD_FLOAT ibb_xmin = atom->iclusters[ci].bbminx;
@ -325,13 +327,13 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
#endif
if(imask == NBNXN_INTERACTION_MASK_ALL) {
neighptr[n].cj = cj;
neighptr[n].imask = imask;
neighptr[n] = cj;
neighptr_imask[n] = imask;
} else {
neighptr[n].cj = neighptr[nmasked].cj;
neighptr[n].imask = neighptr[nmasked].imask;
neighptr[nmasked].cj = cj;
neighptr[nmasked].imask = imask;
neighptr[n] = neighptr[nmasked];
neighptr_imask[n] = neighptr_imask[nmasked];
neighptr[nmasked] = cj;
neighptr_imask[nmasked] = imask;
nmasked++;
}
@ -357,8 +359,8 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
// Fill neighbor list with dummy values to fit vector width
if(CLUSTER_N < VECTOR_WIDTH) {
while(n % (VECTOR_WIDTH / CLUSTER_N)) {
neighptr[n].cj = atom->dummy_cj; // Last cluster is always a dummy cluster
neighptr[n].imask = 0;
neighptr[n] = atom->dummy_cj; // Last cluster is always a dummy cluster
neighptr_imask[n] = 0;
n++;
}
}
@ -378,7 +380,8 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
fprintf(stdout, "RESIZE %d\n", neighbor->maxneighs);
neighbor->maxneighs = new_maxneighs * 1.2;
free(neighbor->neighbors);
neighbor->neighbors = (NeighborCluster*) malloc(atom->Nmax * neighbor->maxneighs * sizeof(int));
neighbor->neighbors = (int *) malloc(atom->Nmax * neighbor->maxneighs * sizeof(int));
neighbor->neighbors_imask = (unsigned int *) malloc(atom->Nmax * neighbor->maxneighs * sizeof(unsigned int));
}
}
@ -433,20 +436,21 @@ void pruneNeighbor(Parameter *param, Atom *atom, Neighbor *neighbor) {
MD_FLOAT cutsq = cutneighsq;
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
NeighborCluster *neighs = &neighbor->neighbors[ci * neighbor->maxneighs];
int *neighs = &neighbor->neighbors[ci * neighbor->maxneighs];
unsigned int *neighs_imask = &neighbor->neighbors_imask[ci * neighbor->maxneighs];
int numneighs = neighbor->numneigh[ci];
int numneighs_masked = neighbor->numneigh_masked[ci];
int k = 0;
// Remove dummy clusters if necessary
if(CLUSTER_N < VECTOR_WIDTH) {
while(neighs[numneighs - 1].cj == atom->dummy_cj) {
while(neighs[numneighs - 1] == atom->dummy_cj) {
numneighs--;
}
}
while(k < numneighs) {
int cj = neighs[k].cj;
int cj = neighs[k];
if(atomDistanceInRange(atom, ci, cj, cutsq)) {
k++;
} else {
@ -461,8 +465,8 @@ void pruneNeighbor(Parameter *param, Atom *atom, Neighbor *neighbor) {
// Readd dummy clusters if necessary
if(CLUSTER_N < VECTOR_WIDTH) {
while(numneighs % (VECTOR_WIDTH / CLUSTER_N)) {
neighs[numneighs].cj = atom->dummy_cj; // Last cluster is always a dummy cluster
neighs[numneighs].imask = 0;
neighs[numneighs] = atom->dummy_cj; // Last cluster is always a dummy cluster
neighs_imask[numneighs] = 0;
numneighs++;
}
}

View File

@ -13,7 +13,8 @@ void traceAddresses(Parameter *param, Atom *atom, Neighbor *neighbor, int timest
MEM_TRACER_INIT;
INDEX_TRACER_INIT;
int Nlocal = atom->Nlocal;
NeighborCluster* neighs;
int *neighs;
unsigned int *neighs_imask;
//MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz;
INDEX_TRACE_NATOMS(Nlocal, atom->Nghost, neighbor->maxneighs);
@ -34,7 +35,7 @@ void traceAddresses(Parameter *param, Atom *atom, Neighbor *neighbor, int timest
DIST_TRACE(neighs, numneighs);
for(int k = 0; k < numneighs; k++) {
int j = neighs[k].cj;
int j = neighs[k];
MEM_TRACE(j, 'R');
MEM_TRACE(atom_x(j), 'R');
MEM_TRACE(atom_y(j), 'R');