Adjust neighbor lists layout to keep neighbor ids contiguous in memory
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
parent
3eb7170a65
commit
43259eb3cf
@ -45,7 +45,7 @@ static inline void gmx_load_simd_4xn_interactions(
|
||||
double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
||||
DEBUG_MESSAGE("computeForceLJ begin\n");
|
||||
int Nlocal = atom->Nlocal;
|
||||
NeighborCluster* neighs;
|
||||
int *neighs;
|
||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||
MD_FLOAT sigma6 = param->sigma6;
|
||||
MD_FLOAT epsilon = param->epsilon;
|
||||
@ -77,7 +77,7 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
||||
int numneighs = neighbor->numneigh[ci];
|
||||
|
||||
for(int k = 0; k < numneighs; k++) {
|
||||
int cj = neighs[k].cj;
|
||||
int cj = neighs[k];
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||
int any = 0;
|
||||
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||
@ -158,7 +158,7 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
||||
double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
||||
DEBUG_MESSAGE("computeForceLJ_2xnn begin\n");
|
||||
int Nlocal = atom->Nlocal;
|
||||
NeighborCluster* neighs;
|
||||
int *neighs;
|
||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||
MD_FLOAT sigma6 = param->sigma6;
|
||||
MD_FLOAT epsilon = param->epsilon;
|
||||
@ -240,9 +240,9 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
|
||||
MD_SIMD_FLOAT fiz2 = simd_zero();
|
||||
|
||||
for(int k = 0; k < numneighs_masked; k++) {
|
||||
int cj = neighs[k].cj;
|
||||
int cj = neighs[k];
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||
//int imask = neighs[k].imask;
|
||||
//int imask = neighs_imask[k];
|
||||
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
|
||||
//MD_SIMD_MASK interact0;
|
||||
@ -331,7 +331,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
|
||||
}
|
||||
|
||||
for(int k = numneighs_masked; k < numneighs; k++) {
|
||||
int cj = neighs[k].cj;
|
||||
int cj = neighs[k];
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
|
||||
@ -401,7 +401,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
|
||||
double computeForceLJ_2xnn_full(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
||||
DEBUG_MESSAGE("computeForceLJ_2xnn begin\n");
|
||||
int Nlocal = atom->Nlocal;
|
||||
NeighborCluster* neighs;
|
||||
int *neighs;
|
||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||
MD_FLOAT sigma6 = param->sigma6;
|
||||
MD_FLOAT epsilon = param->epsilon;
|
||||
@ -454,9 +454,8 @@ double computeForceLJ_2xnn_full(Parameter *param, Atom *atom, Neighbor *neighbor
|
||||
MD_SIMD_FLOAT fiz2 = simd_zero();
|
||||
|
||||
for(int k = 0; k < numneighs_masked; k++) {
|
||||
int cj = neighs[k].cj;
|
||||
int cj = neighs[k];
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||
int imask = neighs[k].imask;
|
||||
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||
unsigned int mask0, mask1, mask2, mask3;
|
||||
|
||||
@ -507,7 +506,7 @@ double computeForceLJ_2xnn_full(Parameter *param, Atom *atom, Neighbor *neighbor
|
||||
}
|
||||
|
||||
for(int k = numneighs_masked; k < numneighs; k++) {
|
||||
int cj = neighs[k].cj;
|
||||
int cj = neighs[k];
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||
|
||||
@ -570,7 +569,7 @@ double computeForceLJ_2xnn(Parameter *param, Atom *atom, Neighbor *neighbor, Sta
|
||||
double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
||||
DEBUG_MESSAGE("computeForceLJ_4xn begin\n");
|
||||
int Nlocal = atom->Nlocal;
|
||||
NeighborCluster* neighs;
|
||||
int *neighs;
|
||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||
MD_FLOAT sigma6 = param->sigma6;
|
||||
MD_FLOAT epsilon = param->epsilon;
|
||||
@ -635,9 +634,8 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
|
||||
MD_SIMD_FLOAT fiz3 = simd_zero();
|
||||
|
||||
for(int k = 0; k < numneighs_masked; k++) {
|
||||
int cj = neighs[k].cj;
|
||||
int cj = neighs[k];
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||
int imask = neighs[k].imask;
|
||||
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
|
||||
MD_SIMD_FLOAT xj_tmp = simd_load(&cj_x[CL_X_OFFSET]);
|
||||
@ -741,9 +739,8 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
|
||||
}
|
||||
|
||||
for(int k = numneighs_masked; k < numneighs; k++) {
|
||||
int cj = neighs[k].cj;
|
||||
int cj = neighs[k];
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||
int imask = neighs[k].imask;
|
||||
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
|
||||
MD_SIMD_FLOAT xj_tmp = simd_load(&cj_x[CL_X_OFFSET]);
|
||||
@ -846,7 +843,7 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
|
||||
double computeForceLJ_4xn_full(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
||||
DEBUG_MESSAGE("computeForceLJ_4xn begin\n");
|
||||
int Nlocal = atom->Nlocal;
|
||||
NeighborCluster* neighs;
|
||||
int *neighs;
|
||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||
MD_FLOAT sigma6 = param->sigma6;
|
||||
MD_FLOAT epsilon = param->epsilon;
|
||||
@ -911,9 +908,8 @@ double computeForceLJ_4xn_full(Parameter *param, Atom *atom, Neighbor *neighbor,
|
||||
MD_SIMD_FLOAT fiz3 = simd_zero();
|
||||
|
||||
for(int k = 0; k < numneighs_masked; k++) {
|
||||
int cj = neighs[k].cj;
|
||||
int cj = neighs[k];
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||
int imask = neighs[k].imask;
|
||||
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||
MD_SIMD_FLOAT xj_tmp = simd_load(&cj_x[CL_X_OFFSET]);
|
||||
MD_SIMD_FLOAT yj_tmp = simd_load(&cj_x[CL_Y_OFFSET]);
|
||||
@ -991,9 +987,8 @@ double computeForceLJ_4xn_full(Parameter *param, Atom *atom, Neighbor *neighbor,
|
||||
}
|
||||
|
||||
for(int k = numneighs_masked; k < numneighs; k++) {
|
||||
int cj = neighs[k].cj;
|
||||
int cj = neighs[k];
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||
int imask = neighs[k].imask;
|
||||
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||
MD_SIMD_FLOAT xj_tmp = simd_load(&cj_x[CL_X_OFFSET]);
|
||||
MD_SIMD_FLOAT yj_tmp = simd_load(&cj_x[CL_Y_OFFSET]);
|
||||
|
@ -25,11 +25,6 @@
|
||||
#define NBNXN_INTERACTION_MASK_DIAG_J8_0 0xf0f8fcfeU
|
||||
#define NBNXN_INTERACTION_MASK_DIAG_J8_1 0x0080c0e0U
|
||||
|
||||
typedef struct {
|
||||
int cj;
|
||||
unsigned int imask;
|
||||
} NeighborCluster;
|
||||
|
||||
typedef struct {
|
||||
int every;
|
||||
int ncalls;
|
||||
@ -37,7 +32,8 @@ typedef struct {
|
||||
int* numneigh;
|
||||
int* numneigh_masked;
|
||||
int half_neigh;
|
||||
NeighborCluster* neighbors;
|
||||
int* neighbors;
|
||||
unsigned int* neighbors_imask;
|
||||
} Neighbor;
|
||||
|
||||
extern void initNeighbor(Neighbor*, Parameter*);
|
||||
|
@ -72,7 +72,8 @@ void createNeighbors(Atom *atom, Neighbor *neighbor, int pattern, int nneighs, i
|
||||
const int ncj = atom->Nclusters_local / jfac;
|
||||
const unsigned int imask = NBNXN_INTERACTION_MASK_ALL;
|
||||
neighbor->numneigh = (int*) malloc(atom->Nclusters_max * sizeof(int));
|
||||
neighbor->neighbors = (NeighborCluster*) malloc(atom->Nclusters_max * maxneighs * sizeof(int));
|
||||
neighbor->neighbors = (int*) malloc(atom->Nclusters_max * maxneighs * sizeof(int));
|
||||
neighbor->neighbors_imask = (unsigned int*) malloc(atom->Nclusters_max * maxneighs * sizeof(unsigned int));
|
||||
|
||||
if(pattern == P_RAND && ncj <= nneighs) {
|
||||
fprintf(stderr, "Error: P_RAND: Number of j-clusters should be higher than number of j-cluster neighbors per i-cluster!\n");
|
||||
@ -80,7 +81,8 @@ void createNeighbors(Atom *atom, Neighbor *neighbor, int pattern, int nneighs, i
|
||||
}
|
||||
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||
NeighborCluster *neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]);
|
||||
int *neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]);
|
||||
unsigned int *neighptr_imask = &(neighbor->neighbors_imask[ci * neighbor->maxneighs]);
|
||||
int j = (pattern == P_SEQ) ? CJ0_FROM_CI(ci) : 0;
|
||||
int m = (pattern == P_SEQ) ? ncj : nneighs;
|
||||
int k = 0;
|
||||
@ -90,26 +92,26 @@ void createNeighbors(Atom *atom, Neighbor *neighbor, int pattern, int nneighs, i
|
||||
int found = 0;
|
||||
do {
|
||||
int cj = rand() % ncj;
|
||||
neighptr[k].cj = cj;
|
||||
neighptr[k].imask = imask;
|
||||
neighptr[k] = cj;
|
||||
neighptr_imask[k] = imask;
|
||||
found = 0;
|
||||
for(int l = 0; l < k; l++) {
|
||||
if(neighptr[l].cj == cj) {
|
||||
if(neighptr[l] == cj) {
|
||||
found = 1;
|
||||
}
|
||||
}
|
||||
} while(found == 1);
|
||||
} else {
|
||||
neighptr[k].cj = j;
|
||||
neighptr[k].imask = imask;
|
||||
neighptr[k] = j;
|
||||
neighptr_imask[k] = imask;
|
||||
j = (j + 1) % m;
|
||||
}
|
||||
}
|
||||
|
||||
for(int r = 1; r < nreps; r++) {
|
||||
for(int k = 0; k < nneighs; k++) {
|
||||
neighptr[r * nneighs + k].cj = neighptr[k].cj;
|
||||
neighptr[r * nneighs + k].imask = neighptr[k].imask;
|
||||
neighptr[r * nneighs + k] = neighptr[k];
|
||||
neighptr_imask[r * nneighs + k] = neighptr_imask[k];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -232,7 +232,8 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
|
||||
if(neighbor->neighbors) free(neighbor->neighbors);
|
||||
neighbor->numneigh = (int*) malloc(nmax * sizeof(int));
|
||||
neighbor->numneigh_masked = (int*) malloc(nmax * sizeof(int));
|
||||
neighbor->neighbors = (NeighborCluster*) malloc(nmax * neighbor->maxneighs * sizeof(NeighborCluster));
|
||||
neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int));
|
||||
neighbor->neighbors_imask = (unsigned int*) malloc(nmax * neighbor->maxneighs * sizeof(unsigned int));
|
||||
}
|
||||
|
||||
MD_FLOAT bbx = 0.5 * (binsizex + binsizex);
|
||||
@ -248,7 +249,8 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
|
||||
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||
int ci_cj1 = CJ1_FROM_CI(ci);
|
||||
NeighborCluster *neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]);
|
||||
int *neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]);
|
||||
unsigned int *neighptr_imask = &(neighbor->neighbors_imask[ci * neighbor->maxneighs]);
|
||||
int n = 0, nmasked = 0;
|
||||
int ibin = atom->icluster_bin[ci];
|
||||
MD_FLOAT ibb_xmin = atom->iclusters[ci].bbminx;
|
||||
@ -325,13 +327,13 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
|
||||
#endif
|
||||
|
||||
if(imask == NBNXN_INTERACTION_MASK_ALL) {
|
||||
neighptr[n].cj = cj;
|
||||
neighptr[n].imask = imask;
|
||||
neighptr[n] = cj;
|
||||
neighptr_imask[n] = imask;
|
||||
} else {
|
||||
neighptr[n].cj = neighptr[nmasked].cj;
|
||||
neighptr[n].imask = neighptr[nmasked].imask;
|
||||
neighptr[nmasked].cj = cj;
|
||||
neighptr[nmasked].imask = imask;
|
||||
neighptr[n] = neighptr[nmasked];
|
||||
neighptr_imask[n] = neighptr_imask[nmasked];
|
||||
neighptr[nmasked] = cj;
|
||||
neighptr_imask[nmasked] = imask;
|
||||
nmasked++;
|
||||
}
|
||||
|
||||
@ -357,8 +359,8 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
|
||||
// Fill neighbor list with dummy values to fit vector width
|
||||
if(CLUSTER_N < VECTOR_WIDTH) {
|
||||
while(n % (VECTOR_WIDTH / CLUSTER_N)) {
|
||||
neighptr[n].cj = atom->dummy_cj; // Last cluster is always a dummy cluster
|
||||
neighptr[n].imask = 0;
|
||||
neighptr[n] = atom->dummy_cj; // Last cluster is always a dummy cluster
|
||||
neighptr_imask[n] = 0;
|
||||
n++;
|
||||
}
|
||||
}
|
||||
@ -378,7 +380,8 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
|
||||
fprintf(stdout, "RESIZE %d\n", neighbor->maxneighs);
|
||||
neighbor->maxneighs = new_maxneighs * 1.2;
|
||||
free(neighbor->neighbors);
|
||||
neighbor->neighbors = (NeighborCluster*) malloc(atom->Nmax * neighbor->maxneighs * sizeof(int));
|
||||
neighbor->neighbors = (int *) malloc(atom->Nmax * neighbor->maxneighs * sizeof(int));
|
||||
neighbor->neighbors_imask = (unsigned int *) malloc(atom->Nmax * neighbor->maxneighs * sizeof(unsigned int));
|
||||
}
|
||||
}
|
||||
|
||||
@ -433,20 +436,21 @@ void pruneNeighbor(Parameter *param, Atom *atom, Neighbor *neighbor) {
|
||||
MD_FLOAT cutsq = cutneighsq;
|
||||
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||
NeighborCluster *neighs = &neighbor->neighbors[ci * neighbor->maxneighs];
|
||||
int *neighs = &neighbor->neighbors[ci * neighbor->maxneighs];
|
||||
unsigned int *neighs_imask = &neighbor->neighbors_imask[ci * neighbor->maxneighs];
|
||||
int numneighs = neighbor->numneigh[ci];
|
||||
int numneighs_masked = neighbor->numneigh_masked[ci];
|
||||
int k = 0;
|
||||
|
||||
// Remove dummy clusters if necessary
|
||||
if(CLUSTER_N < VECTOR_WIDTH) {
|
||||
while(neighs[numneighs - 1].cj == atom->dummy_cj) {
|
||||
while(neighs[numneighs - 1] == atom->dummy_cj) {
|
||||
numneighs--;
|
||||
}
|
||||
}
|
||||
|
||||
while(k < numneighs) {
|
||||
int cj = neighs[k].cj;
|
||||
int cj = neighs[k];
|
||||
if(atomDistanceInRange(atom, ci, cj, cutsq)) {
|
||||
k++;
|
||||
} else {
|
||||
@ -461,8 +465,8 @@ void pruneNeighbor(Parameter *param, Atom *atom, Neighbor *neighbor) {
|
||||
// Readd dummy clusters if necessary
|
||||
if(CLUSTER_N < VECTOR_WIDTH) {
|
||||
while(numneighs % (VECTOR_WIDTH / CLUSTER_N)) {
|
||||
neighs[numneighs].cj = atom->dummy_cj; // Last cluster is always a dummy cluster
|
||||
neighs[numneighs].imask = 0;
|
||||
neighs[numneighs] = atom->dummy_cj; // Last cluster is always a dummy cluster
|
||||
neighs_imask[numneighs] = 0;
|
||||
numneighs++;
|
||||
}
|
||||
}
|
||||
|
@ -13,7 +13,8 @@ void traceAddresses(Parameter *param, Atom *atom, Neighbor *neighbor, int timest
|
||||
MEM_TRACER_INIT;
|
||||
INDEX_TRACER_INIT;
|
||||
int Nlocal = atom->Nlocal;
|
||||
NeighborCluster* neighs;
|
||||
int *neighs;
|
||||
unsigned int *neighs_imask;
|
||||
//MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz;
|
||||
|
||||
INDEX_TRACE_NATOMS(Nlocal, atom->Nghost, neighbor->maxneighs);
|
||||
@ -34,7 +35,7 @@ void traceAddresses(Parameter *param, Atom *atom, Neighbor *neighbor, int timest
|
||||
DIST_TRACE(neighs, numneighs);
|
||||
|
||||
for(int k = 0; k < numneighs; k++) {
|
||||
int j = neighs[k].cj;
|
||||
int j = neighs[k];
|
||||
MEM_TRACE(j, 'R');
|
||||
MEM_TRACE(atom_x(j), 'R');
|
||||
MEM_TRACE(atom_y(j), 'R');
|
||||
|
Loading…
Reference in New Issue
Block a user