diff --git a/gromacs/force_lj.c b/gromacs/force_lj.c index 00d35ce..ca280ea 100644 --- a/gromacs/force_lj.c +++ b/gromacs/force_lj.c @@ -45,7 +45,7 @@ static inline void gmx_load_simd_4xn_interactions( double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) { DEBUG_MESSAGE("computeForceLJ begin\n"); int Nlocal = atom->Nlocal; - NeighborCluster* neighs; + int *neighs; MD_FLOAT cutforcesq = param->cutforce * param->cutforce; MD_FLOAT sigma6 = param->sigma6; MD_FLOAT epsilon = param->epsilon; @@ -77,7 +77,7 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat int numneighs = neighbor->numneigh[ci]; for(int k = 0; k < numneighs; k++) { - int cj = neighs[k].cj; + int cj = neighs[k]; int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj); int any = 0; MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base]; @@ -158,7 +158,7 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) { DEBUG_MESSAGE("computeForceLJ_2xnn begin\n"); int Nlocal = atom->Nlocal; - NeighborCluster* neighs; + int *neighs; MD_FLOAT cutforcesq = param->cutforce * param->cutforce; MD_FLOAT sigma6 = param->sigma6; MD_FLOAT epsilon = param->epsilon; @@ -240,9 +240,9 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor MD_SIMD_FLOAT fiz2 = simd_zero(); for(int k = 0; k < numneighs_masked; k++) { - int cj = neighs[k].cj; + int cj = neighs[k]; int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj); - //int imask = neighs[k].imask; + //int imask = neighs_imask[k]; MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base]; MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base]; //MD_SIMD_MASK interact0; @@ -331,7 +331,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor } for(int k = numneighs_masked; k < numneighs; k++) { - int cj = neighs[k].cj; + int cj = neighs[k]; int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj); MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base]; MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base]; @@ -401,7 +401,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor double computeForceLJ_2xnn_full(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) { DEBUG_MESSAGE("computeForceLJ_2xnn begin\n"); int Nlocal = atom->Nlocal; - NeighborCluster* neighs; + int *neighs; MD_FLOAT cutforcesq = param->cutforce * param->cutforce; MD_FLOAT sigma6 = param->sigma6; MD_FLOAT epsilon = param->epsilon; @@ -454,9 +454,8 @@ double computeForceLJ_2xnn_full(Parameter *param, Atom *atom, Neighbor *neighbor MD_SIMD_FLOAT fiz2 = simd_zero(); for(int k = 0; k < numneighs_masked; k++) { - int cj = neighs[k].cj; + int cj = neighs[k]; int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj); - int imask = neighs[k].imask; MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base]; unsigned int mask0, mask1, mask2, mask3; @@ -507,7 +506,7 @@ double computeForceLJ_2xnn_full(Parameter *param, Atom *atom, Neighbor *neighbor } for(int k = numneighs_masked; k < numneighs; k++) { - int cj = neighs[k].cj; + int cj = neighs[k]; int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj); MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base]; @@ -570,7 +569,7 @@ double computeForceLJ_2xnn(Parameter *param, Atom *atom, Neighbor *neighbor, Sta double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) { DEBUG_MESSAGE("computeForceLJ_4xn begin\n"); int Nlocal = atom->Nlocal; - NeighborCluster* neighs; + int *neighs; MD_FLOAT cutforcesq = param->cutforce * param->cutforce; MD_FLOAT sigma6 = param->sigma6; MD_FLOAT epsilon = param->epsilon; @@ -635,9 +634,8 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor, MD_SIMD_FLOAT fiz3 = simd_zero(); for(int k = 0; k < numneighs_masked; k++) { - int cj = neighs[k].cj; + int cj = neighs[k]; int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj); - int imask = neighs[k].imask; MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base]; MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base]; MD_SIMD_FLOAT xj_tmp = simd_load(&cj_x[CL_X_OFFSET]); @@ -741,9 +739,8 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor, } for(int k = numneighs_masked; k < numneighs; k++) { - int cj = neighs[k].cj; + int cj = neighs[k]; int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj); - int imask = neighs[k].imask; MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base]; MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base]; MD_SIMD_FLOAT xj_tmp = simd_load(&cj_x[CL_X_OFFSET]); @@ -846,7 +843,7 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor, double computeForceLJ_4xn_full(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) { DEBUG_MESSAGE("computeForceLJ_4xn begin\n"); int Nlocal = atom->Nlocal; - NeighborCluster* neighs; + int *neighs; MD_FLOAT cutforcesq = param->cutforce * param->cutforce; MD_FLOAT sigma6 = param->sigma6; MD_FLOAT epsilon = param->epsilon; @@ -911,9 +908,8 @@ double computeForceLJ_4xn_full(Parameter *param, Atom *atom, Neighbor *neighbor, MD_SIMD_FLOAT fiz3 = simd_zero(); for(int k = 0; k < numneighs_masked; k++) { - int cj = neighs[k].cj; + int cj = neighs[k]; int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj); - int imask = neighs[k].imask; MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base]; MD_SIMD_FLOAT xj_tmp = simd_load(&cj_x[CL_X_OFFSET]); MD_SIMD_FLOAT yj_tmp = simd_load(&cj_x[CL_Y_OFFSET]); @@ -991,9 +987,8 @@ double computeForceLJ_4xn_full(Parameter *param, Atom *atom, Neighbor *neighbor, } for(int k = numneighs_masked; k < numneighs; k++) { - int cj = neighs[k].cj; + int cj = neighs[k]; int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj); - int imask = neighs[k].imask; MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base]; MD_SIMD_FLOAT xj_tmp = simd_load(&cj_x[CL_X_OFFSET]); MD_SIMD_FLOAT yj_tmp = simd_load(&cj_x[CL_Y_OFFSET]); diff --git a/gromacs/includes/neighbor.h b/gromacs/includes/neighbor.h index 6d3de64..668edc8 100644 --- a/gromacs/includes/neighbor.h +++ b/gromacs/includes/neighbor.h @@ -25,11 +25,6 @@ #define NBNXN_INTERACTION_MASK_DIAG_J8_0 0xf0f8fcfeU #define NBNXN_INTERACTION_MASK_DIAG_J8_1 0x0080c0e0U -typedef struct { - int cj; - unsigned int imask; -} NeighborCluster; - typedef struct { int every; int ncalls; @@ -37,7 +32,8 @@ typedef struct { int* numneigh; int* numneigh_masked; int half_neigh; - NeighborCluster* neighbors; + int* neighbors; + unsigned int* neighbors_imask; } Neighbor; extern void initNeighbor(Neighbor*, Parameter*); diff --git a/gromacs/main-stub.c b/gromacs/main-stub.c index a4dbfd3..3cd8431 100644 --- a/gromacs/main-stub.c +++ b/gromacs/main-stub.c @@ -72,7 +72,8 @@ void createNeighbors(Atom *atom, Neighbor *neighbor, int pattern, int nneighs, i const int ncj = atom->Nclusters_local / jfac; const unsigned int imask = NBNXN_INTERACTION_MASK_ALL; neighbor->numneigh = (int*) malloc(atom->Nclusters_max * sizeof(int)); - neighbor->neighbors = (NeighborCluster*) malloc(atom->Nclusters_max * maxneighs * sizeof(int)); + neighbor->neighbors = (int*) malloc(atom->Nclusters_max * maxneighs * sizeof(int)); + neighbor->neighbors_imask = (unsigned int*) malloc(atom->Nclusters_max * maxneighs * sizeof(unsigned int)); if(pattern == P_RAND && ncj <= nneighs) { fprintf(stderr, "Error: P_RAND: Number of j-clusters should be higher than number of j-cluster neighbors per i-cluster!\n"); @@ -80,7 +81,8 @@ void createNeighbors(Atom *atom, Neighbor *neighbor, int pattern, int nneighs, i } for(int ci = 0; ci < atom->Nclusters_local; ci++) { - NeighborCluster *neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]); + int *neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]); + unsigned int *neighptr_imask = &(neighbor->neighbors_imask[ci * neighbor->maxneighs]); int j = (pattern == P_SEQ) ? CJ0_FROM_CI(ci) : 0; int m = (pattern == P_SEQ) ? ncj : nneighs; int k = 0; @@ -90,26 +92,26 @@ void createNeighbors(Atom *atom, Neighbor *neighbor, int pattern, int nneighs, i int found = 0; do { int cj = rand() % ncj; - neighptr[k].cj = cj; - neighptr[k].imask = imask; + neighptr[k] = cj; + neighptr_imask[k] = imask; found = 0; for(int l = 0; l < k; l++) { - if(neighptr[l].cj == cj) { + if(neighptr[l] == cj) { found = 1; } } } while(found == 1); } else { - neighptr[k].cj = j; - neighptr[k].imask = imask; + neighptr[k] = j; + neighptr_imask[k] = imask; j = (j + 1) % m; } } for(int r = 1; r < nreps; r++) { for(int k = 0; k < nneighs; k++) { - neighptr[r * nneighs + k].cj = neighptr[k].cj; - neighptr[r * nneighs + k].imask = neighptr[k].imask; + neighptr[r * nneighs + k] = neighptr[k]; + neighptr_imask[r * nneighs + k] = neighptr_imask[k]; } } diff --git a/gromacs/neighbor.c b/gromacs/neighbor.c index 6f627bd..dfba8f5 100644 --- a/gromacs/neighbor.c +++ b/gromacs/neighbor.c @@ -232,7 +232,8 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) { if(neighbor->neighbors) free(neighbor->neighbors); neighbor->numneigh = (int*) malloc(nmax * sizeof(int)); neighbor->numneigh_masked = (int*) malloc(nmax * sizeof(int)); - neighbor->neighbors = (NeighborCluster*) malloc(nmax * neighbor->maxneighs * sizeof(NeighborCluster)); + neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int)); + neighbor->neighbors_imask = (unsigned int*) malloc(nmax * neighbor->maxneighs * sizeof(unsigned int)); } MD_FLOAT bbx = 0.5 * (binsizex + binsizex); @@ -248,7 +249,8 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) { for(int ci = 0; ci < atom->Nclusters_local; ci++) { int ci_cj1 = CJ1_FROM_CI(ci); - NeighborCluster *neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]); + int *neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]); + unsigned int *neighptr_imask = &(neighbor->neighbors_imask[ci * neighbor->maxneighs]); int n = 0, nmasked = 0; int ibin = atom->icluster_bin[ci]; MD_FLOAT ibb_xmin = atom->iclusters[ci].bbminx; @@ -325,13 +327,13 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) { #endif if(imask == NBNXN_INTERACTION_MASK_ALL) { - neighptr[n].cj = cj; - neighptr[n].imask = imask; + neighptr[n] = cj; + neighptr_imask[n] = imask; } else { - neighptr[n].cj = neighptr[nmasked].cj; - neighptr[n].imask = neighptr[nmasked].imask; - neighptr[nmasked].cj = cj; - neighptr[nmasked].imask = imask; + neighptr[n] = neighptr[nmasked]; + neighptr_imask[n] = neighptr_imask[nmasked]; + neighptr[nmasked] = cj; + neighptr_imask[nmasked] = imask; nmasked++; } @@ -357,8 +359,8 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) { // Fill neighbor list with dummy values to fit vector width if(CLUSTER_N < VECTOR_WIDTH) { while(n % (VECTOR_WIDTH / CLUSTER_N)) { - neighptr[n].cj = atom->dummy_cj; // Last cluster is always a dummy cluster - neighptr[n].imask = 0; + neighptr[n] = atom->dummy_cj; // Last cluster is always a dummy cluster + neighptr_imask[n] = 0; n++; } } @@ -378,7 +380,8 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) { fprintf(stdout, "RESIZE %d\n", neighbor->maxneighs); neighbor->maxneighs = new_maxneighs * 1.2; free(neighbor->neighbors); - neighbor->neighbors = (NeighborCluster*) malloc(atom->Nmax * neighbor->maxneighs * sizeof(int)); + neighbor->neighbors = (int *) malloc(atom->Nmax * neighbor->maxneighs * sizeof(int)); + neighbor->neighbors_imask = (unsigned int *) malloc(atom->Nmax * neighbor->maxneighs * sizeof(unsigned int)); } } @@ -433,20 +436,21 @@ void pruneNeighbor(Parameter *param, Atom *atom, Neighbor *neighbor) { MD_FLOAT cutsq = cutneighsq; for(int ci = 0; ci < atom->Nclusters_local; ci++) { - NeighborCluster *neighs = &neighbor->neighbors[ci * neighbor->maxneighs]; + int *neighs = &neighbor->neighbors[ci * neighbor->maxneighs]; + unsigned int *neighs_imask = &neighbor->neighbors_imask[ci * neighbor->maxneighs]; int numneighs = neighbor->numneigh[ci]; int numneighs_masked = neighbor->numneigh_masked[ci]; int k = 0; // Remove dummy clusters if necessary if(CLUSTER_N < VECTOR_WIDTH) { - while(neighs[numneighs - 1].cj == atom->dummy_cj) { + while(neighs[numneighs - 1] == atom->dummy_cj) { numneighs--; } } while(k < numneighs) { - int cj = neighs[k].cj; + int cj = neighs[k]; if(atomDistanceInRange(atom, ci, cj, cutsq)) { k++; } else { @@ -461,8 +465,8 @@ void pruneNeighbor(Parameter *param, Atom *atom, Neighbor *neighbor) { // Readd dummy clusters if necessary if(CLUSTER_N < VECTOR_WIDTH) { while(numneighs % (VECTOR_WIDTH / CLUSTER_N)) { - neighs[numneighs].cj = atom->dummy_cj; // Last cluster is always a dummy cluster - neighs[numneighs].imask = 0; + neighs[numneighs] = atom->dummy_cj; // Last cluster is always a dummy cluster + neighs_imask[numneighs] = 0; numneighs++; } } diff --git a/gromacs/tracing.c b/gromacs/tracing.c index c87b73d..efa284e 100644 --- a/gromacs/tracing.c +++ b/gromacs/tracing.c @@ -13,7 +13,8 @@ void traceAddresses(Parameter *param, Atom *atom, Neighbor *neighbor, int timest MEM_TRACER_INIT; INDEX_TRACER_INIT; int Nlocal = atom->Nlocal; - NeighborCluster* neighs; + int *neighs; + unsigned int *neighs_imask; //MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz; INDEX_TRACE_NATOMS(Nlocal, atom->Nghost, neighbor->maxneighs); @@ -34,7 +35,7 @@ void traceAddresses(Parameter *param, Atom *atom, Neighbor *neighbor, int timest DIST_TRACE(neighs, numneighs); for(int k = 0; k < numneighs; k++) { - int j = neighs[k].cj; + int j = neighs[k]; MEM_TRACE(j, 'R'); MEM_TRACE(atom_x(j), 'R'); MEM_TRACE(atom_y(j), 'R');