From d79c3c2a1d313a09b5de95ae6047231987076d33 Mon Sep 17 00:00:00 2001 From: Rafael Ravedutti Date: Thu, 10 Mar 2022 22:33:41 +0100 Subject: [PATCH] Add first working version with 4x8 config (ref kernel) Signed-off-by: Rafael Ravedutti --- gromacs/atom.c | 4 +- gromacs/force_lj.c | 10 +- gromacs/includes/atom.h | 1 + gromacs/includes/neighbor.h | 1 + gromacs/main.c | 3 + gromacs/neighbor.c | 184 +++++++++++++++++------------------- gromacs/pbc.c | 175 +++++++++++++++++----------------- 7 files changed, 191 insertions(+), 187 deletions(-) diff --git a/gromacs/atom.c b/gromacs/atom.c index b96a91e..88ee0f8 100644 --- a/gromacs/atom.c +++ b/gromacs/atom.c @@ -429,10 +429,10 @@ void growAtom(Atom *atom) { void growClusters(Atom *atom) { int nold = atom->Nclusters_max; - int jfac = MIN(1, CLUSTER_M / CLUSTER_N); // If M>N, we need to allocate more j-clusters + int jterm = MAX(1, CLUSTER_M / CLUSTER_N); // If M>N, we need to allocate more j-clusters atom->Nclusters_max += DELTA; atom->iclusters = (Cluster*) reallocate(atom->iclusters, ALIGNMENT, atom->Nclusters_max * sizeof(Cluster), nold * sizeof(Cluster)); - atom->jclusters = (Cluster*) reallocate(atom->jclusters, ALIGNMENT, atom->Nclusters_max * jfac * sizeof(Cluster), nold * jfac * sizeof(Cluster)); + atom->jclusters = (Cluster*) reallocate(atom->jclusters, ALIGNMENT, atom->Nclusters_max * jterm * sizeof(Cluster), nold * jterm * sizeof(Cluster)); atom->icluster_bin = (int*) reallocate(atom->icluster_bin, ALIGNMENT, atom->Nclusters_max * sizeof(int), nold * sizeof(int)); atom->cl_x = (MD_FLOAT*) reallocate(atom->cl_x, ALIGNMENT, atom->Nclusters_max * CLUSTER_M * 3 * sizeof(MD_FLOAT), nold * CLUSTER_M * 3 * sizeof(MD_FLOAT)); atom->cl_f = (MD_FLOAT*) reallocate(atom->cl_f, ALIGNMENT, atom->Nclusters_max * CLUSTER_M * 3 * sizeof(MD_FLOAT), nold * CLUSTER_M * 3 * sizeof(MD_FLOAT)); diff --git a/gromacs/force_lj.c b/gromacs/force_lj.c index 03d1926..99d56a9 100644 --- a/gromacs/force_lj.c +++ b/gromacs/force_lj.c @@ -55,6 +55,8 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat #pragma omp parallel for for(int ci = 0; ci < atom->Nclusters_local; ci++) { + int ci_cj0 = CJ0_FROM_CI(ci); + int ci_cj1 = CJ1_FROM_CI(ci); int ci_vec_base = CI_VECTOR_BASE_INDEX(ci); MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base]; MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base]; @@ -76,7 +78,13 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat MD_FLOAT fiz = 0; for(int cjj = 0; cjj < CLUSTER_N; cjj++) { - if(ci != cj || cii != cjj) { + #if CLUSTER_M == CLUSTER_N + if(ci_cj0 != cj || cii != cjj) { + #elif CLUSTER_M < CLUSTER_N + if(ci_cj0 != cj || cii + CLUSTER_M * (ci & 0x1) != cjj) { + #else + if((ci_cj0 != cj || cii != cjj) && (ci_cj1 != cj || cii != cjj + CLUSTER_N)) { + #endif MD_FLOAT delx = xtmp - cj_x[CL_X_OFFSET + cjj]; MD_FLOAT dely = ytmp - cj_x[CL_Y_OFFSET + cjj]; MD_FLOAT delz = ztmp - cj_x[CL_Z_OFFSET + cjj]; diff --git a/gromacs/includes/atom.h b/gromacs/includes/atom.h index 0eff9f5..6590419 100644 --- a/gromacs/includes/atom.h +++ b/gromacs/includes/atom.h @@ -117,6 +117,7 @@ typedef struct { int *cl_type; Cluster *iclusters, *jclusters; int *icluster_bin; + int dummy_cj; } Atom; extern void initAtom(Atom*); diff --git a/gromacs/includes/neighbor.h b/gromacs/includes/neighbor.h index d1fbb5f..0ba00c0 100644 --- a/gromacs/includes/neighbor.h +++ b/gromacs/includes/neighbor.h @@ -40,6 +40,7 @@ extern void buildNeighbor(Atom*, Neighbor*); extern void pruneNeighbor(Parameter*, Atom*, Neighbor*); extern void sortAtom(Atom*); extern void buildClusters(Atom*); +extern void defineJClusters(Atom*); extern void binClusters(Atom*); extern void updateSingleAtoms(Atom*); #endif diff --git a/gromacs/main.c b/gromacs/main.c index a4eda58..a36af9d 100644 --- a/gromacs/main.c +++ b/gromacs/main.c @@ -70,6 +70,7 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats * setupThermo(param, atom->Natoms); if(param->input_file == NULL) { adjustThermo(param, atom); } buildClusters(atom); + defineJClusters(atom); setupPbc(atom, param); binClusters(atom); buildNeighbor(atom, neighbor); @@ -84,6 +85,7 @@ double reneighbour(Parameter *param, Atom *atom, Neighbor *neighbor) { updateSingleAtoms(atom); updateAtomsPbc(atom, param); buildClusters(atom); + defineJClusters(atom); setupPbc(atom, param); binClusters(atom); buildNeighbor(atom, neighbor); @@ -313,6 +315,7 @@ int main(int argc, char** argv) { } timer[TOTAL] = getTimeStamp() - timer[TOTAL]; + updateSingleAtoms(&atom); computeThermo(-1, ¶m, &atom); if(param.xtc_file != NULL) { diff --git a/gromacs/neighbor.c b/gromacs/neighbor.c index 9634157..694abd7 100644 --- a/gromacs/neighbor.c +++ b/gromacs/neighbor.c @@ -201,11 +201,10 @@ int atomDistanceInRange(Atom *atom, int ci, int cj, MD_FLOAT rsq) { void buildNeighbor(Atom *atom, Neighbor *neighbor) { DEBUG_MESSAGE("buildNeighbor start\n"); - int nall = atom->Nclusters_local + atom->Nclusters_ghost; /* extend atom arrays if necessary */ - if(nall > nmax) { - nmax = nall; + if(atom->Nclusters_local > nmax) { + nmax = atom->Nclusters_local; if(neighbor->numneigh) free(neighbor->numneigh); if(neighbor->neighbors) free(neighbor->neighbors); neighbor->numneigh = (int*) malloc(nmax * sizeof(int)); @@ -307,7 +306,7 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) { // Fill neighbor list with dummy values to fit vector width if(CLUSTER_N < VECTOR_WIDTH) { while(n % (VECTOR_WIDTH / CLUSTER_N)) { - neighptr[n++] = CJ0_FROM_CI(nall - 1); // Last cluster is always a dummy cluster + neighptr[n++] = atom->dummy_cj; // Last cluster is always a dummy cluster } } @@ -332,38 +331,40 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) { /* DEBUG_MESSAGE("\ncutneighsq = %f, rbb_sq = %f\n", cutneighsq, rbb_sq); for(int ci = 0; ci < 6; ci++) { - MD_FLOAT *ciptr = cluster_pos_ptr(ci); + int ci_vec_base = CI_VECTOR_BASE_INDEX(ci); + MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base]; int* neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]); DEBUG_MESSAGE("Cluster %d, bbx = {%f, %f}, bby = {%f, %f}, bbz = {%f, %f}\n", ci, - atom->clusters[ci].bbminx, - atom->clusters[ci].bbmaxx, - atom->clusters[ci].bbminy, - atom->clusters[ci].bbmaxy, - atom->clusters[ci].bbminz, - atom->clusters[ci].bbmaxz); + atom->iclusters[ci].bbminx, + atom->iclusters[ci].bbmaxx, + atom->iclusters[ci].bbminy, + atom->iclusters[ci].bbmaxy, + atom->iclusters[ci].bbminz, + atom->iclusters[ci].bbmaxz); for(int cii = 0; cii < CLUSTER_M; cii++) { - DEBUG_MESSAGE("%f, %f, %f\n", cluster_x(ciptr, cii), cluster_y(ciptr, cii), cluster_z(ciptr, cii)); + DEBUG_MESSAGE("%f, %f, %f\n", ci_x[CL_X_OFFSET + cii], ci_x[CL_Y_OFFSET + cii], ci_x[CL_Z_OFFSET + cii]); } DEBUG_MESSAGE("Neighbors:\n"); for(int k = 0; k < neighbor->numneigh[ci]; k++) { - const int cj = neighptr[k]; - MD_FLOAT *cjptr = cluster_pos_ptr(cj); + int cj = neighptr[k]; + int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj); + MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base]; DEBUG_MESSAGE(" Cluster %d, bbx = {%f, %f}, bby = {%f, %f}, bbz = {%f, %f}\n", cj, - atom->clusters[cj].bbminx, - atom->clusters[cj].bbmaxx, - atom->clusters[cj].bbminy, - atom->clusters[cj].bbmaxy, - atom->clusters[cj].bbminz, - atom->clusters[cj].bbmaxz); + atom->jclusters[cj].bbminx, + atom->jclusters[cj].bbmaxx, + atom->jclusters[cj].bbminy, + atom->jclusters[cj].bbmaxy, + atom->jclusters[cj].bbminz, + atom->jclusters[cj].bbmaxz); - for(int cjj = 0; cjj < CLUSTER_M; cjj++) { - DEBUG_MESSAGE(" %f, %f, %f\n", cluster_x(cjptr, cjj), cluster_y(cjptr, cjj), cluster_z(cjptr, cjj)); + for(int cjj = 0; cjj < CLUSTER_N; cjj++) { + DEBUG_MESSAGE(" %f, %f, %f\n", cj_x[CL_X_OFFSET + cjj], cj_x[CL_Y_OFFSET + cjj], cj_x[CL_Z_OFFSET + cjj]); } } } @@ -374,7 +375,6 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) { void pruneNeighbor(Parameter *param, Atom *atom, Neighbor *neighbor) { DEBUG_MESSAGE("pruneNeighbor start\n"); - int nall = atom->Nclusters_local + atom->Nclusters_ghost; //MD_FLOAT cutsq = param->cutforce * param->cutforce; MD_FLOAT cutsq = cutneighsq; @@ -385,7 +385,7 @@ void pruneNeighbor(Parameter *param, Atom *atom, Neighbor *neighbor) { // Remove dummy clusters if necessary if(CLUSTER_N < VECTOR_WIDTH) { - while(neighs[numneighs - 1] == CJ0_FROM_CI(nall - 1)) { + while(neighs[numneighs - 1] == atom->dummy_cj) { numneighs--; } } @@ -403,7 +403,7 @@ void pruneNeighbor(Parameter *param, Atom *atom, Neighbor *neighbor) { // Readd dummy clusters if necessary if(CLUSTER_N < VECTOR_WIDTH) { while(numneighs % (VECTOR_WIDTH / CLUSTER_N)) { - neighs[numneighs++] = CJ0_FROM_CI(nall - 1); // Last cluster is always a dummy cluster + neighs[numneighs++] = atom->dummy_cj; // Last cluster is always a dummy cluster } } @@ -617,7 +617,7 @@ void buildClusters(Atom *atom) { void defineJClusters(Atom *atom) { DEBUG_MESSAGE("defineJClusters start\n"); - for(int ci = 0; ci < atom->Nclusters_local + atom->Nclusters_ghost; ci++) { + for(int ci = 0; ci < atom->Nclusters_local; ci++) { int cj0 = CJ0_FROM_CI(ci); if(CLUSTER_M == CLUSTER_N) { @@ -687,13 +687,14 @@ void defineJClusters(Atom *atom) { } else { if(ci % 2 == 0) { - atom->jclusters[cj0].bbminx = MIN(atom->iclusters[ci].bbminx, atom->iclusters[ci + 1].bbminx); - atom->jclusters[cj0].bbmaxx = MAX(atom->iclusters[ci].bbmaxx, atom->iclusters[ci + 1].bbmaxx); - atom->jclusters[cj0].bbminy = MIN(atom->iclusters[ci].bbminy, atom->iclusters[ci + 1].bbminy); - atom->jclusters[cj0].bbmaxy = MAX(atom->iclusters[ci].bbmaxy, atom->iclusters[ci + 1].bbmaxy); - atom->jclusters[cj0].bbminz = MIN(atom->iclusters[ci].bbminz, atom->iclusters[ci + 1].bbminz); - atom->jclusters[cj0].bbmaxz = MAX(atom->iclusters[ci].bbmaxz, atom->iclusters[ci + 1].bbmaxz); - atom->jclusters[cj0].natoms = atom->iclusters[ci].natoms + atom->iclusters[ci + 1].natoms; + const int ci1 = ci + 1; + atom->jclusters[cj0].bbminx = MIN(atom->iclusters[ci].bbminx, atom->iclusters[ci1].bbminx); + atom->jclusters[cj0].bbmaxx = MAX(atom->iclusters[ci].bbmaxx, atom->iclusters[ci1].bbmaxx); + atom->jclusters[cj0].bbminy = MIN(atom->iclusters[ci].bbminy, atom->iclusters[ci1].bbminy); + atom->jclusters[cj0].bbmaxy = MAX(atom->iclusters[ci].bbmaxy, atom->iclusters[ci1].bbmaxy); + atom->jclusters[cj0].bbminz = MIN(atom->iclusters[ci].bbminz, atom->iclusters[ci1].bbminz); + atom->jclusters[cj0].bbmaxz = MAX(atom->iclusters[ci].bbmaxz, atom->iclusters[ci1].bbmaxz); + atom->jclusters[cj0].natoms = atom->iclusters[ci].natoms + atom->iclusters[ci1].natoms; } } } @@ -726,12 +727,8 @@ void binClusters(Atom *atom) { */ const int nlocal = atom->Nclusters_local; - const int ncji = MAX(1, CLUSTER_M / CLUSTER_N); - if(ncji > 2) { - fprintf(stderr, "binClusters(): ncji cannot be higher than 2!"); - } - - defineJClusters(atom); + const int jfac = MAX(1, CLUSTER_N / CLUSTER_M); + const int ncj = atom->Nclusters_local / jfac; int resize = 1; while(resize > 0) { @@ -764,74 +761,65 @@ void binClusters(Atom *atom) { } for(int cg = 0; cg < atom->Nclusters_ghost && !resize; cg++) { - // Assure we add this j-cluster only once in the bin - if(!(CLUSTER_M < CLUSTER_N && cg % 2)) { - const int ci = nlocal + cg; - int ix = -1, iy = -1; - MD_FLOAT xtmp, ytmp; + const int cj = ncj + cg; + int ix = -1, iy = -1; + MD_FLOAT xtmp, ytmp; - for(int cji = 0; cji < ncji && !resize; cji++) { - int cj = (cji == 0) ? CJ0_FROM_CI(ci) : CJ1_FROM_CI(ci); - const int n = atom->jclusters[cj].natoms; + if(atom->jclusters[cj].natoms > 0) { + int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj); + MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base]; + MD_FLOAT cj_minz = atom->jclusters[cj].bbminz; - if(n > 0) { - int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj); - MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base]; - MD_FLOAT cj_minz = atom->jclusters[cj].bbminz; + xtmp = cj_x[CL_X_OFFSET + 0]; + ytmp = cj_x[CL_Y_OFFSET + 0]; + coord2bin2D(xtmp, ytmp, &ix, &iy); + ix = MAX(MIN(ix, mbinx - 1), 0); + iy = MAX(MIN(iy, mbiny - 1), 0); + for(int cjj = 1; cjj < atom->jclusters[cj].natoms; cjj++) { + int nix, niy; + xtmp = cj_x[CL_X_OFFSET + cjj]; + ytmp = cj_x[CL_Y_OFFSET + cjj]; + coord2bin2D(xtmp, ytmp, &nix, &niy); + nix = MAX(MIN(nix, mbinx - 1), 0); + niy = MAX(MIN(niy, mbiny - 1), 0); - xtmp = cj_x[CL_X_OFFSET + 0]; - ytmp = cj_x[CL_Y_OFFSET + 0]; - coord2bin2D(xtmp, ytmp, &ix, &iy); - ix = MAX(MIN(ix, mbinx - 1), 0); - iy = MAX(MIN(iy, mbiny - 1), 0); - for(int cjj = 1; cjj < n; cjj++) { - int nix, niy; - xtmp = cj_x[CL_X_OFFSET + cjj]; - ytmp = cj_x[CL_Y_OFFSET + cjj]; - coord2bin2D(xtmp, ytmp, &nix, &niy); - nix = MAX(MIN(nix, mbinx - 1), 0); - niy = MAX(MIN(niy, mbiny - 1), 0); + // Always put the cluster on the bin of its innermost atom so + // the cluster should be closer to local clusters + if(atom->PBCx[cg] > 0 && ix > nix) { ix = nix; } + if(atom->PBCx[cg] < 0 && ix < nix) { ix = nix; } + if(atom->PBCy[cg] > 0 && iy > niy) { iy = niy; } + if(atom->PBCy[cg] < 0 && iy < niy) { iy = niy; } + } - // Always put the cluster on the bin of its innermost atom so - // the cluster should be closer to local clusters - if(atom->PBCx[cg] > 0 && ix > nix) { ix = nix; } - if(atom->PBCx[cg] < 0 && ix < nix) { ix = nix; } - if(atom->PBCy[cg] > 0 && iy > niy) { iy = niy; } - if(atom->PBCy[cg] < 0 && iy < niy) { iy = niy; } - } + int bin = iy * mbinx + ix + 1; + int c = bin_nclusters[bin]; + if(c < clusters_per_bin) { + // Insert the current ghost cluster in the bin keeping clusters + // sorted by z coordinate + int inserted = 0; + for(int i = 0; i < c; i++) { + int last_cl = bin_clusters[bin * clusters_per_bin + i]; + if(atom->jclusters[last_cl].bbminz > cj_minz) { + bin_clusters[bin * clusters_per_bin + i] = cj; - int bin = iy * mbinx + ix + 1; - int c = bin_nclusters[bin]; - if(c < clusters_per_bin) { - // Insert the current ghost cluster in the bin keeping clusters - // sorted by z coordinate - int inserted = 0; - for(int i = 0; i < c; i++) { - int last_cl = bin_clusters[bin * clusters_per_bin + i]; - if(atom->jclusters[last_cl].bbminz > cj_minz) { - bin_clusters[bin * clusters_per_bin + i] = cj; - - for(int j = i + 1; j <= c; j++) { - int tmp = bin_clusters[bin * clusters_per_bin + j]; - bin_clusters[bin * clusters_per_bin + j] = last_cl; - last_cl = tmp; - } - - inserted = 1; - break; - } + for(int j = i + 1; j <= c; j++) { + int tmp = bin_clusters[bin * clusters_per_bin + j]; + bin_clusters[bin * clusters_per_bin + j] = last_cl; + last_cl = tmp; } - if(!inserted) { - bin_clusters[bin * clusters_per_bin + c] = cj; - } - - atom->icluster_bin[ci] = bin; - bin_nclusters[bin]++; - } else { - resize = 1; + inserted = 1; + break; } } + + if(!inserted) { + bin_clusters[bin * clusters_per_bin + c] = cj; + } + + bin_nclusters[bin]++; + } else { + resize = 1; } } } diff --git a/gromacs/pbc.c b/gromacs/pbc.c index 03d669b..301ee28 100644 --- a/gromacs/pbc.c +++ b/gromacs/pbc.c @@ -47,30 +47,30 @@ void initPbc(Atom* atom) { /* uses mapping created in setupPbc */ void updatePbc(Atom *atom, Parameter *param, int firstUpdate) { DEBUG_MESSAGE("updatePbc start\n"); - int *border_map = atom->border_map; - int nlocal = atom->Nclusters_local; + int jfac = MAX(1, CLUSTER_N / CLUSTER_M); + int ncj = atom->Nclusters_local / jfac; MD_FLOAT xprd = param->xprd; MD_FLOAT yprd = param->yprd; MD_FLOAT zprd = param->zprd; for(int cg = 0; cg < atom->Nclusters_ghost; cg++) { - const int ci = nlocal + cg; - int ci_vec_base = CI_VECTOR_BASE_INDEX(ci); - int bmap_vec_base = CI_VECTOR_BASE_INDEX(border_map[cg]); - MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base]; + const int cj = ncj + cg; + int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj); + int bmap_vec_base = CJ_VECTOR_BASE_INDEX(atom->border_map[cg]); + MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base]; MD_FLOAT *bmap_x = &atom->cl_x[bmap_vec_base]; MD_FLOAT bbminx = INFINITY, bbmaxx = -INFINITY; MD_FLOAT bbminy = INFINITY, bbmaxy = -INFINITY; MD_FLOAT bbminz = INFINITY, bbmaxz = -INFINITY; - for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) { - MD_FLOAT xtmp = bmap_x[CL_X_OFFSET + cii] + atom->PBCx[cg] * xprd; - MD_FLOAT ytmp = bmap_x[CL_Y_OFFSET + cii] + atom->PBCy[cg] * yprd; - MD_FLOAT ztmp = bmap_x[CL_Z_OFFSET + cii] + atom->PBCz[cg] * zprd; + for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) { + MD_FLOAT xtmp = bmap_x[CL_X_OFFSET + cjj] + atom->PBCx[cg] * xprd; + MD_FLOAT ytmp = bmap_x[CL_Y_OFFSET + cjj] + atom->PBCy[cg] * yprd; + MD_FLOAT ztmp = bmap_x[CL_Z_OFFSET + cjj] + atom->PBCz[cg] * zprd; - ci_x[CL_X_OFFSET + cii] = xtmp; - ci_x[CL_Y_OFFSET + cii] = ytmp; - ci_x[CL_Z_OFFSET + cii] = ztmp; + cj_x[CL_X_OFFSET + cjj] = xtmp; + cj_x[CL_Y_OFFSET + cjj] = ytmp; + cj_x[CL_Z_OFFSET + cjj] = ztmp; if(firstUpdate) { // TODO: To create the bounding boxes faster, we can use SIMD operations @@ -84,18 +84,18 @@ void updatePbc(Atom *atom, Parameter *param, int firstUpdate) { } if(firstUpdate) { - for(int cii = atom->iclusters[ci].natoms; cii < CLUSTER_M; cii++) { - ci_x[CL_X_OFFSET + cii] = INFINITY; - ci_x[CL_Y_OFFSET + cii] = INFINITY; - ci_x[CL_Z_OFFSET + cii] = INFINITY; + for(int cjj = atom->jclusters[cj].natoms; cjj < CLUSTER_N; cjj++) { + cj_x[CL_X_OFFSET + cjj] = INFINITY; + cj_x[CL_Y_OFFSET + cjj] = INFINITY; + cj_x[CL_Z_OFFSET + cjj] = INFINITY; } - atom->iclusters[ci].bbminx = bbminx; - atom->iclusters[ci].bbmaxx = bbmaxx; - atom->iclusters[ci].bbminy = bbminy; - atom->iclusters[ci].bbmaxy = bbmaxy; - atom->iclusters[ci].bbminz = bbminz; - atom->iclusters[ci].bbmaxz = bbmaxz; + atom->jclusters[cj].bbminx = bbminx; + atom->jclusters[cj].bbmaxx = bbmaxx; + atom->jclusters[cj].bbminy = bbminy; + atom->jclusters[cj].bbmaxy = bbmaxy; + atom->jclusters[cj].bbminz = bbminz; + atom->jclusters[cj].bbmaxz = bbmaxz; } } @@ -136,18 +136,18 @@ void updateAtomsPbc(Atom *atom, Parameter *param) { * that are then enforced in updatePbc */ #define ADDGHOST(dx,dy,dz); \ Nghost++; \ - const int cg = atom->Nclusters_local + Nghost; \ - const int natoms_ci = atom->iclusters[ci].natoms; \ - border_map[Nghost] = ci; \ + const int cg = ncj + Nghost; \ + const int cj_natoms = atom->jclusters[cj].natoms; \ + atom->border_map[Nghost] = cj; \ atom->PBCx[Nghost] = dx; \ atom->PBCy[Nghost] = dy; \ atom->PBCz[Nghost] = dz; \ - atom->iclusters[cg].natoms = natoms_ci; \ - Nghost_atoms += natoms_ci; \ - int ci_sca_base = CI_SCALAR_BASE_INDEX(ci); \ - int cg_sca_base = CI_SCALAR_BASE_INDEX(cg); \ - for(int cii = 0; cii < natoms_ci; cii++) { \ - atom->cl_type[cg_sca_base + cii] = atom->cl_type[ci_sca_base + cii]; \ + atom->jclusters[cg].natoms = cj_natoms; \ + Nghost_atoms += cj_natoms; \ + int cj_sca_base = CJ_SCALAR_BASE_INDEX(cj); \ + int cg_sca_base = CJ_SCALAR_BASE_INDEX(cg); \ + for(int cjj = 0; cjj < cj_natoms; cjj++) { \ + atom->cl_type[cg_sca_base + cjj] = atom->cl_type[cj_sca_base + cjj]; \ } /* internal subroutines */ @@ -163,77 +163,80 @@ void growPbc(Atom* atom) { void setupPbc(Atom *atom, Parameter *param) { DEBUG_MESSAGE("setupPbc start\n"); - int *border_map = atom->border_map; MD_FLOAT xprd = param->xprd; MD_FLOAT yprd = param->yprd; MD_FLOAT zprd = param->zprd; MD_FLOAT Cutneigh = param->cutneigh; + int jfac = MAX(1, CLUSTER_N / CLUSTER_M); + int ncj = atom->Nclusters_local / jfac; int Nghost = -1; int Nghost_atoms = 0; - for(int ci = 0; ci < atom->Nclusters_local; ci++) { - if (atom->Nclusters_local + Nghost + 7 >= atom->Nclusters_max) { - growClusters(atom); + for(int cj = 0; cj < ncj; cj++) { + if(atom->jclusters[cj].natoms > 0) { + if(atom->Nclusters_local + (Nghost + 7) * jfac >= atom->Nclusters_max) { + growClusters(atom); + } + + if((Nghost + 7) * jfac >= NmaxGhost) { + growPbc(atom); + } + + MD_FLOAT bbminx = atom->jclusters[cj].bbminx; + MD_FLOAT bbmaxx = atom->jclusters[cj].bbmaxx; + MD_FLOAT bbminy = atom->jclusters[cj].bbminy; + MD_FLOAT bbmaxy = atom->jclusters[cj].bbmaxy; + MD_FLOAT bbminz = atom->jclusters[cj].bbminz; + MD_FLOAT bbmaxz = atom->jclusters[cj].bbmaxz; + + /* Setup ghost atoms */ + /* 6 planes */ + if (bbminx < Cutneigh) { ADDGHOST(+1,0,0); } + if (bbmaxx >= (xprd-Cutneigh)) { ADDGHOST(-1,0,0); } + if (bbminy < Cutneigh) { ADDGHOST(0,+1,0); } + if (bbmaxy >= (yprd-Cutneigh)) { ADDGHOST(0,-1,0); } + if (bbminz < Cutneigh) { ADDGHOST(0,0,+1); } + if (bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(0,0,-1); } + /* 8 corners */ + if (bbminx < Cutneigh && bbminy < Cutneigh && bbminz < Cutneigh) { ADDGHOST(+1,+1,+1); } + if (bbminx < Cutneigh && bbmaxy >= (yprd-Cutneigh) && bbminz < Cutneigh) { ADDGHOST(+1,-1,+1); } + if (bbminx < Cutneigh && bbminy < Cutneigh && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(+1,+1,-1); } + if (bbminx < Cutneigh && bbmaxy >= (yprd-Cutneigh) && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(+1,-1,-1); } + if (bbmaxx >= (xprd-Cutneigh) && bbminy < Cutneigh && bbminz < Cutneigh) { ADDGHOST(-1,+1,+1); } + if (bbmaxx >= (xprd-Cutneigh) && bbmaxy >= (yprd-Cutneigh) && bbminz < Cutneigh) { ADDGHOST(-1,-1,+1); } + if (bbmaxx >= (xprd-Cutneigh) && bbminy < Cutneigh && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(-1,+1,-1); } + if (bbmaxx >= (xprd-Cutneigh) && bbmaxy >= (yprd-Cutneigh) && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(-1,-1,-1); } + /* 12 edges */ + if (bbminx < Cutneigh && bbminz < Cutneigh) { ADDGHOST(+1,0,+1); } + if (bbminx < Cutneigh && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(+1,0,-1); } + if (bbmaxx >= (xprd-Cutneigh) && bbminz < Cutneigh) { ADDGHOST(-1,0,+1); } + if (bbmaxx >= (xprd-Cutneigh) && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(-1,0,-1); } + if (bbminy < Cutneigh && bbminz < Cutneigh) { ADDGHOST(0,+1,+1); } + if (bbminy < Cutneigh && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(0,+1,-1); } + if (bbmaxy >= (yprd-Cutneigh) && bbminz < Cutneigh) { ADDGHOST(0,-1,+1); } + if (bbmaxy >= (yprd-Cutneigh) && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(0,-1,-1); } + if (bbminy < Cutneigh && bbminx < Cutneigh) { ADDGHOST(+1,+1,0); } + if (bbminy < Cutneigh && bbmaxx >= (xprd-Cutneigh)) { ADDGHOST(-1,+1,0); } + if (bbmaxy >= (yprd-Cutneigh) && bbminx < Cutneigh) { ADDGHOST(+1,-1,0); } + if (bbmaxy >= (yprd-Cutneigh) && bbmaxx >= (xprd-Cutneigh)) { ADDGHOST(-1,-1,0); } } - - if (Nghost + 7 >= NmaxGhost) { - growPbc(atom); - border_map = atom->border_map; - } - - MD_FLOAT bbminx = atom->iclusters[ci].bbminx; - MD_FLOAT bbmaxx = atom->iclusters[ci].bbmaxx; - MD_FLOAT bbminy = atom->iclusters[ci].bbminy; - MD_FLOAT bbmaxy = atom->iclusters[ci].bbmaxy; - MD_FLOAT bbminz = atom->iclusters[ci].bbminz; - MD_FLOAT bbmaxz = atom->iclusters[ci].bbmaxz; - - /* Setup ghost atoms */ - /* 6 planes */ - if (bbminx < Cutneigh) { ADDGHOST(+1,0,0); } - if (bbmaxx >= (xprd-Cutneigh)) { ADDGHOST(-1,0,0); } - if (bbminy < Cutneigh) { ADDGHOST(0,+1,0); } - if (bbmaxy >= (yprd-Cutneigh)) { ADDGHOST(0,-1,0); } - if (bbminz < Cutneigh) { ADDGHOST(0,0,+1); } - if (bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(0,0,-1); } - /* 8 corners */ - if (bbminx < Cutneigh && bbminy < Cutneigh && bbminz < Cutneigh) { ADDGHOST(+1,+1,+1); } - if (bbminx < Cutneigh && bbmaxy >= (yprd-Cutneigh) && bbminz < Cutneigh) { ADDGHOST(+1,-1,+1); } - if (bbminx < Cutneigh && bbminy < Cutneigh && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(+1,+1,-1); } - if (bbminx < Cutneigh && bbmaxy >= (yprd-Cutneigh) && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(+1,-1,-1); } - if (bbmaxx >= (xprd-Cutneigh) && bbminy < Cutneigh && bbminz < Cutneigh) { ADDGHOST(-1,+1,+1); } - if (bbmaxx >= (xprd-Cutneigh) && bbmaxy >= (yprd-Cutneigh) && bbminz < Cutneigh) { ADDGHOST(-1,-1,+1); } - if (bbmaxx >= (xprd-Cutneigh) && bbminy < Cutneigh && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(-1,+1,-1); } - if (bbmaxx >= (xprd-Cutneigh) && bbmaxy >= (yprd-Cutneigh) && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(-1,-1,-1); } - /* 12 edges */ - if (bbminx < Cutneigh && bbminz < Cutneigh) { ADDGHOST(+1,0,+1); } - if (bbminx < Cutneigh && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(+1,0,-1); } - if (bbmaxx >= (xprd-Cutneigh) && bbminz < Cutneigh) { ADDGHOST(-1,0,+1); } - if (bbmaxx >= (xprd-Cutneigh) && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(-1,0,-1); } - if (bbminy < Cutneigh && bbminz < Cutneigh) { ADDGHOST(0,+1,+1); } - if (bbminy < Cutneigh && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(0,+1,-1); } - if (bbmaxy >= (yprd-Cutneigh) && bbminz < Cutneigh) { ADDGHOST(0,-1,+1); } - if (bbmaxy >= (yprd-Cutneigh) && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(0,-1,-1); } - if (bbminy < Cutneigh && bbminx < Cutneigh) { ADDGHOST(+1,+1,0); } - if (bbminy < Cutneigh && bbmaxx >= (xprd-Cutneigh)) { ADDGHOST(-1,+1,0); } - if (bbmaxy >= (yprd-Cutneigh) && bbminx < Cutneigh) { ADDGHOST(+1,-1,0); } - if (bbmaxy >= (yprd-Cutneigh) && bbmaxx >= (xprd-Cutneigh)) { ADDGHOST(-1,-1,0); } } - if(atom->Nclusters_local + Nghost + 2 >= atom->Nclusters_max) { + if(ncj + (Nghost + 1) * jfac >= atom->Nclusters_max) { growClusters(atom); } // Add dummy cluster at the end - int ci_vec_base = CI_VECTOR_BASE_INDEX(atom->Nclusters_local + Nghost + 1); - MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base]; - for(int cii = 0; cii < MAX(CLUSTER_M, CLUSTER_N); cii++) { - ci_x[CL_X_OFFSET + cii] = INFINITY; - ci_x[CL_Y_OFFSET + cii] = INFINITY; - ci_x[CL_Z_OFFSET + cii] = INFINITY; + int cj_vec_base = CJ_VECTOR_BASE_INDEX(ncj + Nghost + 1); + MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base]; + for(int cjj = 0; cjj < CLUSTER_N; cjj++) { + cj_x[CL_X_OFFSET + cjj] = INFINITY; + cj_x[CL_Y_OFFSET + cjj] = INFINITY; + cj_x[CL_Z_OFFSET + cjj] = INFINITY; } // increase by one to make it the ghost atom count + atom->dummy_cj = ncj + Nghost + 1; atom->Nghost = Nghost_atoms; atom->Nclusters_ghost = Nghost + 1; atom->Nclusters = atom->Nclusters_local + Nghost + 1;