From 924914e4f012f229d52e5c99c30ad1392624bd14 Mon Sep 17 00:00:00 2001 From: Rafael Ravedutti Date: Thu, 25 May 2023 01:10:37 +0200 Subject: [PATCH] First changes in the supercluster code Signed-off-by: Rafael Ravedutti --- gromacs/main.c | 4 --- gromacs/neighbor.c | 64 ++++++++++++---------------------------------- 2 files changed, 16 insertions(+), 52 deletions(-) diff --git a/gromacs/main.c b/gromacs/main.c index 9851c9c..1b628cb 100644 --- a/gromacs/main.c +++ b/gromacs/main.c @@ -288,7 +288,6 @@ int main(int argc, char** argv) { #endif //defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS) } - copyDataFromCUDADevice(&atom); updatePbc(&atom, ¶m, 0); copyDataToCUDADevice(&atom); @@ -334,9 +333,6 @@ int main(int argc, char** argv) { finalIntegrate(¶m, &atom); - - - if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) { computeThermo(n + 1, ¶m, &atom); } diff --git a/gromacs/neighbor.c b/gromacs/neighbor.c index e1a2698..6c5f405 100644 --- a/gromacs/neighbor.c +++ b/gromacs/neighbor.c @@ -192,7 +192,6 @@ int atomDistanceInRange(Atom *atom, int ci, int cj, MD_FLOAT rsq) { int atomDistanceInRangeGPU(Atom *atom, int sci, int cj, MD_FLOAT rsq) { for (int ci = 0; ci < atom->siclusters[sci].nclusters; ci++) { const int icluster_idx = atom->icluster_idx[SCLUSTER_SIZE * sci + ci]; - int ci_vec_base = CI_VECTOR_BASE_INDEX(icluster_idx); int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj); MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base]; @@ -520,15 +519,6 @@ void buildNeighborGPU(Atom *atom, Neighbor *neighbor) { new_maxneighs = n; } } - - - - - for (int scii = 0; scii < atom->siclusters[sci].nclusters; scii++) { - //for(int ci = 0; ci < atom->Nclusters_local; ci++) { - //const int ci = atom->siclusters[sci].iclusters[scii]; - - } } if(resize) { @@ -907,6 +897,7 @@ void buildClusters(Atom *atom) { void buildClustersGPU(Atom *atom) { DEBUG_MESSAGE("buildClustersGPU start\n"); atom->Nclusters_local = 0; + atom->Nsclusters_local = 0; /* bin local atoms */ binAtoms(atom); @@ -917,59 +908,33 @@ void buildClustersGPU(Atom *atom) { int ac = 0; int nclusters = ((c + CLUSTER_M - 1) / CLUSTER_M); if(CLUSTER_N > CLUSTER_M && nclusters % 2) { nclusters++; } + const int supercluster_size = SCLUSTER_SIZE_X * SCLUSTER_SIZE_Y * SCLUSTER_SIZE_Z; + int nsclusters = ((nclusters + supercluster_size - 1) / supercluster_size); - int n_super_clusters_xy = nclusters / (SCLUSTER_SIZE_X * SCLUSTER_SIZE_Y); - if (nclusters % (SCLUSTER_SIZE_X * SCLUSTER_SIZE_Y)) n_super_clusters_xy++; - int n_super_clusters = n_super_clusters_xy / SCLUSTER_SIZE_Z; - if (n_super_clusters_xy % SCLUSTER_SIZE_Z) n_super_clusters++; - - int cl_count = 0; - for (int scl = 0; scl < n_super_clusters; scl++) { + for(int scl = 0; scl < nsclusters; scl++) { const int sci = atom->Nsclusters_local; if(sci >= atom->Nsclusters_max) { growSuperClusters(atom); } - if (cl_count >= nclusters) break; - int scl_offset = scl * SCLUSTER_SIZE * CLUSTER_M; - MD_FLOAT sc_bbminx = INFINITY, sc_bbmaxx = -INFINITY; MD_FLOAT sc_bbminy = INFINITY, sc_bbmaxy = -INFINITY; MD_FLOAT sc_bbminz = INFINITY, sc_bbmaxz = -INFINITY; + atom->siclusters[sci].nclusters = 0; - for (int scl_z = 0; scl_z < SCLUSTER_SIZE_Z; scl_z++) { - - if (cl_count >= nclusters) break; - + for(int scl_z = 0; scl_z < SCLUSTER_SIZE_Z; scl_z++) { const int atom_scl_z_offset = scl_offset + scl_z * SCLUSTER_SIZE_Y * SCLUSTER_SIZE_X * CLUSTER_M; - - - const int atom_scl_z_end_idx = MIN(atom_scl_z_offset + - SCLUSTER_SIZE_Y * SCLUSTER_SIZE_X * CLUSTER_M - 1, c - 1); - + const int atom_scl_z_end_idx = MIN(atom_scl_z_offset + SCLUSTER_SIZE_Y * SCLUSTER_SIZE_X * CLUSTER_M - 1, c - 1); sortAtomsByCoord(atom, YY, bin, atom_scl_z_offset, atom_scl_z_end_idx); - for (int scl_y = 0; scl_y < SCLUSTER_SIZE_Y; scl_y++) { - - if (cl_count >= nclusters) break; - - const int atom_scl_y_offset = scl_offset + - scl_z * SCLUSTER_SIZE_Y * SCLUSTER_SIZE_X * CLUSTER_M + - scl_y * SCLUSTER_SIZE_Y * CLUSTER_M; - - const int atom_scl_y_end_idx = MIN(atom_scl_y_offset + - SCLUSTER_SIZE_X * CLUSTER_M - 1, c - 1); - + for(int scl_y = 0; scl_y < SCLUSTER_SIZE_Y; scl_y++) { + const int atom_scl_y_offset = scl_offset + scl_z * SCLUSTER_SIZE_Y * SCLUSTER_SIZE_X * CLUSTER_M + scl_y * SCLUSTER_SIZE_Y * CLUSTER_M; + const int atom_scl_y_end_idx = MIN(atom_scl_y_offset + SCLUSTER_SIZE_X * CLUSTER_M - 1, c - 1); sortAtomsByCoord(atom, XX, bin, atom_scl_y_offset, atom_scl_y_end_idx); - for (int scl_x = 0; scl_x < SCLUSTER_SIZE_X; scl_x++) { - if (cl_count >= nclusters) break; - cl_count++; - - const int cluster_sup_idx = scl_z * SCLUSTER_SIZE_Z * SCLUSTER_SIZE_Y + - scl_y * SCLUSTER_SIZE_X + scl_x; - + for(int scl_x = 0; scl_x < SCLUSTER_SIZE_X; scl_x++) { + const int cluster_sup_idx = scl_z * SCLUSTER_SIZE_Z * SCLUSTER_SIZE_Y + scl_y * SCLUSTER_SIZE_X + scl_x; const int ci = atom->Nclusters_local; if(ci >= atom->Nclusters_max) { growClusters(atom); @@ -1008,7 +973,6 @@ void buildClustersGPU(Atom *atom) { sci_x[SCL_CL_X_OFFSET(atom->siclusters[sci].nclusters) + cii] = xtmp; sci_x[SCL_CL_Y_OFFSET(atom->siclusters[sci].nclusters) + cii] = ytmp; sci_x[SCL_CL_Z_OFFSET(atom->siclusters[sci].nclusters) + cii] = ztmp; - sci_v[SCL_CL_X_OFFSET(atom->siclusters[sci].nclusters) + cii] = atom->vx[i]; sci_v[SCL_CL_Y_OFFSET(atom->siclusters[sci].nclusters) + cii] = atom->vy[i]; sci_v[SCL_CL_Z_OFFSET(atom->siclusters[sci].nclusters) + cii] = atom->vz[i]; @@ -1027,6 +991,10 @@ void buildClustersGPU(Atom *atom) { ci_x[CL_X_OFFSET + cii] = INFINITY; ci_x[CL_Y_OFFSET + cii] = INFINITY; ci_x[CL_Z_OFFSET + cii] = INFINITY; + + sci_x[SCL_CL_X_OFFSET(atom->siclusters[sci].nclusters) + cii] = INFINITY; + sci_x[SCL_CL_Y_OFFSET(atom->siclusters[sci].nclusters) + cii] = INFINITY; + sci_x[SCL_CL_Z_OFFSET(atom->siclusters[sci].nclusters) + cii] = INFINITY; } ac++;