Neighbor list preparation

This commit is contained in:
Andropov Arsenii 2023-05-23 16:25:00 +02:00
parent 182c065fe2
commit 055a009dbd
5 changed files with 228 additions and 93 deletions

View File

@ -121,7 +121,7 @@ void copyDataToCUDADevice(Atom *atom) {
memcpyToGPU(cuda_PBCz, atom->PBCz, atom->Nclusters_ghost * sizeof(int)); memcpyToGPU(cuda_PBCz, atom->PBCz, atom->Nclusters_ghost * sizeof(int));
#ifdef USE_SUPER_CLUSTERS #ifdef USE_SUPER_CLUSTERS
alignDataToSuperclusters(atom); //alignDataToSuperclusters(atom);
if (cuda_max_scl < atom->Nsclusters_max) { if (cuda_max_scl < atom->Nsclusters_max) {
cuda_assert("cudaDeviceFree", cudaFree(cuda_scl_x)); cuda_assert("cudaDeviceFree", cudaFree(cuda_scl_x));
@ -158,7 +158,7 @@ void copyDataFromCUDADevice(Atom *atom) {
memcpyFromGPU(atom->scl_v, cuda_scl_v, atom->Nsclusters_max * SCLUSTER_M * 3 * sizeof(MD_FLOAT)); memcpyFromGPU(atom->scl_v, cuda_scl_v, atom->Nsclusters_max * SCLUSTER_M * 3 * sizeof(MD_FLOAT));
memcpyFromGPU(atom->scl_f, cuda_scl_f, atom->Nsclusters_max * SCLUSTER_M * 3 * sizeof(MD_FLOAT)); memcpyFromGPU(atom->scl_f, cuda_scl_f, atom->Nsclusters_max * SCLUSTER_M * 3 * sizeof(MD_FLOAT));
alignDataFromSuperclusters(atom); //alignDataFromSuperclusters(atom);
#endif //USE_SUPER_CLUSTERS #endif //USE_SUPER_CLUSTERS
DEBUG_MESSAGE("copyDataFromCUDADevice stop\r\n"); DEBUG_MESSAGE("copyDataFromCUDADevice stop\r\n");

View File

@ -74,6 +74,7 @@
# define CI_BASE_INDEX(a,b) ((a) * CLUSTER_N * (b)) # define CI_BASE_INDEX(a,b) ((a) * CLUSTER_N * (b))
# define CJ_BASE_INDEX(a,b) ((a) * CLUSTER_N * (b)) # define CJ_BASE_INDEX(a,b) ((a) * CLUSTER_N * (b))
#ifdef USE_SUPER_CLUSTERS #ifdef USE_SUPER_CLUSTERS
# define CJ1_FROM_SCI(a) (a)
# define SCI_BASE_INDEX(a,b) ((a) * CLUSTER_N * SCLUSTER_SIZE * (b)) # define SCI_BASE_INDEX(a,b) ((a) * CLUSTER_N * SCLUSTER_SIZE * (b))
# define SCJ_BASE_INDEX(a,b) ((a) * CLUSTER_N * SCLUSTER_SIZE * (b)) # define SCJ_BASE_INDEX(a,b) ((a) * CLUSTER_N * SCLUSTER_SIZE * (b))
#endif //USE_SUPER_CLUSTERS #endif //USE_SUPER_CLUSTERS

View File

@ -78,8 +78,8 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *
#endif //defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS) #endif //defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS)
defineJClusters(atom); defineJClusters(atom);
#if defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS) #if defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS)
//setupPbcGPU(atom, param); setupPbcGPU(atom, param);
setupPbc(atom, param); //setupPbc(atom, param);
#else #else
setupPbc(atom, param); setupPbc(atom, param);
#endif //defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS) #endif //defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS)

View File

@ -189,6 +189,30 @@ int atomDistanceInRange(Atom *atom, int ci, int cj, MD_FLOAT rsq) {
return 0; return 0;
} }
int atomDistanceInRangeGPU(Atom *atom, int sci, int cj, MD_FLOAT rsq) {
for (int ci = 0; ci < atom->siclusters[sci].nclusters; ci++) {
const int icluster_idx = atom->icluster_idx[SCLUSTER_SIZE * sci + ci];
int ci_vec_base = CI_VECTOR_BASE_INDEX(icluster_idx);
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
for(int cii = 0; cii < atom->iclusters[icluster_idx].natoms; cii++) {
for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
MD_FLOAT delx = ci_x[CL_X_OFFSET + cii] - cj_x[CL_X_OFFSET + cjj];
MD_FLOAT dely = ci_x[CL_Y_OFFSET + cii] - cj_x[CL_Y_OFFSET + cjj];
MD_FLOAT delz = ci_x[CL_Z_OFFSET + cii] - cj_x[CL_Z_OFFSET + cjj];
if(delx * delx + dely * dely + delz * delz < rsq) {
return 1;
}
}
}
}
return 0;
}
void buildNeighbor(Atom *atom, Neighbor *neighbor) { void buildNeighbor(Atom *atom, Neighbor *neighbor) {
DEBUG_MESSAGE("buildNeighbor start\n"); DEBUG_MESSAGE("buildNeighbor start\n");
@ -375,9 +399,8 @@ void buildNeighborGPU(Atom *atom, Neighbor *neighbor) {
DEBUG_MESSAGE("buildNeighborGPU start\n"); DEBUG_MESSAGE("buildNeighborGPU start\n");
/* extend atom arrays if necessary */ /* extend atom arrays if necessary */
if(atom->Nclusters_local > nmax) { if(atom->Nsclusters_local > nmax) {
nmax = atom->Nclusters_local; nmax = atom->Nsclusters_local;
//nmax = atom->Nsclusters_local;
if(neighbor->numneigh) free(neighbor->numneigh); if(neighbor->numneigh) free(neighbor->numneigh);
if(neighbor->neighbors) free(neighbor->neighbors); if(neighbor->neighbors) free(neighbor->neighbors);
neighbor->numneigh = (int*) malloc(nmax * sizeof(int)); neighbor->numneigh = (int*) malloc(nmax * sizeof(int));
@ -396,21 +419,16 @@ void buildNeighborGPU(Atom *atom, Neighbor *neighbor) {
resize = 0; resize = 0;
for (int sci = 0; sci < atom->Nsclusters_local; sci++) { for (int sci = 0; sci < atom->Nsclusters_local; sci++) {
for (int scii = 0; scii < atom->siclusters[sci].nclusters; scii++) { int ci_cj1 = CJ1_FROM_SCI(sci);
//for(int ci = 0; ci < atom->Nclusters_local; ci++) { int *neighptr = &(neighbor->neighbors[sci * neighbor->maxneighs]);
//const int ci = atom->siclusters[sci].iclusters[scii];
const int ci = atom->icluster_idx[SCLUSTER_SIZE * sci + scii];
int ci_cj1 = CJ1_FROM_CI(ci);
int *neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]);
//int *neighptr = &(neighbor->neighbors[sci * neighbor->maxneighs]);
int n = 0; int n = 0;
int ibin = atom->icluster_bin[ci]; int ibin = atom->sicluster_bin[sci];
MD_FLOAT ibb_xmin = atom->iclusters[ci].bbminx; MD_FLOAT ibb_xmin = atom->siclusters[sci].bbminx;
MD_FLOAT ibb_xmax = atom->iclusters[ci].bbmaxx; MD_FLOAT ibb_xmax = atom->siclusters[sci].bbmaxx;
MD_FLOAT ibb_ymin = atom->iclusters[ci].bbminy; MD_FLOAT ibb_ymin = atom->siclusters[sci].bbminy;
MD_FLOAT ibb_ymax = atom->iclusters[ci].bbmaxy; MD_FLOAT ibb_ymax = atom->siclusters[sci].bbmaxy;
MD_FLOAT ibb_zmin = atom->iclusters[ci].bbminz; MD_FLOAT ibb_zmin = atom->siclusters[sci].bbminz;
MD_FLOAT ibb_zmax = atom->iclusters[ci].bbmaxz; MD_FLOAT ibb_zmax = atom->siclusters[sci].bbmaxz;
for(int k = 0; k < nstencil; k++) { for(int k = 0; k < nstencil; k++) {
int jbin = ibin + stencil[k]; int jbin = ibin + stencil[k];
@ -467,7 +485,7 @@ void buildNeighborGPU(Atom *atom, Neighbor *neighbor) {
d_bb_sq += dm0 * dm0; d_bb_sq += dm0 * dm0;
if(d_bb_sq < cutneighsq) { if(d_bb_sq < cutneighsq) {
if(d_bb_sq < rbb_sq || atomDistanceInRange(atom, ci, cj, cutneighsq)) { if(d_bb_sq < rbb_sq || atomDistanceInRangeGPU(atom, sci, cj, cutneighsq)) {
neighptr[n++] = cj; neighptr[n++] = cj;
} }
} }
@ -494,8 +512,7 @@ void buildNeighborGPU(Atom *atom, Neighbor *neighbor) {
} }
} }
//neighbor->numneigh[sci] = n; neighbor->numneigh[sci] = n;
neighbor->numneigh[ci] = n;
if(n >= neighbor->maxneighs) { if(n >= neighbor->maxneighs) {
resize = 1; resize = 1;
@ -503,6 +520,14 @@ void buildNeighborGPU(Atom *atom, Neighbor *neighbor) {
new_maxneighs = n; new_maxneighs = n;
} }
} }
for (int scii = 0; scii < atom->siclusters[sci].nclusters; scii++) {
//for(int ci = 0; ci < atom->Nclusters_local; ci++) {
//const int ci = atom->siclusters[sci].iclusters[scii];
} }
} }
@ -954,6 +979,12 @@ void buildClustersGPU(Atom *atom) {
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci); int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base]; MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base]; MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
int sci_sca_base = SCI_SCALAR_BASE_INDEX(sci);
int sci_vec_base = SCI_VECTOR_BASE_INDEX(sci);
MD_FLOAT *sci_x = &atom->scl_x[sci_vec_base];
MD_FLOAT *sci_v = &atom->scl_v[sci_vec_base];
int *ci_type = &atom->cl_type[ci_sca_base]; int *ci_type = &atom->cl_type[ci_sca_base];
MD_FLOAT bbminx = INFINITY, bbmaxx = -INFINITY; MD_FLOAT bbminx = INFINITY, bbmaxx = -INFINITY;
MD_FLOAT bbminy = INFINITY, bbmaxy = -INFINITY; MD_FLOAT bbminy = INFINITY, bbmaxy = -INFINITY;
@ -974,6 +1005,14 @@ void buildClustersGPU(Atom *atom) {
ci_v[CL_Y_OFFSET + cii] = atom->vy[i]; ci_v[CL_Y_OFFSET + cii] = atom->vy[i];
ci_v[CL_Z_OFFSET + cii] = atom->vz[i]; ci_v[CL_Z_OFFSET + cii] = atom->vz[i];
sci_x[SCL_CL_X_OFFSET(atom->siclusters[sci].nclusters) + cii] = xtmp;
sci_x[SCL_CL_Y_OFFSET(atom->siclusters[sci].nclusters) + cii] = ytmp;
sci_x[SCL_CL_Z_OFFSET(atom->siclusters[sci].nclusters) + cii] = ztmp;
sci_v[SCL_CL_X_OFFSET(atom->siclusters[sci].nclusters) + cii] = atom->vx[i];
sci_v[SCL_CL_Y_OFFSET(atom->siclusters[sci].nclusters) + cii] = atom->vy[i];
sci_v[SCL_CL_Z_OFFSET(atom->siclusters[sci].nclusters) + cii] = atom->vz[i];
// TODO: To create the bounding boxes faster, we can use SIMD operations // TODO: To create the bounding boxes faster, we can use SIMD operations
if(bbminx > xtmp) { bbminx = xtmp; } if(bbminx > xtmp) { bbminx = xtmp; }
if(bbmaxx < xtmp) { bbmaxx = xtmp; } if(bbmaxx < xtmp) { bbmaxx = xtmp; }

View File

@ -86,6 +86,98 @@ void cpuUpdatePbc(Atom *atom, Parameter *param, int firstUpdate) {
DEBUG_MESSAGE("updatePbc end\n"); DEBUG_MESSAGE("updatePbc end\n");
} }
/* update coordinates of ghost atoms */
/* uses mapping created in setupPbc */
void gpuUpdatePbc(Atom *atom, Parameter *param, int firstUpdate) {
DEBUG_MESSAGE("gpuUpdatePbc start\n");
int jfac = MAX(1, CLUSTER_N / CLUSTER_M);
int ncj = atom->Nclusters_local / jfac;
MD_FLOAT xprd = param->xprd;
MD_FLOAT yprd = param->yprd;
MD_FLOAT zprd = param->zprd;
for(int cg = 0; cg < atom->Nclusters_ghost; cg++) {
const int cj = ncj + cg;
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
int scj_vec_base = SCJ_VECTOR_BASE_INDEX(cj);
int bmap_vec_base = CJ_VECTOR_BASE_INDEX(atom->border_map[cg]);
int sbmap_vec_base = SCJ_VECTOR_BASE_INDEX(atom->border_map[cg]);
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
MD_FLOAT *bmap_x = &atom->cl_x[bmap_vec_base];
MD_FLOAT *scj_x = &atom->scl_x[scj_vec_base];
MD_FLOAT *sbmap_x = &atom->scl_x[sbmap_vec_base];
MD_FLOAT bbminx = INFINITY, bbmaxx = -INFINITY;
MD_FLOAT bbminy = INFINITY, bbmaxy = -INFINITY;
MD_FLOAT bbminz = INFINITY, bbmaxz = -INFINITY;
MD_FLOAT sbbminx = INFINITY, sbbmaxx = -INFINITY;
MD_FLOAT sbbminy = INFINITY, sbbmaxy = -INFINITY;
MD_FLOAT sbbminz = INFINITY, sbbmaxz = -INFINITY;
for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
MD_FLOAT xtmp = bmap_x[CL_X_OFFSET + cjj] + atom->PBCx[cg] * xprd;
MD_FLOAT ytmp = bmap_x[CL_Y_OFFSET + cjj] + atom->PBCy[cg] * yprd;
MD_FLOAT ztmp = bmap_x[CL_Z_OFFSET + cjj] + atom->PBCz[cg] * zprd;
MD_FLOAT sxtmp = sbmap_x[CL_X_OFFSET + cjj] + atom->PBCx[cg] * xprd;
MD_FLOAT sytmp = sbmap_x[CL_Y_OFFSET + cjj] + atom->PBCy[cg] * yprd;
MD_FLOAT sztmp = sbmap_x[CL_Z_OFFSET + cjj] + atom->PBCz[cg] * zprd;
cj_x[CL_X_OFFSET + cjj] = xtmp;
cj_x[CL_Y_OFFSET + cjj] = ytmp;
cj_x[CL_Z_OFFSET + cjj] = ztmp;
scj_x[SCL_X_OFFSET + cjj] = sxtmp;
scj_x[SCL_Y_OFFSET + cjj] = sytmp;
scj_x[SCL_Z_OFFSET + cjj] = sztmp;
if(firstUpdate) {
// TODO: To create the bounding boxes faster, we can use SIMD operations
if(bbminx > xtmp) { bbminx = xtmp; }
if(bbmaxx < xtmp) { bbmaxx = xtmp; }
if(bbminy > ytmp) { bbminy = ytmp; }
if(bbmaxy < ytmp) { bbmaxy = ytmp; }
if(bbminz > ztmp) { bbminz = ztmp; }
if(bbmaxz < ztmp) { bbmaxz = ztmp; }
if(sbbminx > sxtmp) { sbbminx = sxtmp; }
if(sbbmaxx < sxtmp) { sbbmaxx = sxtmp; }
if(sbbminy > sytmp) { sbbminy = sytmp; }
if(sbbmaxy < sytmp) { sbbmaxy = sytmp; }
if(sbbminz > sztmp) { sbbminz = sztmp; }
if(sbbmaxz < sztmp) { sbbmaxz = sztmp; }
}
}
if(firstUpdate) {
for(int cjj = atom->jclusters[cj].natoms; cjj < CLUSTER_N; cjj++) {
cj_x[CL_X_OFFSET + cjj] = INFINITY;
cj_x[CL_Y_OFFSET + cjj] = INFINITY;
cj_x[CL_Z_OFFSET + cjj] = INFINITY;
scj_x[SCL_X_OFFSET + cjj] = INFINITY;
scj_x[SCL_Y_OFFSET + cjj] = INFINITY;
scj_x[SCL_Z_OFFSET + cjj] = INFINITY;
}
atom->jclusters[cj].bbminx = bbminx;
atom->jclusters[cj].bbmaxx = bbmaxx;
atom->jclusters[cj].bbminy = bbminy;
atom->jclusters[cj].bbmaxy = bbmaxy;
atom->jclusters[cj].bbminz = bbminz;
atom->jclusters[cj].bbmaxz = bbmaxz;
}
}
DEBUG_MESSAGE("gpuUpdatePbc end\n");
}
/* relocate atoms that have left domain according /* relocate atoms that have left domain according
* to periodic boundary conditions */ * to periodic boundary conditions */
void updateAtomsPbc(Atom *atom, Parameter *param) { void updateAtomsPbc(Atom *atom, Parameter *param) {
@ -236,7 +328,8 @@ void setupPbcGPU(Atom *atom, Parameter *param) {
MD_FLOAT yprd = param->yprd; MD_FLOAT yprd = param->yprd;
MD_FLOAT zprd = param->zprd; MD_FLOAT zprd = param->zprd;
MD_FLOAT Cutneigh = param->cutneigh; MD_FLOAT Cutneigh = param->cutneigh;
int jfac = SCLUSTER_SIZE / CLUSTER_M; //int jfac = MAX(1, CLUSTER_N / CLUSTER_M);
int jfac = SCLUSTER_M / CLUSTER_M;
int ncj = atom->Nsclusters_local * jfac; int ncj = atom->Nsclusters_local * jfac;
int Nghost = -1; int Nghost = -1;
int Nghost_atoms = 0; int Nghost_atoms = 0;
@ -245,6 +338,7 @@ void setupPbcGPU(Atom *atom, Parameter *param) {
if(atom->jclusters[cj].natoms > 0) { if(atom->jclusters[cj].natoms > 0) {
if(atom->Nsclusters_local + (Nghost + (jfac - 1) + 7) / jfac >= atom->Nclusters_max) { if(atom->Nsclusters_local + (Nghost + (jfac - 1) + 7) / jfac >= atom->Nclusters_max) {
growClusters(atom); growClusters(atom);
//growSuperClusters(atom);
} }
if((Nghost + 7) * CLUSTER_M >= NmaxGhost) { if((Nghost + 7) * CLUSTER_M >= NmaxGhost) {
@ -293,6 +387,7 @@ void setupPbcGPU(Atom *atom, Parameter *param) {
if(ncj + (Nghost + (jfac - 1) + 1) / jfac >= atom->Nclusters_max) { if(ncj + (Nghost + (jfac - 1) + 1) / jfac >= atom->Nclusters_max) {
growClusters(atom); growClusters(atom);
//growSuperClusters(atom);
} }
// Add dummy cluster at the end // Add dummy cluster at the end
@ -311,6 +406,6 @@ void setupPbcGPU(Atom *atom, Parameter *param) {
atom->Nclusters = atom->Nclusters_local + Nghost + 1; atom->Nclusters = atom->Nclusters_local + Nghost + 1;
// Update created ghost clusters positions // Update created ghost clusters positions
cudaUpdatePbc(atom, param, 1); gpuUpdatePbc(atom, param, 1);
DEBUG_MESSAGE("setupPbcGPU end\n"); DEBUG_MESSAGE("setupPbcGPU end\n");
} }