Neighbor list preparation
This commit is contained in:
@@ -154,11 +154,11 @@ void copyDataFromCUDADevice(Atom *atom) {
|
||||
memcpyFromGPU(atom->cl_f, cuda_cl_f, atom->Nclusters_max * CLUSTER_M * 3 * sizeof(MD_FLOAT));
|
||||
|
||||
#ifdef USE_SUPER_CLUSTERS
|
||||
alignDataFromSuperclusters(atom);
|
||||
|
||||
memcpyFromGPU(atom->scl_x, cuda_scl_x, atom->Nsclusters_max * SCLUSTER_M * 3 * sizeof(MD_FLOAT));
|
||||
memcpyFromGPU(atom->scl_v, cuda_scl_v, atom->Nsclusters_max * SCLUSTER_M * 3 * sizeof(MD_FLOAT));
|
||||
memcpyFromGPU(atom->scl_f, cuda_scl_f, atom->Nsclusters_max * SCLUSTER_M * 3 * sizeof(MD_FLOAT));
|
||||
|
||||
alignDataFromSuperclusters(atom);
|
||||
#endif //USE_SUPER_CLUSTERS
|
||||
|
||||
DEBUG_MESSAGE("copyDataFromCUDADevice stop\r\n");
|
||||
@@ -241,6 +241,39 @@ __global__ void cudaUpdatePbc_warp(MD_FLOAT *cuda_cl_x, int *cuda_border_map,
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void cudaUpdatePbcSup_warp(MD_FLOAT *cuda_cl_x, int *cuda_border_map,
|
||||
int *cuda_jclusters_natoms,
|
||||
int *cuda_PBCx,
|
||||
int *cuda_PBCy,
|
||||
int *cuda_PBCz,
|
||||
int Nsclusters_local,
|
||||
int Nclusters_ghost,
|
||||
MD_FLOAT param_xprd,
|
||||
MD_FLOAT param_yprd,
|
||||
MD_FLOAT param_zprd) {
|
||||
unsigned int cg = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (cg >= Nclusters_ghost) return;
|
||||
|
||||
//int jfac = MAX(1, CLUSTER_N / CLUSTER_M);
|
||||
int jfac = SCLUSTER_SIZE / CLUSTER_M;
|
||||
int ncj = Nsclusters_local / jfac;
|
||||
MD_FLOAT xprd = param_xprd;
|
||||
MD_FLOAT yprd = param_yprd;
|
||||
MD_FLOAT zprd = param_zprd;
|
||||
|
||||
const int cj = ncj + cg;
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||
int bmap_vec_base = CJ_VECTOR_BASE_INDEX(cuda_border_map[cg]);
|
||||
MD_FLOAT *cj_x = &cuda_cl_x[cj_vec_base];
|
||||
MD_FLOAT *bmap_x = &cuda_cl_x[bmap_vec_base];
|
||||
|
||||
for(int cjj = 0; cjj < cuda_jclusters_natoms[cg]; cjj++) {
|
||||
cj_x[CL_X_OFFSET + cjj] = bmap_x[CL_X_OFFSET + cjj] + cuda_PBCx[cg] * xprd;
|
||||
cj_x[CL_Y_OFFSET + cjj] = bmap_x[CL_Y_OFFSET + cjj] + cuda_PBCy[cg] * yprd;
|
||||
cj_x[CL_Z_OFFSET + cjj] = bmap_x[CL_Z_OFFSET + cjj] + cuda_PBCz[cg] * zprd;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void computeForceLJ_cuda_warp(MD_FLOAT *cuda_cl_x, MD_FLOAT *cuda_cl_f,
|
||||
int Nclusters_local, int Nclusters_max,
|
||||
int *cuda_numneigh, int *cuda_neighs, int half_neigh, int maxneighs,
|
||||
@@ -348,11 +381,19 @@ extern "C"
|
||||
void cudaUpdatePbc(Atom *atom, Parameter *param) {
|
||||
const int threads_num = 512;
|
||||
dim3 block_size = dim3(threads_num, 1, 1);;
|
||||
dim3 grid_size = dim3(atom->Nclusters_ghost/(threads_num)+1, 1, 1);;
|
||||
cudaUpdatePbc_warp<<<grid_size, block_size>>>(cuda_cl_x, cuda_border_map,
|
||||
dim3 grid_size = dim3(atom->Nclusters_ghost/(threads_num)+1, 1, 1);
|
||||
|
||||
#ifdef USE_SUPER_CLUSTERS
|
||||
cudaUpdatePbcSup_warp<<<grid_size, block_size>>>(cuda_scl_x, cuda_border_map,
|
||||
cuda_jclusters_natoms, cuda_PBCx, cuda_PBCy, cuda_PBCz,
|
||||
atom->Nclusters_local, atom->Nclusters_ghost,
|
||||
param->xprd, param->yprd, param->zprd);
|
||||
#else
|
||||
cudaUpdatePbc_warp<<<grid_size, block_size>>>(cuda_cl_x, cuda_border_map,
|
||||
cuda_jclusters_natoms, cuda_PBCx, cuda_PBCy, cuda_PBCz,
|
||||
atom->Nclusters_local, atom->Nclusters_ghost,
|
||||
param->xprd, param->yprd, param->zprd);
|
||||
#endif //USE_SUPER_CLUSTERS
|
||||
cuda_assert("cudaUpdatePbc", cudaPeekAtLastError());
|
||||
cuda_assert("cudaUpdatePbc", cudaDeviceSynchronize());
|
||||
}
|
||||
|
@@ -193,9 +193,11 @@ __global__ void computeForceLJSup_cuda_warp(MD_FLOAT *cuda_cl_x, MD_FLOAT *cuda_
|
||||
int numneighs = cuda_numneigh[cuda_iclusters[SCLUSTER_SIZE * sci_pos + ci_pos]];
|
||||
|
||||
for(int k = 0; k < numneighs; k++) {
|
||||
int cj = (&cuda_neighs[cuda_iclusters[SCLUSTER_SIZE * sci_pos + ci_pos] * maxneighs])[k];
|
||||
int glob_j = (&cuda_neighs[cuda_iclusters[SCLUSTER_SIZE * sci_pos + ci_pos] * maxneighs])[k];
|
||||
int scj = glob_j / SCLUSTER_SIZE;
|
||||
// TODO Make cj accessible from super cluster data alignment (not reachable right now)
|
||||
int cj_vec_base = SCJ_VECTOR_BASE_INDEX(cj);
|
||||
int cj = SCJ_VECTOR_BASE_INDEX(scj) + CLUSTER_M * (glob_j % SCLUSTER_SIZE);
|
||||
int cj_vec_base = cj;
|
||||
MD_FLOAT *cj_x = &cuda_cl_x[cj_vec_base];
|
||||
MD_FLOAT *cj_f = &cuda_cl_f[cj_vec_base];
|
||||
|
||||
@@ -206,14 +208,10 @@ __global__ void computeForceLJSup_cuda_warp(MD_FLOAT *cuda_cl_x, MD_FLOAT *cuda_
|
||||
MD_FLOAT fiy = 0;
|
||||
MD_FLOAT fiz = 0;
|
||||
|
||||
int cond;
|
||||
#if CLUSTER_M == CLUSTER_N
|
||||
cond = half_neigh ? (ci_cj0 != cj || cii_pos < cjj_pos) :
|
||||
(ci_cj0 != cj || cii_pos != cjj_pos);
|
||||
#elif CLUSTER_M < CLUSTER_N
|
||||
cond = half_neigh ? (ci_cj0 != cj || cii_pos + CLUSTER_M * (ci_pos & 0x1) < cjj_pos) :
|
||||
(ci_cj0 != cj || cii_pos + CLUSTER_M * (ci_pos & 0x1) != cjj_pos);
|
||||
#endif
|
||||
|
||||
//int cond = ci_cj0 != cj || cii_pos != cjj_pos || scj != sci_pos;
|
||||
int cond = (glob_j != cuda_iclusters[SCLUSTER_SIZE * sci_pos + ci_pos] && cii_pos != cjj_pos);
|
||||
|
||||
if(cond) {
|
||||
MD_FLOAT delx = xtmp - cj_x[SCL_CL_X_OFFSET(ci_pos) + cjj_pos];
|
||||
MD_FLOAT dely = ytmp - cj_x[SCL_CL_Y_OFFSET(ci_pos) + cjj_pos];
|
||||
|
Reference in New Issue
Block a user