Neighbor list preparation
This commit is contained in:
parent
ee3f6de050
commit
182c065fe2
@ -13,7 +13,7 @@ DATA_LAYOUT ?= AOS
|
|||||||
# Assembly syntax to generate (ATT/INTEL)
|
# Assembly syntax to generate (ATT/INTEL)
|
||||||
ASM_SYNTAX ?= ATT
|
ASM_SYNTAX ?= ATT
|
||||||
# Debug
|
# Debug
|
||||||
DEBUG ?= false
|
DEBUG ?= true
|
||||||
|
|
||||||
# Explicitly store and load atom types (true or false)
|
# Explicitly store and load atom types (true or false)
|
||||||
EXPLICIT_TYPES ?= false
|
EXPLICIT_TYPES ?= false
|
||||||
|
@ -452,12 +452,5 @@ void growSuperClusters(Atom *atom) {
|
|||||||
atom->scl_x = (MD_FLOAT*) reallocate(atom->scl_x, ALIGNMENT, atom->Nsclusters_max * SCLUSTER_M * 3 * sizeof(MD_FLOAT), nold * SCLUSTER_M * 3 * sizeof(MD_FLOAT));
|
atom->scl_x = (MD_FLOAT*) reallocate(atom->scl_x, ALIGNMENT, atom->Nsclusters_max * SCLUSTER_M * 3 * sizeof(MD_FLOAT), nold * SCLUSTER_M * 3 * sizeof(MD_FLOAT));
|
||||||
atom->scl_f = (MD_FLOAT*) reallocate(atom->scl_f, ALIGNMENT, atom->Nsclusters_max * SCLUSTER_M * 3 * sizeof(MD_FLOAT), nold * SCLUSTER_M * 3 * sizeof(MD_FLOAT));
|
atom->scl_f = (MD_FLOAT*) reallocate(atom->scl_f, ALIGNMENT, atom->Nsclusters_max * SCLUSTER_M * 3 * sizeof(MD_FLOAT), nold * SCLUSTER_M * 3 * sizeof(MD_FLOAT));
|
||||||
atom->scl_v = (MD_FLOAT*) reallocate(atom->scl_v, ALIGNMENT, atom->Nsclusters_max * SCLUSTER_M * 3 * sizeof(MD_FLOAT), nold * SCLUSTER_M * 3 * sizeof(MD_FLOAT));
|
atom->scl_v = (MD_FLOAT*) reallocate(atom->scl_v, ALIGNMENT, atom->Nsclusters_max * SCLUSTER_M * 3 * sizeof(MD_FLOAT), nold * SCLUSTER_M * 3 * sizeof(MD_FLOAT));
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
for (int sci = 0; sci < atom->Nsclusters_max; sci++) {
|
|
||||||
atom->siclusters[sci].iclusters = (int*) reallocate(atom->siclusters[sci].iclusters, ALIGNMENT, SCLUSTER_SIZE * sizeof(int), SCLUSTER_SIZE * sizeof(int));
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
#endif //USE_SUPER_CLUSTERS
|
#endif //USE_SUPER_CLUSTERS
|
||||||
|
@ -154,11 +154,11 @@ void copyDataFromCUDADevice(Atom *atom) {
|
|||||||
memcpyFromGPU(atom->cl_f, cuda_cl_f, atom->Nclusters_max * CLUSTER_M * 3 * sizeof(MD_FLOAT));
|
memcpyFromGPU(atom->cl_f, cuda_cl_f, atom->Nclusters_max * CLUSTER_M * 3 * sizeof(MD_FLOAT));
|
||||||
|
|
||||||
#ifdef USE_SUPER_CLUSTERS
|
#ifdef USE_SUPER_CLUSTERS
|
||||||
alignDataFromSuperclusters(atom);
|
|
||||||
|
|
||||||
memcpyFromGPU(atom->scl_x, cuda_scl_x, atom->Nsclusters_max * SCLUSTER_M * 3 * sizeof(MD_FLOAT));
|
memcpyFromGPU(atom->scl_x, cuda_scl_x, atom->Nsclusters_max * SCLUSTER_M * 3 * sizeof(MD_FLOAT));
|
||||||
memcpyFromGPU(atom->scl_v, cuda_scl_v, atom->Nsclusters_max * SCLUSTER_M * 3 * sizeof(MD_FLOAT));
|
memcpyFromGPU(atom->scl_v, cuda_scl_v, atom->Nsclusters_max * SCLUSTER_M * 3 * sizeof(MD_FLOAT));
|
||||||
memcpyFromGPU(atom->scl_f, cuda_scl_f, atom->Nsclusters_max * SCLUSTER_M * 3 * sizeof(MD_FLOAT));
|
memcpyFromGPU(atom->scl_f, cuda_scl_f, atom->Nsclusters_max * SCLUSTER_M * 3 * sizeof(MD_FLOAT));
|
||||||
|
|
||||||
|
alignDataFromSuperclusters(atom);
|
||||||
#endif //USE_SUPER_CLUSTERS
|
#endif //USE_SUPER_CLUSTERS
|
||||||
|
|
||||||
DEBUG_MESSAGE("copyDataFromCUDADevice stop\r\n");
|
DEBUG_MESSAGE("copyDataFromCUDADevice stop\r\n");
|
||||||
@ -241,6 +241,39 @@ __global__ void cudaUpdatePbc_warp(MD_FLOAT *cuda_cl_x, int *cuda_border_map,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__global__ void cudaUpdatePbcSup_warp(MD_FLOAT *cuda_cl_x, int *cuda_border_map,
|
||||||
|
int *cuda_jclusters_natoms,
|
||||||
|
int *cuda_PBCx,
|
||||||
|
int *cuda_PBCy,
|
||||||
|
int *cuda_PBCz,
|
||||||
|
int Nsclusters_local,
|
||||||
|
int Nclusters_ghost,
|
||||||
|
MD_FLOAT param_xprd,
|
||||||
|
MD_FLOAT param_yprd,
|
||||||
|
MD_FLOAT param_zprd) {
|
||||||
|
unsigned int cg = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
|
if (cg >= Nclusters_ghost) return;
|
||||||
|
|
||||||
|
//int jfac = MAX(1, CLUSTER_N / CLUSTER_M);
|
||||||
|
int jfac = SCLUSTER_SIZE / CLUSTER_M;
|
||||||
|
int ncj = Nsclusters_local / jfac;
|
||||||
|
MD_FLOAT xprd = param_xprd;
|
||||||
|
MD_FLOAT yprd = param_yprd;
|
||||||
|
MD_FLOAT zprd = param_zprd;
|
||||||
|
|
||||||
|
const int cj = ncj + cg;
|
||||||
|
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||||
|
int bmap_vec_base = CJ_VECTOR_BASE_INDEX(cuda_border_map[cg]);
|
||||||
|
MD_FLOAT *cj_x = &cuda_cl_x[cj_vec_base];
|
||||||
|
MD_FLOAT *bmap_x = &cuda_cl_x[bmap_vec_base];
|
||||||
|
|
||||||
|
for(int cjj = 0; cjj < cuda_jclusters_natoms[cg]; cjj++) {
|
||||||
|
cj_x[CL_X_OFFSET + cjj] = bmap_x[CL_X_OFFSET + cjj] + cuda_PBCx[cg] * xprd;
|
||||||
|
cj_x[CL_Y_OFFSET + cjj] = bmap_x[CL_Y_OFFSET + cjj] + cuda_PBCy[cg] * yprd;
|
||||||
|
cj_x[CL_Z_OFFSET + cjj] = bmap_x[CL_Z_OFFSET + cjj] + cuda_PBCz[cg] * zprd;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
__global__ void computeForceLJ_cuda_warp(MD_FLOAT *cuda_cl_x, MD_FLOAT *cuda_cl_f,
|
__global__ void computeForceLJ_cuda_warp(MD_FLOAT *cuda_cl_x, MD_FLOAT *cuda_cl_f,
|
||||||
int Nclusters_local, int Nclusters_max,
|
int Nclusters_local, int Nclusters_max,
|
||||||
int *cuda_numneigh, int *cuda_neighs, int half_neigh, int maxneighs,
|
int *cuda_numneigh, int *cuda_neighs, int half_neigh, int maxneighs,
|
||||||
@ -348,11 +381,19 @@ extern "C"
|
|||||||
void cudaUpdatePbc(Atom *atom, Parameter *param) {
|
void cudaUpdatePbc(Atom *atom, Parameter *param) {
|
||||||
const int threads_num = 512;
|
const int threads_num = 512;
|
||||||
dim3 block_size = dim3(threads_num, 1, 1);;
|
dim3 block_size = dim3(threads_num, 1, 1);;
|
||||||
dim3 grid_size = dim3(atom->Nclusters_ghost/(threads_num)+1, 1, 1);;
|
dim3 grid_size = dim3(atom->Nclusters_ghost/(threads_num)+1, 1, 1);
|
||||||
|
|
||||||
|
#ifdef USE_SUPER_CLUSTERS
|
||||||
|
cudaUpdatePbcSup_warp<<<grid_size, block_size>>>(cuda_scl_x, cuda_border_map,
|
||||||
|
cuda_jclusters_natoms, cuda_PBCx, cuda_PBCy, cuda_PBCz,
|
||||||
|
atom->Nclusters_local, atom->Nclusters_ghost,
|
||||||
|
param->xprd, param->yprd, param->zprd);
|
||||||
|
#else
|
||||||
cudaUpdatePbc_warp<<<grid_size, block_size>>>(cuda_cl_x, cuda_border_map,
|
cudaUpdatePbc_warp<<<grid_size, block_size>>>(cuda_cl_x, cuda_border_map,
|
||||||
cuda_jclusters_natoms, cuda_PBCx, cuda_PBCy, cuda_PBCz,
|
cuda_jclusters_natoms, cuda_PBCx, cuda_PBCy, cuda_PBCz,
|
||||||
atom->Nclusters_local, atom->Nclusters_ghost,
|
atom->Nclusters_local, atom->Nclusters_ghost,
|
||||||
param->xprd, param->yprd, param->zprd);
|
param->xprd, param->yprd, param->zprd);
|
||||||
|
#endif //USE_SUPER_CLUSTERS
|
||||||
cuda_assert("cudaUpdatePbc", cudaPeekAtLastError());
|
cuda_assert("cudaUpdatePbc", cudaPeekAtLastError());
|
||||||
cuda_assert("cudaUpdatePbc", cudaDeviceSynchronize());
|
cuda_assert("cudaUpdatePbc", cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
@ -193,9 +193,11 @@ __global__ void computeForceLJSup_cuda_warp(MD_FLOAT *cuda_cl_x, MD_FLOAT *cuda_
|
|||||||
int numneighs = cuda_numneigh[cuda_iclusters[SCLUSTER_SIZE * sci_pos + ci_pos]];
|
int numneighs = cuda_numneigh[cuda_iclusters[SCLUSTER_SIZE * sci_pos + ci_pos]];
|
||||||
|
|
||||||
for(int k = 0; k < numneighs; k++) {
|
for(int k = 0; k < numneighs; k++) {
|
||||||
int cj = (&cuda_neighs[cuda_iclusters[SCLUSTER_SIZE * sci_pos + ci_pos] * maxneighs])[k];
|
int glob_j = (&cuda_neighs[cuda_iclusters[SCLUSTER_SIZE * sci_pos + ci_pos] * maxneighs])[k];
|
||||||
|
int scj = glob_j / SCLUSTER_SIZE;
|
||||||
// TODO Make cj accessible from super cluster data alignment (not reachable right now)
|
// TODO Make cj accessible from super cluster data alignment (not reachable right now)
|
||||||
int cj_vec_base = SCJ_VECTOR_BASE_INDEX(cj);
|
int cj = SCJ_VECTOR_BASE_INDEX(scj) + CLUSTER_M * (glob_j % SCLUSTER_SIZE);
|
||||||
|
int cj_vec_base = cj;
|
||||||
MD_FLOAT *cj_x = &cuda_cl_x[cj_vec_base];
|
MD_FLOAT *cj_x = &cuda_cl_x[cj_vec_base];
|
||||||
MD_FLOAT *cj_f = &cuda_cl_f[cj_vec_base];
|
MD_FLOAT *cj_f = &cuda_cl_f[cj_vec_base];
|
||||||
|
|
||||||
@ -206,14 +208,10 @@ __global__ void computeForceLJSup_cuda_warp(MD_FLOAT *cuda_cl_x, MD_FLOAT *cuda_
|
|||||||
MD_FLOAT fiy = 0;
|
MD_FLOAT fiy = 0;
|
||||||
MD_FLOAT fiz = 0;
|
MD_FLOAT fiz = 0;
|
||||||
|
|
||||||
int cond;
|
|
||||||
#if CLUSTER_M == CLUSTER_N
|
//int cond = ci_cj0 != cj || cii_pos != cjj_pos || scj != sci_pos;
|
||||||
cond = half_neigh ? (ci_cj0 != cj || cii_pos < cjj_pos) :
|
int cond = (glob_j != cuda_iclusters[SCLUSTER_SIZE * sci_pos + ci_pos] && cii_pos != cjj_pos);
|
||||||
(ci_cj0 != cj || cii_pos != cjj_pos);
|
|
||||||
#elif CLUSTER_M < CLUSTER_N
|
|
||||||
cond = half_neigh ? (ci_cj0 != cj || cii_pos + CLUSTER_M * (ci_pos & 0x1) < cjj_pos) :
|
|
||||||
(ci_cj0 != cj || cii_pos + CLUSTER_M * (ci_pos & 0x1) != cjj_pos);
|
|
||||||
#endif
|
|
||||||
if(cond) {
|
if(cond) {
|
||||||
MD_FLOAT delx = xtmp - cj_x[SCL_CL_X_OFFSET(ci_pos) + cjj_pos];
|
MD_FLOAT delx = xtmp - cj_x[SCL_CL_X_OFFSET(ci_pos) + cjj_pos];
|
||||||
MD_FLOAT dely = ytmp - cj_x[SCL_CL_Y_OFFSET(ci_pos) + cjj_pos];
|
MD_FLOAT dely = ytmp - cj_x[SCL_CL_Y_OFFSET(ci_pos) + cjj_pos];
|
||||||
|
@ -149,7 +149,6 @@ typedef struct {
|
|||||||
} Cluster;
|
} Cluster;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
//int *iclusters;
|
|
||||||
int nclusters;
|
int nclusters;
|
||||||
MD_FLOAT bbminx, bbmaxx;
|
MD_FLOAT bbminx, bbmaxx;
|
||||||
MD_FLOAT bbminy, bbmaxy;
|
MD_FLOAT bbminy, bbmaxy;
|
||||||
|
@ -16,5 +16,8 @@ extern void setupPbc(Atom*, Parameter*);
|
|||||||
|
|
||||||
#ifdef CUDA_TARGET
|
#ifdef CUDA_TARGET
|
||||||
extern void cudaUpdatePbc(Atom*, Parameter*, int);
|
extern void cudaUpdatePbc(Atom*, Parameter*, int);
|
||||||
|
#if defined(USE_SUPER_CLUSTERS)
|
||||||
|
extern void setupPbcGPU(Atom*, Parameter*);
|
||||||
|
#endif //defined(USE_SUPER_CLUSTERS)
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
@ -6,12 +6,14 @@
|
|||||||
#define MD_BENCH_UTILS_H
|
#define MD_BENCH_UTILS_H
|
||||||
|
|
||||||
#include <atom.h>
|
#include <atom.h>
|
||||||
|
#include <neighbor.h>
|
||||||
|
|
||||||
#ifdef USE_SUPER_CLUSTERS
|
#ifdef USE_SUPER_CLUSTERS
|
||||||
void verifyClusters(Atom *atom);
|
void verifyClusters(Atom *atom);
|
||||||
void verifyLayout(Atom *atom);
|
void verifyLayout(Atom *atom);
|
||||||
void checkAlignment(Atom *atom);
|
void checkAlignment(Atom *atom);
|
||||||
void showSuperclusters(Atom *atom);
|
void showSuperclusters(Atom *atom);
|
||||||
|
void printNeighs(Atom *atom, Neighbor *neighbor);
|
||||||
#endif //USE_SUPER_CLUSTERS
|
#endif //USE_SUPER_CLUSTERS
|
||||||
|
|
||||||
#endif //MD_BENCH_UTILS_H
|
#endif //MD_BENCH_UTILS_H
|
||||||
|
@ -77,7 +77,12 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *
|
|||||||
buildClusters(atom);
|
buildClusters(atom);
|
||||||
#endif //defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS)
|
#endif //defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS)
|
||||||
defineJClusters(atom);
|
defineJClusters(atom);
|
||||||
|
#if defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS)
|
||||||
|
//setupPbcGPU(atom, param);
|
||||||
setupPbc(atom, param);
|
setupPbc(atom, param);
|
||||||
|
#else
|
||||||
|
setupPbc(atom, param);
|
||||||
|
#endif //defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS)
|
||||||
binClusters(atom);
|
binClusters(atom);
|
||||||
#if defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS)
|
#if defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS)
|
||||||
buildNeighborGPU(atom, neighbor);
|
buildNeighborGPU(atom, neighbor);
|
||||||
@ -101,7 +106,12 @@ double reneighbour(Parameter *param, Atom *atom, Neighbor *neighbor) {
|
|||||||
buildClusters(atom);
|
buildClusters(atom);
|
||||||
#endif //defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS)
|
#endif //defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS)
|
||||||
defineJClusters(atom);
|
defineJClusters(atom);
|
||||||
|
#if defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS)
|
||||||
|
//setupPbcGPU(atom, param);
|
||||||
setupPbc(atom, param);
|
setupPbc(atom, param);
|
||||||
|
#else
|
||||||
|
setupPbc(atom, param);
|
||||||
|
#endif //defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS)
|
||||||
binClusters(atom);
|
binClusters(atom);
|
||||||
#if defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS)
|
#if defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS)
|
||||||
buildNeighborGPU(atom, neighbor);
|
buildNeighborGPU(atom, neighbor);
|
||||||
@ -234,6 +244,8 @@ int main(int argc, char** argv) {
|
|||||||
printParameter(¶m);
|
printParameter(¶m);
|
||||||
printf(HLINE);
|
printf(HLINE);
|
||||||
|
|
||||||
|
//verifyNeigh(&atom, &neighbor);
|
||||||
|
|
||||||
printf("step\ttemp\t\tpressure\n");
|
printf("step\ttemp\t\tpressure\n");
|
||||||
computeThermo(0, ¶m, &atom);
|
computeThermo(0, ¶m, &atom);
|
||||||
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
|
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
|
||||||
@ -276,7 +288,10 @@ int main(int argc, char** argv) {
|
|||||||
#endif //defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS)
|
#endif //defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
copyDataFromCUDADevice(&atom);
|
||||||
updatePbc(&atom, ¶m, 0);
|
updatePbc(&atom, ¶m, 0);
|
||||||
|
copyDataToCUDADevice(&atom);
|
||||||
} else {
|
} else {
|
||||||
#ifdef CUDA_TARGET
|
#ifdef CUDA_TARGET
|
||||||
copyDataFromCUDADevice(&atom);
|
copyDataFromCUDADevice(&atom);
|
||||||
@ -294,14 +309,34 @@ int main(int argc, char** argv) {
|
|||||||
traceAddresses(¶m, &atom, &neighbor, n + 1);
|
traceAddresses(¶m, &atom, &neighbor, n + 1);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
printf("%d\t%d\r\n", atom.Nsclusters_local, atom.Nclusters_local);
|
||||||
|
copyDataToCUDADevice(&atom);
|
||||||
|
verifyLayout(&atom);
|
||||||
|
|
||||||
|
//printClusterIndices(&atom);
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
if(param.force_field == FF_EAM) {
|
if(param.force_field == FF_EAM) {
|
||||||
timer[FORCE] += computeForceEam(&eam, ¶m, &atom, &neighbor, &stats);
|
timer[FORCE] += computeForceEam(&eam, ¶m, &atom, &neighbor, &stats);
|
||||||
} else {
|
} else {
|
||||||
timer[FORCE] += computeForceLJ(¶m, &atom, &neighbor, &stats);
|
timer[FORCE] += computeForceLJ(¶m, &atom, &neighbor, &stats);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
copyDataFromCUDADevice(&atom);
|
||||||
|
verifyLayout(&atom);
|
||||||
|
|
||||||
|
getchar();
|
||||||
|
*/
|
||||||
|
|
||||||
finalIntegrate(¶m, &atom);
|
finalIntegrate(¶m, &atom);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
|
if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
|
||||||
computeThermo(n + 1, ¶m, &atom);
|
computeThermo(n + 1, ¶m, &atom);
|
||||||
}
|
}
|
||||||
|
@ -395,11 +395,11 @@ void buildNeighborGPU(Atom *atom, Neighbor *neighbor) {
|
|||||||
int new_maxneighs = neighbor->maxneighs;
|
int new_maxneighs = neighbor->maxneighs;
|
||||||
resize = 0;
|
resize = 0;
|
||||||
|
|
||||||
//for (int sci = 0; sci < atom->Nsclusters_local; sci++) {
|
for (int sci = 0; sci < atom->Nsclusters_local; sci++) {
|
||||||
//for (int scii = 0; scii < atom->siclusters[sci].nclusters; scii++) {
|
for (int scii = 0; scii < atom->siclusters[sci].nclusters; scii++) {
|
||||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
//for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||||
//const int ci = atom->siclusters[sci].iclusters[scii];
|
//const int ci = atom->siclusters[sci].iclusters[scii];
|
||||||
//const int ci = atom->icluster_idx[SCLUSTER_SIZE * sci + scii];
|
const int ci = atom->icluster_idx[SCLUSTER_SIZE * sci + scii];
|
||||||
int ci_cj1 = CJ1_FROM_CI(ci);
|
int ci_cj1 = CJ1_FROM_CI(ci);
|
||||||
int *neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]);
|
int *neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]);
|
||||||
//int *neighptr = &(neighbor->neighbors[sci * neighbor->maxneighs]);
|
//int *neighptr = &(neighbor->neighbors[sci * neighbor->maxneighs]);
|
||||||
@ -504,7 +504,7 @@ void buildNeighborGPU(Atom *atom, Neighbor *neighbor) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//}
|
}
|
||||||
|
|
||||||
if(resize) {
|
if(resize) {
|
||||||
fprintf(stdout, "RESIZE %d\n", neighbor->maxneighs);
|
fprintf(stdout, "RESIZE %d\n", neighbor->maxneighs);
|
||||||
@ -898,10 +898,7 @@ void buildClustersGPU(Atom *atom) {
|
|||||||
int n_super_clusters = n_super_clusters_xy / SCLUSTER_SIZE_Z;
|
int n_super_clusters = n_super_clusters_xy / SCLUSTER_SIZE_Z;
|
||||||
if (n_super_clusters_xy % SCLUSTER_SIZE_Z) n_super_clusters++;
|
if (n_super_clusters_xy % SCLUSTER_SIZE_Z) n_super_clusters++;
|
||||||
|
|
||||||
//printf("%d\t%d\t%d\t%d\r\n", nclusters, n_super_clusters_xy, n_super_clusters, (SCLUSTER_SIZE_X * SCLUSTER_SIZE_Y * SCLUSTER_SIZE_Z)*n_super_clusters);
|
|
||||||
|
|
||||||
int cl_count = 0;
|
int cl_count = 0;
|
||||||
|
|
||||||
for (int scl = 0; scl < n_super_clusters; scl++) {
|
for (int scl = 0; scl < n_super_clusters; scl++) {
|
||||||
const int sci = atom->Nsclusters_local;
|
const int sci = atom->Nsclusters_local;
|
||||||
if(sci >= atom->Nsclusters_max) {
|
if(sci >= atom->Nsclusters_max) {
|
||||||
@ -928,8 +925,6 @@ void buildClustersGPU(Atom *atom) {
|
|||||||
|
|
||||||
sortAtomsByCoord(atom, YY, bin, atom_scl_z_offset, atom_scl_z_end_idx);
|
sortAtomsByCoord(atom, YY, bin, atom_scl_z_offset, atom_scl_z_end_idx);
|
||||||
|
|
||||||
//printf("%d\t%d\t%d\t%d\t%d\t%d\r\n", nclusters, scl, scl_z, atom_scl_z_offset, atom_scl_z_end_idx, c);
|
|
||||||
|
|
||||||
for (int scl_y = 0; scl_y < SCLUSTER_SIZE_Y; scl_y++) {
|
for (int scl_y = 0; scl_y < SCLUSTER_SIZE_Y; scl_y++) {
|
||||||
|
|
||||||
if (cl_count >= nclusters) break;
|
if (cl_count >= nclusters) break;
|
||||||
@ -941,11 +936,8 @@ void buildClustersGPU(Atom *atom) {
|
|||||||
const int atom_scl_y_end_idx = MIN(atom_scl_y_offset +
|
const int atom_scl_y_end_idx = MIN(atom_scl_y_offset +
|
||||||
SCLUSTER_SIZE_X * CLUSTER_M - 1, c - 1);
|
SCLUSTER_SIZE_X * CLUSTER_M - 1, c - 1);
|
||||||
|
|
||||||
//printf("%d\t%d\t%d\t%d\t%d\t%d\t%d\r\n", nclusters, scl, scl_z, scl_y, atom_scl_y_offset, atom_scl_y_end_idx, c);
|
|
||||||
|
|
||||||
sortAtomsByCoord(atom, XX, bin, atom_scl_y_offset, atom_scl_y_end_idx);
|
sortAtomsByCoord(atom, XX, bin, atom_scl_y_offset, atom_scl_y_end_idx);
|
||||||
|
|
||||||
|
|
||||||
for (int scl_x = 0; scl_x < SCLUSTER_SIZE_X; scl_x++) {
|
for (int scl_x = 0; scl_x < SCLUSTER_SIZE_X; scl_x++) {
|
||||||
if (cl_count >= nclusters) break;
|
if (cl_count >= nclusters) break;
|
||||||
cl_count++;
|
cl_count++;
|
||||||
@ -1035,9 +1027,6 @@ void buildClustersGPU(Atom *atom) {
|
|||||||
atom->siclusters[sci].bbmaxz = sc_bbmaxz;
|
atom->siclusters[sci].bbmaxz = sc_bbmaxz;
|
||||||
atom->Nsclusters_local++;
|
atom->Nsclusters_local++;
|
||||||
}
|
}
|
||||||
|
|
||||||
//printf("%d\t%d\r\n", (SCLUSTER_SIZE_X * SCLUSTER_SIZE_Y * SCLUSTER_SIZE_Z)*n_super_clusters, count);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_MESSAGE("buildClustersGPU end\n");
|
DEBUG_MESSAGE("buildClustersGPU end\n");
|
||||||
|
@ -229,3 +229,88 @@ void setupPbc(Atom *atom, Parameter *param) {
|
|||||||
cpuUpdatePbc(atom, param, 1);
|
cpuUpdatePbc(atom, param, 1);
|
||||||
DEBUG_MESSAGE("setupPbc end\n");
|
DEBUG_MESSAGE("setupPbc end\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void setupPbcGPU(Atom *atom, Parameter *param) {
|
||||||
|
DEBUG_MESSAGE("setupPbcGPU start\n");
|
||||||
|
MD_FLOAT xprd = param->xprd;
|
||||||
|
MD_FLOAT yprd = param->yprd;
|
||||||
|
MD_FLOAT zprd = param->zprd;
|
||||||
|
MD_FLOAT Cutneigh = param->cutneigh;
|
||||||
|
int jfac = SCLUSTER_SIZE / CLUSTER_M;
|
||||||
|
int ncj = atom->Nsclusters_local * jfac;
|
||||||
|
int Nghost = -1;
|
||||||
|
int Nghost_atoms = 0;
|
||||||
|
|
||||||
|
for(int cj = 0; cj < ncj; cj++) {
|
||||||
|
if(atom->jclusters[cj].natoms > 0) {
|
||||||
|
if(atom->Nsclusters_local + (Nghost + (jfac - 1) + 7) / jfac >= atom->Nclusters_max) {
|
||||||
|
growClusters(atom);
|
||||||
|
}
|
||||||
|
|
||||||
|
if((Nghost + 7) * CLUSTER_M >= NmaxGhost) {
|
||||||
|
growPbc(atom);
|
||||||
|
}
|
||||||
|
|
||||||
|
MD_FLOAT bbminx = atom->jclusters[cj].bbminx;
|
||||||
|
MD_FLOAT bbmaxx = atom->jclusters[cj].bbmaxx;
|
||||||
|
MD_FLOAT bbminy = atom->jclusters[cj].bbminy;
|
||||||
|
MD_FLOAT bbmaxy = atom->jclusters[cj].bbmaxy;
|
||||||
|
MD_FLOAT bbminz = atom->jclusters[cj].bbminz;
|
||||||
|
MD_FLOAT bbmaxz = atom->jclusters[cj].bbmaxz;
|
||||||
|
|
||||||
|
/* Setup ghost atoms */
|
||||||
|
/* 6 planes */
|
||||||
|
if (bbminx < Cutneigh) { ADDGHOST(+1,0,0); }
|
||||||
|
if (bbmaxx >= (xprd-Cutneigh)) { ADDGHOST(-1,0,0); }
|
||||||
|
if (bbminy < Cutneigh) { ADDGHOST(0,+1,0); }
|
||||||
|
if (bbmaxy >= (yprd-Cutneigh)) { ADDGHOST(0,-1,0); }
|
||||||
|
if (bbminz < Cutneigh) { ADDGHOST(0,0,+1); }
|
||||||
|
if (bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(0,0,-1); }
|
||||||
|
/* 8 corners */
|
||||||
|
if (bbminx < Cutneigh && bbminy < Cutneigh && bbminz < Cutneigh) { ADDGHOST(+1,+1,+1); }
|
||||||
|
if (bbminx < Cutneigh && bbmaxy >= (yprd-Cutneigh) && bbminz < Cutneigh) { ADDGHOST(+1,-1,+1); }
|
||||||
|
if (bbminx < Cutneigh && bbminy < Cutneigh && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(+1,+1,-1); }
|
||||||
|
if (bbminx < Cutneigh && bbmaxy >= (yprd-Cutneigh) && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(+1,-1,-1); }
|
||||||
|
if (bbmaxx >= (xprd-Cutneigh) && bbminy < Cutneigh && bbminz < Cutneigh) { ADDGHOST(-1,+1,+1); }
|
||||||
|
if (bbmaxx >= (xprd-Cutneigh) && bbmaxy >= (yprd-Cutneigh) && bbminz < Cutneigh) { ADDGHOST(-1,-1,+1); }
|
||||||
|
if (bbmaxx >= (xprd-Cutneigh) && bbminy < Cutneigh && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(-1,+1,-1); }
|
||||||
|
if (bbmaxx >= (xprd-Cutneigh) && bbmaxy >= (yprd-Cutneigh) && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(-1,-1,-1); }
|
||||||
|
/* 12 edges */
|
||||||
|
if (bbminx < Cutneigh && bbminz < Cutneigh) { ADDGHOST(+1,0,+1); }
|
||||||
|
if (bbminx < Cutneigh && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(+1,0,-1); }
|
||||||
|
if (bbmaxx >= (xprd-Cutneigh) && bbminz < Cutneigh) { ADDGHOST(-1,0,+1); }
|
||||||
|
if (bbmaxx >= (xprd-Cutneigh) && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(-1,0,-1); }
|
||||||
|
if (bbminy < Cutneigh && bbminz < Cutneigh) { ADDGHOST(0,+1,+1); }
|
||||||
|
if (bbminy < Cutneigh && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(0,+1,-1); }
|
||||||
|
if (bbmaxy >= (yprd-Cutneigh) && bbminz < Cutneigh) { ADDGHOST(0,-1,+1); }
|
||||||
|
if (bbmaxy >= (yprd-Cutneigh) && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(0,-1,-1); }
|
||||||
|
if (bbminy < Cutneigh && bbminx < Cutneigh) { ADDGHOST(+1,+1,0); }
|
||||||
|
if (bbminy < Cutneigh && bbmaxx >= (xprd-Cutneigh)) { ADDGHOST(-1,+1,0); }
|
||||||
|
if (bbmaxy >= (yprd-Cutneigh) && bbminx < Cutneigh) { ADDGHOST(+1,-1,0); }
|
||||||
|
if (bbmaxy >= (yprd-Cutneigh) && bbmaxx >= (xprd-Cutneigh)) { ADDGHOST(-1,-1,0); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(ncj + (Nghost + (jfac - 1) + 1) / jfac >= atom->Nclusters_max) {
|
||||||
|
growClusters(atom);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add dummy cluster at the end
|
||||||
|
int cj_vec_base = CJ_VECTOR_BASE_INDEX(ncj + Nghost + 1);
|
||||||
|
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||||
|
for(int cjj = 0; cjj < CLUSTER_N; cjj++) {
|
||||||
|
cj_x[CL_X_OFFSET + cjj] = INFINITY;
|
||||||
|
cj_x[CL_Y_OFFSET + cjj] = INFINITY;
|
||||||
|
cj_x[CL_Z_OFFSET + cjj] = INFINITY;
|
||||||
|
}
|
||||||
|
|
||||||
|
// increase by one to make it the ghost atom count
|
||||||
|
atom->dummy_cj = ncj + Nghost + 1;
|
||||||
|
atom->Nghost = Nghost_atoms;
|
||||||
|
atom->Nclusters_ghost = Nghost + 1;
|
||||||
|
atom->Nclusters = atom->Nclusters_local + Nghost + 1;
|
||||||
|
|
||||||
|
// Update created ghost clusters positions
|
||||||
|
cudaUpdatePbc(atom, param, 1);
|
||||||
|
DEBUG_MESSAGE("setupPbcGPU end\n");
|
||||||
|
}
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
#include <utils.h>
|
#include <utils.h>
|
||||||
|
|
||||||
extern void alignDataToSuperclusters(Atom *atom);
|
extern void alignDataToSuperclusters(Atom *atom);
|
||||||
@ -274,4 +274,59 @@ void showSuperclusters(Atom *atom) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void printNeighs(Atom *atom, Neighbor *neighbor) {
|
||||||
|
for (int i = 0; i < atom->Nclusters_local; ++i) {
|
||||||
|
int neigh_num = neighbor->numneigh[i];
|
||||||
|
for (int j = 0; j < neigh_num; j++) {
|
||||||
|
printf("%d ", neighbor->neighbors[ i * neighbor->maxneighs + j]);
|
||||||
|
}
|
||||||
|
printf("\r\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void printClusterIndices(Atom *atom) {
|
||||||
|
for (int i = 0; i < atom->Nsclusters_local; ++i) {
|
||||||
|
int clusters_num = atom->siclusters[i].nclusters;
|
||||||
|
for (int j = 0; j < clusters_num; j++) {
|
||||||
|
printf("%d ", atom->icluster_idx[j + SCLUSTER_SIZE * i]);
|
||||||
|
}
|
||||||
|
printf("\r\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void verifyNeigh(Atom *atom, Neighbor *neighbor) {
|
||||||
|
|
||||||
|
buildNeighbor(atom, neighbor);
|
||||||
|
int *numneigh = (int*) malloc(atom->Nclusters_local * sizeof(int));
|
||||||
|
int *neighbors = (int*) malloc(atom->Nclusters_local * neighbor->maxneighs * sizeof(int*));
|
||||||
|
|
||||||
|
for (int i = 0; i < atom->Nclusters_local; ++i) {
|
||||||
|
int neigh_num = neighbor->numneigh[i];
|
||||||
|
numneigh[i] = neighbor->numneigh[i];
|
||||||
|
neighbor->numneigh[i] = 0;
|
||||||
|
for (int j = 0; j < neigh_num; j++) {
|
||||||
|
neighbors[i * neighbor->maxneighs + j] = neighbor->neighbors[i * neighbor->maxneighs + j];
|
||||||
|
neighbor->neighbors[i * neighbor->maxneighs + j] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
buildNeighborGPU(atom, neighbor);
|
||||||
|
|
||||||
|
unsigned int num_diff = 0;
|
||||||
|
unsigned int neigh_diff = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < atom->Nclusters_local; ++i) {
|
||||||
|
int neigh_num = neighbor->numneigh[i];
|
||||||
|
if (numneigh[i] != neigh_num) num_diff++;
|
||||||
|
for (int j = 0; j < neigh_num; j++) {
|
||||||
|
if (neighbors[i * neighbor->maxneighs + j] !=
|
||||||
|
neighbor->neighbors[ i * neighbor->maxneighs + j]) neigh_diff++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("%d\t%d\r\n", num_diff, neigh_diff);
|
||||||
|
}
|
||||||
|
|
||||||
#endif //USE_SUPER_CLUSTERS
|
#endif //USE_SUPER_CLUSTERS
|
||||||
|
Loading…
Reference in New Issue
Block a user