diff --git a/config.mk b/config.mk index 7e5f72a..3767ee7 100644 --- a/config.mk +++ b/config.mk @@ -13,7 +13,7 @@ DATA_LAYOUT ?= AOS # Assembly syntax to generate (ATT/INTEL) ASM_SYNTAX ?= ATT # Debug -DEBUG ?= false +DEBUG ?= true # Explicitly store and load atom types (true or false) EXPLICIT_TYPES ?= false diff --git a/gromacs/atom.c b/gromacs/atom.c index 047775e..78a1924 100644 --- a/gromacs/atom.c +++ b/gromacs/atom.c @@ -452,12 +452,5 @@ void growSuperClusters(Atom *atom) { atom->scl_x = (MD_FLOAT*) reallocate(atom->scl_x, ALIGNMENT, atom->Nsclusters_max * SCLUSTER_M * 3 * sizeof(MD_FLOAT), nold * SCLUSTER_M * 3 * sizeof(MD_FLOAT)); atom->scl_f = (MD_FLOAT*) reallocate(atom->scl_f, ALIGNMENT, atom->Nsclusters_max * SCLUSTER_M * 3 * sizeof(MD_FLOAT), nold * SCLUSTER_M * 3 * sizeof(MD_FLOAT)); atom->scl_v = (MD_FLOAT*) reallocate(atom->scl_v, ALIGNMENT, atom->Nsclusters_max * SCLUSTER_M * 3 * sizeof(MD_FLOAT), nold * SCLUSTER_M * 3 * sizeof(MD_FLOAT)); - - - /* - for (int sci = 0; sci < atom->Nsclusters_max; sci++) { - atom->siclusters[sci].iclusters = (int*) reallocate(atom->siclusters[sci].iclusters, ALIGNMENT, SCLUSTER_SIZE * sizeof(int), SCLUSTER_SIZE * sizeof(int)); - } - */ } #endif //USE_SUPER_CLUSTERS diff --git a/gromacs/cuda/force_lj.cu b/gromacs/cuda/force_lj.cu index 2b7d449..aa02fdd 100644 --- a/gromacs/cuda/force_lj.cu +++ b/gromacs/cuda/force_lj.cu @@ -154,11 +154,11 @@ void copyDataFromCUDADevice(Atom *atom) { memcpyFromGPU(atom->cl_f, cuda_cl_f, atom->Nclusters_max * CLUSTER_M * 3 * sizeof(MD_FLOAT)); #ifdef USE_SUPER_CLUSTERS - alignDataFromSuperclusters(atom); - memcpyFromGPU(atom->scl_x, cuda_scl_x, atom->Nsclusters_max * SCLUSTER_M * 3 * sizeof(MD_FLOAT)); memcpyFromGPU(atom->scl_v, cuda_scl_v, atom->Nsclusters_max * SCLUSTER_M * 3 * sizeof(MD_FLOAT)); memcpyFromGPU(atom->scl_f, cuda_scl_f, atom->Nsclusters_max * SCLUSTER_M * 3 * sizeof(MD_FLOAT)); + + alignDataFromSuperclusters(atom); #endif //USE_SUPER_CLUSTERS DEBUG_MESSAGE("copyDataFromCUDADevice stop\r\n"); @@ -241,6 +241,39 @@ __global__ void cudaUpdatePbc_warp(MD_FLOAT *cuda_cl_x, int *cuda_border_map, } } +__global__ void cudaUpdatePbcSup_warp(MD_FLOAT *cuda_cl_x, int *cuda_border_map, + int *cuda_jclusters_natoms, + int *cuda_PBCx, + int *cuda_PBCy, + int *cuda_PBCz, + int Nsclusters_local, + int Nclusters_ghost, + MD_FLOAT param_xprd, + MD_FLOAT param_yprd, + MD_FLOAT param_zprd) { + unsigned int cg = blockDim.x * blockIdx.x + threadIdx.x; + if (cg >= Nclusters_ghost) return; + + //int jfac = MAX(1, CLUSTER_N / CLUSTER_M); + int jfac = SCLUSTER_SIZE / CLUSTER_M; + int ncj = Nsclusters_local / jfac; + MD_FLOAT xprd = param_xprd; + MD_FLOAT yprd = param_yprd; + MD_FLOAT zprd = param_zprd; + + const int cj = ncj + cg; + int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj); + int bmap_vec_base = CJ_VECTOR_BASE_INDEX(cuda_border_map[cg]); + MD_FLOAT *cj_x = &cuda_cl_x[cj_vec_base]; + MD_FLOAT *bmap_x = &cuda_cl_x[bmap_vec_base]; + + for(int cjj = 0; cjj < cuda_jclusters_natoms[cg]; cjj++) { + cj_x[CL_X_OFFSET + cjj] = bmap_x[CL_X_OFFSET + cjj] + cuda_PBCx[cg] * xprd; + cj_x[CL_Y_OFFSET + cjj] = bmap_x[CL_Y_OFFSET + cjj] + cuda_PBCy[cg] * yprd; + cj_x[CL_Z_OFFSET + cjj] = bmap_x[CL_Z_OFFSET + cjj] + cuda_PBCz[cg] * zprd; + } +} + __global__ void computeForceLJ_cuda_warp(MD_FLOAT *cuda_cl_x, MD_FLOAT *cuda_cl_f, int Nclusters_local, int Nclusters_max, int *cuda_numneigh, int *cuda_neighs, int half_neigh, int maxneighs, @@ -348,11 +381,19 @@ extern "C" void cudaUpdatePbc(Atom *atom, Parameter *param) { const int threads_num = 512; dim3 block_size = dim3(threads_num, 1, 1);; - dim3 grid_size = dim3(atom->Nclusters_ghost/(threads_num)+1, 1, 1);; - cudaUpdatePbc_warp<<>>(cuda_cl_x, cuda_border_map, + dim3 grid_size = dim3(atom->Nclusters_ghost/(threads_num)+1, 1, 1); + +#ifdef USE_SUPER_CLUSTERS + cudaUpdatePbcSup_warp<<>>(cuda_scl_x, cuda_border_map, cuda_jclusters_natoms, cuda_PBCx, cuda_PBCy, cuda_PBCz, atom->Nclusters_local, atom->Nclusters_ghost, param->xprd, param->yprd, param->zprd); +#else + cudaUpdatePbc_warp<<>>(cuda_cl_x, cuda_border_map, + cuda_jclusters_natoms, cuda_PBCx, cuda_PBCy, cuda_PBCz, + atom->Nclusters_local, atom->Nclusters_ghost, + param->xprd, param->yprd, param->zprd); +#endif //USE_SUPER_CLUSTERS cuda_assert("cudaUpdatePbc", cudaPeekAtLastError()); cuda_assert("cudaUpdatePbc", cudaDeviceSynchronize()); } diff --git a/gromacs/cuda/force_lj_sup.cu b/gromacs/cuda/force_lj_sup.cu index 23cf486..7d5d9c6 100644 --- a/gromacs/cuda/force_lj_sup.cu +++ b/gromacs/cuda/force_lj_sup.cu @@ -193,9 +193,11 @@ __global__ void computeForceLJSup_cuda_warp(MD_FLOAT *cuda_cl_x, MD_FLOAT *cuda_ int numneighs = cuda_numneigh[cuda_iclusters[SCLUSTER_SIZE * sci_pos + ci_pos]]; for(int k = 0; k < numneighs; k++) { - int cj = (&cuda_neighs[cuda_iclusters[SCLUSTER_SIZE * sci_pos + ci_pos] * maxneighs])[k]; + int glob_j = (&cuda_neighs[cuda_iclusters[SCLUSTER_SIZE * sci_pos + ci_pos] * maxneighs])[k]; + int scj = glob_j / SCLUSTER_SIZE; // TODO Make cj accessible from super cluster data alignment (not reachable right now) - int cj_vec_base = SCJ_VECTOR_BASE_INDEX(cj); + int cj = SCJ_VECTOR_BASE_INDEX(scj) + CLUSTER_M * (glob_j % SCLUSTER_SIZE); + int cj_vec_base = cj; MD_FLOAT *cj_x = &cuda_cl_x[cj_vec_base]; MD_FLOAT *cj_f = &cuda_cl_f[cj_vec_base]; @@ -206,14 +208,10 @@ __global__ void computeForceLJSup_cuda_warp(MD_FLOAT *cuda_cl_x, MD_FLOAT *cuda_ MD_FLOAT fiy = 0; MD_FLOAT fiz = 0; - int cond; -#if CLUSTER_M == CLUSTER_N - cond = half_neigh ? (ci_cj0 != cj || cii_pos < cjj_pos) : - (ci_cj0 != cj || cii_pos != cjj_pos); -#elif CLUSTER_M < CLUSTER_N - cond = half_neigh ? (ci_cj0 != cj || cii_pos + CLUSTER_M * (ci_pos & 0x1) < cjj_pos) : - (ci_cj0 != cj || cii_pos + CLUSTER_M * (ci_pos & 0x1) != cjj_pos); -#endif + + //int cond = ci_cj0 != cj || cii_pos != cjj_pos || scj != sci_pos; + int cond = (glob_j != cuda_iclusters[SCLUSTER_SIZE * sci_pos + ci_pos] && cii_pos != cjj_pos); + if(cond) { MD_FLOAT delx = xtmp - cj_x[SCL_CL_X_OFFSET(ci_pos) + cjj_pos]; MD_FLOAT dely = ytmp - cj_x[SCL_CL_Y_OFFSET(ci_pos) + cjj_pos]; diff --git a/gromacs/includes/atom.h b/gromacs/includes/atom.h index 13f9d73..281548e 100644 --- a/gromacs/includes/atom.h +++ b/gromacs/includes/atom.h @@ -149,7 +149,6 @@ typedef struct { } Cluster; typedef struct { - //int *iclusters; int nclusters; MD_FLOAT bbminx, bbmaxx; MD_FLOAT bbminy, bbmaxy; diff --git a/gromacs/includes/pbc.h b/gromacs/includes/pbc.h index 33ceb53..50607c2 100644 --- a/gromacs/includes/pbc.h +++ b/gromacs/includes/pbc.h @@ -16,5 +16,8 @@ extern void setupPbc(Atom*, Parameter*); #ifdef CUDA_TARGET extern void cudaUpdatePbc(Atom*, Parameter*, int); +#if defined(USE_SUPER_CLUSTERS) +extern void setupPbcGPU(Atom*, Parameter*); +#endif //defined(USE_SUPER_CLUSTERS) #endif #endif diff --git a/gromacs/includes/utils.h b/gromacs/includes/utils.h index d2c3130..326c022 100644 --- a/gromacs/includes/utils.h +++ b/gromacs/includes/utils.h @@ -6,12 +6,14 @@ #define MD_BENCH_UTILS_H #include +#include #ifdef USE_SUPER_CLUSTERS void verifyClusters(Atom *atom); void verifyLayout(Atom *atom); void checkAlignment(Atom *atom); void showSuperclusters(Atom *atom); +void printNeighs(Atom *atom, Neighbor *neighbor); #endif //USE_SUPER_CLUSTERS #endif //MD_BENCH_UTILS_H diff --git a/gromacs/main.c b/gromacs/main.c index 5cd66b1..8a24baa 100644 --- a/gromacs/main.c +++ b/gromacs/main.c @@ -77,7 +77,12 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats * buildClusters(atom); #endif //defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS) defineJClusters(atom); + #if defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS) + //setupPbcGPU(atom, param); setupPbc(atom, param); + #else + setupPbc(atom, param); + #endif //defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS) binClusters(atom); #if defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS) buildNeighborGPU(atom, neighbor); @@ -101,7 +106,12 @@ double reneighbour(Parameter *param, Atom *atom, Neighbor *neighbor) { buildClusters(atom); #endif //defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS) defineJClusters(atom); + #if defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS) + //setupPbcGPU(atom, param); setupPbc(atom, param); + #else + setupPbc(atom, param); + #endif //defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS) binClusters(atom); #if defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS) buildNeighborGPU(atom, neighbor); @@ -234,6 +244,8 @@ int main(int argc, char** argv) { printParameter(¶m); printf(HLINE); + //verifyNeigh(&atom, &neighbor); + printf("step\ttemp\t\tpressure\n"); computeThermo(0, ¶m, &atom); #if defined(MEM_TRACER) || defined(INDEX_TRACER) @@ -276,7 +288,10 @@ int main(int argc, char** argv) { #endif //defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS) } + + copyDataFromCUDADevice(&atom); updatePbc(&atom, ¶m, 0); + copyDataToCUDADevice(&atom); } else { #ifdef CUDA_TARGET copyDataFromCUDADevice(&atom); @@ -294,14 +309,34 @@ int main(int argc, char** argv) { traceAddresses(¶m, &atom, &neighbor, n + 1); #endif + + /* + printf("%d\t%d\r\n", atom.Nsclusters_local, atom.Nclusters_local); + copyDataToCUDADevice(&atom); + verifyLayout(&atom); + + //printClusterIndices(&atom); + + */ + if(param.force_field == FF_EAM) { timer[FORCE] += computeForceEam(&eam, ¶m, &atom, &neighbor, &stats); } else { timer[FORCE] += computeForceLJ(¶m, &atom, &neighbor, &stats); } + /* + copyDataFromCUDADevice(&atom); + verifyLayout(&atom); + + getchar(); + */ + finalIntegrate(¶m, &atom); + + + if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) { computeThermo(n + 1, ¶m, &atom); } diff --git a/gromacs/neighbor.c b/gromacs/neighbor.c index 775c5dd..3124b8f 100644 --- a/gromacs/neighbor.c +++ b/gromacs/neighbor.c @@ -395,11 +395,11 @@ void buildNeighborGPU(Atom *atom, Neighbor *neighbor) { int new_maxneighs = neighbor->maxneighs; resize = 0; - //for (int sci = 0; sci < atom->Nsclusters_local; sci++) { - //for (int scii = 0; scii < atom->siclusters[sci].nclusters; scii++) { - for(int ci = 0; ci < atom->Nclusters_local; ci++) { + for (int sci = 0; sci < atom->Nsclusters_local; sci++) { + for (int scii = 0; scii < atom->siclusters[sci].nclusters; scii++) { + //for(int ci = 0; ci < atom->Nclusters_local; ci++) { //const int ci = atom->siclusters[sci].iclusters[scii]; - //const int ci = atom->icluster_idx[SCLUSTER_SIZE * sci + scii]; + const int ci = atom->icluster_idx[SCLUSTER_SIZE * sci + scii]; int ci_cj1 = CJ1_FROM_CI(ci); int *neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]); //int *neighptr = &(neighbor->neighbors[sci * neighbor->maxneighs]); @@ -504,7 +504,7 @@ void buildNeighborGPU(Atom *atom, Neighbor *neighbor) { } } } - //} + } if(resize) { fprintf(stdout, "RESIZE %d\n", neighbor->maxneighs); @@ -898,10 +898,7 @@ void buildClustersGPU(Atom *atom) { int n_super_clusters = n_super_clusters_xy / SCLUSTER_SIZE_Z; if (n_super_clusters_xy % SCLUSTER_SIZE_Z) n_super_clusters++; - //printf("%d\t%d\t%d\t%d\r\n", nclusters, n_super_clusters_xy, n_super_clusters, (SCLUSTER_SIZE_X * SCLUSTER_SIZE_Y * SCLUSTER_SIZE_Z)*n_super_clusters); - int cl_count = 0; - for (int scl = 0; scl < n_super_clusters; scl++) { const int sci = atom->Nsclusters_local; if(sci >= atom->Nsclusters_max) { @@ -928,8 +925,6 @@ void buildClustersGPU(Atom *atom) { sortAtomsByCoord(atom, YY, bin, atom_scl_z_offset, atom_scl_z_end_idx); - //printf("%d\t%d\t%d\t%d\t%d\t%d\r\n", nclusters, scl, scl_z, atom_scl_z_offset, atom_scl_z_end_idx, c); - for (int scl_y = 0; scl_y < SCLUSTER_SIZE_Y; scl_y++) { if (cl_count >= nclusters) break; @@ -941,11 +936,8 @@ void buildClustersGPU(Atom *atom) { const int atom_scl_y_end_idx = MIN(atom_scl_y_offset + SCLUSTER_SIZE_X * CLUSTER_M - 1, c - 1); - //printf("%d\t%d\t%d\t%d\t%d\t%d\t%d\r\n", nclusters, scl, scl_z, scl_y, atom_scl_y_offset, atom_scl_y_end_idx, c); - sortAtomsByCoord(atom, XX, bin, atom_scl_y_offset, atom_scl_y_end_idx); - for (int scl_x = 0; scl_x < SCLUSTER_SIZE_X; scl_x++) { if (cl_count >= nclusters) break; cl_count++; @@ -1035,9 +1027,6 @@ void buildClustersGPU(Atom *atom) { atom->siclusters[sci].bbmaxz = sc_bbmaxz; atom->Nsclusters_local++; } - - //printf("%d\t%d\r\n", (SCLUSTER_SIZE_X * SCLUSTER_SIZE_Y * SCLUSTER_SIZE_Z)*n_super_clusters, count); - } DEBUG_MESSAGE("buildClustersGPU end\n"); diff --git a/gromacs/pbc.c b/gromacs/pbc.c index 412e088..ec4581e 100644 --- a/gromacs/pbc.c +++ b/gromacs/pbc.c @@ -229,3 +229,88 @@ void setupPbc(Atom *atom, Parameter *param) { cpuUpdatePbc(atom, param, 1); DEBUG_MESSAGE("setupPbc end\n"); } + +void setupPbcGPU(Atom *atom, Parameter *param) { + DEBUG_MESSAGE("setupPbcGPU start\n"); + MD_FLOAT xprd = param->xprd; + MD_FLOAT yprd = param->yprd; + MD_FLOAT zprd = param->zprd; + MD_FLOAT Cutneigh = param->cutneigh; + int jfac = SCLUSTER_SIZE / CLUSTER_M; + int ncj = atom->Nsclusters_local * jfac; + int Nghost = -1; + int Nghost_atoms = 0; + + for(int cj = 0; cj < ncj; cj++) { + if(atom->jclusters[cj].natoms > 0) { + if(atom->Nsclusters_local + (Nghost + (jfac - 1) + 7) / jfac >= atom->Nclusters_max) { + growClusters(atom); + } + + if((Nghost + 7) * CLUSTER_M >= NmaxGhost) { + growPbc(atom); + } + + MD_FLOAT bbminx = atom->jclusters[cj].bbminx; + MD_FLOAT bbmaxx = atom->jclusters[cj].bbmaxx; + MD_FLOAT bbminy = atom->jclusters[cj].bbminy; + MD_FLOAT bbmaxy = atom->jclusters[cj].bbmaxy; + MD_FLOAT bbminz = atom->jclusters[cj].bbminz; + MD_FLOAT bbmaxz = atom->jclusters[cj].bbmaxz; + + /* Setup ghost atoms */ + /* 6 planes */ + if (bbminx < Cutneigh) { ADDGHOST(+1,0,0); } + if (bbmaxx >= (xprd-Cutneigh)) { ADDGHOST(-1,0,0); } + if (bbminy < Cutneigh) { ADDGHOST(0,+1,0); } + if (bbmaxy >= (yprd-Cutneigh)) { ADDGHOST(0,-1,0); } + if (bbminz < Cutneigh) { ADDGHOST(0,0,+1); } + if (bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(0,0,-1); } + /* 8 corners */ + if (bbminx < Cutneigh && bbminy < Cutneigh && bbminz < Cutneigh) { ADDGHOST(+1,+1,+1); } + if (bbminx < Cutneigh && bbmaxy >= (yprd-Cutneigh) && bbminz < Cutneigh) { ADDGHOST(+1,-1,+1); } + if (bbminx < Cutneigh && bbminy < Cutneigh && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(+1,+1,-1); } + if (bbminx < Cutneigh && bbmaxy >= (yprd-Cutneigh) && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(+1,-1,-1); } + if (bbmaxx >= (xprd-Cutneigh) && bbminy < Cutneigh && bbminz < Cutneigh) { ADDGHOST(-1,+1,+1); } + if (bbmaxx >= (xprd-Cutneigh) && bbmaxy >= (yprd-Cutneigh) && bbminz < Cutneigh) { ADDGHOST(-1,-1,+1); } + if (bbmaxx >= (xprd-Cutneigh) && bbminy < Cutneigh && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(-1,+1,-1); } + if (bbmaxx >= (xprd-Cutneigh) && bbmaxy >= (yprd-Cutneigh) && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(-1,-1,-1); } + /* 12 edges */ + if (bbminx < Cutneigh && bbminz < Cutneigh) { ADDGHOST(+1,0,+1); } + if (bbminx < Cutneigh && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(+1,0,-1); } + if (bbmaxx >= (xprd-Cutneigh) && bbminz < Cutneigh) { ADDGHOST(-1,0,+1); } + if (bbmaxx >= (xprd-Cutneigh) && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(-1,0,-1); } + if (bbminy < Cutneigh && bbminz < Cutneigh) { ADDGHOST(0,+1,+1); } + if (bbminy < Cutneigh && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(0,+1,-1); } + if (bbmaxy >= (yprd-Cutneigh) && bbminz < Cutneigh) { ADDGHOST(0,-1,+1); } + if (bbmaxy >= (yprd-Cutneigh) && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(0,-1,-1); } + if (bbminy < Cutneigh && bbminx < Cutneigh) { ADDGHOST(+1,+1,0); } + if (bbminy < Cutneigh && bbmaxx >= (xprd-Cutneigh)) { ADDGHOST(-1,+1,0); } + if (bbmaxy >= (yprd-Cutneigh) && bbminx < Cutneigh) { ADDGHOST(+1,-1,0); } + if (bbmaxy >= (yprd-Cutneigh) && bbmaxx >= (xprd-Cutneigh)) { ADDGHOST(-1,-1,0); } + } + } + + if(ncj + (Nghost + (jfac - 1) + 1) / jfac >= atom->Nclusters_max) { + growClusters(atom); + } + + // Add dummy cluster at the end + int cj_vec_base = CJ_VECTOR_BASE_INDEX(ncj + Nghost + 1); + MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base]; + for(int cjj = 0; cjj < CLUSTER_N; cjj++) { + cj_x[CL_X_OFFSET + cjj] = INFINITY; + cj_x[CL_Y_OFFSET + cjj] = INFINITY; + cj_x[CL_Z_OFFSET + cjj] = INFINITY; + } + + // increase by one to make it the ghost atom count + atom->dummy_cj = ncj + Nghost + 1; + atom->Nghost = Nghost_atoms; + atom->Nclusters_ghost = Nghost + 1; + atom->Nclusters = atom->Nclusters_local + Nghost + 1; + + // Update created ghost clusters positions + cudaUpdatePbc(atom, param, 1); + DEBUG_MESSAGE("setupPbcGPU end\n"); +} diff --git a/gromacs/utils.c b/gromacs/utils.c index 1d9db02..8cfd710 100644 --- a/gromacs/utils.c +++ b/gromacs/utils.c @@ -4,7 +4,7 @@ */ #include - +#include #include extern void alignDataToSuperclusters(Atom *atom); @@ -274,4 +274,59 @@ void showSuperclusters(Atom *atom) { } } + +void printNeighs(Atom *atom, Neighbor *neighbor) { + for (int i = 0; i < atom->Nclusters_local; ++i) { + int neigh_num = neighbor->numneigh[i]; + for (int j = 0; j < neigh_num; j++) { + printf("%d ", neighbor->neighbors[ i * neighbor->maxneighs + j]); + } + printf("\r\n"); + } +} + +void printClusterIndices(Atom *atom) { + for (int i = 0; i < atom->Nsclusters_local; ++i) { + int clusters_num = atom->siclusters[i].nclusters; + for (int j = 0; j < clusters_num; j++) { + printf("%d ", atom->icluster_idx[j + SCLUSTER_SIZE * i]); + } + printf("\r\n"); + } +} + +void verifyNeigh(Atom *atom, Neighbor *neighbor) { + + buildNeighbor(atom, neighbor); + int *numneigh = (int*) malloc(atom->Nclusters_local * sizeof(int)); + int *neighbors = (int*) malloc(atom->Nclusters_local * neighbor->maxneighs * sizeof(int*)); + + for (int i = 0; i < atom->Nclusters_local; ++i) { + int neigh_num = neighbor->numneigh[i]; + numneigh[i] = neighbor->numneigh[i]; + neighbor->numneigh[i] = 0; + for (int j = 0; j < neigh_num; j++) { + neighbors[i * neighbor->maxneighs + j] = neighbor->neighbors[i * neighbor->maxneighs + j]; + neighbor->neighbors[i * neighbor->maxneighs + j] = 0; + } + } + + + buildNeighborGPU(atom, neighbor); + + unsigned int num_diff = 0; + unsigned int neigh_diff = 0; + + for (int i = 0; i < atom->Nclusters_local; ++i) { + int neigh_num = neighbor->numneigh[i]; + if (numneigh[i] != neigh_num) num_diff++; + for (int j = 0; j < neigh_num; j++) { + if (neighbors[i * neighbor->maxneighs + j] != + neighbor->neighbors[ i * neighbor->maxneighs + j]) neigh_diff++; + } + } + + printf("%d\t%d\r\n", num_diff, neigh_diff); +} + #endif //USE_SUPER_CLUSTERS