Neighbor list preparation
This commit is contained in:
		@@ -121,7 +121,7 @@ void copyDataToCUDADevice(Atom *atom) {
 | 
				
			|||||||
    memcpyToGPU(cuda_PBCz, atom->PBCz, atom->Nclusters_ghost * sizeof(int));
 | 
					    memcpyToGPU(cuda_PBCz, atom->PBCz, atom->Nclusters_ghost * sizeof(int));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef USE_SUPER_CLUSTERS
 | 
					#ifdef USE_SUPER_CLUSTERS
 | 
				
			||||||
    alignDataToSuperclusters(atom);
 | 
					    //alignDataToSuperclusters(atom);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (cuda_max_scl < atom->Nsclusters_max) {
 | 
					    if (cuda_max_scl < atom->Nsclusters_max) {
 | 
				
			||||||
        cuda_assert("cudaDeviceFree", cudaFree(cuda_scl_x));
 | 
					        cuda_assert("cudaDeviceFree", cudaFree(cuda_scl_x));
 | 
				
			||||||
@@ -158,7 +158,7 @@ void copyDataFromCUDADevice(Atom *atom) {
 | 
				
			|||||||
    memcpyFromGPU(atom->scl_v, cuda_scl_v, atom->Nsclusters_max * SCLUSTER_M * 3 * sizeof(MD_FLOAT));
 | 
					    memcpyFromGPU(atom->scl_v, cuda_scl_v, atom->Nsclusters_max * SCLUSTER_M * 3 * sizeof(MD_FLOAT));
 | 
				
			||||||
    memcpyFromGPU(atom->scl_f, cuda_scl_f, atom->Nsclusters_max * SCLUSTER_M * 3 * sizeof(MD_FLOAT));
 | 
					    memcpyFromGPU(atom->scl_f, cuda_scl_f, atom->Nsclusters_max * SCLUSTER_M * 3 * sizeof(MD_FLOAT));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    alignDataFromSuperclusters(atom);
 | 
					    //alignDataFromSuperclusters(atom);
 | 
				
			||||||
#endif //USE_SUPER_CLUSTERS
 | 
					#endif //USE_SUPER_CLUSTERS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    DEBUG_MESSAGE("copyDataFromCUDADevice stop\r\n");
 | 
					    DEBUG_MESSAGE("copyDataFromCUDADevice stop\r\n");
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -74,6 +74,7 @@
 | 
				
			|||||||
#   define CI_BASE_INDEX(a,b)       ((a) * CLUSTER_N * (b))
 | 
					#   define CI_BASE_INDEX(a,b)       ((a) * CLUSTER_N * (b))
 | 
				
			||||||
#   define CJ_BASE_INDEX(a,b)       ((a) * CLUSTER_N * (b))
 | 
					#   define CJ_BASE_INDEX(a,b)       ((a) * CLUSTER_N * (b))
 | 
				
			||||||
#ifdef USE_SUPER_CLUSTERS
 | 
					#ifdef USE_SUPER_CLUSTERS
 | 
				
			||||||
 | 
					#   define CJ1_FROM_SCI(a)          (a)
 | 
				
			||||||
#   define SCI_BASE_INDEX(a,b)      ((a) * CLUSTER_N * SCLUSTER_SIZE * (b))
 | 
					#   define SCI_BASE_INDEX(a,b)      ((a) * CLUSTER_N * SCLUSTER_SIZE * (b))
 | 
				
			||||||
#   define SCJ_BASE_INDEX(a,b)      ((a) * CLUSTER_N * SCLUSTER_SIZE * (b))
 | 
					#   define SCJ_BASE_INDEX(a,b)      ((a) * CLUSTER_N * SCLUSTER_SIZE * (b))
 | 
				
			||||||
#endif //USE_SUPER_CLUSTERS
 | 
					#endif //USE_SUPER_CLUSTERS
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -78,8 +78,8 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *
 | 
				
			|||||||
    #endif //defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS)
 | 
					    #endif //defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS)
 | 
				
			||||||
    defineJClusters(atom);
 | 
					    defineJClusters(atom);
 | 
				
			||||||
    #if defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS)
 | 
					    #if defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS)
 | 
				
			||||||
    //setupPbcGPU(atom, param);
 | 
					    setupPbcGPU(atom, param);
 | 
				
			||||||
    setupPbc(atom, param);
 | 
					    //setupPbc(atom, param);
 | 
				
			||||||
    #else
 | 
					    #else
 | 
				
			||||||
    setupPbc(atom, param);
 | 
					    setupPbc(atom, param);
 | 
				
			||||||
    #endif //defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS)
 | 
					    #endif //defined(CUDA_TARGET) && defined(USE_SUPER_CLUSTERS)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -189,6 +189,30 @@ int atomDistanceInRange(Atom *atom, int ci, int cj, MD_FLOAT rsq) {
 | 
				
			|||||||
    return 0;
 | 
					    return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int atomDistanceInRangeGPU(Atom *atom, int sci, int cj, MD_FLOAT rsq) {
 | 
				
			||||||
 | 
					    for (int ci = 0; ci < atom->siclusters[sci].nclusters; ci++) {
 | 
				
			||||||
 | 
					        const int icluster_idx = atom->icluster_idx[SCLUSTER_SIZE * sci + ci];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        int ci_vec_base = CI_VECTOR_BASE_INDEX(icluster_idx);
 | 
				
			||||||
 | 
					        int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
 | 
				
			||||||
 | 
					        MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
 | 
				
			||||||
 | 
					        MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for(int cii = 0; cii < atom->iclusters[icluster_idx].natoms; cii++) {
 | 
				
			||||||
 | 
					            for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
 | 
				
			||||||
 | 
					                MD_FLOAT delx = ci_x[CL_X_OFFSET + cii] - cj_x[CL_X_OFFSET + cjj];
 | 
				
			||||||
 | 
					                MD_FLOAT dely = ci_x[CL_Y_OFFSET + cii] - cj_x[CL_Y_OFFSET + cjj];
 | 
				
			||||||
 | 
					                MD_FLOAT delz = ci_x[CL_Z_OFFSET + cii] - cj_x[CL_Z_OFFSET + cjj];
 | 
				
			||||||
 | 
					                if(delx * delx + dely * dely + delz * delz < rsq) {
 | 
				
			||||||
 | 
					                    return 1;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void buildNeighbor(Atom *atom, Neighbor *neighbor) {
 | 
					void buildNeighbor(Atom *atom, Neighbor *neighbor) {
 | 
				
			||||||
    DEBUG_MESSAGE("buildNeighbor start\n");
 | 
					    DEBUG_MESSAGE("buildNeighbor start\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -375,9 +399,8 @@ void buildNeighborGPU(Atom *atom, Neighbor *neighbor) {
 | 
				
			|||||||
    DEBUG_MESSAGE("buildNeighborGPU start\n");
 | 
					    DEBUG_MESSAGE("buildNeighborGPU start\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /* extend atom arrays if necessary */
 | 
					    /* extend atom arrays if necessary */
 | 
				
			||||||
    if(atom->Nclusters_local > nmax) {
 | 
					    if(atom->Nsclusters_local > nmax) {
 | 
				
			||||||
        nmax = atom->Nclusters_local;
 | 
					        nmax = atom->Nsclusters_local;
 | 
				
			||||||
        //nmax = atom->Nsclusters_local;
 | 
					 | 
				
			||||||
        if(neighbor->numneigh) free(neighbor->numneigh);
 | 
					        if(neighbor->numneigh) free(neighbor->numneigh);
 | 
				
			||||||
        if(neighbor->neighbors) free(neighbor->neighbors);
 | 
					        if(neighbor->neighbors) free(neighbor->neighbors);
 | 
				
			||||||
        neighbor->numneigh = (int*) malloc(nmax * sizeof(int));
 | 
					        neighbor->numneigh = (int*) malloc(nmax * sizeof(int));
 | 
				
			||||||
@@ -396,114 +419,116 @@ void buildNeighborGPU(Atom *atom, Neighbor *neighbor) {
 | 
				
			|||||||
        resize = 0;
 | 
					        resize = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for (int sci = 0; sci < atom->Nsclusters_local; sci++) {
 | 
					        for (int sci = 0; sci < atom->Nsclusters_local; sci++) {
 | 
				
			||||||
            for (int scii = 0; scii < atom->siclusters[sci].nclusters; scii++) {
 | 
					            int ci_cj1 = CJ1_FROM_SCI(sci);
 | 
				
			||||||
            //for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
					            int *neighptr = &(neighbor->neighbors[sci * neighbor->maxneighs]);
 | 
				
			||||||
                //const int ci = atom->siclusters[sci].iclusters[scii];
 | 
					            int n = 0;
 | 
				
			||||||
                const int ci = atom->icluster_idx[SCLUSTER_SIZE * sci + scii];
 | 
					            int ibin = atom->sicluster_bin[sci];
 | 
				
			||||||
                int ci_cj1 = CJ1_FROM_CI(ci);
 | 
					            MD_FLOAT ibb_xmin = atom->siclusters[sci].bbminx;
 | 
				
			||||||
                int *neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]);
 | 
					            MD_FLOAT ibb_xmax = atom->siclusters[sci].bbmaxx;
 | 
				
			||||||
                //int *neighptr = &(neighbor->neighbors[sci * neighbor->maxneighs]);
 | 
					            MD_FLOAT ibb_ymin = atom->siclusters[sci].bbminy;
 | 
				
			||||||
                int n = 0;
 | 
					            MD_FLOAT ibb_ymax = atom->siclusters[sci].bbmaxy;
 | 
				
			||||||
                int ibin = atom->icluster_bin[ci];
 | 
					            MD_FLOAT ibb_zmin = atom->siclusters[sci].bbminz;
 | 
				
			||||||
                MD_FLOAT ibb_xmin = atom->iclusters[ci].bbminx;
 | 
					            MD_FLOAT ibb_zmax = atom->siclusters[sci].bbmaxz;
 | 
				
			||||||
                MD_FLOAT ibb_xmax = atom->iclusters[ci].bbmaxx;
 | 
					 | 
				
			||||||
                MD_FLOAT ibb_ymin = atom->iclusters[ci].bbminy;
 | 
					 | 
				
			||||||
                MD_FLOAT ibb_ymax = atom->iclusters[ci].bbmaxy;
 | 
					 | 
				
			||||||
                MD_FLOAT ibb_zmin = atom->iclusters[ci].bbminz;
 | 
					 | 
				
			||||||
                MD_FLOAT ibb_zmax = atom->iclusters[ci].bbmaxz;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
                for(int k = 0; k < nstencil; k++) {
 | 
					            for(int k = 0; k < nstencil; k++) {
 | 
				
			||||||
                    int jbin = ibin + stencil[k];
 | 
					                int jbin = ibin + stencil[k];
 | 
				
			||||||
                    int *loc_bin = &bin_clusters[jbin * clusters_per_bin];
 | 
					                int *loc_bin = &bin_clusters[jbin * clusters_per_bin];
 | 
				
			||||||
                    int cj, m = -1;
 | 
					                int cj, m = -1;
 | 
				
			||||||
                    MD_FLOAT jbb_xmin, jbb_xmax, jbb_ymin, jbb_ymax, jbb_zmin, jbb_zmax;
 | 
					                MD_FLOAT jbb_xmin, jbb_xmax, jbb_ymin, jbb_ymax, jbb_zmin, jbb_zmax;
 | 
				
			||||||
                    const int c = bin_nclusters[jbin];
 | 
					                const int c = bin_nclusters[jbin];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    if(c > 0) {
 | 
					                if(c > 0) {
 | 
				
			||||||
                        MD_FLOAT dl, dh, dm, dm0, d_bb_sq;
 | 
					                    MD_FLOAT dl, dh, dm, dm0, d_bb_sq;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                        do {
 | 
					                    do {
 | 
				
			||||||
                            m++;
 | 
					                        m++;
 | 
				
			||||||
                            cj = loc_bin[m];
 | 
					                        cj = loc_bin[m];
 | 
				
			||||||
                            if(neighbor->half_neigh && ci_cj1 > cj) {
 | 
					                        if(neighbor->half_neigh && ci_cj1 > cj) {
 | 
				
			||||||
                                continue;
 | 
					                            continue;
 | 
				
			||||||
                            }
 | 
					                        }
 | 
				
			||||||
                            jbb_zmin = atom->jclusters[cj].bbminz;
 | 
					                        jbb_zmin = atom->jclusters[cj].bbminz;
 | 
				
			||||||
                            jbb_zmax = atom->jclusters[cj].bbmaxz;
 | 
					                        jbb_zmax = atom->jclusters[cj].bbmaxz;
 | 
				
			||||||
 | 
					                        dl = ibb_zmin - jbb_zmax;
 | 
				
			||||||
 | 
					                        dh = jbb_zmin - ibb_zmax;
 | 
				
			||||||
 | 
					                        dm = MAX(dl, dh);
 | 
				
			||||||
 | 
					                        dm0 = MAX(dm, 0.0);
 | 
				
			||||||
 | 
					                        d_bb_sq = dm0 * dm0;
 | 
				
			||||||
 | 
					                    } while(m + 1 < c && d_bb_sq > cutneighsq);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    jbb_xmin = atom->jclusters[cj].bbminx;
 | 
				
			||||||
 | 
					                    jbb_xmax = atom->jclusters[cj].bbmaxx;
 | 
				
			||||||
 | 
					                    jbb_ymin = atom->jclusters[cj].bbminy;
 | 
				
			||||||
 | 
					                    jbb_ymax = atom->jclusters[cj].bbmaxy;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    while(m < c) {
 | 
				
			||||||
 | 
					                        if(!neighbor->half_neigh || ci_cj1 <= cj) {
 | 
				
			||||||
                            dl = ibb_zmin - jbb_zmax;
 | 
					                            dl = ibb_zmin - jbb_zmax;
 | 
				
			||||||
                            dh = jbb_zmin - ibb_zmax;
 | 
					                            dh = jbb_zmin - ibb_zmax;
 | 
				
			||||||
                            dm = MAX(dl, dh);
 | 
					                            dm = MAX(dl, dh);
 | 
				
			||||||
                            dm0 = MAX(dm, 0.0);
 | 
					                            dm0 = MAX(dm, 0.0);
 | 
				
			||||||
                            d_bb_sq = dm0 * dm0;
 | 
					                            d_bb_sq = dm0 * dm0;
 | 
				
			||||||
                        } while(m + 1 < c && d_bb_sq > cutneighsq);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
                        jbb_xmin = atom->jclusters[cj].bbminx;
 | 
					                            /*if(d_bb_sq > cutneighsq) {
 | 
				
			||||||
                        jbb_xmax = atom->jclusters[cj].bbmaxx;
 | 
					                                break;
 | 
				
			||||||
                        jbb_ymin = atom->jclusters[cj].bbminy;
 | 
					                            }*/
 | 
				
			||||||
                        jbb_ymax = atom->jclusters[cj].bbmaxy;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
                        while(m < c) {
 | 
					                            dl = ibb_ymin - jbb_ymax;
 | 
				
			||||||
                            if(!neighbor->half_neigh || ci_cj1 <= cj) {
 | 
					                            dh = jbb_ymin - ibb_ymax;
 | 
				
			||||||
                                dl = ibb_zmin - jbb_zmax;
 | 
					                            dm = MAX(dl, dh);
 | 
				
			||||||
                                dh = jbb_zmin - ibb_zmax;
 | 
					                            dm0 = MAX(dm, 0.0);
 | 
				
			||||||
                                dm = MAX(dl, dh);
 | 
					                            d_bb_sq += dm0 * dm0;
 | 
				
			||||||
                                dm0 = MAX(dm, 0.0);
 | 
					 | 
				
			||||||
                                d_bb_sq = dm0 * dm0;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
                                /*if(d_bb_sq > cutneighsq) {
 | 
					                            dl = ibb_xmin - jbb_xmax;
 | 
				
			||||||
                                    break;
 | 
					                            dh = jbb_xmin - ibb_xmax;
 | 
				
			||||||
                                }*/
 | 
					                            dm = MAX(dl, dh);
 | 
				
			||||||
 | 
					                            dm0 = MAX(dm, 0.0);
 | 
				
			||||||
 | 
					                            d_bb_sq += dm0 * dm0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                                dl = ibb_ymin - jbb_ymax;
 | 
					                            if(d_bb_sq < cutneighsq) {
 | 
				
			||||||
                                dh = jbb_ymin - ibb_ymax;
 | 
					                                if(d_bb_sq < rbb_sq || atomDistanceInRangeGPU(atom, sci, cj, cutneighsq)) {
 | 
				
			||||||
                                dm = MAX(dl, dh);
 | 
					                                    neighptr[n++] = cj;
 | 
				
			||||||
                                dm0 = MAX(dm, 0.0);
 | 
					 | 
				
			||||||
                                d_bb_sq += dm0 * dm0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                                dl = ibb_xmin - jbb_xmax;
 | 
					 | 
				
			||||||
                                dh = jbb_xmin - ibb_xmax;
 | 
					 | 
				
			||||||
                                dm = MAX(dl, dh);
 | 
					 | 
				
			||||||
                                dm0 = MAX(dm, 0.0);
 | 
					 | 
				
			||||||
                                d_bb_sq += dm0 * dm0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                                if(d_bb_sq < cutneighsq) {
 | 
					 | 
				
			||||||
                                    if(d_bb_sq < rbb_sq || atomDistanceInRange(atom, ci, cj, cutneighsq)) {
 | 
					 | 
				
			||||||
                                        neighptr[n++] = cj;
 | 
					 | 
				
			||||||
                                    }
 | 
					 | 
				
			||||||
                                }
 | 
					                                }
 | 
				
			||||||
                            }
 | 
					                            }
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                            m++;
 | 
					                        m++;
 | 
				
			||||||
                            if(m < c) {
 | 
					                        if(m < c) {
 | 
				
			||||||
                                cj = loc_bin[m];
 | 
					                            cj = loc_bin[m];
 | 
				
			||||||
                                jbb_xmin = atom->jclusters[cj].bbminx;
 | 
					                            jbb_xmin = atom->jclusters[cj].bbminx;
 | 
				
			||||||
                                jbb_xmax = atom->jclusters[cj].bbmaxx;
 | 
					                            jbb_xmax = atom->jclusters[cj].bbmaxx;
 | 
				
			||||||
                                jbb_ymin = atom->jclusters[cj].bbminy;
 | 
					                            jbb_ymin = atom->jclusters[cj].bbminy;
 | 
				
			||||||
                                jbb_ymax = atom->jclusters[cj].bbmaxy;
 | 
					                            jbb_ymax = atom->jclusters[cj].bbmaxy;
 | 
				
			||||||
                                jbb_zmin = atom->jclusters[cj].bbminz;
 | 
					                            jbb_zmin = atom->jclusters[cj].bbminz;
 | 
				
			||||||
                                jbb_zmax = atom->jclusters[cj].bbmaxz;
 | 
					                            jbb_zmax = atom->jclusters[cj].bbmaxz;
 | 
				
			||||||
                            }
 | 
					 | 
				
			||||||
                        }
 | 
					                        }
 | 
				
			||||||
                    }
 | 
					                    }
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                // Fill neighbor list with dummy values to fit vector width
 | 
					            // Fill neighbor list with dummy values to fit vector width
 | 
				
			||||||
                if(CLUSTER_N < VECTOR_WIDTH) {
 | 
					            if(CLUSTER_N < VECTOR_WIDTH) {
 | 
				
			||||||
                    while(n % (VECTOR_WIDTH / CLUSTER_N)) {
 | 
					                while(n % (VECTOR_WIDTH / CLUSTER_N)) {
 | 
				
			||||||
                        neighptr[n++] = atom->dummy_cj; // Last cluster is always a dummy cluster
 | 
					                    neighptr[n++] = atom->dummy_cj; // Last cluster is always a dummy cluster
 | 
				
			||||||
                    }
 | 
					 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                //neighbor->numneigh[sci] = n;
 | 
					            neighbor->numneigh[sci] = n;
 | 
				
			||||||
                neighbor->numneigh[ci] = n;
 | 
					            if(n >= neighbor->maxneighs) {
 | 
				
			||||||
                if(n >= neighbor->maxneighs) {
 | 
					                resize = 1;
 | 
				
			||||||
                    resize = 1;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    if(n >= new_maxneighs) {
 | 
					                if(n >= new_maxneighs) {
 | 
				
			||||||
                        new_maxneighs = n;
 | 
					                    new_maxneighs = n;
 | 
				
			||||||
                    }
 | 
					 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            for (int scii = 0; scii < atom->siclusters[sci].nclusters; scii++) {
 | 
				
			||||||
 | 
					            //for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
				
			||||||
 | 
					                //const int ci = atom->siclusters[sci].iclusters[scii];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if(resize) {
 | 
					        if(resize) {
 | 
				
			||||||
@@ -954,6 +979,12 @@ void buildClustersGPU(Atom *atom) {
 | 
				
			|||||||
                        int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
 | 
					                        int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
 | 
				
			||||||
                        MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
 | 
					                        MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
 | 
				
			||||||
                        MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
 | 
					                        MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                        int sci_sca_base = SCI_SCALAR_BASE_INDEX(sci);
 | 
				
			||||||
 | 
					                        int sci_vec_base = SCI_VECTOR_BASE_INDEX(sci);
 | 
				
			||||||
 | 
					                        MD_FLOAT *sci_x = &atom->scl_x[sci_vec_base];
 | 
				
			||||||
 | 
					                        MD_FLOAT *sci_v = &atom->scl_v[sci_vec_base];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                        int *ci_type = &atom->cl_type[ci_sca_base];
 | 
					                        int *ci_type = &atom->cl_type[ci_sca_base];
 | 
				
			||||||
                        MD_FLOAT bbminx = INFINITY, bbmaxx = -INFINITY;
 | 
					                        MD_FLOAT bbminx = INFINITY, bbmaxx = -INFINITY;
 | 
				
			||||||
                        MD_FLOAT bbminy = INFINITY, bbmaxy = -INFINITY;
 | 
					                        MD_FLOAT bbminy = INFINITY, bbmaxy = -INFINITY;
 | 
				
			||||||
@@ -974,6 +1005,14 @@ void buildClustersGPU(Atom *atom) {
 | 
				
			|||||||
                                ci_v[CL_Y_OFFSET + cii] = atom->vy[i];
 | 
					                                ci_v[CL_Y_OFFSET + cii] = atom->vy[i];
 | 
				
			||||||
                                ci_v[CL_Z_OFFSET + cii] = atom->vz[i];
 | 
					                                ci_v[CL_Z_OFFSET + cii] = atom->vz[i];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                                sci_x[SCL_CL_X_OFFSET(atom->siclusters[sci].nclusters) + cii] = xtmp;
 | 
				
			||||||
 | 
					                                sci_x[SCL_CL_Y_OFFSET(atom->siclusters[sci].nclusters) + cii] = ytmp;
 | 
				
			||||||
 | 
					                                sci_x[SCL_CL_Z_OFFSET(atom->siclusters[sci].nclusters) + cii] = ztmp;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                                sci_v[SCL_CL_X_OFFSET(atom->siclusters[sci].nclusters) + cii] = atom->vx[i];
 | 
				
			||||||
 | 
					                                sci_v[SCL_CL_Y_OFFSET(atom->siclusters[sci].nclusters) + cii] = atom->vy[i];
 | 
				
			||||||
 | 
					                                sci_v[SCL_CL_Z_OFFSET(atom->siclusters[sci].nclusters) + cii] = atom->vz[i];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                                // TODO: To create the bounding boxes faster, we can use SIMD operations
 | 
					                                // TODO: To create the bounding boxes faster, we can use SIMD operations
 | 
				
			||||||
                                if(bbminx > xtmp) { bbminx = xtmp; }
 | 
					                                if(bbminx > xtmp) { bbminx = xtmp; }
 | 
				
			||||||
                                if(bbmaxx < xtmp) { bbmaxx = xtmp; }
 | 
					                                if(bbmaxx < xtmp) { bbmaxx = xtmp; }
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -86,6 +86,98 @@ void cpuUpdatePbc(Atom *atom, Parameter *param, int firstUpdate) {
 | 
				
			|||||||
    DEBUG_MESSAGE("updatePbc end\n");
 | 
					    DEBUG_MESSAGE("updatePbc end\n");
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* update coordinates of ghost atoms */
 | 
				
			||||||
 | 
					/* uses mapping created in setupPbc */
 | 
				
			||||||
 | 
					void gpuUpdatePbc(Atom *atom, Parameter *param, int firstUpdate) {
 | 
				
			||||||
 | 
					    DEBUG_MESSAGE("gpuUpdatePbc start\n");
 | 
				
			||||||
 | 
					    int jfac = MAX(1, CLUSTER_N / CLUSTER_M);
 | 
				
			||||||
 | 
					    int ncj = atom->Nclusters_local / jfac;
 | 
				
			||||||
 | 
					    MD_FLOAT xprd = param->xprd;
 | 
				
			||||||
 | 
					    MD_FLOAT yprd = param->yprd;
 | 
				
			||||||
 | 
					    MD_FLOAT zprd = param->zprd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for(int cg = 0; cg < atom->Nclusters_ghost; cg++) {
 | 
				
			||||||
 | 
					        const int cj = ncj + cg;
 | 
				
			||||||
 | 
					        int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        int scj_vec_base = SCJ_VECTOR_BASE_INDEX(cj);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        int bmap_vec_base = CJ_VECTOR_BASE_INDEX(atom->border_map[cg]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        int sbmap_vec_base = SCJ_VECTOR_BASE_INDEX(atom->border_map[cg]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
 | 
				
			||||||
 | 
					        MD_FLOAT *bmap_x = &atom->cl_x[bmap_vec_base];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        MD_FLOAT *scj_x = &atom->scl_x[scj_vec_base];
 | 
				
			||||||
 | 
					        MD_FLOAT *sbmap_x = &atom->scl_x[sbmap_vec_base];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        MD_FLOAT bbminx = INFINITY, bbmaxx = -INFINITY;
 | 
				
			||||||
 | 
					        MD_FLOAT bbminy = INFINITY, bbmaxy = -INFINITY;
 | 
				
			||||||
 | 
					        MD_FLOAT bbminz = INFINITY, bbmaxz = -INFINITY;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        MD_FLOAT sbbminx = INFINITY, sbbmaxx = -INFINITY;
 | 
				
			||||||
 | 
					        MD_FLOAT sbbminy = INFINITY, sbbmaxy = -INFINITY;
 | 
				
			||||||
 | 
					        MD_FLOAT sbbminz = INFINITY, sbbmaxz = -INFINITY;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
 | 
				
			||||||
 | 
					            MD_FLOAT xtmp = bmap_x[CL_X_OFFSET + cjj] + atom->PBCx[cg] * xprd;
 | 
				
			||||||
 | 
					            MD_FLOAT ytmp = bmap_x[CL_Y_OFFSET + cjj] + atom->PBCy[cg] * yprd;
 | 
				
			||||||
 | 
					            MD_FLOAT ztmp = bmap_x[CL_Z_OFFSET + cjj] + atom->PBCz[cg] * zprd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            MD_FLOAT sxtmp = sbmap_x[CL_X_OFFSET + cjj] + atom->PBCx[cg] * xprd;
 | 
				
			||||||
 | 
					            MD_FLOAT sytmp = sbmap_x[CL_Y_OFFSET + cjj] + atom->PBCy[cg] * yprd;
 | 
				
			||||||
 | 
					            MD_FLOAT sztmp = sbmap_x[CL_Z_OFFSET + cjj] + atom->PBCz[cg] * zprd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            cj_x[CL_X_OFFSET + cjj] = xtmp;
 | 
				
			||||||
 | 
					            cj_x[CL_Y_OFFSET + cjj] = ytmp;
 | 
				
			||||||
 | 
					            cj_x[CL_Z_OFFSET + cjj] = ztmp;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            scj_x[SCL_X_OFFSET + cjj] = sxtmp;
 | 
				
			||||||
 | 
					            scj_x[SCL_Y_OFFSET + cjj] = sytmp;
 | 
				
			||||||
 | 
					            scj_x[SCL_Z_OFFSET + cjj] = sztmp;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if(firstUpdate) {
 | 
				
			||||||
 | 
					                // TODO: To create the bounding boxes faster, we can use SIMD operations
 | 
				
			||||||
 | 
					                if(bbminx > xtmp) { bbminx = xtmp; }
 | 
				
			||||||
 | 
					                if(bbmaxx < xtmp) { bbmaxx = xtmp; }
 | 
				
			||||||
 | 
					                if(bbminy > ytmp) { bbminy = ytmp; }
 | 
				
			||||||
 | 
					                if(bbmaxy < ytmp) { bbmaxy = ytmp; }
 | 
				
			||||||
 | 
					                if(bbminz > ztmp) { bbminz = ztmp; }
 | 
				
			||||||
 | 
					                if(bbmaxz < ztmp) { bbmaxz = ztmp; }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                if(sbbminx > sxtmp) { sbbminx = sxtmp; }
 | 
				
			||||||
 | 
					                if(sbbmaxx < sxtmp) { sbbmaxx = sxtmp; }
 | 
				
			||||||
 | 
					                if(sbbminy > sytmp) { sbbminy = sytmp; }
 | 
				
			||||||
 | 
					                if(sbbmaxy < sytmp) { sbbmaxy = sytmp; }
 | 
				
			||||||
 | 
					                if(sbbminz > sztmp) { sbbminz = sztmp; }
 | 
				
			||||||
 | 
					                if(sbbmaxz < sztmp) { sbbmaxz = sztmp; }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if(firstUpdate) {
 | 
				
			||||||
 | 
					            for(int cjj = atom->jclusters[cj].natoms; cjj < CLUSTER_N; cjj++) {
 | 
				
			||||||
 | 
					                cj_x[CL_X_OFFSET + cjj] = INFINITY;
 | 
				
			||||||
 | 
					                cj_x[CL_Y_OFFSET + cjj] = INFINITY;
 | 
				
			||||||
 | 
					                cj_x[CL_Z_OFFSET + cjj] = INFINITY;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                scj_x[SCL_X_OFFSET + cjj] = INFINITY;
 | 
				
			||||||
 | 
					                scj_x[SCL_Y_OFFSET + cjj] = INFINITY;
 | 
				
			||||||
 | 
					                scj_x[SCL_Z_OFFSET + cjj] = INFINITY;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            atom->jclusters[cj].bbminx = bbminx;
 | 
				
			||||||
 | 
					            atom->jclusters[cj].bbmaxx = bbmaxx;
 | 
				
			||||||
 | 
					            atom->jclusters[cj].bbminy = bbminy;
 | 
				
			||||||
 | 
					            atom->jclusters[cj].bbmaxy = bbmaxy;
 | 
				
			||||||
 | 
					            atom->jclusters[cj].bbminz = bbminz;
 | 
				
			||||||
 | 
					            atom->jclusters[cj].bbmaxz = bbmaxz;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    DEBUG_MESSAGE("gpuUpdatePbc end\n");
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* relocate atoms that have left domain according
 | 
					/* relocate atoms that have left domain according
 | 
				
			||||||
 * to periodic boundary conditions */
 | 
					 * to periodic boundary conditions */
 | 
				
			||||||
void updateAtomsPbc(Atom *atom, Parameter *param) {
 | 
					void updateAtomsPbc(Atom *atom, Parameter *param) {
 | 
				
			||||||
@@ -236,7 +328,8 @@ void setupPbcGPU(Atom *atom, Parameter *param) {
 | 
				
			|||||||
    MD_FLOAT yprd = param->yprd;
 | 
					    MD_FLOAT yprd = param->yprd;
 | 
				
			||||||
    MD_FLOAT zprd = param->zprd;
 | 
					    MD_FLOAT zprd = param->zprd;
 | 
				
			||||||
    MD_FLOAT Cutneigh = param->cutneigh;
 | 
					    MD_FLOAT Cutneigh = param->cutneigh;
 | 
				
			||||||
    int jfac = SCLUSTER_SIZE / CLUSTER_M;
 | 
					    //int jfac = MAX(1, CLUSTER_N / CLUSTER_M);
 | 
				
			||||||
 | 
					    int jfac = SCLUSTER_M / CLUSTER_M;
 | 
				
			||||||
    int ncj = atom->Nsclusters_local * jfac;
 | 
					    int ncj = atom->Nsclusters_local * jfac;
 | 
				
			||||||
    int Nghost = -1;
 | 
					    int Nghost = -1;
 | 
				
			||||||
    int Nghost_atoms = 0;
 | 
					    int Nghost_atoms = 0;
 | 
				
			||||||
@@ -245,6 +338,7 @@ void setupPbcGPU(Atom *atom, Parameter *param) {
 | 
				
			|||||||
        if(atom->jclusters[cj].natoms > 0) {
 | 
					        if(atom->jclusters[cj].natoms > 0) {
 | 
				
			||||||
            if(atom->Nsclusters_local + (Nghost + (jfac - 1) + 7) / jfac >= atom->Nclusters_max) {
 | 
					            if(atom->Nsclusters_local + (Nghost + (jfac - 1) + 7) / jfac >= atom->Nclusters_max) {
 | 
				
			||||||
                growClusters(atom);
 | 
					                growClusters(atom);
 | 
				
			||||||
 | 
					                //growSuperClusters(atom);
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            if((Nghost + 7) * CLUSTER_M >= NmaxGhost) {
 | 
					            if((Nghost + 7) * CLUSTER_M >= NmaxGhost) {
 | 
				
			||||||
@@ -293,6 +387,7 @@ void setupPbcGPU(Atom *atom, Parameter *param) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    if(ncj + (Nghost + (jfac - 1) + 1) / jfac >= atom->Nclusters_max) {
 | 
					    if(ncj + (Nghost + (jfac - 1) + 1) / jfac >= atom->Nclusters_max) {
 | 
				
			||||||
        growClusters(atom);
 | 
					        growClusters(atom);
 | 
				
			||||||
 | 
					        //growSuperClusters(atom);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Add dummy cluster at the end
 | 
					    // Add dummy cluster at the end
 | 
				
			||||||
@@ -311,6 +406,6 @@ void setupPbcGPU(Atom *atom, Parameter *param) {
 | 
				
			|||||||
    atom->Nclusters = atom->Nclusters_local + Nghost + 1;
 | 
					    atom->Nclusters = atom->Nclusters_local + Nghost + 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Update created ghost clusters positions
 | 
					    // Update created ghost clusters positions
 | 
				
			||||||
    cudaUpdatePbc(atom, param, 1);
 | 
					    gpuUpdatePbc(atom, param, 1);
 | 
				
			||||||
    DEBUG_MESSAGE("setupPbcGPU end\n");
 | 
					    DEBUG_MESSAGE("setupPbcGPU end\n");
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user