Compare commits
	
		
			35 Commits
		
	
	
		
			mucosim_cu
			...
			cuda_port
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					bc7b523979 | ||
| 
						 | 
					eeba125a52 | ||
| 
						 | 
					b32254b03f | ||
| 
						 | 
					4dac820784 | ||
| 
						 | 
					fe56c50efd | ||
| 
						 | 
					7a61cbbabf | ||
| 
						 | 
					176de0525b | ||
| 
						 | 
					7bad7e84b6 | ||
| 
						 | 
					fb304f240b | ||
| 
						 | 
					5a6d1851ed | ||
| 
						 | 
					f61f59ba3f | ||
| 
						 | 
					d1c2249b55 | ||
| 
						 | 
					c9db6e45fa | ||
| 
						 | 
					0967e8f671 | ||
| 
						 | 
					fa409c016c | ||
| 
						 | 
					b65199308d | ||
| 
						 | 
					71798f5ec5 | ||
| 
						 | 
					4f0403d3ea | ||
| 
						 | 
					fa86e44f90 | ||
| 
						 | 
					7e8fd96fa4 | ||
| 
						 | 
					463de5b1ed | ||
| 
						 | 
					4a32a62a98 | ||
| 
						 | 
					16e8b76012 | ||
| 
						 | 
					60ed524dd8 | ||
| 
						 | 
					45f83c7607 | ||
| 
						 | 
					c49278cb21 | ||
| 
						 | 
					757d4329f3 | ||
| 
						 | 
					67f9c769ef | ||
| 
						 | 
					b5b4d23c0c | ||
| 
						 | 
					fea1e41daa | ||
| 
						 | 
					f1998b7acc | ||
| 
						 | 
					2fe3cd80a0 | ||
| 
						 | 
					f4313f64e5 | ||
| 
						 | 
					7f068a6959 | ||
| 
						 | 
					62cfc22856 | 
@@ -7,9 +7,10 @@ ANSI_CFLAGS += -pedantic
 | 
				
			|||||||
ANSI_CFLAGS += -Wextra
 | 
					ANSI_CFLAGS += -Wextra
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# CFLAGS   = -O0 -g  -std=c99 -fargument-noalias
 | 
					# CFLAGS   = -O0 -g  -std=c99 -fargument-noalias
 | 
				
			||||||
CFLAGS   = -O3 -g -arch=sm_61 # -fopenmp
 | 
					#CFLAGS   = -O3 -g -arch=sm_61 # -fopenmp
 | 
				
			||||||
 | 
					CFLAGS   = -O3 -g # -fopenmp
 | 
				
			||||||
ASFLAGS  =  -masm=intel
 | 
					ASFLAGS  =  -masm=intel
 | 
				
			||||||
LFLAGS   =
 | 
					LFLAGS   =
 | 
				
			||||||
DEFINES  = -D_GNU_SOURCE -DLIKWID_PERFMON
 | 
					DEFINES  = -D_GNU_SOURCE #-DLIKWID_PERFMON
 | 
				
			||||||
INCLUDES = $(LIKWID_INC)
 | 
					INCLUDES = $(LIKWID_INC)
 | 
				
			||||||
LIBS     = -lm $(LIKWID_LIB) -llikwid -lcuda -lcudart
 | 
					LIBS     = -lm $(LIKWID_LIB) -lcuda -lcudart #-llikwid
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -124,92 +124,46 @@ __global__ void kernel_final_integrate(MD_FLOAT dtforce, int Nlocal, Atom a) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
extern "C" {
 | 
					extern "C" {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static Atom c_atom;
 | 
					 | 
				
			||||||
int *c_neighs;
 | 
					 | 
				
			||||||
int *c_neigh_numneigh;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
int get_num_threads() {
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const char *num_threads_env = getenv("NUM_THREADS");
 | 
					void cuda_final_integrate(bool doReneighbour, Parameter *param, Atom *atom, Atom *c_atom, const int num_threads_per_block) {
 | 
				
			||||||
    int num_threads = 0;
 | 
					 | 
				
			||||||
    if(num_threads_env == nullptr)
 | 
					 | 
				
			||||||
        num_threads = 32;
 | 
					 | 
				
			||||||
    else {
 | 
					 | 
				
			||||||
        num_threads = atoi(num_threads_env);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return num_threads;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cuda_final_integrate(bool doReneighbour, Parameter *param, Atom *atom) {
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const int Nlocal = atom->Nlocal;
 | 
					    const int Nlocal = atom->Nlocal;
 | 
				
			||||||
    const int num_threads = get_num_threads();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    const int num_threads_per_block = num_threads; // this should be multiple of 32 as operations are performed at the level of warps
 | 
					 | 
				
			||||||
    const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block);
 | 
					    const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    kernel_final_integrate <<< num_blocks, num_threads_per_block >>> (param->dtforce, Nlocal, c_atom);
 | 
					    kernel_final_integrate <<< num_blocks, num_threads_per_block >>> (param->dtforce, Nlocal, *c_atom);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    checkCUDAError( "PeekAtLastError FinalIntegrate", cudaPeekAtLastError() );
 | 
					    checkCUDAError( "PeekAtLastError FinalIntegrate", cudaPeekAtLastError() );
 | 
				
			||||||
    checkCUDAError( "DeviceSync FinalIntegrate", cudaDeviceSynchronize() );
 | 
					    checkCUDAError( "DeviceSync FinalIntegrate", cudaDeviceSynchronize() );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if(doReneighbour) {
 | 
					    if(doReneighbour) {
 | 
				
			||||||
        checkCUDAError( "FinalIntegrate: velocity memcpy", cudaMemcpy(atom->vx, c_atom.vx, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) );
 | 
					        checkCUDAError( "FinalIntegrate: velocity memcpy", cudaMemcpy(atom->vx, c_atom->vx, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) );
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void cuda_initial_integrate(bool doReneighbour, Parameter *param, Atom *atom) {
 | 
					void cuda_initial_integrate(bool doReneighbour, Parameter *param, Atom *atom, Atom *c_atom, const int num_threads_per_block) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const int Nlocal = atom->Nlocal;
 | 
					    const int Nlocal = atom->Nlocal;
 | 
				
			||||||
    const int num_threads = get_num_threads();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    const int num_threads_per_block = num_threads; // this should be multiple of 32 as operations are performed at the level of warps
 | 
					 | 
				
			||||||
    const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block);
 | 
					    const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    kernel_initial_integrate <<< num_blocks, num_threads_per_block >>> (param->dtforce, param->dt, Nlocal, c_atom);
 | 
					    kernel_initial_integrate <<< num_blocks, num_threads_per_block >>> (param->dtforce, param->dt, Nlocal, *c_atom);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    checkCUDAError( "PeekAtLastError InitialIntegrate", cudaPeekAtLastError() );
 | 
					    checkCUDAError( "PeekAtLastError InitialIntegrate", cudaPeekAtLastError() );
 | 
				
			||||||
    checkCUDAError( "DeviceSync InitialIntegrate", cudaDeviceSynchronize() );
 | 
					    checkCUDAError( "DeviceSync InitialIntegrate", cudaDeviceSynchronize() );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if(doReneighbour) {
 | 
					    if(doReneighbour) {
 | 
				
			||||||
        checkCUDAError( "InitialIntegrate: velocity memcpy", cudaMemcpy(atom->vx, c_atom.vx, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) );
 | 
					        checkCUDAError( "InitialIntegrate: velocity memcpy", cudaMemcpy(atom->vx, c_atom->vx, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) );
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    checkCUDAError( "InitialIntegrate: position memcpy", cudaMemcpy(atom->x, c_atom.x, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) );
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void initCudaAtom(Atom *atom, Neighbor *neighbor) {
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    const int Nlocal = atom->Nlocal;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    checkCUDAError( "c_atom.x malloc", cudaMalloc((void**)&(c_atom.x), sizeof(MD_FLOAT) * atom->Nmax * 3) );
 | 
					 | 
				
			||||||
    checkCUDAError( "c_atom.x memcpy", cudaMemcpy(c_atom.x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3, cudaMemcpyHostToDevice) );
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    checkCUDAError( "c_atom.fx malloc", cudaMalloc((void**)&(c_atom.fx), sizeof(MD_FLOAT) * Nlocal * 3) );
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    checkCUDAError( "c_atom.vx malloc", cudaMalloc((void**)&(c_atom.vx), sizeof(MD_FLOAT) * Nlocal * 3) );
 | 
					 | 
				
			||||||
    checkCUDAError( "c_atom.vx memcpy", cudaMemcpy(c_atom.vx, atom->vx, sizeof(MD_FLOAT) * Nlocal * 3, cudaMemcpyHostToDevice) );
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    checkCUDAError( "c_atom.type malloc", cudaMalloc((void**)&(c_atom.type), sizeof(int) * atom->Nmax) );
 | 
					 | 
				
			||||||
    checkCUDAError( "c_atom.epsilon malloc", cudaMalloc((void**)&(c_atom.epsilon), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) );
 | 
					 | 
				
			||||||
    checkCUDAError( "c_atom.sigma6 malloc", cudaMalloc((void**)&(c_atom.sigma6), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) );
 | 
					 | 
				
			||||||
    checkCUDAError( "c_atom.cutforcesq malloc", cudaMalloc((void**)&(c_atom.cutforcesq), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) );
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    checkCUDAError( "c_neighs malloc", cudaMalloc((void**)&c_neighs, sizeof(int) * Nlocal * neighbor->maxneighs) );
 | 
					 | 
				
			||||||
    checkCUDAError( "c_neigh_numneigh malloc", cudaMalloc((void**)&c_neigh_numneigh, sizeof(int) * Nlocal) );
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    checkCUDAError( "c_atom.type memcpy", cudaMemcpy(c_atom.type, atom->type, sizeof(int) * atom->Nmax, cudaMemcpyHostToDevice) );
 | 
					 | 
				
			||||||
    checkCUDAError( "c_atom.sigma6 memcpy", cudaMemcpy(c_atom.sigma6, atom->sigma6, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes, cudaMemcpyHostToDevice) );
 | 
					 | 
				
			||||||
    checkCUDAError( "c_atom.epsilon memcpy", cudaMemcpy(c_atom.epsilon, atom->epsilon, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes, cudaMemcpyHostToDevice) );
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    checkCUDAError( "c_atom.cutforcesq memcpy", cudaMemcpy(c_atom.cutforcesq, atom->cutforcesq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes, cudaMemcpyHostToDevice) );
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
double computeForce(
 | 
					double computeForce(
 | 
				
			||||||
        bool reneighbourHappenend,
 | 
					        bool reneighbourHappenend,
 | 
				
			||||||
        Parameter *param,
 | 
					        Parameter *param,
 | 
				
			||||||
        Atom *atom,
 | 
					        Atom *atom,
 | 
				
			||||||
        Neighbor *neighbor
 | 
					        Neighbor *neighbor,
 | 
				
			||||||
 | 
					        Atom *c_atom,
 | 
				
			||||||
 | 
					        Neighbor *c_neighbor,
 | 
				
			||||||
 | 
					        int num_threads_per_block
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    int Nlocal = atom->Nlocal;
 | 
					    int Nlocal = atom->Nlocal;
 | 
				
			||||||
@@ -219,14 +173,6 @@ double computeForce(
 | 
				
			|||||||
    MD_FLOAT epsilon = param->epsilon;
 | 
					    MD_FLOAT epsilon = param->epsilon;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const int num_threads = get_num_threads();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    c_atom.Natoms = atom->Natoms;
 | 
					 | 
				
			||||||
    c_atom.Nlocal = atom->Nlocal;
 | 
					 | 
				
			||||||
    c_atom.Nghost = atom->Nghost;
 | 
					 | 
				
			||||||
    c_atom.Nmax = atom->Nmax;
 | 
					 | 
				
			||||||
    c_atom.ntypes = atom->ntypes;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /*
 | 
					    /*
 | 
				
			||||||
    int nDevices;
 | 
					    int nDevices;
 | 
				
			||||||
    cudaGetDeviceCount(&nDevices);
 | 
					    cudaGetDeviceCount(&nDevices);
 | 
				
			||||||
@@ -242,24 +188,16 @@ double computeForce(
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    // HINT: Run with cuda-memcheck ./MDBench-NVCC in case of error
 | 
					    // HINT: Run with cuda-memcheck ./MDBench-NVCC in case of error
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // checkCUDAError( "c_atom.fx memset", cudaMemset(c_atom.fx, 0, sizeof(MD_FLOAT) * Nlocal * 3) );
 | 
					    // checkCUDAError( "c_atom->fx memset", cudaMemset(c_atom->fx, 0, sizeof(MD_FLOAT) * Nlocal * 3) );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    cudaProfilerStart();
 | 
					    cudaProfilerStart();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    checkCUDAError( "c_atom.x memcpy", cudaMemcpy(c_atom.x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3, cudaMemcpyHostToDevice) );
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if(reneighbourHappenend) {
 | 
					 | 
				
			||||||
        checkCUDAError( "c_neigh_numneigh memcpy", cudaMemcpy(c_neigh_numneigh, neighbor->numneigh, sizeof(int) * Nlocal, cudaMemcpyHostToDevice) );
 | 
					 | 
				
			||||||
        checkCUDAError( "c_neighs memcpy", cudaMemcpy(c_neighs, neighbor->neighbors, sizeof(int) * Nlocal * neighbor->maxneighs, cudaMemcpyHostToDevice) );
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    const int num_threads_per_block = num_threads; // this should be multiple of 32 as operations are performed at the level of warps
 | 
					 | 
				
			||||||
    const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block);
 | 
					    const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    double S = getTimeStamp();
 | 
					    double S = getTimeStamp();
 | 
				
			||||||
    LIKWID_MARKER_START("force");
 | 
					    LIKWID_MARKER_START("force");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    calc_force <<< num_blocks, num_threads_per_block >>> (c_atom, cutforcesq, sigma6, epsilon, Nlocal, neighbor->maxneighs, c_neighs, c_neigh_numneigh);
 | 
					    calc_force <<< num_blocks, num_threads_per_block >>> (*c_atom, cutforcesq, sigma6, epsilon, Nlocal, neighbor->maxneighs, c_neighbor->neighbors, c_neighbor->numneigh);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    checkCUDAError( "PeekAtLastError ComputeForce", cudaPeekAtLastError() );
 | 
					    checkCUDAError( "PeekAtLastError ComputeForce", cudaPeekAtLastError() );
 | 
				
			||||||
    checkCUDAError( "DeviceSync ComputeForce", cudaDeviceSynchronize() );
 | 
					    checkCUDAError( "DeviceSync ComputeForce", cudaDeviceSynchronize() );
 | 
				
			||||||
@@ -271,4 +209,4 @@ double computeForce(
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    return E-S;
 | 
					    return E-S;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -33,9 +33,26 @@ typedef struct {
 | 
				
			|||||||
    int* numneigh;
 | 
					    int* numneigh;
 | 
				
			||||||
} Neighbor;
 | 
					} Neighbor;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					    MD_FLOAT xprd; MD_FLOAT yprd; MD_FLOAT zprd;
 | 
				
			||||||
 | 
					    MD_FLOAT bininvx; MD_FLOAT bininvy; MD_FLOAT bininvz;
 | 
				
			||||||
 | 
					    int mbinxlo; int mbinylo; int mbinzlo;
 | 
				
			||||||
 | 
					    int nbinx; int nbiny; int nbinz;
 | 
				
			||||||
 | 
					    int mbinx; int mbiny; int mbinz;
 | 
				
			||||||
 | 
					} Neighbor_params;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					    int* bincount;
 | 
				
			||||||
 | 
					    int* bins;
 | 
				
			||||||
 | 
					    int mbins;
 | 
				
			||||||
 | 
					    int atoms_per_bin;
 | 
				
			||||||
 | 
					} Binning;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern void initNeighbor(Neighbor*, Parameter*);
 | 
					extern void initNeighbor(Neighbor*, Parameter*);
 | 
				
			||||||
extern void setupNeighbor();
 | 
					extern void setupNeighbor();
 | 
				
			||||||
extern void binatoms(Atom*);
 | 
					extern void binatoms(Atom*);
 | 
				
			||||||
extern void buildNeighbor(Atom*, Neighbor*);
 | 
					extern void buildNeighbor(Atom*, Neighbor*);
 | 
				
			||||||
extern void sortAtom(Atom*);
 | 
					extern void sortAtom(Atom*);
 | 
				
			||||||
 | 
					extern void binatoms_cuda(Atom*, Binning*, int*, Neighbor_params*, const int);
 | 
				
			||||||
 | 
					extern void buildNeighbor_cuda(Atom*, Neighbor*, Atom*, Neighbor*, const int, double*);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
@@ -25,8 +25,10 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#ifndef __PBC_H_
 | 
					#ifndef __PBC_H_
 | 
				
			||||||
#define __PBC_H_
 | 
					#define __PBC_H_
 | 
				
			||||||
extern void initPbc();
 | 
					extern void initPbc(Atom*);
 | 
				
			||||||
extern void updatePbc(Atom*, Parameter*);
 | 
					extern void updatePbc(Atom*, Parameter*);
 | 
				
			||||||
 | 
					extern void updatePbc_cuda(Atom*, Parameter*, Atom*, bool, const int);
 | 
				
			||||||
extern void updateAtomsPbc(Atom*, Parameter*);
 | 
					extern void updateAtomsPbc(Atom*, Parameter*);
 | 
				
			||||||
 | 
					extern void updateAtomsPbc_cuda(Atom*, Parameter*, Atom*, const int);
 | 
				
			||||||
extern void setupPbc(Atom*, Parameter*);
 | 
					extern void setupPbc(Atom*, Parameter*);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
@@ -5,6 +5,11 @@ typedef enum {
 | 
				
			|||||||
    TOTAL = 0,
 | 
					    TOTAL = 0,
 | 
				
			||||||
    NEIGH,
 | 
					    NEIGH,
 | 
				
			||||||
    FORCE,
 | 
					    FORCE,
 | 
				
			||||||
 | 
					    NEIGH_UPDATE_ATOMS_PBC,
 | 
				
			||||||
 | 
					    NEIGH_SETUP_PBC,
 | 
				
			||||||
 | 
					    NEIGH_UPDATE_PBC,
 | 
				
			||||||
 | 
					    NEIGH_BINATOMS,
 | 
				
			||||||
 | 
					    NEIGH_BUILD_LISTS,
 | 
				
			||||||
    NUMTIMER
 | 
					    NUMTIMER
 | 
				
			||||||
} timertype;
 | 
					} timertype;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -45,12 +45,14 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#define HLINE "----------------------------------------------------------------------------\n"
 | 
					#define HLINE "----------------------------------------------------------------------------\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern void initCudaAtom(Atom *atom, Neighbor *neighbor);
 | 
					extern void cuda_final_integrate(bool doReneighbour, Parameter *param,
 | 
				
			||||||
 | 
					                                 Atom *atom, Atom *c_atom,
 | 
				
			||||||
 | 
					                                 const int num_threads_per_block);
 | 
				
			||||||
 | 
					extern void cuda_initial_integrate(bool doReneighbour, Parameter *param,
 | 
				
			||||||
 | 
					                                   Atom *atom, Atom *c_atom,
 | 
				
			||||||
 | 
					                                   const int num_threads_per_block);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern void cuda_final_integrate(bool doReneighbour, Parameter *param, Atom *atom);
 | 
					extern double computeForce(bool, Parameter*, Atom*, Neighbor*, Atom*, Neighbor*, const int);
 | 
				
			||||||
extern void cuda_initial_integrate(bool doReneighbour, Parameter *param, Atom *atom);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
extern double computeForce(bool, Parameter*, Atom*, Neighbor*);
 | 
					 | 
				
			||||||
extern double computeForceTracing(Parameter*, Atom*, Neighbor*, Stats*, int, int);
 | 
					extern double computeForceTracing(Parameter*, Atom*, Neighbor*, Stats*, int, int);
 | 
				
			||||||
extern double computeForceEam(Eam* eam, Parameter*, Atom *atom, Neighbor *neighbor, Stats *stats, int first_exec, int timestep);
 | 
					extern double computeForceEam(Eam* eam, Parameter*, Atom *atom, Neighbor *neighbor, Stats *stats, int first_exec, int timestep);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -78,12 +80,52 @@ void init(Parameter *param)
 | 
				
			|||||||
    param->proc_freq = 2.4;
 | 
					    param->proc_freq = 2.4;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void initCudaAtom(Atom *atom, Neighbor *neighbor, Atom *c_atom, Neighbor *c_neighbor) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    c_atom->Natoms = atom->Natoms;
 | 
				
			||||||
 | 
					    c_atom->Nlocal = atom->Nlocal;
 | 
				
			||||||
 | 
					    c_atom->Nghost = atom->Nghost;
 | 
				
			||||||
 | 
					    c_atom->Nmax = atom->Nmax;
 | 
				
			||||||
 | 
					    c_atom->ntypes = atom->ntypes;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    c_atom->border_map = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const int Nlocal = atom->Nlocal;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    checkCUDAError( "c_atom->x malloc", cudaMalloc((void**)&(c_atom->x), sizeof(MD_FLOAT) * atom->Nmax * 3) );
 | 
				
			||||||
 | 
					    checkCUDAError( "c_atom->x memcpy", cudaMemcpy(c_atom->x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3, cudaMemcpyHostToDevice) );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    checkCUDAError( "c_atom->fx malloc", cudaMalloc((void**)&(c_atom->fx), sizeof(MD_FLOAT) * Nlocal * 3) );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    checkCUDAError( "c_atom->vx malloc", cudaMalloc((void**)&(c_atom->vx), sizeof(MD_FLOAT) * Nlocal * 3) );
 | 
				
			||||||
 | 
					    checkCUDAError( "c_atom->vx memcpy", cudaMemcpy(c_atom->vx, atom->vx, sizeof(MD_FLOAT) * Nlocal * 3, cudaMemcpyHostToDevice) );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    checkCUDAError( "c_atom->type malloc", cudaMalloc((void**)&(c_atom->type), sizeof(int) * atom->Nmax) );
 | 
				
			||||||
 | 
					    checkCUDAError( "c_atom->epsilon malloc", cudaMalloc((void**)&(c_atom->epsilon), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) );
 | 
				
			||||||
 | 
					    checkCUDAError( "c_atom->sigma6 malloc", cudaMalloc((void**)&(c_atom->sigma6), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) );
 | 
				
			||||||
 | 
					    checkCUDAError( "c_atom->cutforcesq malloc", cudaMalloc((void**)&(c_atom->cutforcesq), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    checkCUDAError( "c_neighbor->neighbors malloc", cudaMalloc((void**)&c_neighbor->neighbors, sizeof(int) * Nlocal * neighbor->maxneighs) );
 | 
				
			||||||
 | 
					    checkCUDAError( "c_neighbor->numneigh malloc", cudaMalloc((void**)&c_neighbor->numneigh, sizeof(int) * Nlocal) );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    checkCUDAError( "c_atom->type memcpy", cudaMemcpy(c_atom->type, atom->type, sizeof(int) * atom->Nmax, cudaMemcpyHostToDevice) );
 | 
				
			||||||
 | 
					    checkCUDAError( "c_atom->sigma6 memcpy", cudaMemcpy(c_atom->sigma6, atom->sigma6, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes, cudaMemcpyHostToDevice) );
 | 
				
			||||||
 | 
					    checkCUDAError( "c_atom->epsilon memcpy", cudaMemcpy(c_atom->epsilon, atom->epsilon, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes, cudaMemcpyHostToDevice) );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    checkCUDAError( "c_atom->cutforcesq memcpy", cudaMemcpy(c_atom->cutforcesq, atom->cutforcesq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes, cudaMemcpyHostToDevice) );
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
double setup(
 | 
					double setup(
 | 
				
			||||||
        Parameter *param,
 | 
					        Parameter *param,
 | 
				
			||||||
        Eam *eam,
 | 
					        Eam *eam,
 | 
				
			||||||
        Atom *atom,
 | 
					        Atom *atom,
 | 
				
			||||||
        Neighbor *neighbor,
 | 
					        Neighbor *neighbor,
 | 
				
			||||||
        Stats *stats)
 | 
					        Atom *c_atom,
 | 
				
			||||||
 | 
					        Neighbor *c_neighbor,
 | 
				
			||||||
 | 
					        Stats *stats,
 | 
				
			||||||
 | 
					        const int num_threads_per_block,
 | 
				
			||||||
 | 
						double* timers)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    if(param->force_field == FF_EAM) { initEam(eam, param); }
 | 
					    if(param->force_field == FF_EAM) { initEam(eam, param); }
 | 
				
			||||||
    double S, E;
 | 
					    double S, E;
 | 
				
			||||||
@@ -102,11 +144,11 @@ double setup(
 | 
				
			|||||||
    setupThermo(param, atom->Natoms);
 | 
					    setupThermo(param, atom->Natoms);
 | 
				
			||||||
    adjustThermo(param, atom);
 | 
					    adjustThermo(param, atom);
 | 
				
			||||||
    setupPbc(atom, param);
 | 
					    setupPbc(atom, param);
 | 
				
			||||||
    updatePbc(atom, param);
 | 
					    initCudaAtom(atom, neighbor, c_atom, c_neighbor);
 | 
				
			||||||
    buildNeighbor(atom, neighbor);
 | 
					    updatePbc_cuda(atom, param, c_atom, true, num_threads_per_block);
 | 
				
			||||||
 | 
					    buildNeighbor_cuda(atom, neighbor, c_atom, c_neighbor, num_threads_per_block, timers);
 | 
				
			||||||
    E = getTimeStamp();
 | 
					    E = getTimeStamp();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    initCudaAtom(atom, neighbor);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return E-S;
 | 
					    return E-S;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -114,19 +156,35 @@ double setup(
 | 
				
			|||||||
double reneighbour(
 | 
					double reneighbour(
 | 
				
			||||||
        Parameter *param,
 | 
					        Parameter *param,
 | 
				
			||||||
        Atom *atom,
 | 
					        Atom *atom,
 | 
				
			||||||
        Neighbor *neighbor)
 | 
					        Neighbor *neighbor,
 | 
				
			||||||
 | 
					        Atom *c_atom,
 | 
				
			||||||
 | 
					        Neighbor *c_neighbor,
 | 
				
			||||||
 | 
					        const int num_threads_per_block,
 | 
				
			||||||
 | 
						double* timers)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    double S, E;
 | 
					    double S, E, beforeEvent, afterEvent;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    S = getTimeStamp();
 | 
					    S = getTimeStamp();
 | 
				
			||||||
 | 
					    beforeEvent = S;
 | 
				
			||||||
    LIKWID_MARKER_START("reneighbour");
 | 
					    LIKWID_MARKER_START("reneighbour");
 | 
				
			||||||
    updateAtomsPbc(atom, param);
 | 
					    updateAtomsPbc_cuda(atom, param, c_atom, num_threads_per_block);
 | 
				
			||||||
 | 
					    afterEvent = getTimeStamp();
 | 
				
			||||||
 | 
					    timers[NEIGH_UPDATE_ATOMS_PBC] += afterEvent - beforeEvent;
 | 
				
			||||||
 | 
					    beforeEvent = afterEvent;
 | 
				
			||||||
    setupPbc(atom, param);
 | 
					    setupPbc(atom, param);
 | 
				
			||||||
    updatePbc(atom, param);
 | 
					    afterEvent = getTimeStamp();
 | 
				
			||||||
 | 
					    timers[NEIGH_SETUP_PBC] += afterEvent - beforeEvent;
 | 
				
			||||||
 | 
					    beforeEvent = afterEvent;
 | 
				
			||||||
 | 
					    updatePbc_cuda(atom, param, c_atom, true, num_threads_per_block);
 | 
				
			||||||
 | 
					    afterEvent = getTimeStamp();
 | 
				
			||||||
 | 
					    timers[NEIGH_UPDATE_PBC] += afterEvent - beforeEvent;
 | 
				
			||||||
 | 
					    beforeEvent = afterEvent;
 | 
				
			||||||
    //sortAtom(atom);
 | 
					    //sortAtom(atom);
 | 
				
			||||||
    buildNeighbor(atom, neighbor);
 | 
					    buildNeighbor_cuda(atom, neighbor, c_atom, c_neighbor, num_threads_per_block, timers);
 | 
				
			||||||
    LIKWID_MARKER_STOP("reneighbour");
 | 
					    LIKWID_MARKER_STOP("reneighbour");
 | 
				
			||||||
    E = getTimeStamp();
 | 
					    E = getTimeStamp();
 | 
				
			||||||
 | 
					    afterEvent = E;
 | 
				
			||||||
 | 
					    timers[NEIGH_BUILD_LISTS] += afterEvent - beforeEvent;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return E-S;
 | 
					    return E-S;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -178,6 +236,19 @@ const char* ff2str(int ff)
 | 
				
			|||||||
    return "invalid";
 | 
					    return "invalid";
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int get_num_threads() {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const char *num_threads_env = getenv("NUM_THREADS");
 | 
				
			||||||
 | 
					    int num_threads = 0;
 | 
				
			||||||
 | 
					    if(num_threads_env == 0)
 | 
				
			||||||
 | 
					        num_threads = 32;
 | 
				
			||||||
 | 
					    else {
 | 
				
			||||||
 | 
					        num_threads = atoi(num_threads_env);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return num_threads;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int main(int argc, char** argv)
 | 
					int main(int argc, char** argv)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    double timer[NUMTIMER];
 | 
					    double timer[NUMTIMER];
 | 
				
			||||||
@@ -186,6 +257,8 @@ int main(int argc, char** argv)
 | 
				
			|||||||
    Neighbor neighbor;
 | 
					    Neighbor neighbor;
 | 
				
			||||||
    Stats stats;
 | 
					    Stats stats;
 | 
				
			||||||
    Parameter param;
 | 
					    Parameter param;
 | 
				
			||||||
 | 
					    Atom c_atom;
 | 
				
			||||||
 | 
					    Neighbor c_neighbor;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    LIKWID_MARKER_INIT;
 | 
					    LIKWID_MARKER_INIT;
 | 
				
			||||||
#pragma omp parallel
 | 
					#pragma omp parallel
 | 
				
			||||||
@@ -256,7 +329,10 @@ int main(int argc, char** argv)
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    setup(¶m, &eam, &atom, &neighbor, &stats);
 | 
					    // this should be multiple of 32 as operations are performed at the level of warps
 | 
				
			||||||
 | 
					    const int num_threads_per_block = get_num_threads();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    setup(¶m, &eam, &atom, &neighbor, &c_atom, &c_neighbor, &stats, num_threads_per_block, (double*) &timer);
 | 
				
			||||||
    computeThermo(0, ¶m, &atom);
 | 
					    computeThermo(0, ¶m, &atom);
 | 
				
			||||||
    if(param.force_field == FF_EAM) {
 | 
					    if(param.force_field == FF_EAM) {
 | 
				
			||||||
        computeForceEam(&eam, ¶m, &atom, &neighbor, &stats, 1, 0);
 | 
					        computeForceEam(&eam, ¶m, &atom, &neighbor, &stats, 1, 0);
 | 
				
			||||||
@@ -264,13 +340,18 @@ int main(int argc, char** argv)
 | 
				
			|||||||
#if defined(MEM_TRACER) || defined(INDEX_TRACER) || defined(COMPUTE_STATS)
 | 
					#if defined(MEM_TRACER) || defined(INDEX_TRACER) || defined(COMPUTE_STATS)
 | 
				
			||||||
        computeForceTracing(¶m, &atom, &neighbor, &stats, 1, 0);
 | 
					        computeForceTracing(¶m, &atom, &neighbor, &stats, 1, 0);
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
        computeForce(true, ¶m, &atom, &neighbor);
 | 
					        computeForce(true, ¶m, &atom, &neighbor, &c_atom, &c_neighbor, num_threads_per_block);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    timer[FORCE] = 0.0;
 | 
					    timer[FORCE] = 0.0;
 | 
				
			||||||
    timer[NEIGH] = 0.0;
 | 
					    timer[NEIGH] = 0.0;
 | 
				
			||||||
    timer[TOTAL] = getTimeStamp();
 | 
					    timer[TOTAL] = getTimeStamp();
 | 
				
			||||||
 | 
					    timer[NEIGH_UPDATE_ATOMS_PBC] = 0.0;
 | 
				
			||||||
 | 
					    timer[NEIGH_SETUP_PBC] = 0.0;
 | 
				
			||||||
 | 
					    timer[NEIGH_UPDATE_PBC] = 0.0;
 | 
				
			||||||
 | 
					    timer[NEIGH_BINATOMS] = 0.0;
 | 
				
			||||||
 | 
					    timer[NEIGH_BUILD_LISTS] = 0.0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if(param.vtk_file != NULL) {
 | 
					    if(param.vtk_file != NULL) {
 | 
				
			||||||
        write_atoms_to_vtk_file(param.vtk_file, &atom, 0);
 | 
					        write_atoms_to_vtk_file(param.vtk_file, &atom, 0);
 | 
				
			||||||
@@ -280,12 +361,15 @@ int main(int argc, char** argv)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        const bool doReneighbour = (n + 1) % param.every == 0;
 | 
					        const bool doReneighbour = (n + 1) % param.every == 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        cuda_initial_integrate(doReneighbour, ¶m, &atom);
 | 
					        cuda_initial_integrate(doReneighbour, ¶m, &atom, &c_atom, num_threads_per_block);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if(doReneighbour) {
 | 
					        if(doReneighbour) {
 | 
				
			||||||
            timer[NEIGH] += reneighbour(¶m, &atom, &neighbor);
 | 
					            timer[NEIGH] += reneighbour(¶m, &atom, &neighbor, &c_atom, &c_neighbor, num_threads_per_block, (double*) &timer);
 | 
				
			||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
            updatePbc(&atom, ¶m);
 | 
						    double before = getTimeStamp();
 | 
				
			||||||
 | 
					            updatePbc_cuda(&atom, ¶m, &c_atom, false, num_threads_per_block);
 | 
				
			||||||
 | 
						    double after = getTimeStamp();
 | 
				
			||||||
 | 
						    timer[NEIGH_UPDATE_PBC] += after - before;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if(param.force_field == FF_EAM) {
 | 
					        if(param.force_field == FF_EAM) {
 | 
				
			||||||
@@ -294,13 +378,14 @@ int main(int argc, char** argv)
 | 
				
			|||||||
#if defined(MEM_TRACER) || defined(INDEX_TRACER) || defined(COMPUTE_STATS)
 | 
					#if defined(MEM_TRACER) || defined(INDEX_TRACER) || defined(COMPUTE_STATS)
 | 
				
			||||||
            timer[FORCE] += computeForceTracing(¶m, &atom, &neighbor, &stats, 0, n + 1);
 | 
					            timer[FORCE] += computeForceTracing(¶m, &atom, &neighbor, &stats, 0, n + 1);
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
            timer[FORCE] += computeForce(doReneighbour, ¶m, &atom, &neighbor);
 | 
					            timer[FORCE] += computeForce(doReneighbour, ¶m, &atom, &neighbor, &c_atom, &c_neighbor, num_threads_per_block);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        cuda_final_integrate(doReneighbour, ¶m, &atom);
 | 
					        cuda_final_integrate(doReneighbour, ¶m, &atom, &c_atom, num_threads_per_block);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
 | 
					        if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
 | 
				
			||||||
 | 
						    checkCUDAError("computeThermo atom->x memcpy back", cudaMemcpy(atom.x, c_atom.x, atom.Nmax * sizeof(MD_FLOAT) * 3, cudaMemcpyDeviceToHost) );
 | 
				
			||||||
            computeThermo(n + 1, ¶m, &atom);
 | 
					            computeThermo(n + 1, ¶m, &atom);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -309,6 +394,7 @@ int main(int argc, char** argv)
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    timer[NEIGH_BUILD_LISTS] -= timer[NEIGH_BINATOMS];
 | 
				
			||||||
    timer[TOTAL] = getTimeStamp() - timer[TOTAL];
 | 
					    timer[TOTAL] = getTimeStamp() - timer[TOTAL];
 | 
				
			||||||
    computeThermo(-1, ¶m, &atom);
 | 
					    computeThermo(-1, ¶m, &atom);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -322,11 +408,15 @@ int main(int argc, char** argv)
 | 
				
			|||||||
#endif
 | 
					#endif
 | 
				
			||||||
    printf(HLINE);
 | 
					    printf(HLINE);
 | 
				
			||||||
    printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, atom.Nghost, param.ntimes);
 | 
					    printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, atom.Nghost, param.ntimes);
 | 
				
			||||||
    printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n",
 | 
					    printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs   NEIGH_TIMERS: UPD_AT: %.2fs SETUP_PBC %.2fs UPDATE_PBC %.2fs BINATOMS %.2fs BUILD_NEIGHBOR %.2fs\n",
 | 
				
			||||||
            timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH]);
 | 
					            timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH], timer[NEIGH_UPDATE_ATOMS_PBC], timer[NEIGH_SETUP_PBC], timer[NEIGH_UPDATE_PBC], timer[NEIGH_BINATOMS], timer[NEIGH_BUILD_LISTS]);
 | 
				
			||||||
    printf(HLINE);
 | 
					    printf(HLINE);
 | 
				
			||||||
    printf("Performance: %.2f million atom updates per second\n",
 | 
					    printf("Performance: %.2f million atom updates per second\n",
 | 
				
			||||||
            1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
 | 
					            1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
 | 
				
			||||||
 | 
					    double atomUpdatesTotal = (double) atom.Natoms * param.ntimes;
 | 
				
			||||||
 | 
					    printf("Force_perf in millions per sec: %.2f\n", 1e-6 * atomUpdatesTotal / timer[FORCE]);
 | 
				
			||||||
 | 
					    double atomNeighUpdatesTotal = (double) atom.Natoms * param.ntimes / param.every;
 | 
				
			||||||
 | 
					    printf("Neighbor_perf in millions per sec: updateAtomsPbc: %.2f setupPbc: %.2f updatePbc: %.2f binAtoms: %.2f buildNeighbor_wo_binning: %.2f\n", 1e-6 * atomNeighUpdatesTotal / timer[NEIGH_UPDATE_ATOMS_PBC], 1e-6 * atomNeighUpdatesTotal / timer[NEIGH_SETUP_PBC], 1e-6 * atomUpdatesTotal / timer[NEIGH_UPDATE_PBC], 1e-6 * atomNeighUpdatesTotal / timer[NEIGH_BINATOMS], 1e-6 * atomNeighUpdatesTotal / timer[NEIGH_BUILD_LISTS]);
 | 
				
			||||||
#ifdef COMPUTE_STATS
 | 
					#ifdef COMPUTE_STATS
 | 
				
			||||||
    displayStatistics(&atom, ¶m, &stats, timer);
 | 
					    displayStatistics(&atom, ¶m, &stats, timer);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
							
								
								
									
										719
									
								
								lammps/neighbor.cu
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										719
									
								
								lammps/neighbor.cu
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,719 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * =======================================================================================
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 *   Author:   Jan Eitzinger (je), jan.eitzinger@fau.de
 | 
				
			||||||
 | 
					 *   Copyright (c) 2021 RRZE, University Erlangen-Nuremberg
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 *   This file is part of MD-Bench.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 *   MD-Bench is free software: you can redistribute it and/or modify it
 | 
				
			||||||
 | 
					 *   under the terms of the GNU Lesser General Public License as published
 | 
				
			||||||
 | 
					 *   by the Free Software Foundation, either version 3 of the License, or
 | 
				
			||||||
 | 
					 *   (at your option) any later version.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 *   MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
 | 
				
			||||||
 | 
					 *   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 | 
				
			||||||
 | 
					 *   PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
 | 
				
			||||||
 | 
					 *   details.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 *   You should have received a copy of the GNU Lesser General Public License along
 | 
				
			||||||
 | 
					 *   with MD-Bench.  If not, see <https://www.gnu.org/licenses/>.
 | 
				
			||||||
 | 
					 * =======================================================================================
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					#include <stdlib.h>
 | 
				
			||||||
 | 
					#include <stdio.h>
 | 
				
			||||||
 | 
					#include <math.h>
 | 
				
			||||||
 | 
					#include <cuda_profiler_api.h>
 | 
				
			||||||
 | 
					#include <cuda_runtime.h>
 | 
				
			||||||
 | 
					#include <device_launch_parameters.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					extern "C" {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <neighbor.h>
 | 
				
			||||||
 | 
					#include <parameter.h>
 | 
				
			||||||
 | 
					#include <allocate.h>
 | 
				
			||||||
 | 
					#include <atom.h>
 | 
				
			||||||
 | 
					#include <timing.h>
 | 
				
			||||||
 | 
					#include <timers.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define SMALL 1.0e-6
 | 
				
			||||||
 | 
					#define FACTOR 0.999
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					__device__ int coord2bin_device(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin, 
 | 
				
			||||||
 | 
					                                Neighbor_params np)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    int ix, iy, iz;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(xin >= np.xprd) {
 | 
				
			||||||
 | 
					        ix = (int)((xin - np.xprd) * np.bininvx) + np.nbinx - np.mbinxlo;
 | 
				
			||||||
 | 
					    } else if(xin >= 0.0) {
 | 
				
			||||||
 | 
					        ix = (int)(xin * np.bininvx) - np.mbinxlo;
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        ix = (int)(xin * np.bininvx) - np.mbinxlo - 1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(yin >= np.yprd) {
 | 
				
			||||||
 | 
					        iy = (int)((yin - np.yprd) * np.bininvy) + np.nbiny - np.mbinylo;
 | 
				
			||||||
 | 
					    } else if(yin >= 0.0) {
 | 
				
			||||||
 | 
					        iy = (int)(yin * np.bininvy) - np.mbinylo;
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        iy = (int)(yin * np.bininvy) - np.mbinylo - 1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(zin >= np.zprd) {
 | 
				
			||||||
 | 
					        iz = (int)((zin - np.zprd) * np.bininvz) + np.nbinz - np.mbinzlo;
 | 
				
			||||||
 | 
					    } else if(zin >= 0.0) {
 | 
				
			||||||
 | 
					        iz = (int)(zin * np.bininvz) - np.mbinzlo;
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        iz = (int)(zin * np.bininvz) - np.mbinzlo - 1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return (iz * np.mbiny * np.mbinx + iy * np.mbinx + ix + 1);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* sorts the contents of a bin to make it comparable to the CPU version */
 | 
				
			||||||
 | 
					/* uses bubble sort since atoms per bin should be relatively small and can be done in situ */
 | 
				
			||||||
 | 
					__global__ void sort_bin_contents_kernel(int* bincount, int* bins, int mbins, int atoms_per_bin){
 | 
				
			||||||
 | 
					    const int i = blockIdx.x * blockDim.x + threadIdx.x;
 | 
				
			||||||
 | 
					    if (i >= mbins){
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    int atoms_in_bin = bincount[i];
 | 
				
			||||||
 | 
					    int* bin_ptr = &bins[i * atoms_per_bin];
 | 
				
			||||||
 | 
					    int sorted;
 | 
				
			||||||
 | 
					    do {
 | 
				
			||||||
 | 
					        sorted = 1;
 | 
				
			||||||
 | 
					        int tmp;
 | 
				
			||||||
 | 
					        for(int index = 0; index < atoms_in_bin - 1; index++){
 | 
				
			||||||
 | 
					            if (bin_ptr[index] > bin_ptr[index + 1]){
 | 
				
			||||||
 | 
					                tmp = bin_ptr[index];
 | 
				
			||||||
 | 
					                bin_ptr[index] = bin_ptr[index + 1];
 | 
				
			||||||
 | 
					                bin_ptr[index + 1] = tmp;
 | 
				
			||||||
 | 
					                sorted = 0;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    } while (!sorted);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					__global__ void binatoms_kernel(Atom a, int* bincount, int* bins, int atoms_per_bin, Neighbor_params np, int *resize_needed){
 | 
				
			||||||
 | 
					    Atom* atom = &a;
 | 
				
			||||||
 | 
					    const int i = blockIdx.x * blockDim.x + threadIdx.x;
 | 
				
			||||||
 | 
					    int nall = atom->Nlocal + atom->Nghost;
 | 
				
			||||||
 | 
					    if(i >= nall){
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    MD_FLOAT x = atom_x(i);
 | 
				
			||||||
 | 
					    MD_FLOAT y = atom_y(i);
 | 
				
			||||||
 | 
					    MD_FLOAT z = atom_z(i);
 | 
				
			||||||
 | 
					    int ibin = coord2bin_device(x, y, z, np);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    int ac = atomicAdd(&bincount[ibin], 1);
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					    if(ac < atoms_per_bin){
 | 
				
			||||||
 | 
					        bins[ibin * atoms_per_bin + ac] = i;
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        atomicMax(resize_needed, ac);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					__global__ void compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np, int nstencil, int* stencil,
 | 
				
			||||||
 | 
					                                     int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs, MD_FLOAT cutneighsq){
 | 
				
			||||||
 | 
					    const int i = blockIdx.x * blockDim.x + threadIdx.x;
 | 
				
			||||||
 | 
					    const int Nlocal = a.Nlocal;
 | 
				
			||||||
 | 
					    if( i >= Nlocal ) {
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    Atom *atom = &a;
 | 
				
			||||||
 | 
					    Neighbor *neighbor = &neigh;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    int* neighptr = &(neighbor->neighbors[i]);
 | 
				
			||||||
 | 
					    int n = 0;
 | 
				
			||||||
 | 
					    MD_FLOAT xtmp = atom_x(i);
 | 
				
			||||||
 | 
					    MD_FLOAT ytmp = atom_y(i);
 | 
				
			||||||
 | 
					    MD_FLOAT ztmp = atom_z(i);
 | 
				
			||||||
 | 
					    int ibin = coord2bin_device(xtmp, ytmp, ztmp, np);
 | 
				
			||||||
 | 
					#ifdef EXPLICIT_TYPES
 | 
				
			||||||
 | 
					    int type_i = atom->type[i];
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					    for(int k = 0; k < nstencil; k++) {
 | 
				
			||||||
 | 
					        int jbin = ibin + stencil[k];
 | 
				
			||||||
 | 
					        int* loc_bin = &bins[jbin * atoms_per_bin];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for(int m = 0; m < bincount[jbin]; m++) {
 | 
				
			||||||
 | 
					            int j = loc_bin[m];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if ( j == i ){
 | 
				
			||||||
 | 
					                continue;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            MD_FLOAT delx = xtmp - atom_x(j);
 | 
				
			||||||
 | 
					            MD_FLOAT dely = ytmp - atom_y(j);
 | 
				
			||||||
 | 
					            MD_FLOAT delz = ztmp - atom_z(j);
 | 
				
			||||||
 | 
					            MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef EXPLICIT_TYPES
 | 
				
			||||||
 | 
					            int type_j = atom->type[j];
 | 
				
			||||||
 | 
					                    const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j];
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					            const MD_FLOAT cutoff = cutneighsq;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if( rsq <= cutoff ) {
 | 
				
			||||||
 | 
					                int idx = atom->Nlocal * n;
 | 
				
			||||||
 | 
					                neighptr[idx] = j;
 | 
				
			||||||
 | 
					                n += 1;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    neighbor->numneigh[i] = n;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(n > neighbor->maxneighs) {
 | 
				
			||||||
 | 
					        atomicMax(new_maxneighs, n);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					extern "C" {
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					static MD_FLOAT xprd, yprd, zprd;
 | 
				
			||||||
 | 
					static MD_FLOAT bininvx, bininvy, bininvz;
 | 
				
			||||||
 | 
					static int mbinxlo, mbinylo, mbinzlo;
 | 
				
			||||||
 | 
					static int nbinx, nbiny, nbinz;
 | 
				
			||||||
 | 
					static int mbinx, mbiny, mbinz; // n bins in x, y, z
 | 
				
			||||||
 | 
					static int *bincount;
 | 
				
			||||||
 | 
					static int *bins;
 | 
				
			||||||
 | 
					static int mbins; //total number of bins
 | 
				
			||||||
 | 
					static int atoms_per_bin;  // max atoms per bin
 | 
				
			||||||
 | 
					static MD_FLOAT cutneigh;
 | 
				
			||||||
 | 
					static MD_FLOAT cutneighsq;  // neighbor cutoff squared
 | 
				
			||||||
 | 
					static int nmax;
 | 
				
			||||||
 | 
					static int nstencil;      // # of bins in stencil
 | 
				
			||||||
 | 
					static int* stencil;      // stencil list of bin offsets
 | 
				
			||||||
 | 
					static MD_FLOAT binsizex, binsizey, binsizez;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int* c_stencil = NULL;
 | 
				
			||||||
 | 
					static int* c_resize_needed = NULL;
 | 
				
			||||||
 | 
					static int* c_new_maxneighs = NULL;
 | 
				
			||||||
 | 
					static Binning c_binning{
 | 
				
			||||||
 | 
					        .bincount = NULL,
 | 
				
			||||||
 | 
					        .bins = NULL,
 | 
				
			||||||
 | 
					        .mbins = 0,
 | 
				
			||||||
 | 
					        .atoms_per_bin = 0
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int coord2bin(MD_FLOAT, MD_FLOAT , MD_FLOAT);
 | 
				
			||||||
 | 
					static MD_FLOAT bindist(int, int, int);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* exported subroutines */
 | 
				
			||||||
 | 
					void initNeighbor(Neighbor *neighbor, Parameter *param)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    MD_FLOAT neighscale = 5.0 / 6.0;
 | 
				
			||||||
 | 
					    xprd = param->nx * param->lattice;
 | 
				
			||||||
 | 
					    yprd = param->ny * param->lattice;
 | 
				
			||||||
 | 
					    zprd = param->nz * param->lattice;
 | 
				
			||||||
 | 
					    cutneigh = param->cutneigh;
 | 
				
			||||||
 | 
					    nbinx = neighscale * param->nx;
 | 
				
			||||||
 | 
					    nbiny = neighscale * param->ny;
 | 
				
			||||||
 | 
					    nbinz = neighscale * param->nz;
 | 
				
			||||||
 | 
					    nmax = 0;
 | 
				
			||||||
 | 
					    atoms_per_bin = 8;
 | 
				
			||||||
 | 
					    stencil = NULL;
 | 
				
			||||||
 | 
					    bins = NULL;
 | 
				
			||||||
 | 
					    bincount = NULL;
 | 
				
			||||||
 | 
					    neighbor->maxneighs = 100;
 | 
				
			||||||
 | 
					    neighbor->numneigh = NULL;
 | 
				
			||||||
 | 
					    neighbor->neighbors = NULL;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void setupNeighbor()
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    MD_FLOAT coord;
 | 
				
			||||||
 | 
					    int mbinxhi, mbinyhi, mbinzhi;
 | 
				
			||||||
 | 
					    int nextx, nexty, nextz;
 | 
				
			||||||
 | 
					    MD_FLOAT xlo = 0.0; MD_FLOAT xhi = xprd;
 | 
				
			||||||
 | 
					    MD_FLOAT ylo = 0.0; MD_FLOAT yhi = yprd;
 | 
				
			||||||
 | 
					    MD_FLOAT zlo = 0.0; MD_FLOAT zhi = zprd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    cutneighsq = cutneigh * cutneigh;
 | 
				
			||||||
 | 
					    binsizex = xprd / nbinx;
 | 
				
			||||||
 | 
					    binsizey = yprd / nbiny;
 | 
				
			||||||
 | 
					    binsizez = zprd / nbinz;
 | 
				
			||||||
 | 
					    bininvx = 1.0 / binsizex;
 | 
				
			||||||
 | 
					    bininvy = 1.0 / binsizey;
 | 
				
			||||||
 | 
					    bininvz = 1.0 / binsizez;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    coord = xlo - cutneigh - SMALL * xprd;
 | 
				
			||||||
 | 
					    mbinxlo = (int) (coord * bininvx);
 | 
				
			||||||
 | 
					    if (coord < 0.0) {
 | 
				
			||||||
 | 
					        mbinxlo = mbinxlo - 1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    coord = xhi + cutneigh + SMALL * xprd;
 | 
				
			||||||
 | 
					    mbinxhi = (int) (coord * bininvx);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    coord = ylo - cutneigh - SMALL * yprd;
 | 
				
			||||||
 | 
					    mbinylo = (int) (coord * bininvy);
 | 
				
			||||||
 | 
					    if (coord < 0.0) {
 | 
				
			||||||
 | 
					        mbinylo = mbinylo - 1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    coord = yhi + cutneigh + SMALL * yprd;
 | 
				
			||||||
 | 
					    mbinyhi = (int) (coord * bininvy);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    coord = zlo - cutneigh - SMALL * zprd;
 | 
				
			||||||
 | 
					    mbinzlo = (int) (coord * bininvz);
 | 
				
			||||||
 | 
					    if (coord < 0.0) {
 | 
				
			||||||
 | 
					        mbinzlo = mbinzlo - 1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    coord = zhi + cutneigh + SMALL * zprd;
 | 
				
			||||||
 | 
					    mbinzhi = (int) (coord * bininvz);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    mbinxlo = mbinxlo - 1;
 | 
				
			||||||
 | 
					    mbinxhi = mbinxhi + 1;
 | 
				
			||||||
 | 
					    mbinx = mbinxhi - mbinxlo + 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    mbinylo = mbinylo - 1;
 | 
				
			||||||
 | 
					    mbinyhi = mbinyhi + 1;
 | 
				
			||||||
 | 
					    mbiny = mbinyhi - mbinylo + 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    mbinzlo = mbinzlo - 1;
 | 
				
			||||||
 | 
					    mbinzhi = mbinzhi + 1;
 | 
				
			||||||
 | 
					    mbinz = mbinzhi - mbinzlo + 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    nextx = (int) (cutneigh * bininvx);
 | 
				
			||||||
 | 
					    if(nextx * binsizex < FACTOR * cutneigh) nextx++;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    nexty = (int) (cutneigh * bininvy);
 | 
				
			||||||
 | 
					    if(nexty * binsizey < FACTOR * cutneigh) nexty++;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    nextz = (int) (cutneigh * bininvz);
 | 
				
			||||||
 | 
					    if(nextz * binsizez < FACTOR * cutneigh) nextz++;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (stencil) {
 | 
				
			||||||
 | 
					        free(stencil);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    stencil = (int*) malloc(
 | 
				
			||||||
 | 
					            (2 * nextz + 1) * (2 * nexty + 1) * (2 * nextx + 1) * sizeof(int));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    nstencil = 0;
 | 
				
			||||||
 | 
					    int kstart = -nextz;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for(int k = kstart; k <= nextz; k++) {
 | 
				
			||||||
 | 
					        for(int j = -nexty; j <= nexty; j++) {
 | 
				
			||||||
 | 
					            for(int i = -nextx; i <= nextx; i++) {
 | 
				
			||||||
 | 
					                if(bindist(i, j, k) < cutneighsq) {
 | 
				
			||||||
 | 
					                    stencil[nstencil++] =
 | 
				
			||||||
 | 
					                        k * mbiny * mbinx + j * mbinx + i;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    mbins = mbinx * mbiny * mbinz;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (bincount) {
 | 
				
			||||||
 | 
					        free(bincount);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    bincount = (int*) malloc(mbins * sizeof(int));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (bins) {
 | 
				
			||||||
 | 
					        free(bins);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    bins = (int*) malloc(mbins * atoms_per_bin * sizeof(int));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void buildNeighbor(Atom *atom, Neighbor *neighbor)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    int nall = atom->Nlocal + atom->Nghost;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /* extend atom arrays if necessary */
 | 
				
			||||||
 | 
					    if(nall > nmax) {
 | 
				
			||||||
 | 
					        nmax = nall;
 | 
				
			||||||
 | 
					        if(neighbor->numneigh) cudaFreeHost(neighbor->numneigh);
 | 
				
			||||||
 | 
					        if(neighbor->neighbors) cudaFreeHost(neighbor->neighbors);
 | 
				
			||||||
 | 
					        checkCUDAError( "buildNeighbor numneigh", cudaMallocHost((void**)&(neighbor->numneigh), nmax * sizeof(int)) );
 | 
				
			||||||
 | 
					        checkCUDAError( "buildNeighbor neighbors", cudaMallocHost((void**)&(neighbor->neighbors), nmax * neighbor->maxneighs * sizeof(int)) );
 | 
				
			||||||
 | 
					        // neighbor->numneigh = (int*) malloc(nmax * sizeof(int));
 | 
				
			||||||
 | 
					        // neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int*));
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /* bin local & ghost atoms */
 | 
				
			||||||
 | 
					    binatoms(atom);
 | 
				
			||||||
 | 
					    int resize = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /* loop over each atom, storing neighbors */
 | 
				
			||||||
 | 
					    while(resize) {
 | 
				
			||||||
 | 
					        int new_maxneighs = neighbor->maxneighs;
 | 
				
			||||||
 | 
					        resize = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for(int i = 0; i < atom->Nlocal; i++) {
 | 
				
			||||||
 | 
					            int* neighptr = &(neighbor->neighbors[i]);
 | 
				
			||||||
 | 
					            int n = 0;
 | 
				
			||||||
 | 
					            MD_FLOAT xtmp = atom_x(i);
 | 
				
			||||||
 | 
					            MD_FLOAT ytmp = atom_y(i);
 | 
				
			||||||
 | 
					            MD_FLOAT ztmp = atom_z(i);
 | 
				
			||||||
 | 
					            int ibin = coord2bin(xtmp, ytmp, ztmp);
 | 
				
			||||||
 | 
					            #ifdef EXPLICIT_TYPES
 | 
				
			||||||
 | 
					            int type_i = atom->type[i];
 | 
				
			||||||
 | 
					            #endif
 | 
				
			||||||
 | 
					            for(int k = 0; k < nstencil; k++) {
 | 
				
			||||||
 | 
					                int jbin = ibin + stencil[k];
 | 
				
			||||||
 | 
					                int* loc_bin = &bins[jbin * atoms_per_bin];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                for(int m = 0; m < bincount[jbin]; m++) {
 | 
				
			||||||
 | 
					                    int j = loc_bin[m];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    if ( j == i ){
 | 
				
			||||||
 | 
					                        continue;
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    MD_FLOAT delx = xtmp - atom_x(j);
 | 
				
			||||||
 | 
					                    MD_FLOAT dely = ytmp - atom_y(j);
 | 
				
			||||||
 | 
					                    MD_FLOAT delz = ztmp - atom_z(j);
 | 
				
			||||||
 | 
					                    MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    #ifdef EXPLICIT_TYPES
 | 
				
			||||||
 | 
					                    int type_j = atom->type[j];
 | 
				
			||||||
 | 
					                    const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j];
 | 
				
			||||||
 | 
					                    #else
 | 
				
			||||||
 | 
					                    const MD_FLOAT cutoff = cutneighsq;
 | 
				
			||||||
 | 
					                    #endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    if( rsq <= cutoff ) {
 | 
				
			||||||
 | 
					                        int idx = atom->Nlocal * n;
 | 
				
			||||||
 | 
					                        neighptr[idx] = j;
 | 
				
			||||||
 | 
					                        n += 1;
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            neighbor->numneigh[i] = n;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if(n >= neighbor->maxneighs) {
 | 
				
			||||||
 | 
					                resize = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                if(n >= new_maxneighs) {
 | 
				
			||||||
 | 
					                    new_maxneighs = n;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if(resize) {
 | 
				
			||||||
 | 
					            printf("RESIZE %d\n", neighbor->maxneighs);
 | 
				
			||||||
 | 
					            neighbor->maxneighs = new_maxneighs * 1.2;
 | 
				
			||||||
 | 
					            free(neighbor->neighbors);
 | 
				
			||||||
 | 
					            neighbor->neighbors = (int*) malloc(atom->Nmax * neighbor->maxneighs * sizeof(int));
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* internal subroutines */
 | 
				
			||||||
 | 
					MD_FLOAT bindist(int i, int j, int k)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    MD_FLOAT delx, dely, delz;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(i > 0) {
 | 
				
			||||||
 | 
					        delx = (i - 1) * binsizex;
 | 
				
			||||||
 | 
					    } else if(i == 0) {
 | 
				
			||||||
 | 
					        delx = 0.0;
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        delx = (i + 1) * binsizex;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(j > 0) {
 | 
				
			||||||
 | 
					        dely = (j - 1) * binsizey;
 | 
				
			||||||
 | 
					    } else if(j == 0) {
 | 
				
			||||||
 | 
					        dely = 0.0;
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        dely = (j + 1) * binsizey;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(k > 0) {
 | 
				
			||||||
 | 
					        delz = (k - 1) * binsizez;
 | 
				
			||||||
 | 
					    } else if(k == 0) {
 | 
				
			||||||
 | 
					        delz = 0.0;
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        delz = (k + 1) * binsizez;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return (delx * delx + dely * dely + delz * delz);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int coord2bin(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    int ix, iy, iz;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(xin >= xprd) {
 | 
				
			||||||
 | 
					        ix = (int)((xin - xprd) * bininvx) + nbinx - mbinxlo;
 | 
				
			||||||
 | 
					    } else if(xin >= 0.0) {
 | 
				
			||||||
 | 
					        ix = (int)(xin * bininvx) - mbinxlo;
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        ix = (int)(xin * bininvx) - mbinxlo - 1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(yin >= yprd) {
 | 
				
			||||||
 | 
					        iy = (int)((yin - yprd) * bininvy) + nbiny - mbinylo;
 | 
				
			||||||
 | 
					    } else if(yin >= 0.0) {
 | 
				
			||||||
 | 
					        iy = (int)(yin * bininvy) - mbinylo;
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        iy = (int)(yin * bininvy) - mbinylo - 1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(zin >= zprd) {
 | 
				
			||||||
 | 
					        iz = (int)((zin - zprd) * bininvz) + nbinz - mbinzlo;
 | 
				
			||||||
 | 
					    } else if(zin >= 0.0) {
 | 
				
			||||||
 | 
					        iz = (int)(zin * bininvz) - mbinzlo;
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        iz = (int)(zin * bininvz) - mbinzlo - 1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return (iz * mbiny * mbinx + iy * mbinx + ix + 1);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void binatoms(Atom *atom)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    int nall = atom->Nlocal + atom->Nghost;
 | 
				
			||||||
 | 
					    int resize = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    while(resize > 0) {
 | 
				
			||||||
 | 
					        resize = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for(int i = 0; i < mbins; i++) {
 | 
				
			||||||
 | 
					            bincount[i] = 0;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for(int i = 0; i < nall; i++) {
 | 
				
			||||||
 | 
					            MD_FLOAT x = atom_x(i);
 | 
				
			||||||
 | 
					            MD_FLOAT y = atom_y(i);
 | 
				
			||||||
 | 
					            MD_FLOAT z = atom_z(i);
 | 
				
			||||||
 | 
					            int ibin = coord2bin(x, y, z);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if(bincount[ibin] < atoms_per_bin) {
 | 
				
			||||||
 | 
					                int ac = bincount[ibin]++;
 | 
				
			||||||
 | 
					                bins[ibin * atoms_per_bin + ac] = i;
 | 
				
			||||||
 | 
					            } else {
 | 
				
			||||||
 | 
					                resize = 1;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if(resize) {
 | 
				
			||||||
 | 
					            free(bins);
 | 
				
			||||||
 | 
					            atoms_per_bin *= 2;
 | 
				
			||||||
 | 
					            bins = (int*) malloc(mbins * atoms_per_bin * sizeof(int));
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void sortAtom(Atom* atom) {
 | 
				
			||||||
 | 
					    binatoms(atom);
 | 
				
			||||||
 | 
					    int Nmax = atom->Nmax;
 | 
				
			||||||
 | 
					    int* binpos = bincount;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for(int i=1; i<mbins; i++) {
 | 
				
			||||||
 | 
					        binpos[i] += binpos[i-1];
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef AOS
 | 
				
			||||||
 | 
					    MD_FLOAT* new_x = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT) * 3);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    MD_FLOAT* new_vx = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT) * 3);
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					    MD_FLOAT* new_x = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT));
 | 
				
			||||||
 | 
					    MD_FLOAT* new_y = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT));
 | 
				
			||||||
 | 
					    MD_FLOAT* new_z = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    MD_FLOAT* new_vx = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT));
 | 
				
			||||||
 | 
					    MD_FLOAT* new_vy = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT));
 | 
				
			||||||
 | 
					    MD_FLOAT* new_vz = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT));
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    MD_FLOAT* old_x = atom->x; MD_FLOAT* old_y = atom->y; MD_FLOAT* old_z = atom->z;
 | 
				
			||||||
 | 
					    MD_FLOAT* old_vx = atom->vx; MD_FLOAT* old_vy = atom->vy; MD_FLOAT* old_vz = atom->vz;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for(int mybin = 0; mybin<mbins; mybin++) {
 | 
				
			||||||
 | 
					        int start = mybin>0?binpos[mybin-1]:0;
 | 
				
			||||||
 | 
					        int count = binpos[mybin] - start;
 | 
				
			||||||
 | 
					        for(int k=0; k<count; k++) {
 | 
				
			||||||
 | 
					            int new_i = start + k;
 | 
				
			||||||
 | 
					            int old_i = bins[mybin * atoms_per_bin + k];
 | 
				
			||||||
 | 
					#ifdef AOS
 | 
				
			||||||
 | 
					            new_x[new_i * 3 + 0] = old_x[old_i * 3 + 0];
 | 
				
			||||||
 | 
					            new_x[new_i * 3 + 1] = old_x[old_i * 3 + 1];
 | 
				
			||||||
 | 
					            new_x[new_i * 3 + 2] = old_x[old_i * 3 + 2];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            new_vx[new_i * 3 + 0] = old_vx[old_i * 3 + 0];
 | 
				
			||||||
 | 
					            new_vx[new_i * 3 + 1] = old_vy[old_i * 3 + 1];
 | 
				
			||||||
 | 
					            new_vx[new_i * 3 + 2] = old_vz[old_i * 3 + 2];
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					            new_x[new_i] = old_x[old_i];
 | 
				
			||||||
 | 
					            new_y[new_i] = old_y[old_i];
 | 
				
			||||||
 | 
					            new_z[new_i] = old_z[old_i];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            new_vx[new_i] = old_vx[old_i];
 | 
				
			||||||
 | 
					            new_vy[new_i] = old_vy[old_i];
 | 
				
			||||||
 | 
					            new_vz[new_i] = old_vz[old_i];
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    free(atom->x);
 | 
				
			||||||
 | 
					    atom->x = new_x;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    free(atom->vx);
 | 
				
			||||||
 | 
					    atom->vx = new_vx;
 | 
				
			||||||
 | 
					#ifndef AOS
 | 
				
			||||||
 | 
					    free(atom->y);
 | 
				
			||||||
 | 
					    free(atom->z);
 | 
				
			||||||
 | 
					    atom->y = new_y; atom->z = new_z;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    free(atom->vy); free(atom->vz);
 | 
				
			||||||
 | 
					    atom->vy = new_vy; atom->vz = new_vz;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void binatoms_cuda(Atom* c_atom, Binning* c_binning, int* c_resize_needed, Neighbor_params *np, const int threads_per_block)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    int nall = c_atom->Nlocal + c_atom->Nghost;
 | 
				
			||||||
 | 
					    int resize = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const int num_blocks = ceil((float)nall / (float)threads_per_block);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    while(resize > 0) {
 | 
				
			||||||
 | 
					        resize = 0;
 | 
				
			||||||
 | 
					        checkCUDAError("binatoms_cuda c_binning->bincount memset", cudaMemset(c_binning->bincount, 0, c_binning->mbins * sizeof(int)));
 | 
				
			||||||
 | 
					        checkCUDAError("binatoms_cuda c_resize_needed memset", cudaMemset(c_resize_needed, 0, sizeof(int)) );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        /*binatoms_kernel(Atom a, int* bincount, int* bins, int c_binning->atoms_per_bin, Neighbor_params np, int *resize_needed) */
 | 
				
			||||||
 | 
					        binatoms_kernel<<<num_blocks, threads_per_block>>>(*c_atom, c_binning->bincount, c_binning->bins, c_binning->atoms_per_bin, *np, c_resize_needed);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						    checkCUDAError( "PeekAtLastError binatoms kernel", cudaPeekAtLastError() );
 | 
				
			||||||
 | 
						    checkCUDAError( "DeviceSync binatoms kernel", cudaDeviceSynchronize() );
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
						    checkCUDAError("binatoms_cuda c_resize_needed memcpy back", cudaMemcpy(&resize, c_resize_needed, sizeof(int), cudaMemcpyDeviceToHost) );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if(resize) {
 | 
				
			||||||
 | 
					            cudaFree(c_binning->bins);
 | 
				
			||||||
 | 
					            c_binning->atoms_per_bin *= 2;
 | 
				
			||||||
 | 
					            checkCUDAError("binatoms_cuda c_binning->bins resize malloc", cudaMalloc(&c_binning->bins, c_binning->mbins * c_binning->atoms_per_bin * sizeof(int)) );
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    atoms_per_bin = c_binning->atoms_per_bin;
 | 
				
			||||||
 | 
					    const int sortBlocks = ceil((float)mbins / (float)threads_per_block);
 | 
				
			||||||
 | 
					    /*void sort_bin_contents_kernel(int* bincount, int* bins, int mbins, int atoms_per_bin)*/
 | 
				
			||||||
 | 
					    sort_bin_contents_kernel<<<sortBlocks, threads_per_block>>>(c_binning->bincount, c_binning->bins, c_binning->mbins, c_binning->atoms_per_bin);
 | 
				
			||||||
 | 
					    checkCUDAError( "PeekAtLastError sort_bin_contents kernel", cudaPeekAtLastError() );
 | 
				
			||||||
 | 
					    checkCUDAError( "DeviceSync sort_bin_contents kernel", cudaDeviceSynchronize() );
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor, Atom *c_atom, Neighbor *c_neighbor, const int num_threads_per_block, double* timers)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    int nall = atom->Nlocal + atom->Nghost;
 | 
				
			||||||
 | 
					    c_neighbor->maxneighs = neighbor->maxneighs;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    cudaProfilerStart();
 | 
				
			||||||
 | 
					    /* upload stencil */
 | 
				
			||||||
 | 
					    // TODO move all of this initialization into its own method
 | 
				
			||||||
 | 
					    if(c_stencil == NULL){
 | 
				
			||||||
 | 
					        checkCUDAError( "buildNeighbor c_n_stencil malloc", cudaMalloc((void**)&c_stencil, nstencil * sizeof(int)) );
 | 
				
			||||||
 | 
					        checkCUDAError( "buildNeighbor c_n_stencil memcpy", cudaMemcpy(c_stencil, stencil, nstencil * sizeof(int), cudaMemcpyHostToDevice ));
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(c_binning.mbins == 0){
 | 
				
			||||||
 | 
					        c_binning.mbins = mbins;
 | 
				
			||||||
 | 
					        c_binning.atoms_per_bin = atoms_per_bin;
 | 
				
			||||||
 | 
					        checkCUDAError( "buildNeighbor c_binning->bincount malloc", cudaMalloc((void**)&(c_binning.bincount), c_binning.mbins * sizeof(int)) );
 | 
				
			||||||
 | 
					        checkCUDAError( "buidlNeighbor c_binning->bins malloc", cudaMalloc((void**)&(c_binning.bins), c_binning.mbins * c_binning.atoms_per_bin * sizeof(int)) );
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Neighbor_params np{
 | 
				
			||||||
 | 
					            .xprd = xprd,
 | 
				
			||||||
 | 
					            .yprd = yprd,
 | 
				
			||||||
 | 
					            .zprd = zprd,
 | 
				
			||||||
 | 
					            .bininvx = bininvx,
 | 
				
			||||||
 | 
					            .bininvy = bininvy,
 | 
				
			||||||
 | 
					            .bininvz = bininvz,
 | 
				
			||||||
 | 
					            .mbinxlo = mbinxlo,
 | 
				
			||||||
 | 
					            .mbinylo = mbinylo,
 | 
				
			||||||
 | 
					            .mbinzlo = mbinzlo,
 | 
				
			||||||
 | 
					            .nbinx = nbinx,
 | 
				
			||||||
 | 
					            .nbiny = nbiny,
 | 
				
			||||||
 | 
					            .nbinz = nbinz,
 | 
				
			||||||
 | 
					            .mbinx = mbinx,
 | 
				
			||||||
 | 
					            .mbiny = mbiny,
 | 
				
			||||||
 | 
					            .mbinz = mbinz
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(c_resize_needed == NULL){
 | 
				
			||||||
 | 
					        checkCUDAError("buildNeighbor c_resize_needed malloc", cudaMalloc((void**)&c_resize_needed, sizeof(int)) );
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /* bin local & ghost atoms */
 | 
				
			||||||
 | 
					    double beforeBinning = getTimeStamp();
 | 
				
			||||||
 | 
					    binatoms_cuda(c_atom, &c_binning, c_resize_needed, &np, num_threads_per_block);
 | 
				
			||||||
 | 
					    double afterBinning = getTimeStamp();
 | 
				
			||||||
 | 
					    timers[NEIGH_BINATOMS] += afterBinning - beforeBinning;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(c_new_maxneighs == NULL){
 | 
				
			||||||
 | 
					        checkCUDAError("c_new_maxneighs malloc", cudaMalloc((void**)&c_new_maxneighs, sizeof(int) ));
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    int resize = 1;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    /* extend c_neighbor arrays if necessary */
 | 
				
			||||||
 | 
					    if(nall > nmax) {
 | 
				
			||||||
 | 
					        nmax = nall;
 | 
				
			||||||
 | 
					        if(c_neighbor->numneigh) cudaFree(c_neighbor->numneigh);
 | 
				
			||||||
 | 
					        if(c_neighbor->neighbors) cudaFree(c_neighbor->neighbors);
 | 
				
			||||||
 | 
					        checkCUDAError( "buildNeighbor c_numneigh malloc", cudaMalloc((void**)&(c_neighbor->numneigh), nmax * sizeof(int)) );
 | 
				
			||||||
 | 
					        checkCUDAError( "buildNeighbor c_neighbors malloc", cudaMalloc((void**)&(c_neighbor->neighbors), nmax * c_neighbor->maxneighs * sizeof(int)) );
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /* loop over each atom, storing neighbors */
 | 
				
			||||||
 | 
					    while(resize) {
 | 
				
			||||||
 | 
					        resize = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        checkCUDAError("c_new_maxneighs memset", cudaMemset(c_new_maxneighs, 0, sizeof(int) ));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // TODO call compute_neigborhood kernel here
 | 
				
			||||||
 | 
					        const int num_blocks = ceil((float)atom->Nlocal / (float)num_threads_per_block);
 | 
				
			||||||
 | 
					        /*compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np, int nstencil, int* stencil,
 | 
				
			||||||
 | 
					                                     int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs)
 | 
				
			||||||
 | 
					         * */
 | 
				
			||||||
 | 
					        compute_neighborhood<<<num_blocks, num_threads_per_block>>>(*c_atom, *c_neighbor,
 | 
				
			||||||
 | 
					                                                                    np, nstencil, c_stencil,
 | 
				
			||||||
 | 
					                                                                    c_binning.bins, c_binning.atoms_per_bin, c_binning.bincount,
 | 
				
			||||||
 | 
					                                                                    c_new_maxneighs,
 | 
				
			||||||
 | 
													                                    cutneighsq);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						checkCUDAError( "PeekAtLastError ComputeNeighbor", cudaPeekAtLastError() );
 | 
				
			||||||
 | 
						checkCUDAError( "DeviceSync ComputeNeighbor", cudaDeviceSynchronize() );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // TODO copy the value of c_new_maxneighs back to host and check if it has been modified
 | 
				
			||||||
 | 
					        int new_maxneighs;
 | 
				
			||||||
 | 
					        checkCUDAError("c_new_maxneighs memcpy back", cudaMemcpy(&new_maxneighs, c_new_maxneighs, sizeof(int), cudaMemcpyDeviceToHost));
 | 
				
			||||||
 | 
					        if (new_maxneighs > c_neighbor->maxneighs){
 | 
				
			||||||
 | 
					            resize = 1;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if(resize) {
 | 
				
			||||||
 | 
					            printf("RESIZE %d\n", c_neighbor->maxneighs);
 | 
				
			||||||
 | 
					            c_neighbor->maxneighs = new_maxneighs * 1.2;
 | 
				
			||||||
 | 
					            printf("NEW SIZE %d\n", c_neighbor->maxneighs);
 | 
				
			||||||
 | 
					            cudaFree(c_neighbor->neighbors);
 | 
				
			||||||
 | 
					            checkCUDAError("c_neighbor->neighbors resize malloc",
 | 
				
			||||||
 | 
					                           cudaMalloc((void**)(&c_neighbor->neighbors),
 | 
				
			||||||
 | 
					                                      c_atom->Nmax * c_neighbor->maxneighs * sizeof(int)));
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    neighbor->maxneighs = c_neighbor->maxneighs;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    cudaProfilerStop();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										286
									
								
								lammps/pbc.cu
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										286
									
								
								lammps/pbc.cu
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,286 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * =======================================================================================
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 *   Author:   Jan Eitzinger (je), jan.eitzinger@fau.de
 | 
				
			||||||
 | 
					 *   Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 *   This file is part of MD-Bench.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 *   MD-Bench is free software: you can redistribute it and/or modify it
 | 
				
			||||||
 | 
					 *   under the terms of the GNU Lesser General Public License as published
 | 
				
			||||||
 | 
					 *   by the Free Software Foundation, either version 3 of the License, or
 | 
				
			||||||
 | 
					 *   (at your option) any later version.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 *   MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
 | 
				
			||||||
 | 
					 *   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 | 
				
			||||||
 | 
					 *   PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
 | 
				
			||||||
 | 
					 *   details.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 *   You should have received a copy of the GNU Lesser General Public License along
 | 
				
			||||||
 | 
					 *   with MD-Bench.  If not, see <https://www.gnu.org/licenses/>.
 | 
				
			||||||
 | 
					 * =======================================================================================
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					#include <stdlib.h>
 | 
				
			||||||
 | 
					#include <stdio.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					extern "C" {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <pbc.h>
 | 
				
			||||||
 | 
					#include <atom.h>
 | 
				
			||||||
 | 
					#include <allocate.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define DELTA 20000
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					__global__ void computeAtomsPbcUpdate(Atom a, MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd){
 | 
				
			||||||
 | 
					    const int i = blockIdx.x * blockDim.x + threadIdx.x;
 | 
				
			||||||
 | 
					    Atom* atom = &a;
 | 
				
			||||||
 | 
					    if( i >= atom->Nlocal ){
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (atom_x(i) < 0.0) {
 | 
				
			||||||
 | 
					        atom_x(i) += xprd;
 | 
				
			||||||
 | 
					    } else if (atom_x(i) >= xprd) {
 | 
				
			||||||
 | 
					        atom_x(i) -= xprd;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (atom_y(i) < 0.0) {
 | 
				
			||||||
 | 
					        atom_y(i) += yprd;
 | 
				
			||||||
 | 
					    } else if (atom_y(i) >= yprd) {
 | 
				
			||||||
 | 
					        atom_y(i) -= yprd;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (atom_z(i) < 0.0) {
 | 
				
			||||||
 | 
					        atom_z(i) += zprd;
 | 
				
			||||||
 | 
					    } else if (atom_z(i) >= zprd) {
 | 
				
			||||||
 | 
					        atom_z(i) -= zprd;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					__global__ void computePbcUpdate(Atom a, int* PBCx, int* PBCy, int* PBCz, MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd){
 | 
				
			||||||
 | 
					    const int i = blockIdx.x * blockDim.x + threadIdx.x;
 | 
				
			||||||
 | 
					    const int Nghost = a.Nghost;
 | 
				
			||||||
 | 
					    if( i >= Nghost ) {
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    Atom* atom = &a;
 | 
				
			||||||
 | 
					    int *border_map = atom->border_map;
 | 
				
			||||||
 | 
					    int nlocal = atom->Nlocal;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    atom_x(nlocal + i) = atom_x(border_map[i]) + PBCx[i] * xprd;
 | 
				
			||||||
 | 
					    atom_y(nlocal + i) = atom_y(border_map[i]) + PBCy[i] * yprd;
 | 
				
			||||||
 | 
					    atom_z(nlocal + i) = atom_z(border_map[i]) + PBCz[i] * zprd;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					extern "C"{
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int NmaxGhost;
 | 
				
			||||||
 | 
					static int *PBCx, *PBCy, *PBCz;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int c_NmaxGhost = 0;
 | 
				
			||||||
 | 
					static int *c_PBCx = NULL, *c_PBCy = NULL, *c_PBCz = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void growPbc(Atom *);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* exported subroutines */
 | 
				
			||||||
 | 
					void initPbc(Atom *atom) {
 | 
				
			||||||
 | 
					    NmaxGhost = 0;
 | 
				
			||||||
 | 
					    atom->border_map = NULL;
 | 
				
			||||||
 | 
					    PBCx = NULL;
 | 
				
			||||||
 | 
					    PBCy = NULL;
 | 
				
			||||||
 | 
					    PBCz = NULL;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* update coordinates of ghost atoms */
 | 
				
			||||||
 | 
					/* uses mapping created in setupPbc */
 | 
				
			||||||
 | 
					void updatePbc(Atom *atom, Parameter *param) {
 | 
				
			||||||
 | 
					    int *border_map = atom->border_map;
 | 
				
			||||||
 | 
					    int nlocal = atom->Nlocal;
 | 
				
			||||||
 | 
					    MD_FLOAT xprd = param->xprd;
 | 
				
			||||||
 | 
					    MD_FLOAT yprd = param->yprd;
 | 
				
			||||||
 | 
					    MD_FLOAT zprd = param->zprd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (int i = 0; i < atom->Nghost; i++) {
 | 
				
			||||||
 | 
					        atom_x(nlocal + i) = atom_x(border_map[i]) + PBCx[i] * xprd;
 | 
				
			||||||
 | 
					        atom_y(nlocal + i) = atom_y(border_map[i]) + PBCy[i] * yprd;
 | 
				
			||||||
 | 
					        atom_z(nlocal + i) = atom_z(border_map[i]) + PBCz[i] * zprd;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* update coordinates of ghost atoms */
 | 
				
			||||||
 | 
					/* uses mapping created in setupPbc */
 | 
				
			||||||
 | 
					void updatePbc_cuda(Atom *atom, Parameter *param, Atom *c_atom, bool doReneighbor, const int num_threads_per_block) {
 | 
				
			||||||
 | 
					    if (doReneighbor){
 | 
				
			||||||
 | 
					        c_atom->Natoms = atom->Natoms;
 | 
				
			||||||
 | 
					        c_atom->Nlocal = atom->Nlocal;
 | 
				
			||||||
 | 
					        c_atom->Nghost = atom->Nghost;
 | 
				
			||||||
 | 
					        c_atom->ntypes = atom->ntypes;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if (atom->Nmax > c_atom->Nmax){ // the number of ghost atoms has increased -> more space is needed
 | 
				
			||||||
 | 
					            c_atom->Nmax = atom->Nmax;
 | 
				
			||||||
 | 
					            if(c_atom->x != NULL){ cudaFree(c_atom->x); }
 | 
				
			||||||
 | 
					            if(c_atom->type != NULL){ cudaFree(c_atom->type); }
 | 
				
			||||||
 | 
					            checkCUDAError( "updatePbc c_atom->x malloc", cudaMalloc((void**)&(c_atom->x), sizeof(MD_FLOAT) * atom->Nmax * 3) );
 | 
				
			||||||
 | 
					            checkCUDAError( "updatePbc c_atom->type malloc", cudaMalloc((void**)&(c_atom->type), sizeof(int) * atom->Nmax) );
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        // TODO if the sort is reactivated the atom->vx needs to be copied to GPU as well
 | 
				
			||||||
 | 
					        checkCUDAError( "updatePbc c_atom->x memcpy", cudaMemcpy(c_atom->x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3, cudaMemcpyHostToDevice) );
 | 
				
			||||||
 | 
					        checkCUDAError( "updatePbc c_atom->type memcpy", cudaMemcpy(c_atom->type, atom->type, sizeof(int) * atom->Nmax, cudaMemcpyHostToDevice) );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if(c_NmaxGhost < NmaxGhost){
 | 
				
			||||||
 | 
					            c_NmaxGhost = NmaxGhost;
 | 
				
			||||||
 | 
					            if(c_PBCx != NULL){ cudaFree(c_PBCx); }
 | 
				
			||||||
 | 
					            if(c_PBCy != NULL){ cudaFree(c_PBCy); }
 | 
				
			||||||
 | 
					            if(c_PBCz != NULL){ cudaFree(c_PBCz); }
 | 
				
			||||||
 | 
					            if(c_atom->border_map != NULL){ cudaFree(c_atom->border_map); }
 | 
				
			||||||
 | 
					            checkCUDAError( "updatePbc c_PBCx malloc", cudaMalloc((void**)&c_PBCx, NmaxGhost * sizeof(int)) );
 | 
				
			||||||
 | 
					            checkCUDAError( "updatePbc c_PBCy malloc", cudaMalloc((void**)&c_PBCy, NmaxGhost * sizeof(int)) );
 | 
				
			||||||
 | 
					            checkCUDAError( "updatePbc c_PBCz malloc", cudaMalloc((void**)&c_PBCz, NmaxGhost * sizeof(int)) );
 | 
				
			||||||
 | 
					            checkCUDAError( "updatePbc c_atom->border_map malloc", cudaMalloc((void**)&(c_atom->border_map), NmaxGhost * sizeof(int)) );
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        checkCUDAError( "updatePbc c_PBCx memcpy", cudaMemcpy(c_PBCx, PBCx, NmaxGhost * sizeof(int), cudaMemcpyHostToDevice) );
 | 
				
			||||||
 | 
					        checkCUDAError( "updatePbc c_PBCy memcpy", cudaMemcpy(c_PBCy, PBCy, NmaxGhost * sizeof(int), cudaMemcpyHostToDevice) );
 | 
				
			||||||
 | 
					        checkCUDAError( "updatePbc c_PBCz memcpy", cudaMemcpy(c_PBCz, PBCz, NmaxGhost * sizeof(int), cudaMemcpyHostToDevice) );
 | 
				
			||||||
 | 
					        checkCUDAError( "updatePbc c_atom->border_map memcpy", cudaMemcpy(c_atom->border_map, atom->border_map, NmaxGhost * sizeof(int), cudaMemcpyHostToDevice) );
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    MD_FLOAT xprd = param->xprd;
 | 
				
			||||||
 | 
					    MD_FLOAT yprd = param->yprd;
 | 
				
			||||||
 | 
					    MD_FLOAT zprd = param->zprd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const int num_blocks = ceil((float)atom->Nghost / (float)num_threads_per_block);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /*__global__ void computePbcUpdate(Atom a, int* PBCx, int* PBCy, int* PBCz,
 | 
				
			||||||
 | 
					     *                                                          MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd)
 | 
				
			||||||
 | 
					     * */
 | 
				
			||||||
 | 
					    computePbcUpdate<<<num_blocks, num_threads_per_block>>>(*c_atom, c_PBCx, c_PBCy, c_PBCz, xprd, yprd, zprd);
 | 
				
			||||||
 | 
					    checkCUDAError( "PeekAtLastError UpdatePbc", cudaPeekAtLastError() );
 | 
				
			||||||
 | 
					    checkCUDAError( "DeviceSync UpdatePbc", cudaDeviceSynchronize() );
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* relocate atoms that have left domain according
 | 
				
			||||||
 | 
					 * to periodic boundary conditions */
 | 
				
			||||||
 | 
					void updateAtomsPbc(Atom *atom, Parameter *param) {
 | 
				
			||||||
 | 
					    MD_FLOAT xprd = param->xprd;
 | 
				
			||||||
 | 
					    MD_FLOAT yprd = param->yprd;
 | 
				
			||||||
 | 
					    MD_FLOAT zprd = param->zprd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (int i = 0; i < atom->Nlocal; i++) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if (atom_x(i) < 0.0) {
 | 
				
			||||||
 | 
					            atom_x(i) += xprd;
 | 
				
			||||||
 | 
					        } else if (atom_x(i) >= xprd) {
 | 
				
			||||||
 | 
					            atom_x(i) -= xprd;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if (atom_y(i) < 0.0) {
 | 
				
			||||||
 | 
					            atom_y(i) += yprd;
 | 
				
			||||||
 | 
					        } else if (atom_y(i) >= yprd) {
 | 
				
			||||||
 | 
					            atom_y(i) -= yprd;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if (atom_z(i) < 0.0) {
 | 
				
			||||||
 | 
					            atom_z(i) += zprd;
 | 
				
			||||||
 | 
					        } else if (atom_z(i) >= zprd) {
 | 
				
			||||||
 | 
					            atom_z(i) -= zprd;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void updateAtomsPbc_cuda(Atom* atom, Parameter* param, Atom* c_atom, const int num_threads_per_block){
 | 
				
			||||||
 | 
					    MD_FLOAT xprd = param->xprd;
 | 
				
			||||||
 | 
					    MD_FLOAT yprd = param->yprd;
 | 
				
			||||||
 | 
					    MD_FLOAT zprd = param->zprd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const int num_blocks = ceil((float)atom->Nlocal / (float)num_threads_per_block);
 | 
				
			||||||
 | 
					    /*void computeAtomsPbcUpdate(Atom a, MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd)*/
 | 
				
			||||||
 | 
					    computeAtomsPbcUpdate<<<num_blocks, num_threads_per_block>>>(*c_atom, xprd, yprd, zprd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    checkCUDAError( "PeekAtLastError UpdateAtomsPbc", cudaPeekAtLastError() );
 | 
				
			||||||
 | 
					    checkCUDAError( "DeviceSync UpdateAtomsPbc", cudaDeviceSynchronize() );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    checkCUDAError( "updateAtomsPbc position memcpy back", cudaMemcpy(atom->x, c_atom->x, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) );
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* setup periodic boundary conditions by
 | 
				
			||||||
 | 
					 * defining ghost atoms around domain
 | 
				
			||||||
 | 
					 * only creates mapping and coordinate corrections
 | 
				
			||||||
 | 
					 * that are then enforced in updatePbc */
 | 
				
			||||||
 | 
					#define ADDGHOST(dx, dy, dz)                              \
 | 
				
			||||||
 | 
					    Nghost++;                                           \
 | 
				
			||||||
 | 
					    border_map[Nghost] = i;                             \
 | 
				
			||||||
 | 
					    PBCx[Nghost] = dx;                                  \
 | 
				
			||||||
 | 
					    PBCy[Nghost] = dy;                                  \
 | 
				
			||||||
 | 
					    PBCz[Nghost] = dz;                                  \
 | 
				
			||||||
 | 
					    atom->type[atom->Nlocal + Nghost] = atom->type[i]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void setupPbc(Atom *atom, Parameter *param) {
 | 
				
			||||||
 | 
					    int *border_map = atom->border_map;
 | 
				
			||||||
 | 
					    MD_FLOAT xprd = param->xprd;
 | 
				
			||||||
 | 
					    MD_FLOAT yprd = param->yprd;
 | 
				
			||||||
 | 
					    MD_FLOAT zprd = param->zprd;
 | 
				
			||||||
 | 
					    MD_FLOAT Cutneigh = param->cutneigh;
 | 
				
			||||||
 | 
					    int Nghost = -1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (int i = 0; i < atom->Nlocal; i++) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if (atom->Nlocal + Nghost + 7 >= atom->Nmax) {
 | 
				
			||||||
 | 
					            growAtom(atom);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        if (Nghost + 7 >= NmaxGhost) {
 | 
				
			||||||
 | 
					            growPbc(atom);
 | 
				
			||||||
 | 
					            border_map = atom->border_map;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        MD_FLOAT x = atom_x(i);
 | 
				
			||||||
 | 
					        MD_FLOAT y = atom_y(i);
 | 
				
			||||||
 | 
					        MD_FLOAT z = atom_z(i);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        /* Setup ghost atoms */
 | 
				
			||||||
 | 
					        /* 6 planes */
 | 
				
			||||||
 | 
					        if (x < Cutneigh) { ADDGHOST(+1, 0, 0); }
 | 
				
			||||||
 | 
					        if (x >= (xprd - Cutneigh)) { ADDGHOST(-1, 0, 0); }
 | 
				
			||||||
 | 
					        if (y < Cutneigh) { ADDGHOST(0, +1, 0); }
 | 
				
			||||||
 | 
					        if (y >= (yprd - Cutneigh)) { ADDGHOST(0, -1, 0); }
 | 
				
			||||||
 | 
					        if (z < Cutneigh) { ADDGHOST(0, 0, +1); }
 | 
				
			||||||
 | 
					        if (z >= (zprd - Cutneigh)) { ADDGHOST(0, 0, -1); }
 | 
				
			||||||
 | 
					        /* 8 corners */
 | 
				
			||||||
 | 
					        if (x < Cutneigh && y < Cutneigh && z < Cutneigh) { ADDGHOST(+1, +1, +1); }
 | 
				
			||||||
 | 
					        if (x < Cutneigh && y >= (yprd - Cutneigh) && z < Cutneigh) { ADDGHOST(+1, -1, +1); }
 | 
				
			||||||
 | 
					        if (x < Cutneigh && y >= Cutneigh && z >= (zprd - Cutneigh)) { ADDGHOST(+1, +1, -1); }
 | 
				
			||||||
 | 
					        if (x < Cutneigh && y >= (yprd - Cutneigh) && z >= (zprd - Cutneigh)) { ADDGHOST(+1, -1, -1); }
 | 
				
			||||||
 | 
					        if (x >= (xprd - Cutneigh) && y < Cutneigh && z < Cutneigh) { ADDGHOST(-1, +1, +1); }
 | 
				
			||||||
 | 
					        if (x >= (xprd - Cutneigh) && y >= (yprd - Cutneigh) && z < Cutneigh) { ADDGHOST(-1, -1, +1); }
 | 
				
			||||||
 | 
					        if (x >= (xprd - Cutneigh) && y < Cutneigh && z >= (zprd - Cutneigh)) { ADDGHOST(-1, +1, -1); }
 | 
				
			||||||
 | 
					        if (x >= (xprd - Cutneigh) && y >= (yprd - Cutneigh) && z >= (zprd - Cutneigh)) { ADDGHOST(-1, -1, -1); }
 | 
				
			||||||
 | 
					        /* 12 edges */
 | 
				
			||||||
 | 
					        if (x < Cutneigh && z < Cutneigh) { ADDGHOST(+1, 0, +1); }
 | 
				
			||||||
 | 
					        if (x < Cutneigh && z >= (zprd - Cutneigh)) { ADDGHOST(+1, 0, -1); }
 | 
				
			||||||
 | 
					        if (x >= (xprd - Cutneigh) && z < Cutneigh) { ADDGHOST(-1, 0, +1); }
 | 
				
			||||||
 | 
					        if (x >= (xprd - Cutneigh) && z >= (zprd - Cutneigh)) { ADDGHOST(-1, 0, -1); }
 | 
				
			||||||
 | 
					        if (y < Cutneigh && z < Cutneigh) { ADDGHOST(0, +1, +1); }
 | 
				
			||||||
 | 
					        if (y < Cutneigh && z >= (zprd - Cutneigh)) { ADDGHOST(0, +1, -1); }
 | 
				
			||||||
 | 
					        if (y >= (yprd - Cutneigh) && z < Cutneigh) { ADDGHOST(0, -1, +1); }
 | 
				
			||||||
 | 
					        if (y >= (yprd - Cutneigh) && z >= (zprd - Cutneigh)) { ADDGHOST(0, -1, -1); }
 | 
				
			||||||
 | 
					        if (y < Cutneigh && x < Cutneigh) { ADDGHOST(+1, +1, 0); }
 | 
				
			||||||
 | 
					        if (y < Cutneigh && x >= (xprd - Cutneigh)) { ADDGHOST(-1, +1, 0); }
 | 
				
			||||||
 | 
					        if (y >= (yprd - Cutneigh) && x < Cutneigh) { ADDGHOST(+1, -1, 0); }
 | 
				
			||||||
 | 
					        if (y >= (yprd - Cutneigh) && x >= (xprd - Cutneigh)) { ADDGHOST(-1, -1, 0); }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    // increase by one to make it the ghost atom count
 | 
				
			||||||
 | 
					    atom->Nghost = Nghost + 1;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* internal subroutines */
 | 
				
			||||||
 | 
					void growPbc(Atom *atom) {
 | 
				
			||||||
 | 
					    int nold = NmaxGhost;
 | 
				
			||||||
 | 
					    NmaxGhost += DELTA;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    atom->border_map = (int *) reallocate(atom->border_map, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
 | 
				
			||||||
 | 
					    PBCx = (int *) reallocate(PBCx, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
 | 
				
			||||||
 | 
					    PBCy = (int *) reallocate(PBCy, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
 | 
				
			||||||
 | 
					    PBCz = (int *) reallocate(PBCz, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										419
									
								
								src/neighbor.c
									
									
									
									
									
								
							
							
						
						
									
										419
									
								
								src/neighbor.c
									
									
									
									
									
								
							@@ -1,419 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * =======================================================================================
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 *   Author:   Jan Eitzinger (je), jan.eitzinger@fau.de
 | 
					 | 
				
			||||||
 *   Copyright (c) 2021 RRZE, University Erlangen-Nuremberg
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 *   This file is part of MD-Bench.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 *   MD-Bench is free software: you can redistribute it and/or modify it
 | 
					 | 
				
			||||||
 *   under the terms of the GNU Lesser General Public License as published
 | 
					 | 
				
			||||||
 *   by the Free Software Foundation, either version 3 of the License, or
 | 
					 | 
				
			||||||
 *   (at your option) any later version.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 *   MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
 | 
					 | 
				
			||||||
 *   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 | 
					 | 
				
			||||||
 *   PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
 | 
					 | 
				
			||||||
 *   details.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 *   You should have received a copy of the GNU Lesser General Public License along
 | 
					 | 
				
			||||||
 *   with MD-Bench.  If not, see <https://www.gnu.org/licenses/>.
 | 
					 | 
				
			||||||
 * =======================================================================================
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
#include <stdio.h>
 | 
					 | 
				
			||||||
#include <math.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#include <neighbor.h>
 | 
					 | 
				
			||||||
#include <parameter.h>
 | 
					 | 
				
			||||||
#include <allocate.h>
 | 
					 | 
				
			||||||
#include <atom.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define SMALL 1.0e-6
 | 
					 | 
				
			||||||
#define FACTOR 0.999
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static MD_FLOAT xprd, yprd, zprd;
 | 
					 | 
				
			||||||
static MD_FLOAT bininvx, bininvy, bininvz;
 | 
					 | 
				
			||||||
static int mbinxlo, mbinylo, mbinzlo;
 | 
					 | 
				
			||||||
static int nbinx, nbiny, nbinz;
 | 
					 | 
				
			||||||
static int mbinx, mbiny, mbinz; // n bins in x, y, z
 | 
					 | 
				
			||||||
static int *bincount;
 | 
					 | 
				
			||||||
static int *bins;
 | 
					 | 
				
			||||||
static int mbins; //total number of bins
 | 
					 | 
				
			||||||
static int atoms_per_bin;  // max atoms per bin
 | 
					 | 
				
			||||||
static MD_FLOAT cutneigh;
 | 
					 | 
				
			||||||
static MD_FLOAT cutneighsq;  // neighbor cutoff squared
 | 
					 | 
				
			||||||
static int nmax;
 | 
					 | 
				
			||||||
static int nstencil;      // # of bins in stencil
 | 
					 | 
				
			||||||
static int* stencil;      // stencil list of bin offsets
 | 
					 | 
				
			||||||
static MD_FLOAT binsizex, binsizey, binsizez;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static int coord2bin(MD_FLOAT, MD_FLOAT , MD_FLOAT);
 | 
					 | 
				
			||||||
static MD_FLOAT bindist(int, int, int);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* exported subroutines */
 | 
					 | 
				
			||||||
void initNeighbor(Neighbor *neighbor, Parameter *param)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    MD_FLOAT neighscale = 5.0 / 6.0;
 | 
					 | 
				
			||||||
    xprd = param->nx * param->lattice;
 | 
					 | 
				
			||||||
    yprd = param->ny * param->lattice;
 | 
					 | 
				
			||||||
    zprd = param->nz * param->lattice;
 | 
					 | 
				
			||||||
    cutneigh = param->cutneigh;
 | 
					 | 
				
			||||||
    nbinx = neighscale * param->nx;
 | 
					 | 
				
			||||||
    nbiny = neighscale * param->ny;
 | 
					 | 
				
			||||||
    nbinz = neighscale * param->nz;
 | 
					 | 
				
			||||||
    nmax = 0;
 | 
					 | 
				
			||||||
    atoms_per_bin = 8;
 | 
					 | 
				
			||||||
    stencil = NULL;
 | 
					 | 
				
			||||||
    bins = NULL;
 | 
					 | 
				
			||||||
    bincount = NULL;
 | 
					 | 
				
			||||||
    neighbor->maxneighs = 100;
 | 
					 | 
				
			||||||
    neighbor->numneigh = NULL;
 | 
					 | 
				
			||||||
    neighbor->neighbors = NULL;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void setupNeighbor()
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    MD_FLOAT coord;
 | 
					 | 
				
			||||||
    int mbinxhi, mbinyhi, mbinzhi;
 | 
					 | 
				
			||||||
    int nextx, nexty, nextz;
 | 
					 | 
				
			||||||
    MD_FLOAT xlo = 0.0; MD_FLOAT xhi = xprd;
 | 
					 | 
				
			||||||
    MD_FLOAT ylo = 0.0; MD_FLOAT yhi = yprd;
 | 
					 | 
				
			||||||
    MD_FLOAT zlo = 0.0; MD_FLOAT zhi = zprd;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    cutneighsq = cutneigh * cutneigh;
 | 
					 | 
				
			||||||
    binsizex = xprd / nbinx;
 | 
					 | 
				
			||||||
    binsizey = yprd / nbiny;
 | 
					 | 
				
			||||||
    binsizez = zprd / nbinz;
 | 
					 | 
				
			||||||
    bininvx = 1.0 / binsizex;
 | 
					 | 
				
			||||||
    bininvy = 1.0 / binsizey;
 | 
					 | 
				
			||||||
    bininvz = 1.0 / binsizez;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    coord = xlo - cutneigh - SMALL * xprd;
 | 
					 | 
				
			||||||
    mbinxlo = (int) (coord * bininvx);
 | 
					 | 
				
			||||||
    if (coord < 0.0) {
 | 
					 | 
				
			||||||
        mbinxlo = mbinxlo - 1;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    coord = xhi + cutneigh + SMALL * xprd;
 | 
					 | 
				
			||||||
    mbinxhi = (int) (coord * bininvx);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    coord = ylo - cutneigh - SMALL * yprd;
 | 
					 | 
				
			||||||
    mbinylo = (int) (coord * bininvy);
 | 
					 | 
				
			||||||
    if (coord < 0.0) {
 | 
					 | 
				
			||||||
        mbinylo = mbinylo - 1;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    coord = yhi + cutneigh + SMALL * yprd;
 | 
					 | 
				
			||||||
    mbinyhi = (int) (coord * bininvy);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    coord = zlo - cutneigh - SMALL * zprd;
 | 
					 | 
				
			||||||
    mbinzlo = (int) (coord * bininvz);
 | 
					 | 
				
			||||||
    if (coord < 0.0) {
 | 
					 | 
				
			||||||
        mbinzlo = mbinzlo - 1;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    coord = zhi + cutneigh + SMALL * zprd;
 | 
					 | 
				
			||||||
    mbinzhi = (int) (coord * bininvz);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    mbinxlo = mbinxlo - 1;
 | 
					 | 
				
			||||||
    mbinxhi = mbinxhi + 1;
 | 
					 | 
				
			||||||
    mbinx = mbinxhi - mbinxlo + 1;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    mbinylo = mbinylo - 1;
 | 
					 | 
				
			||||||
    mbinyhi = mbinyhi + 1;
 | 
					 | 
				
			||||||
    mbiny = mbinyhi - mbinylo + 1;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    mbinzlo = mbinzlo - 1;
 | 
					 | 
				
			||||||
    mbinzhi = mbinzhi + 1;
 | 
					 | 
				
			||||||
    mbinz = mbinzhi - mbinzlo + 1;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    nextx = (int) (cutneigh * bininvx);
 | 
					 | 
				
			||||||
    if(nextx * binsizex < FACTOR * cutneigh) nextx++;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    nexty = (int) (cutneigh * bininvy);
 | 
					 | 
				
			||||||
    if(nexty * binsizey < FACTOR * cutneigh) nexty++;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    nextz = (int) (cutneigh * bininvz);
 | 
					 | 
				
			||||||
    if(nextz * binsizez < FACTOR * cutneigh) nextz++;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (stencil) {
 | 
					 | 
				
			||||||
        free(stencil);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    stencil = (int*) malloc(
 | 
					 | 
				
			||||||
            (2 * nextz + 1) * (2 * nexty + 1) * (2 * nextx + 1) * sizeof(int));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    nstencil = 0;
 | 
					 | 
				
			||||||
    int kstart = -nextz;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for(int k = kstart; k <= nextz; k++) {
 | 
					 | 
				
			||||||
        for(int j = -nexty; j <= nexty; j++) {
 | 
					 | 
				
			||||||
            for(int i = -nextx; i <= nextx; i++) {
 | 
					 | 
				
			||||||
                if(bindist(i, j, k) < cutneighsq) {
 | 
					 | 
				
			||||||
                    stencil[nstencil++] =
 | 
					 | 
				
			||||||
                        k * mbiny * mbinx + j * mbinx + i;
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    mbins = mbinx * mbiny * mbinz;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (bincount) {
 | 
					 | 
				
			||||||
        free(bincount);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    bincount = (int*) malloc(mbins * sizeof(int));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (bins) {
 | 
					 | 
				
			||||||
        free(bins);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    bins = (int*) malloc(mbins * atoms_per_bin * sizeof(int));
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void buildNeighbor(Atom *atom, Neighbor *neighbor)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int nall = atom->Nlocal + atom->Nghost;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* extend atom arrays if necessary */
 | 
					 | 
				
			||||||
    if(nall > nmax) {
 | 
					 | 
				
			||||||
        nmax = nall;
 | 
					 | 
				
			||||||
        if(neighbor->numneigh) cudaFreeHost(neighbor->numneigh);
 | 
					 | 
				
			||||||
        if(neighbor->neighbors) cudaFreeHost(neighbor->neighbors);
 | 
					 | 
				
			||||||
        checkCUDAError( "buildNeighbor numneigh", cudaMallocHost((void**)&(neighbor->numneigh), nmax * sizeof(int)) );
 | 
					 | 
				
			||||||
        checkCUDAError( "buildNeighbor neighbors", cudaMallocHost((void**)&(neighbor->neighbors), nmax * neighbor->maxneighs * sizeof(int)) );
 | 
					 | 
				
			||||||
        // neighbor->numneigh = (int*) malloc(nmax * sizeof(int));
 | 
					 | 
				
			||||||
        // neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int*));
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* bin local & ghost atoms */
 | 
					 | 
				
			||||||
    binatoms(atom);
 | 
					 | 
				
			||||||
    int resize = 1;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* loop over each atom, storing neighbors */
 | 
					 | 
				
			||||||
    while(resize) {
 | 
					 | 
				
			||||||
        int new_maxneighs = neighbor->maxneighs;
 | 
					 | 
				
			||||||
        resize = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        for(int i = 0; i < atom->Nlocal; i++) {
 | 
					 | 
				
			||||||
            int* neighptr = &(neighbor->neighbors[i]);
 | 
					 | 
				
			||||||
            int n = 0;
 | 
					 | 
				
			||||||
            MD_FLOAT xtmp = atom_x(i);
 | 
					 | 
				
			||||||
            MD_FLOAT ytmp = atom_y(i);
 | 
					 | 
				
			||||||
            MD_FLOAT ztmp = atom_z(i);
 | 
					 | 
				
			||||||
            int ibin = coord2bin(xtmp, ytmp, ztmp);
 | 
					 | 
				
			||||||
            #ifdef EXPLICIT_TYPES
 | 
					 | 
				
			||||||
            int type_i = atom->type[i];
 | 
					 | 
				
			||||||
            #endif
 | 
					 | 
				
			||||||
            for(int k = 0; k < nstencil; k++) {
 | 
					 | 
				
			||||||
                int jbin = ibin + stencil[k];
 | 
					 | 
				
			||||||
                int* loc_bin = &bins[jbin * atoms_per_bin];
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                for(int m = 0; m < bincount[jbin]; m++) {
 | 
					 | 
				
			||||||
                    int j = loc_bin[m];
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                    if ( j == i ){
 | 
					 | 
				
			||||||
                        continue;
 | 
					 | 
				
			||||||
                    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                    MD_FLOAT delx = xtmp - atom_x(j);
 | 
					 | 
				
			||||||
                    MD_FLOAT dely = ytmp - atom_y(j);
 | 
					 | 
				
			||||||
                    MD_FLOAT delz = ztmp - atom_z(j);
 | 
					 | 
				
			||||||
                    MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                    #ifdef EXPLICIT_TYPES
 | 
					 | 
				
			||||||
                    int type_j = atom->type[j];
 | 
					 | 
				
			||||||
                    const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j];
 | 
					 | 
				
			||||||
                    #else
 | 
					 | 
				
			||||||
                    const MD_FLOAT cutoff = cutneighsq;
 | 
					 | 
				
			||||||
                    #endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                    if( rsq <= cutoff ) {
 | 
					 | 
				
			||||||
                        int idx = atom->Nlocal * n;
 | 
					 | 
				
			||||||
                        neighptr[idx] = j;
 | 
					 | 
				
			||||||
                        n += 1;
 | 
					 | 
				
			||||||
                    }
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            neighbor->numneigh[i] = n;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            if(n >= neighbor->maxneighs) {
 | 
					 | 
				
			||||||
                resize = 1;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                if(n >= new_maxneighs) {
 | 
					 | 
				
			||||||
                    new_maxneighs = n;
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if(resize) {
 | 
					 | 
				
			||||||
            printf("RESIZE %d\n", neighbor->maxneighs);
 | 
					 | 
				
			||||||
            neighbor->maxneighs = new_maxneighs * 1.2;
 | 
					 | 
				
			||||||
            free(neighbor->neighbors);
 | 
					 | 
				
			||||||
            neighbor->neighbors = (int*) malloc(atom->Nmax * neighbor->maxneighs * sizeof(int));
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* internal subroutines */
 | 
					 | 
				
			||||||
MD_FLOAT bindist(int i, int j, int k)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    MD_FLOAT delx, dely, delz;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if(i > 0) {
 | 
					 | 
				
			||||||
        delx = (i - 1) * binsizex;
 | 
					 | 
				
			||||||
    } else if(i == 0) {
 | 
					 | 
				
			||||||
        delx = 0.0;
 | 
					 | 
				
			||||||
    } else {
 | 
					 | 
				
			||||||
        delx = (i + 1) * binsizex;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if(j > 0) {
 | 
					 | 
				
			||||||
        dely = (j - 1) * binsizey;
 | 
					 | 
				
			||||||
    } else if(j == 0) {
 | 
					 | 
				
			||||||
        dely = 0.0;
 | 
					 | 
				
			||||||
    } else {
 | 
					 | 
				
			||||||
        dely = (j + 1) * binsizey;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if(k > 0) {
 | 
					 | 
				
			||||||
        delz = (k - 1) * binsizez;
 | 
					 | 
				
			||||||
    } else if(k == 0) {
 | 
					 | 
				
			||||||
        delz = 0.0;
 | 
					 | 
				
			||||||
    } else {
 | 
					 | 
				
			||||||
        delz = (k + 1) * binsizez;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return (delx * delx + dely * dely + delz * delz);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int coord2bin(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int ix, iy, iz;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if(xin >= xprd) {
 | 
					 | 
				
			||||||
        ix = (int)((xin - xprd) * bininvx) + nbinx - mbinxlo;
 | 
					 | 
				
			||||||
    } else if(xin >= 0.0) {
 | 
					 | 
				
			||||||
        ix = (int)(xin * bininvx) - mbinxlo;
 | 
					 | 
				
			||||||
    } else {
 | 
					 | 
				
			||||||
        ix = (int)(xin * bininvx) - mbinxlo - 1;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if(yin >= yprd) {
 | 
					 | 
				
			||||||
        iy = (int)((yin - yprd) * bininvy) + nbiny - mbinylo;
 | 
					 | 
				
			||||||
    } else if(yin >= 0.0) {
 | 
					 | 
				
			||||||
        iy = (int)(yin * bininvy) - mbinylo;
 | 
					 | 
				
			||||||
    } else {
 | 
					 | 
				
			||||||
        iy = (int)(yin * bininvy) - mbinylo - 1;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if(zin >= zprd) {
 | 
					 | 
				
			||||||
        iz = (int)((zin - zprd) * bininvz) + nbinz - mbinzlo;
 | 
					 | 
				
			||||||
    } else if(zin >= 0.0) {
 | 
					 | 
				
			||||||
        iz = (int)(zin * bininvz) - mbinzlo;
 | 
					 | 
				
			||||||
    } else {
 | 
					 | 
				
			||||||
        iz = (int)(zin * bininvz) - mbinzlo - 1;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return (iz * mbiny * mbinx + iy * mbinx + ix + 1);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void binatoms(Atom *atom)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int nall = atom->Nlocal + atom->Nghost;
 | 
					 | 
				
			||||||
    int resize = 1;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    while(resize > 0) {
 | 
					 | 
				
			||||||
        resize = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        for(int i = 0; i < mbins; i++) {
 | 
					 | 
				
			||||||
            bincount[i] = 0;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        for(int i = 0; i < nall; i++) {
 | 
					 | 
				
			||||||
            MD_FLOAT x = atom_x(i);
 | 
					 | 
				
			||||||
            MD_FLOAT y = atom_y(i);
 | 
					 | 
				
			||||||
            MD_FLOAT z = atom_z(i);
 | 
					 | 
				
			||||||
            int ibin = coord2bin(x, y, z);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            if(bincount[ibin] < atoms_per_bin) {
 | 
					 | 
				
			||||||
                int ac = bincount[ibin]++;
 | 
					 | 
				
			||||||
                bins[ibin * atoms_per_bin + ac] = i;
 | 
					 | 
				
			||||||
            } else {
 | 
					 | 
				
			||||||
                resize = 1;
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if(resize) {
 | 
					 | 
				
			||||||
            free(bins);
 | 
					 | 
				
			||||||
            atoms_per_bin *= 2;
 | 
					 | 
				
			||||||
            bins = (int*) malloc(mbins * atoms_per_bin * sizeof(int));
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void sortAtom(Atom* atom) {
 | 
					 | 
				
			||||||
    binatoms(atom);
 | 
					 | 
				
			||||||
    int Nmax = atom->Nmax;
 | 
					 | 
				
			||||||
    int* binpos = bincount;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for(int i=1; i<mbins; i++) {
 | 
					 | 
				
			||||||
        binpos[i] += binpos[i-1];
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifdef AOS
 | 
					 | 
				
			||||||
    double* new_x = (double*) malloc(Nmax * sizeof(MD_FLOAT) * 3);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    double* new_vx = (double*) malloc(Nmax * sizeof(MD_FLOAT) * 3);
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
    double* new_x = (double*) malloc(Nmax * sizeof(MD_FLOAT));
 | 
					 | 
				
			||||||
    double* new_y = (double*) malloc(Nmax * sizeof(MD_FLOAT));
 | 
					 | 
				
			||||||
    double* new_z = (double*) malloc(Nmax * sizeof(MD_FLOAT));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    double* new_vx = (double*) malloc(Nmax * sizeof(MD_FLOAT));
 | 
					 | 
				
			||||||
    double* new_vy = (double*) malloc(Nmax * sizeof(MD_FLOAT));
 | 
					 | 
				
			||||||
    double* new_vz = (double*) malloc(Nmax * sizeof(MD_FLOAT));
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    double* old_x = atom->x; double* old_y = atom->y; double* old_z = atom->z;
 | 
					 | 
				
			||||||
    double* old_vx = atom->vx; double* old_vy = atom->vy; double* old_vz = atom->vz;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for(int mybin = 0; mybin<mbins; mybin++) {
 | 
					 | 
				
			||||||
        int start = mybin>0?binpos[mybin-1]:0;
 | 
					 | 
				
			||||||
        int count = binpos[mybin] - start;
 | 
					 | 
				
			||||||
        for(int k=0; k<count; k++) {
 | 
					 | 
				
			||||||
            int new_i = start + k;
 | 
					 | 
				
			||||||
            int old_i = bins[mybin * atoms_per_bin + k];
 | 
					 | 
				
			||||||
#ifdef AOS
 | 
					 | 
				
			||||||
            new_x[new_i * 3 + 0] = old_x[old_i * 3 + 0];
 | 
					 | 
				
			||||||
            new_x[new_i * 3 + 1] = old_x[old_i * 3 + 1];
 | 
					 | 
				
			||||||
            new_x[new_i * 3 + 2] = old_x[old_i * 3 + 2];
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            new_vx[new_i * 3 + 0] = old_vx[old_i * 3 + 0];
 | 
					 | 
				
			||||||
            new_vx[new_i * 3 + 1] = old_vy[old_i * 3 + 1];
 | 
					 | 
				
			||||||
            new_vx[new_i * 3 + 2] = old_vz[old_i * 3 + 2];
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
            new_x[new_i] = old_x[old_i];
 | 
					 | 
				
			||||||
            new_y[new_i] = old_y[old_i];
 | 
					 | 
				
			||||||
            new_z[new_i] = old_z[old_i];
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            new_vx[new_i] = old_vx[old_i];
 | 
					 | 
				
			||||||
            new_vy[new_i] = old_vy[old_i];
 | 
					 | 
				
			||||||
            new_vz[new_i] = old_vz[old_i];
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    free(atom->x);
 | 
					 | 
				
			||||||
    atom->x = new_x;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    free(atom->vx);
 | 
					 | 
				
			||||||
    atom->vx = new_vx;
 | 
					 | 
				
			||||||
#ifndef AOS
 | 
					 | 
				
			||||||
    free(atom->y);
 | 
					 | 
				
			||||||
    free(atom->z);
 | 
					 | 
				
			||||||
    atom->y = new_y; atom->z = new_z;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    free(atom->vy); free(atom->vz);
 | 
					 | 
				
			||||||
    atom->vy = new_vy; atom->vz = new_vz;
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
							
								
								
									
										172
									
								
								src/pbc.c
									
									
									
									
									
								
							
							
						
						
									
										172
									
								
								src/pbc.c
									
									
									
									
									
								
							@@ -1,172 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * =======================================================================================
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 *   Author:   Jan Eitzinger (je), jan.eitzinger@fau.de
 | 
					 | 
				
			||||||
 *   Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 *   This file is part of MD-Bench.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 *   MD-Bench is free software: you can redistribute it and/or modify it
 | 
					 | 
				
			||||||
 *   under the terms of the GNU Lesser General Public License as published
 | 
					 | 
				
			||||||
 *   by the Free Software Foundation, either version 3 of the License, or
 | 
					 | 
				
			||||||
 *   (at your option) any later version.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 *   MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
 | 
					 | 
				
			||||||
 *   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 | 
					 | 
				
			||||||
 *   PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
 | 
					 | 
				
			||||||
 *   details.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 *   You should have received a copy of the GNU Lesser General Public License along
 | 
					 | 
				
			||||||
 *   with MD-Bench.  If not, see <https://www.gnu.org/licenses/>.
 | 
					 | 
				
			||||||
 * =======================================================================================
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
#include <stdio.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#include <pbc.h>
 | 
					 | 
				
			||||||
#include <atom.h>
 | 
					 | 
				
			||||||
#include <allocate.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define DELTA 20000
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static int NmaxGhost;
 | 
					 | 
				
			||||||
static int *PBCx, *PBCy, *PBCz;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void growPbc(Atom*);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* exported subroutines */
 | 
					 | 
				
			||||||
void initPbc(Atom* atom)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    NmaxGhost = 0;
 | 
					 | 
				
			||||||
    atom->border_map = NULL;
 | 
					 | 
				
			||||||
    PBCx = NULL; PBCy = NULL; PBCz = NULL;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* update coordinates of ghost atoms */
 | 
					 | 
				
			||||||
/* uses mapping created in setupPbc */
 | 
					 | 
				
			||||||
void updatePbc(Atom *atom, Parameter *param)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int *border_map = atom->border_map;
 | 
					 | 
				
			||||||
    int nlocal = atom->Nlocal;
 | 
					 | 
				
			||||||
    MD_FLOAT xprd = param->xprd;
 | 
					 | 
				
			||||||
    MD_FLOAT yprd = param->yprd;
 | 
					 | 
				
			||||||
    MD_FLOAT zprd = param->zprd;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for(int i = 0; i < atom->Nghost; i++) {
 | 
					 | 
				
			||||||
        atom_x(nlocal + i) = atom_x(border_map[i]) + PBCx[i] * xprd;
 | 
					 | 
				
			||||||
        atom_y(nlocal + i) = atom_y(border_map[i]) + PBCy[i] * yprd;
 | 
					 | 
				
			||||||
        atom_z(nlocal + i) = atom_z(border_map[i]) + PBCz[i] * zprd;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* relocate atoms that have left domain according
 | 
					 | 
				
			||||||
 * to periodic boundary conditions */
 | 
					 | 
				
			||||||
void updateAtomsPbc(Atom *atom, Parameter *param)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    MD_FLOAT xprd = param->xprd;
 | 
					 | 
				
			||||||
    MD_FLOAT yprd = param->yprd;
 | 
					 | 
				
			||||||
    MD_FLOAT zprd = param->zprd;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for(int i = 0; i < atom->Nlocal; i++) {
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if(atom_x(i) < 0.0) {
 | 
					 | 
				
			||||||
            atom_x(i) += xprd;
 | 
					 | 
				
			||||||
        } else if(atom_x(i) >= xprd) {
 | 
					 | 
				
			||||||
            atom_x(i) -= xprd;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if(atom_y(i) < 0.0) {
 | 
					 | 
				
			||||||
            atom_y(i) += yprd;
 | 
					 | 
				
			||||||
        } else if(atom_y(i) >= yprd) {
 | 
					 | 
				
			||||||
            atom_y(i) -= yprd;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if(atom_z(i) < 0.0) {
 | 
					 | 
				
			||||||
            atom_z(i) += zprd;
 | 
					 | 
				
			||||||
        } else if(atom_z(i) >= zprd) {
 | 
					 | 
				
			||||||
            atom_z(i) -= zprd;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* setup periodic boundary conditions by
 | 
					 | 
				
			||||||
 * defining ghost atoms around domain
 | 
					 | 
				
			||||||
 * only creates mapping and coordinate corrections
 | 
					 | 
				
			||||||
 * that are then enforced in updatePbc */
 | 
					 | 
				
			||||||
#define ADDGHOST(dx,dy,dz)                              \
 | 
					 | 
				
			||||||
    Nghost++;                                           \
 | 
					 | 
				
			||||||
    border_map[Nghost] = i;                             \
 | 
					 | 
				
			||||||
    PBCx[Nghost] = dx;                                  \
 | 
					 | 
				
			||||||
    PBCy[Nghost] = dy;                                  \
 | 
					 | 
				
			||||||
    PBCz[Nghost] = dz;                                  \
 | 
					 | 
				
			||||||
    atom->type[atom->Nlocal + Nghost] = atom->type[i]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void setupPbc(Atom *atom, Parameter *param)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int *border_map = atom->border_map;
 | 
					 | 
				
			||||||
    MD_FLOAT xprd = param->xprd;
 | 
					 | 
				
			||||||
    MD_FLOAT yprd = param->yprd;
 | 
					 | 
				
			||||||
    MD_FLOAT zprd = param->zprd;
 | 
					 | 
				
			||||||
    MD_FLOAT Cutneigh = param->cutneigh;
 | 
					 | 
				
			||||||
    int Nghost = -1;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for(int i = 0; i < atom->Nlocal; i++) {
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if (atom->Nlocal + Nghost + 7 >= atom->Nmax) {
 | 
					 | 
				
			||||||
            growAtom(atom);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        if (Nghost + 7 >= NmaxGhost) {
 | 
					 | 
				
			||||||
            growPbc(atom);
 | 
					 | 
				
			||||||
            border_map = atom->border_map;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        MD_FLOAT x = atom_x(i);
 | 
					 | 
				
			||||||
        MD_FLOAT y = atom_y(i);
 | 
					 | 
				
			||||||
        MD_FLOAT z = atom_z(i);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        /* Setup ghost atoms */
 | 
					 | 
				
			||||||
        /* 6 planes */
 | 
					 | 
				
			||||||
        if (x < Cutneigh)         { ADDGHOST(+1,0,0); }
 | 
					 | 
				
			||||||
        if (x >= (xprd-Cutneigh)) { ADDGHOST(-1,0,0); }
 | 
					 | 
				
			||||||
        if (y < Cutneigh)         { ADDGHOST(0,+1,0); }
 | 
					 | 
				
			||||||
        if (y >= (yprd-Cutneigh)) { ADDGHOST(0,-1,0); }
 | 
					 | 
				
			||||||
        if (z < Cutneigh)         { ADDGHOST(0,0,+1); }
 | 
					 | 
				
			||||||
        if (z >= (zprd-Cutneigh)) { ADDGHOST(0,0,-1); }
 | 
					 | 
				
			||||||
        /* 8 corners */
 | 
					 | 
				
			||||||
        if (x < Cutneigh         && y < Cutneigh         && z < Cutneigh)         { ADDGHOST(+1,+1,+1); }
 | 
					 | 
				
			||||||
        if (x < Cutneigh         && y >= (yprd-Cutneigh) && z < Cutneigh)         { ADDGHOST(+1,-1,+1); }
 | 
					 | 
				
			||||||
        if (x < Cutneigh         && y >= Cutneigh        && z >= (zprd-Cutneigh)) { ADDGHOST(+1,+1,-1); }
 | 
					 | 
				
			||||||
        if (x < Cutneigh         && y >= (yprd-Cutneigh) && z >= (zprd-Cutneigh)) { ADDGHOST(+1,-1,-1); }
 | 
					 | 
				
			||||||
        if (x >= (xprd-Cutneigh) && y < Cutneigh         && z < Cutneigh)         { ADDGHOST(-1,+1,+1); }
 | 
					 | 
				
			||||||
        if (x >= (xprd-Cutneigh) && y >= (yprd-Cutneigh) && z < Cutneigh)         { ADDGHOST(-1,-1,+1); }
 | 
					 | 
				
			||||||
        if (x >= (xprd-Cutneigh) && y < Cutneigh         && z >= (zprd-Cutneigh)) { ADDGHOST(-1,+1,-1); }
 | 
					 | 
				
			||||||
        if (x >= (xprd-Cutneigh) && y >= (yprd-Cutneigh) && z >= (zprd-Cutneigh)) { ADDGHOST(-1,-1,-1); }
 | 
					 | 
				
			||||||
        /* 12 edges */
 | 
					 | 
				
			||||||
        if (x < Cutneigh         && z < Cutneigh)         { ADDGHOST(+1,0,+1); }
 | 
					 | 
				
			||||||
        if (x < Cutneigh         && z >= (zprd-Cutneigh)) { ADDGHOST(+1,0,-1); }
 | 
					 | 
				
			||||||
        if (x >= (xprd-Cutneigh) && z < Cutneigh)         { ADDGHOST(-1,0,+1); }
 | 
					 | 
				
			||||||
        if (x >= (xprd-Cutneigh) && z >= (zprd-Cutneigh)) { ADDGHOST(-1,0,-1); }
 | 
					 | 
				
			||||||
        if (y < Cutneigh         && z < Cutneigh)         { ADDGHOST(0,+1,+1); }
 | 
					 | 
				
			||||||
        if (y < Cutneigh         && z >= (zprd-Cutneigh)) { ADDGHOST(0,+1,-1); }
 | 
					 | 
				
			||||||
        if (y >= (yprd-Cutneigh) && z < Cutneigh)         { ADDGHOST(0,-1,+1); }
 | 
					 | 
				
			||||||
        if (y >= (yprd-Cutneigh) && z >= (zprd-Cutneigh)) { ADDGHOST(0,-1,-1); }
 | 
					 | 
				
			||||||
        if (y < Cutneigh         && x < Cutneigh)         { ADDGHOST(+1,+1,0); }
 | 
					 | 
				
			||||||
        if (y < Cutneigh         && x >= (xprd-Cutneigh)) { ADDGHOST(-1,+1,0); }
 | 
					 | 
				
			||||||
        if (y >= (yprd-Cutneigh) && x < Cutneigh)         { ADDGHOST(+1,-1,0); }
 | 
					 | 
				
			||||||
        if (y >= (yprd-Cutneigh) && x >= (xprd-Cutneigh)) { ADDGHOST(-1,-1,0); }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    // increase by one to make it the ghost atom count
 | 
					 | 
				
			||||||
    atom->Nghost = Nghost + 1;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* internal subroutines */
 | 
					 | 
				
			||||||
void growPbc(Atom* atom)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int nold = NmaxGhost;
 | 
					 | 
				
			||||||
    NmaxGhost += DELTA;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    atom->border_map = (int*) reallocate(atom->border_map, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
 | 
					 | 
				
			||||||
    PBCx = (int*) reallocate(PBCx, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
 | 
					 | 
				
			||||||
    PBCy = (int*) reallocate(PBCy, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
 | 
					 | 
				
			||||||
    PBCz = (int*) reallocate(PBCz, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
		Reference in New Issue
	
	Block a user