diff --git a/src/force.cu b/src/force.cu index 6f27d56..5862e9b 100644 --- a/src/force.cu +++ b/src/force.cu @@ -201,7 +201,7 @@ double computeForce( if(!reneighbourHappenend) { - checkCUDAError( "c_atom.x memcpy", cudaMemcpy(c_atom.x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3, cudaMemcpyHostToDevice) ); + checkCUDAError( "computeForce c_atom->x memcpy", cudaMemcpy(c_atom->x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3, cudaMemcpyHostToDevice) ); } const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block); @@ -221,4 +221,4 @@ double computeForce( return E-S; } -} \ No newline at end of file +} diff --git a/src/main.c b/src/main.c index 30fb374..170d581 100644 --- a/src/main.c +++ b/src/main.c @@ -136,10 +136,10 @@ double setup( adjustThermo(param, atom); setupPbc(atom, param); updatePbc(atom, param); + initCudaAtom(atom, neighbor, c_atom, c_neighbor); buildNeighbor_cuda(atom, neighbor, c_atom, c_neighbor, num_threads_per_block); E = getTimeStamp(); - initCudaAtom(atom, neighbor, c_atom, c_neighbor); return E-S; } @@ -160,7 +160,7 @@ double reneighbour( setupPbc(atom, param); updatePbc(atom, param); //sortAtom(atom); - buildNeighbor(atom, neighbor, c_atom, c_neighbor, num_threads_per_block); + buildNeighbor_cuda(atom, neighbor, c_atom, c_neighbor, num_threads_per_block); LIKWID_MARKER_STOP("reneighbour"); E = getTimeStamp(); @@ -218,7 +218,7 @@ int get_num_threads() { const char *num_threads_env = getenv("NUM_THREADS"); int num_threads = 0; - if(num_threads_env == nullptr) + if(num_threads_env == 0) num_threads = 32; else { num_threads = atoi(num_threads_env); diff --git a/src/neighbor.cu b/src/neighbor.cu index feb02b5..05f5404 100644 --- a/src/neighbor.cu +++ b/src/neighbor.cu @@ -23,6 +23,9 @@ #include #include #include +#include +#include +#include extern "C" { @@ -68,7 +71,7 @@ __device__ int coord2bin_device(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin, } __global__ void compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np, int nstencil, int* stencil, - int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs){ + int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs, MD_FLOAT cutneighsq){ const int i = blockIdx.x * blockDim.x + threadIdx.x; const int Nlocal = a.Nlocal; if( i >= Nlocal ) { @@ -83,7 +86,7 @@ __global__ void compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np, MD_FLOAT xtmp = atom_x(i); MD_FLOAT ytmp = atom_y(i); MD_FLOAT ztmp = atom_z(i); - int ibin = coord2bin_device(xtmp, ytmp, ztmp, Neighbor_params np); + int ibin = coord2bin_device(xtmp, ytmp, ztmp, np); #ifdef EXPLICIT_TYPES int type_i = atom->type[i]; #endif @@ -541,7 +544,7 @@ void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor, Atom *c_atom, Neighbor * cudaProfilerStart(); - checkCUDAError( "c_atom->x memcpy", cudaMemcpy(c_atom->x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3, cudaMemcpyHostToDevice) ); + checkCUDAError( "buildNeighbor c_atom->x memcpy", cudaMemcpy(c_atom->x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3, cudaMemcpyHostToDevice) ); /* upload stencil */ int* c_stencil; @@ -589,10 +592,11 @@ void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor, Atom *c_atom, Neighbor * /*compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np, int nstencil, int* stencil, int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs) * */ - compute_neighborhood<<>>(*c_Atom, *c_neighbor, + compute_neighborhood<<>>(*c_atom, *c_neighbor, np, nstencil, c_stencil, c_bins, atoms_per_bin, c_bincount, - c_new_maxneighs); + c_new_maxneighs, + cutneighsq); // TODO copy the value of c_new_maxneighs back to host and check if it has been modified int new_maxneighs; @@ -616,8 +620,8 @@ void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor, Atom *c_atom, Neighbor * cudaProfilerStop(); cudaFree(c_new_maxneighs); - cudaFree(c_n_stencil); + cudaFree(c_stencil); cudaFree(c_bincount); cudaFree(c_bins); } -} \ No newline at end of file +}