diff --git a/src/force.cu b/src/force.cu index 0f06325..39556ce 100644 --- a/src/force.cu +++ b/src/force.cu @@ -153,7 +153,6 @@ void cuda_initial_integrate(bool doReneighbour, Parameter *param, Atom *atom, At if(doReneighbour) { checkCUDAError( "InitialIntegrate: velocity memcpy", cudaMemcpy(atom->vx, c_atom->vx, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) ); - checkCUDAError( "InitialIntegrate: position memcpy", cudaMemcpy(atom->x, c_atom->x, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) ); } } diff --git a/src/includes/pbc.h b/src/includes/pbc.h index 10234be..9c04ca6 100644 --- a/src/includes/pbc.h +++ b/src/includes/pbc.h @@ -29,5 +29,6 @@ extern void initPbc(Atom*); extern void updatePbc(Atom*, Parameter*); extern void updatePbc_cuda(Atom*, Parameter*, Atom*, bool, const int); extern void updateAtomsPbc(Atom*, Parameter*); +extern void updateAtomsPbc_cuda(Atom*, Parameter*, Atom*, const int); extern void setupPbc(Atom*, Parameter*); #endif diff --git a/src/main.c b/src/main.c index 1cdc09f..54c9645 100644 --- a/src/main.c +++ b/src/main.c @@ -164,7 +164,7 @@ double reneighbour( S = getTimeStamp(); LIKWID_MARKER_START("reneighbour"); - updateAtomsPbc(atom, param); + updateAtomsPbc_cuda(atom, param, c_atom, num_threads_per_block); setupPbc(atom, param); updatePbc_cuda(atom, param, c_atom, true, num_threads_per_block); //sortAtom(atom); diff --git a/src/pbc.cu b/src/pbc.cu index 9589a4f..b94033f 100644 --- a/src/pbc.cu +++ b/src/pbc.cu @@ -33,6 +33,32 @@ extern "C" { } +__global__ void computeAtomsPbcUpdate(Atom a, MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd){ + const int i = blockIdx.x * blockDim.x + threadIdx.x; + Atom* atom = &a; + if( i >= atom->Nlocal ){ + return; + } + + if (atom_x(i) < 0.0) { + atom_x(i) += xprd; + } else if (atom_x(i) >= xprd) { + atom_x(i) -= xprd; + } + + if (atom_y(i) < 0.0) { + atom_y(i) += yprd; + } else if (atom_y(i) >= yprd) { + atom_y(i) -= yprd; + } + + if (atom_z(i) < 0.0) { + atom_z(i) += zprd; + } else if (atom_z(i) >= zprd) { + atom_z(i) -= zprd; + } +} + __global__ void computePbcUpdate(Atom a, int* PBCx, int* PBCy, int* PBCz, MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd){ const int i = blockIdx.x * blockDim.x + threadIdx.x; const int Nghost = a.Nghost; @@ -163,6 +189,21 @@ void updateAtomsPbc(Atom *atom, Parameter *param) { } } +void updateAtomsPbc_cuda(Atom* atom, Parameter* param, Atom* c_atom, const int num_threads_per_block){ + MD_FLOAT xprd = param->xprd; + MD_FLOAT yprd = param->yprd; + MD_FLOAT zprd = param->zprd; + + const int num_blocks = ceil((float)atom->Nlocal / (float)num_threads_per_block); + /*void computeAtomsPbcUpdate(Atom a, MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd)*/ + computeAtomsPbcUpdate<<>>(*c_atom, xprd, yprd, zprd); + + checkCUDAError( "PeekAtLastError UpdateAtomsPbc", cudaPeekAtLastError() ); + checkCUDAError( "DeviceSync UpdateAtomsPbc", cudaDeviceSynchronize() ); + + checkCUDAError( "updateAtomsPbc position memcpy back", cudaMemcpy(atom->x, c_atom->x, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) ); +} + /* setup periodic boundary conditions by * defining ghost atoms around domain * only creates mapping and coordinate corrections