From f61f59ba3f643c62e596d3f170dd52b5495deb23 Mon Sep 17 00:00:00 2001 From: Martin Bauernfeind Date: Mon, 11 Jul 2022 00:55:42 +0200 Subject: [PATCH] Fixed a compiler error and removed an unnecessary memcpy (from device to host) - performance seems to have crossed the 300M updates/second mark for the A100 --- src/neighbor.cu | 2 +- src/pbc.cu | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/src/neighbor.cu b/src/neighbor.cu index fa6e947..a3270dc 100644 --- a/src/neighbor.cu +++ b/src/neighbor.cu @@ -92,7 +92,7 @@ __global__ void sort_bin_contents_kernel(int* bincount, int* bins, int mbins, in sorted = 0; } } - } while (!sorted) + } while (!sorted); } __global__ void binatoms_kernel(Atom a, int* bincount, int* bins, int atoms_per_bin, Neighbor_params np, int *resize_needed){ diff --git a/src/pbc.cu b/src/pbc.cu index 2b04349..9589a4f 100644 --- a/src/pbc.cu +++ b/src/pbc.cu @@ -120,7 +120,6 @@ void updatePbc_cuda(Atom *atom, Parameter *param, Atom *c_atom, bool doReneighbo checkCUDAError( "updatePbc c_atom->border_map memcpy", cudaMemcpy(c_atom->border_map, atom->border_map, NmaxGhost * sizeof(int), cudaMemcpyHostToDevice) ); } - int nlocal = atom->Nlocal; MD_FLOAT xprd = param->xprd; MD_FLOAT yprd = param->yprd; MD_FLOAT zprd = param->zprd; @@ -133,9 +132,6 @@ void updatePbc_cuda(Atom *atom, Parameter *param, Atom *c_atom, bool doReneighbo computePbcUpdate<<>>(*c_atom, c_PBCx, c_PBCy, c_PBCz, xprd, yprd, zprd); checkCUDAError( "PeekAtLastError UpdatePbc", cudaPeekAtLastError() ); checkCUDAError( "DeviceSync UpdatePbc", cudaDeviceSynchronize() ); - if(doReneighbor){ - checkCUDAError( "updatePbc atom->x memcpy back", cudaMemcpy(atom->x, c_atom->x, atom->Nmax * sizeof(MD_FLOAT) * 3, cudaMemcpyDeviceToHost) ); - } } /* relocate atoms that have left domain according