Fixed a compiler error and removed an unnecessary memcpy (from device to host) - performance seems to have crossed the 300M updates/second mark for the A100

This commit is contained in:
Martin Bauernfeind 2022-07-11 00:55:42 +02:00
parent d1c2249b55
commit f61f59ba3f
2 changed files with 1 additions and 5 deletions

View File

@ -92,7 +92,7 @@ __global__ void sort_bin_contents_kernel(int* bincount, int* bins, int mbins, in
sorted = 0; sorted = 0;
} }
} }
} while (!sorted) } while (!sorted);
} }
__global__ void binatoms_kernel(Atom a, int* bincount, int* bins, int atoms_per_bin, Neighbor_params np, int *resize_needed){ __global__ void binatoms_kernel(Atom a, int* bincount, int* bins, int atoms_per_bin, Neighbor_params np, int *resize_needed){

View File

@ -120,7 +120,6 @@ void updatePbc_cuda(Atom *atom, Parameter *param, Atom *c_atom, bool doReneighbo
checkCUDAError( "updatePbc c_atom->border_map memcpy", cudaMemcpy(c_atom->border_map, atom->border_map, NmaxGhost * sizeof(int), cudaMemcpyHostToDevice) ); checkCUDAError( "updatePbc c_atom->border_map memcpy", cudaMemcpy(c_atom->border_map, atom->border_map, NmaxGhost * sizeof(int), cudaMemcpyHostToDevice) );
} }
int nlocal = atom->Nlocal;
MD_FLOAT xprd = param->xprd; MD_FLOAT xprd = param->xprd;
MD_FLOAT yprd = param->yprd; MD_FLOAT yprd = param->yprd;
MD_FLOAT zprd = param->zprd; MD_FLOAT zprd = param->zprd;
@ -133,9 +132,6 @@ void updatePbc_cuda(Atom *atom, Parameter *param, Atom *c_atom, bool doReneighbo
computePbcUpdate<<<num_blocks, num_threads_per_block>>>(*c_atom, c_PBCx, c_PBCy, c_PBCz, xprd, yprd, zprd); computePbcUpdate<<<num_blocks, num_threads_per_block>>>(*c_atom, c_PBCx, c_PBCy, c_PBCz, xprd, yprd, zprd);
checkCUDAError( "PeekAtLastError UpdatePbc", cudaPeekAtLastError() ); checkCUDAError( "PeekAtLastError UpdatePbc", cudaPeekAtLastError() );
checkCUDAError( "DeviceSync UpdatePbc", cudaDeviceSynchronize() ); checkCUDAError( "DeviceSync UpdatePbc", cudaDeviceSynchronize() );
if(doReneighbor){
checkCUDAError( "updatePbc atom->x memcpy back", cudaMemcpy(atom->x, c_atom->x, atom->Nmax * sizeof(MD_FLOAT) * 3, cudaMemcpyDeviceToHost) );
}
} }
/* relocate atoms that have left domain according /* relocate atoms that have left domain according