Make integrate kernels aware of neighbour list update
This commit is contained in:
parent
7b592b5fc7
commit
c4080e866e
12
src/force.cu
12
src/force.cu
@ -157,7 +157,6 @@ void cuda_final_integrate(bool doReneighbour, Parameter *param, Atom *atom) {
|
|||||||
|
|
||||||
if(doReneighbour) {
|
if(doReneighbour) {
|
||||||
checkCUDAError( "FinalIntegrate: velocity memcpy", cudaMemcpy(atom->vx, c_atom.vx, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) );
|
checkCUDAError( "FinalIntegrate: velocity memcpy", cudaMemcpy(atom->vx, c_atom.vx, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) );
|
||||||
checkCUDAError( "FinalIntegrate: position memcpy", cudaMemcpy(atom->x, c_atom.x, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) );
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -174,7 +173,9 @@ void cuda_initial_integrate(bool doReneighbour, Parameter *param, Atom *atom) {
|
|||||||
checkCUDAError( "PeekAtLastError InitialIntegrate", cudaPeekAtLastError() );
|
checkCUDAError( "PeekAtLastError InitialIntegrate", cudaPeekAtLastError() );
|
||||||
checkCUDAError( "DeviceSync InitialIntegrate", cudaDeviceSynchronize() );
|
checkCUDAError( "DeviceSync InitialIntegrate", cudaDeviceSynchronize() );
|
||||||
|
|
||||||
|
if(doReneighbour) {
|
||||||
checkCUDAError( "InitialIntegrate: velocity memcpy", cudaMemcpy(atom->vx, c_atom.vx, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) );
|
checkCUDAError( "InitialIntegrate: velocity memcpy", cudaMemcpy(atom->vx, c_atom.vx, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) );
|
||||||
|
}
|
||||||
checkCUDAError( "InitialIntegrate: position memcpy", cudaMemcpy(atom->x, c_atom.x, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) );
|
checkCUDAError( "InitialIntegrate: position memcpy", cudaMemcpy(atom->x, c_atom.x, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) );
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -183,9 +184,13 @@ void initCudaAtom(Atom *atom, Neighbor *neighbor) {
|
|||||||
const int Nlocal = atom->Nlocal;
|
const int Nlocal = atom->Nlocal;
|
||||||
|
|
||||||
checkCUDAError( "c_atom.x malloc", cudaMalloc((void**)&(c_atom.x), sizeof(MD_FLOAT) * atom->Nmax * 3) );
|
checkCUDAError( "c_atom.x malloc", cudaMalloc((void**)&(c_atom.x), sizeof(MD_FLOAT) * atom->Nmax * 3) );
|
||||||
|
checkCUDAError( "c_atom.x memcpy", cudaMemcpy(c_atom.x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3, cudaMemcpyHostToDevice) );
|
||||||
|
|
||||||
checkCUDAError( "c_atom.fx malloc", cudaMalloc((void**)&(c_atom.fx), sizeof(MD_FLOAT) * Nlocal * 3) );
|
checkCUDAError( "c_atom.fx malloc", cudaMalloc((void**)&(c_atom.fx), sizeof(MD_FLOAT) * Nlocal * 3) );
|
||||||
|
|
||||||
checkCUDAError( "c_atom.vx malloc", cudaMalloc((void**)&(c_atom.vx), sizeof(MD_FLOAT) * Nlocal * 3) );
|
checkCUDAError( "c_atom.vx malloc", cudaMalloc((void**)&(c_atom.vx), sizeof(MD_FLOAT) * Nlocal * 3) );
|
||||||
checkCUDAError( "c_atom.vx memcpy", cudaMemcpy(c_atom.vx, atom->vx, sizeof(MD_FLOAT) * Nlocal * 3, cudaMemcpyHostToDevice) );
|
checkCUDAError( "c_atom.vx memcpy", cudaMemcpy(c_atom.vx, atom->vx, sizeof(MD_FLOAT) * Nlocal * 3, cudaMemcpyHostToDevice) );
|
||||||
|
|
||||||
checkCUDAError( "c_atom.type malloc", cudaMalloc((void**)&(c_atom.type), sizeof(int) * atom->Nmax) );
|
checkCUDAError( "c_atom.type malloc", cudaMalloc((void**)&(c_atom.type), sizeof(int) * atom->Nmax) );
|
||||||
checkCUDAError( "c_atom.epsilon malloc", cudaMalloc((void**)&(c_atom.epsilon), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) );
|
checkCUDAError( "c_atom.epsilon malloc", cudaMalloc((void**)&(c_atom.epsilon), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) );
|
||||||
checkCUDAError( "c_atom.sigma6 malloc", cudaMalloc((void**)&(c_atom.sigma6), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) );
|
checkCUDAError( "c_atom.sigma6 malloc", cudaMalloc((void**)&(c_atom.sigma6), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) );
|
||||||
@ -235,13 +240,8 @@ double computeForce(
|
|||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// Choose GPU where you want to execute code on
|
|
||||||
// It is possible to execute the same kernel on multiple GPUs but you have to copy the data multiple times
|
|
||||||
// Executing `cudaSetDevice(N)` before cudaMalloc / cudaMemcpy / calc_force <<< >>> will set the GPU accordingly
|
|
||||||
|
|
||||||
|
|
||||||
// HINT: Run with cuda-memcheck ./MDBench-NVCC in case of error
|
// HINT: Run with cuda-memcheck ./MDBench-NVCC in case of error
|
||||||
// HINT: Only works for data layout = AOS!!!
|
|
||||||
|
|
||||||
// checkCUDAError( "c_atom.fx memset", cudaMemset(c_atom.fx, 0, sizeof(MD_FLOAT) * Nlocal * 3) );
|
// checkCUDAError( "c_atom.fx memset", cudaMemset(c_atom.fx, 0, sizeof(MD_FLOAT) * Nlocal * 3) );
|
||||||
|
|
||||||
|
@ -280,7 +280,7 @@ int main(int argc, char** argv)
|
|||||||
|
|
||||||
const bool doReneighbour = (n + 1) % param.every == 0;
|
const bool doReneighbour = (n + 1) % param.every == 0;
|
||||||
|
|
||||||
cuda_initial_integrate(doReneighbour, ¶m, &atom); // initialIntegrate(¶m, &atom);
|
cuda_initial_integrate(doReneighbour, ¶m, &atom);
|
||||||
|
|
||||||
if(doReneighbour) {
|
if(doReneighbour) {
|
||||||
timer[NEIGH] += reneighbour(¶m, &atom, &neighbor);
|
timer[NEIGH] += reneighbour(¶m, &atom, &neighbor);
|
||||||
@ -297,7 +297,8 @@ int main(int argc, char** argv)
|
|||||||
timer[FORCE] += computeForce(doReneighbour, ¶m, &atom, &neighbor);
|
timer[FORCE] += computeForce(doReneighbour, ¶m, &atom, &neighbor);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
cuda_final_integrate(doReneighbour, ¶m, &atom); // finalIntegrate(¶m, &atom);
|
|
||||||
|
cuda_final_integrate(doReneighbour, ¶m, &atom);
|
||||||
|
|
||||||
if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
|
if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
|
||||||
computeThermo(n + 1, ¶m, &atom);
|
computeThermo(n + 1, ¶m, &atom);
|
||||||
|
Loading…
Reference in New Issue
Block a user