The program now does the binning on the GPU via the binatoms_cuda method
This commit is contained in:
parent
fa409c016c
commit
0967e8f671
@ -53,6 +53,6 @@ extern void setupNeighbor();
|
|||||||
extern void binatoms(Atom*);
|
extern void binatoms(Atom*);
|
||||||
extern void buildNeighbor(Atom*, Neighbor*);
|
extern void buildNeighbor(Atom*, Neighbor*);
|
||||||
extern void sortAtom(Atom*);
|
extern void sortAtom(Atom*);
|
||||||
extern void binatoms_cuda(Atom*, Binning*, int**, Neighbor_params*, const int);
|
extern void binatoms_cuda(Atom*, Binning*, int*, Neighbor_params*, const int);
|
||||||
extern void buildNeighbor_cuda(Atom*, Neighbor*, Atom*, Neighbor*, const int);
|
extern void buildNeighbor_cuda(Atom*, Neighbor*, Atom*, Neighbor*, const int);
|
||||||
#endif
|
#endif
|
||||||
|
103
src/neighbor.cu
103
src/neighbor.cu
@ -539,7 +539,7 @@ void sortAtom(Atom* atom) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void binatoms_cuda(Atom* c_atom, Binning* c_binning, int** c_resize_needed, NeighborParams *np, const int threads_per_block)
|
void binatoms_cuda(Atom* c_atom, Binning* c_binning, int* c_resize_needed, NeighborParams *np, const int threads_per_block)
|
||||||
{
|
{
|
||||||
int nall = c_atom->Nlocal + c_atom->Nghost;
|
int nall = c_atom->Nlocal + c_atom->Nghost;
|
||||||
int resize = 1;
|
int resize = 1;
|
||||||
@ -549,21 +549,18 @@ void binatoms_cuda(Atom* c_atom, Binning* c_binning, int** c_resize_needed, Neig
|
|||||||
if(c_binning->bins == NULL){
|
if(c_binning->bins == NULL){
|
||||||
checkCUDAError("binatoms_cuda c_binning->bins malloc", cudaMalloc((void**)(&c_binning->bins), c_binning->mbins * c_binning->atoms_per_bin * sizeof(int)) );
|
checkCUDAError("binatoms_cuda c_binning->bins malloc", cudaMalloc((void**)(&c_binning->bins), c_binning->mbins * c_binning->atoms_per_bin * sizeof(int)) );
|
||||||
}
|
}
|
||||||
if(*c_resize_needed == NULL){
|
|
||||||
checkCUDAError("binatoms_cuda c_resize_needed malloc", cudaMalloc(&c_resize_needed, sizeof(int)) );
|
|
||||||
}
|
|
||||||
|
|
||||||
const int num_blocks = ceil((float)nall-> / (float)num_threads_per_block);
|
const int num_blocks = ceil((float)nall-> / (float)num_threads_per_block);
|
||||||
|
|
||||||
while(resize > 0) {
|
while(resize > 0) {
|
||||||
resize = 0;
|
resize = 0;
|
||||||
checkCUDAError("binatoms_cuda c_binning->bincount memset", cudaMemset(c_binning->bincount, 0, c_binning->mbins * sizeof(int)));
|
checkCUDAError("binatoms_cuda c_binning->bincount memset", cudaMemset(c_binning->bincount, 0, c_binning->mbins * sizeof(int)));
|
||||||
checkCUDAError("binatoms_cuda c_resize_needed memset", cudaMemset(*c_resize_needed, 0, sizeof(int)) );
|
checkCUDAError("binatoms_cuda c_resize_needed memset", cudaMemset(c_resize_needed, 0, sizeof(int)) );
|
||||||
|
|
||||||
/*binatoms_kernel(Atom a, int* bincount, int* bins, int c_binning->atoms_per_bin, Neighbor_params np, int *resize_needed) */
|
/*binatoms_kernel(Atom a, int* bincount, int* bins, int c_binning->atoms_per_bin, Neighbor_params np, int *resize_needed) */
|
||||||
binatoms_kernel<<<num_blocks, threads_per_block>>>(*c_atom, c_binning->bincount, c_binning->bins, c_binning->atoms_per_bin, *np, *c_resize_needed);
|
binatoms_kernel<<<num_blocks, threads_per_block>>>(*c_atom, c_binning->bincount, c_binning->bins, c_binning->atoms_per_bin, *np, c_resize_needed);
|
||||||
|
|
||||||
checkCUDAError("binatoms_cuda c_resize_needed memcpy back", cudaMemcpy(&resize, *c_resize_needed, sizeof(int), cudaMemcpyDeviceToHost) );
|
checkCUDAError("binatoms_cuda c_resize_needed memcpy back", cudaMemcpy(&resize, c_resize_needed, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||||
|
|
||||||
if(resize) {
|
if(resize) {
|
||||||
cudaFree(c_binning->bins);
|
cudaFree(c_binning->bins);
|
||||||
@ -576,9 +573,50 @@ void binatoms_cuda(Atom* c_atom, Binning* c_binning, int** c_resize_needed, Neig
|
|||||||
void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor, Atom *c_atom, Neighbor *c_neighbor, const int num_threads_per_block)
|
void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor, Atom *c_atom, Neighbor *c_neighbor, const int num_threads_per_block)
|
||||||
{
|
{
|
||||||
int nall = atom->Nlocal + atom->Nghost;
|
int nall = atom->Nlocal + atom->Nghost;
|
||||||
|
|
||||||
c_neighbor->maxneighs = neighbor->maxneighs;
|
c_neighbor->maxneighs = neighbor->maxneighs;
|
||||||
|
|
||||||
|
cudaProfilerStart();
|
||||||
|
/* upload stencil */
|
||||||
|
int* c_stencil;
|
||||||
|
// TODO move this to be done once at the start
|
||||||
|
checkCUDAError( "buildNeighbor c_n_stencil malloc", cudaMalloc((void**)&c_stencil, nstencil * sizeof(int)) );
|
||||||
|
checkCUDAError( "buildNeighbor c_n_stencil memcpy", cudaMemcpy(c_stencil, stencil, nstencil * sizeof(int), cudaMemcpyHostToDevice ));
|
||||||
|
|
||||||
|
Binning c_binning;
|
||||||
|
c_binning->mbins = mbins;
|
||||||
|
c_binning->atoms_per_bin = atoms_per_bin;
|
||||||
|
checkCUDAError( "buildNeighbor c_binning->bincount malloc", cudaMalloc((void**)&(c_binning->bincount), mbins * sizeof(int)) );
|
||||||
|
checkCUDAError( "buidlNeighbor c_binning->bins malloc", cudaMalloc((void**)&(c_binning->bins), c_binning->mbins * c_binning->atoms_per_bin * sizeof(int)) );
|
||||||
|
|
||||||
|
Neighbor_params np{
|
||||||
|
.xprd = xprd,
|
||||||
|
.yprd = yprd,
|
||||||
|
.zprd = zprd,
|
||||||
|
.bininvx = bininvx,
|
||||||
|
.bininvy = bininvy,
|
||||||
|
.bininvz = bininvz,
|
||||||
|
.mbinxlo = mbinxlo,
|
||||||
|
.mbinylo = mbinylo,
|
||||||
|
.mbinzlo = mbinzlo,
|
||||||
|
.nbinx = nbinx,
|
||||||
|
.nbiny = nbiny,
|
||||||
|
.nbinz = nbinz,
|
||||||
|
.mbinx = mbinx,
|
||||||
|
.mbiny = mbiny,
|
||||||
|
.mbinz = mbinz
|
||||||
|
};
|
||||||
|
|
||||||
|
int* c_resize_needed;
|
||||||
|
checkCUDAError("buildNeighbor c_resize_needed malloc", cudaMalloc((void**)&c_resize_needed, sizeof(int)) );
|
||||||
|
|
||||||
|
/* bin local & ghost atoms */
|
||||||
|
binatoms_cuda(c_atom, &c_binning, c_resize_needed, &np, num_threads_per_block);
|
||||||
|
|
||||||
|
int* c_new_maxneighs;
|
||||||
|
checkCUDAError("c_new_maxneighs malloc", cudaMalloc((void**)&c_new_maxneighs, sizeof(int) ));
|
||||||
|
|
||||||
|
int resize = 1;
|
||||||
|
|
||||||
/* extend c_neighbor arrays if necessary */
|
/* extend c_neighbor arrays if necessary */
|
||||||
if(nall > nmax) {
|
if(nall > nmax) {
|
||||||
nmax = nall;
|
nmax = nall;
|
||||||
@ -588,49 +626,6 @@ void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor, Atom *c_atom, Neighbor *
|
|||||||
checkCUDAError( "buildNeighbor c_neighbors malloc", cudaMalloc((void**)&(c_neighbor->neighbors), nmax * c_neighbor->maxneighs * sizeof(int)) );
|
checkCUDAError( "buildNeighbor c_neighbors malloc", cudaMalloc((void**)&(c_neighbor->neighbors), nmax * c_neighbor->maxneighs * sizeof(int)) );
|
||||||
}
|
}
|
||||||
|
|
||||||
checkCUDAError( "buildNeighbor c_atom->x memcpy back", cudaMemcpy(atom->x, c_atom->x, sizeof(MD_FLOAT) * 3 * c_atom->Nmax, cudaMemcpyDeviceToHost) );
|
|
||||||
|
|
||||||
/* bin local & ghost atoms */
|
|
||||||
binatoms(atom);
|
|
||||||
int resize = 1;
|
|
||||||
|
|
||||||
cudaProfilerStart();
|
|
||||||
|
|
||||||
/* upload stencil */
|
|
||||||
int* c_stencil;
|
|
||||||
// TODO move this to be done once at the start
|
|
||||||
checkCUDAError( "buildNeighbor c_n_stencil malloc", cudaMalloc((void**)&c_stencil, nstencil * sizeof(int)) );
|
|
||||||
checkCUDAError( "buildNeighbor c_n_stencil memcpy", cudaMemcpy(c_stencil, stencil, nstencil * sizeof(int), cudaMemcpyHostToDevice ));
|
|
||||||
|
|
||||||
int *c_bincount;
|
|
||||||
checkCUDAError( "buildNeighbor c_bincount malloc", cudaMalloc((void**)&c_bincount, mbins * sizeof(int)) );
|
|
||||||
checkCUDAError( "buildNeighbor c_bincount memcpy", cudaMemcpy(c_bincount, bincount, mbins * sizeof(int), cudaMemcpyHostToDevice) );
|
|
||||||
|
|
||||||
int *c_bins;
|
|
||||||
checkCUDAError( "buidlNeighbor c_bins malloc", cudaMalloc((void**)&c_bins, mbins * atoms_per_bin * sizeof(int)) );
|
|
||||||
checkCUDAError( "buildNeighbor c_bins memcpy", cudaMemcpy(c_bins, bins, mbins * atoms_per_bin * sizeof(int), cudaMemcpyHostToDevice ) );
|
|
||||||
|
|
||||||
Neighbor_params np{
|
|
||||||
.xprd = xprd,
|
|
||||||
.yprd = yprd,
|
|
||||||
.zprd = zprd,
|
|
||||||
.bininvx = bininvx,
|
|
||||||
.bininvy = bininvy,
|
|
||||||
.bininvz = bininvz,
|
|
||||||
.mbinxlo = mbinxlo,
|
|
||||||
.mbinylo = mbinylo,
|
|
||||||
.mbinzlo = mbinzlo,
|
|
||||||
.nbinx = nbinx,
|
|
||||||
.nbiny = nbiny,
|
|
||||||
.nbinz = nbinz,
|
|
||||||
.mbinx = mbinx,
|
|
||||||
.mbiny = mbiny,
|
|
||||||
.mbinz = mbinz
|
|
||||||
};
|
|
||||||
|
|
||||||
int* c_new_maxneighs;
|
|
||||||
checkCUDAError("c_new_maxneighs malloc", cudaMalloc((void**)&c_new_maxneighs, sizeof(int) ));
|
|
||||||
|
|
||||||
/* loop over each atom, storing neighbors */
|
/* loop over each atom, storing neighbors */
|
||||||
while(resize) {
|
while(resize) {
|
||||||
resize = 0;
|
resize = 0;
|
||||||
@ -644,7 +639,7 @@ void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor, Atom *c_atom, Neighbor *
|
|||||||
* */
|
* */
|
||||||
compute_neighborhood<<<num_blocks, num_threads_per_block>>>(*c_atom, *c_neighbor,
|
compute_neighborhood<<<num_blocks, num_threads_per_block>>>(*c_atom, *c_neighbor,
|
||||||
np, nstencil, c_stencil,
|
np, nstencil, c_stencil,
|
||||||
c_bins, atoms_per_bin, c_bincount,
|
c_binning->bins, atoms_per_bin, c_binning->bincount,
|
||||||
c_new_maxneighs,
|
c_new_maxneighs,
|
||||||
cutneighsq);
|
cutneighsq);
|
||||||
|
|
||||||
@ -675,7 +670,7 @@ void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor, Atom *c_atom, Neighbor *
|
|||||||
|
|
||||||
cudaFree(c_new_maxneighs);
|
cudaFree(c_new_maxneighs);
|
||||||
cudaFree(c_stencil);
|
cudaFree(c_stencil);
|
||||||
cudaFree(c_bincount);
|
cudaFree(c_binning->bincount);
|
||||||
cudaFree(c_bins);
|
cudaFree(c_binning->bins);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user