Fix GPU version
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
		| @@ -59,14 +59,15 @@ void *reallocate(void* ptr, int alignment, size_t newBytesize, size_t oldBytesiz | ||||
|  | ||||
|     return newarray; | ||||
| } | ||||
|  | ||||
| #ifndef CUDA_TARGET | ||||
| #ifdef CUDA_TARGET | ||||
| void *allocate_gpu(int alignment, size_t bytesize) { return NULL; } | ||||
| void *reallocate_gpu(void *ptr, int alignment, size_t newBytesize, size_t oldBytesize) { return NULL; } | ||||
| #else | ||||
| #include <cuda_runtime.h> | ||||
| #include <cuda_atom.h> | ||||
| void *allocate_gpu(int alignment, size_t bytesize) { | ||||
|     void *ptr; | ||||
|     checkCUDAError("allocate_gpu", cudaMallocHost((void **) &ptr, bytesize)); | ||||
|     checkCUDAError("allocate", cudaMallocHost((void **) &ptr, bytesize)); | ||||
|     return ptr; | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -38,17 +38,17 @@ extern "C" { | ||||
|  | ||||
| } | ||||
|  | ||||
| static MD_FLOAT xprd, yprd, zprd; | ||||
| static MD_FLOAT bininvx, bininvy, bininvz; | ||||
| static int mbinxlo, mbinylo, mbinzlo; | ||||
| static int nbinx, nbiny, nbinz; | ||||
| static int mbinx, mbiny, mbinz; // n bins in x, y, z | ||||
| static int mbins; //total number of bins | ||||
| static int atoms_per_bin;  // max atoms per bin | ||||
| static MD_FLOAT cutneighsq;  // neighbor cutoff squared | ||||
| static int nmax; | ||||
| static int nstencil;      // # of bins in stencil | ||||
| static int* stencil;      // stencil list of bin offsets | ||||
| extern MD_FLOAT xprd, yprd, zprd; | ||||
| extern MD_FLOAT bininvx, bininvy, bininvz; | ||||
| extern int mbinxlo, mbinylo, mbinzlo; | ||||
| extern int nbinx, nbiny, nbinz; | ||||
| extern int mbinx, mbiny, mbinz; // n bins in x, y, z | ||||
| extern int mbins; //total number of bins | ||||
| extern int atoms_per_bin;  // max atoms per bin | ||||
| extern MD_FLOAT cutneighsq;  // neighbor cutoff squared | ||||
| extern int nmax; | ||||
| extern int nstencil;      // # of bins in stencil | ||||
| extern int* stencil;      // stencil list of bin offsets | ||||
| static int* c_stencil = NULL; | ||||
| static int* c_resize_needed = NULL; | ||||
| static int* c_new_maxneighs = NULL; | ||||
| @@ -59,7 +59,6 @@ static Binning c_binning { | ||||
|     .atoms_per_bin = 0 | ||||
| }; | ||||
|  | ||||
|  | ||||
| __device__ int coord2bin_device(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin, Neighbor_params np) { | ||||
|     int ix, iy, iz; | ||||
|  | ||||
| @@ -115,7 +114,7 @@ __global__ void sort_bin_contents_kernel(int* bincount, int* bins, int mbins, in | ||||
|     } while (!sorted); | ||||
| } | ||||
|  | ||||
| __global__ void binatoms_kernel(Atom a, int* bincount, int* bins, int atoms_per_bin, Neighbor_params np, int *resize_needed){ | ||||
| __global__ void binatoms_kernel(Atom a, int* bincount, int* bins, int atoms_per_bin, Neighbor_params np, int *resize_needed) { | ||||
|     Atom* atom = &a; | ||||
|     const int i = blockIdx.x * blockDim.x + threadIdx.x; | ||||
|     int nall = atom->Nlocal + atom->Nghost; | ||||
| @@ -127,7 +126,6 @@ __global__ void binatoms_kernel(Atom a, int* bincount, int* bins, int atoms_per_ | ||||
|     MD_FLOAT y = atom_y(i); | ||||
|     MD_FLOAT z = atom_z(i); | ||||
|     int ibin = coord2bin_device(x, y, z, np); | ||||
|  | ||||
|     int ac = atomicAdd(&bincount[ibin], 1); | ||||
|              | ||||
|     if(ac < atoms_per_bin){ | ||||
| @@ -138,7 +136,7 @@ __global__ void binatoms_kernel(Atom a, int* bincount, int* bins, int atoms_per_ | ||||
| } | ||||
|  | ||||
| __global__ void compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np, int nstencil, int* stencil, | ||||
|                                      int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs, MD_FLOAT cutneighsq){ | ||||
|                                      int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs, MD_FLOAT cutneighsq) { | ||||
|     const int i = blockIdx.x * blockDim.x + threadIdx.x; | ||||
|     const int Nlocal = a.Nlocal; | ||||
|     if( i >= Nlocal ) { | ||||
| @@ -189,7 +187,6 @@ __global__ void compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np, | ||||
|     } | ||||
|  | ||||
|     neighbor->numneigh[i] = n; | ||||
|  | ||||
|     if(n > neighbor->maxneighs) { | ||||
|         atomicMax(new_maxneighs, n); | ||||
|     } | ||||
| @@ -304,8 +301,8 @@ void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor, Atom *c_atom, Neighbor * | ||||
|                                                                     c_new_maxneighs, | ||||
| 								                                    cutneighsq); | ||||
|  | ||||
| 	checkCUDAError( "PeekAtLastError ComputeNeighbor", cudaPeekAtLastError() ); | ||||
| 	checkCUDAError( "DeviceSync ComputeNeighbor", cudaDeviceSynchronize() ); | ||||
|         checkCUDAError( "PeekAtLastError ComputeNeighbor", cudaPeekAtLastError() ); | ||||
|         checkCUDAError( "DeviceSync ComputeNeighbor", cudaDeviceSynchronize() ); | ||||
|  | ||||
|         // TODO copy the value of c_new_maxneighs back to host and check if it has been modified | ||||
|         int new_maxneighs; | ||||
| @@ -323,7 +320,7 @@ void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor, Atom *c_atom, Neighbor * | ||||
|         } | ||||
|  | ||||
|     } | ||||
|     neighbor->maxneighs = c_neighbor->maxneighs; | ||||
|  | ||||
|     neighbor->maxneighs = c_neighbor->maxneighs; | ||||
|     cudaProfilerStop(); | ||||
| } | ||||
|   | ||||
| @@ -34,16 +34,15 @@ extern "C" { | ||||
|  | ||||
| } | ||||
|  | ||||
| static int NmaxGhost; | ||||
| static int *PBCx, *PBCy, *PBCz; | ||||
| static int c_NmaxGhost = 0; | ||||
| static int *c_PBCx = NULL, *c_PBCy = NULL, *c_PBCz = NULL; | ||||
|  | ||||
| extern int NmaxGhost; | ||||
| extern int *PBCx, *PBCy, *PBCz; | ||||
| static int c_NmaxGhost; | ||||
| static int *c_PBCx, *c_PBCy, *c_PBCz; | ||||
|  | ||||
| __global__ void computeAtomsPbcUpdate(Atom a, MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd){ | ||||
|     const int i = blockIdx.x * blockDim.x + threadIdx.x; | ||||
|     Atom* atom = &a; | ||||
|     if( i >= atom->Nlocal ){ | ||||
|     if(i >= atom->Nlocal) { | ||||
|         return; | ||||
|     } | ||||
|  | ||||
| @@ -69,9 +68,10 @@ __global__ void computeAtomsPbcUpdate(Atom a, MD_FLOAT xprd, MD_FLOAT yprd, MD_F | ||||
| __global__ void computePbcUpdate(Atom a, int* PBCx, int* PBCy, int* PBCz, MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd){ | ||||
|     const int i = blockIdx.x * blockDim.x + threadIdx.x; | ||||
|     const int Nghost = a.Nghost; | ||||
|     if( i >= Nghost ) { | ||||
|     if(i >= Nghost) { | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     Atom* atom = &a; | ||||
|     int *border_map = atom->border_map; | ||||
|     int nlocal = atom->Nlocal; | ||||
| @@ -86,7 +86,7 @@ __global__ void computePbcUpdate(Atom a, int* PBCx, int* PBCy, int* PBCz, MD_FLO | ||||
| void updatePbc_cuda(Atom *atom, Atom *c_atom, Parameter *param, bool doReneighbor) { | ||||
|     const int num_threads_per_block = get_num_threads(); | ||||
|  | ||||
|     if (doReneighbor){ | ||||
|     if (doReneighbor) { | ||||
|         c_atom->Natoms = atom->Natoms; | ||||
|         c_atom->Nlocal = atom->Nlocal; | ||||
|         c_atom->Nghost = atom->Nghost; | ||||
| @@ -146,6 +146,5 @@ void updateAtomsPbc_cuda(Atom* atom, Atom *c_atom, Parameter *param){ | ||||
|  | ||||
|     checkCUDAError( "PeekAtLastError UpdateAtomsPbc", cudaPeekAtLastError() ); | ||||
|     checkCUDAError( "DeviceSync UpdateAtomsPbc", cudaDeviceSynchronize() ); | ||||
|  | ||||
|     checkCUDAError( "updateAtomsPbc position memcpy back", cudaMemcpy(atom->x, c_atom->x, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) ); | ||||
| } | ||||
|   | ||||
| @@ -31,21 +31,21 @@ | ||||
| #define SMALL 1.0e-6 | ||||
| #define FACTOR 0.999 | ||||
|  | ||||
| static MD_FLOAT xprd, yprd, zprd; | ||||
| static MD_FLOAT bininvx, bininvy, bininvz; | ||||
| static int mbinxlo, mbinylo, mbinzlo; | ||||
| static int nbinx, nbiny, nbinz; | ||||
| static int mbinx, mbiny, mbinz; // n bins in x, y, z | ||||
| static int *bincount; | ||||
| static int *bins; | ||||
| static int mbins; //total number of bins | ||||
| static int atoms_per_bin;  // max atoms per bin | ||||
| static MD_FLOAT cutneigh; | ||||
| static MD_FLOAT cutneighsq;  // neighbor cutoff squared | ||||
| static int nmax; | ||||
| static int nstencil;      // # of bins in stencil | ||||
| static int* stencil;      // stencil list of bin offsets | ||||
| static MD_FLOAT binsizex, binsizey, binsizez; | ||||
| MD_FLOAT xprd, yprd, zprd; | ||||
| MD_FLOAT bininvx, bininvy, bininvz; | ||||
| int mbinxlo, mbinylo, mbinzlo; | ||||
| int nbinx, nbiny, nbinz; | ||||
| int mbinx, mbiny, mbinz; // n bins in x, y, z | ||||
| int *bincount; | ||||
| int *bins; | ||||
| int mbins; //total number of bins | ||||
| int atoms_per_bin;  // max atoms per bin | ||||
| MD_FLOAT cutneigh; | ||||
| MD_FLOAT cutneighsq;  // neighbor cutoff squared | ||||
| int nmax; | ||||
| int nstencil;      // # of bins in stencil | ||||
| int* stencil;      // stencil list of bin offsets | ||||
| MD_FLOAT binsizex, binsizey, binsizez; | ||||
| static int coord2bin(MD_FLOAT, MD_FLOAT , MD_FLOAT); | ||||
| static MD_FLOAT bindist(int, int, int); | ||||
|  | ||||
|   | ||||
| @@ -30,8 +30,8 @@ | ||||
|  | ||||
| #define DELTA 20000 | ||||
|  | ||||
| static int NmaxGhost; | ||||
| static int *PBCx, *PBCy, *PBCz; | ||||
| int NmaxGhost; | ||||
| int *PBCx, *PBCy, *PBCz; | ||||
|  | ||||
| static void growPbc(Atom*); | ||||
|  | ||||
| @@ -66,7 +66,6 @@ void updateAtomsPbc_cpu(Atom *atom, Atom *c_atom, Parameter *param) { | ||||
|     MD_FLOAT zprd = param->zprd; | ||||
|  | ||||
|     for(int i = 0; i < atom->Nlocal; i++) { | ||||
|  | ||||
|         if(atom_x(i) < 0.0) { | ||||
|             atom_x(i) += xprd; | ||||
|         } else if(atom_x(i) >= xprd) { | ||||
| @@ -177,8 +176,7 @@ void setupPbc(Atom *atom, Parameter *param) { | ||||
| } | ||||
|  | ||||
| /* internal subroutines */ | ||||
| void growPbc(Atom* atom) | ||||
| { | ||||
| void growPbc(Atom* atom) { | ||||
|     int nold = NmaxGhost; | ||||
|     NmaxGhost += DELTA; | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user