Fix GPU version

Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
Rafael Ravedutti 2022-08-11 16:42:41 +02:00
parent 3d95ec4b0a
commit 87d006d418
5 changed files with 46 additions and 51 deletions

View File

@ -59,14 +59,15 @@ void *reallocate(void* ptr, int alignment, size_t newBytesize, size_t oldBytesiz
return newarray; return newarray;
} }
#ifdef CUDA_TARGET
#ifndef CUDA_TARGET
void *allocate_gpu(int alignment, size_t bytesize) { return NULL; } void *allocate_gpu(int alignment, size_t bytesize) { return NULL; }
void *reallocate_gpu(void *ptr, int alignment, size_t newBytesize, size_t oldBytesize) { return NULL; } void *reallocate_gpu(void *ptr, int alignment, size_t newBytesize, size_t oldBytesize) { return NULL; }
#else #else
#include <cuda_runtime.h>
#include <cuda_atom.h>
void *allocate_gpu(int alignment, size_t bytesize) { void *allocate_gpu(int alignment, size_t bytesize) {
void *ptr; void *ptr;
checkCUDAError("allocate_gpu", cudaMallocHost((void **) &ptr, bytesize)); checkCUDAError("allocate", cudaMallocHost((void **) &ptr, bytesize));
return ptr; return ptr;
} }

View File

@ -38,17 +38,17 @@ extern "C" {
} }
static MD_FLOAT xprd, yprd, zprd; extern MD_FLOAT xprd, yprd, zprd;
static MD_FLOAT bininvx, bininvy, bininvz; extern MD_FLOAT bininvx, bininvy, bininvz;
static int mbinxlo, mbinylo, mbinzlo; extern int mbinxlo, mbinylo, mbinzlo;
static int nbinx, nbiny, nbinz; extern int nbinx, nbiny, nbinz;
static int mbinx, mbiny, mbinz; // n bins in x, y, z extern int mbinx, mbiny, mbinz; // n bins in x, y, z
static int mbins; //total number of bins extern int mbins; //total number of bins
static int atoms_per_bin; // max atoms per bin extern int atoms_per_bin; // max atoms per bin
static MD_FLOAT cutneighsq; // neighbor cutoff squared extern MD_FLOAT cutneighsq; // neighbor cutoff squared
static int nmax; extern int nmax;
static int nstencil; // # of bins in stencil extern int nstencil; // # of bins in stencil
static int* stencil; // stencil list of bin offsets extern int* stencil; // stencil list of bin offsets
static int* c_stencil = NULL; static int* c_stencil = NULL;
static int* c_resize_needed = NULL; static int* c_resize_needed = NULL;
static int* c_new_maxneighs = NULL; static int* c_new_maxneighs = NULL;
@ -59,7 +59,6 @@ static Binning c_binning {
.atoms_per_bin = 0 .atoms_per_bin = 0
}; };
__device__ int coord2bin_device(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin, Neighbor_params np) { __device__ int coord2bin_device(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin, Neighbor_params np) {
int ix, iy, iz; int ix, iy, iz;
@ -115,7 +114,7 @@ __global__ void sort_bin_contents_kernel(int* bincount, int* bins, int mbins, in
} while (!sorted); } while (!sorted);
} }
__global__ void binatoms_kernel(Atom a, int* bincount, int* bins, int atoms_per_bin, Neighbor_params np, int *resize_needed){ __global__ void binatoms_kernel(Atom a, int* bincount, int* bins, int atoms_per_bin, Neighbor_params np, int *resize_needed) {
Atom* atom = &a; Atom* atom = &a;
const int i = blockIdx.x * blockDim.x + threadIdx.x; const int i = blockIdx.x * blockDim.x + threadIdx.x;
int nall = atom->Nlocal + atom->Nghost; int nall = atom->Nlocal + atom->Nghost;
@ -127,7 +126,6 @@ __global__ void binatoms_kernel(Atom a, int* bincount, int* bins, int atoms_per_
MD_FLOAT y = atom_y(i); MD_FLOAT y = atom_y(i);
MD_FLOAT z = atom_z(i); MD_FLOAT z = atom_z(i);
int ibin = coord2bin_device(x, y, z, np); int ibin = coord2bin_device(x, y, z, np);
int ac = atomicAdd(&bincount[ibin], 1); int ac = atomicAdd(&bincount[ibin], 1);
if(ac < atoms_per_bin){ if(ac < atoms_per_bin){
@ -138,7 +136,7 @@ __global__ void binatoms_kernel(Atom a, int* bincount, int* bins, int atoms_per_
} }
__global__ void compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np, int nstencil, int* stencil, __global__ void compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np, int nstencil, int* stencil,
int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs, MD_FLOAT cutneighsq){ int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs, MD_FLOAT cutneighsq) {
const int i = blockIdx.x * blockDim.x + threadIdx.x; const int i = blockIdx.x * blockDim.x + threadIdx.x;
const int Nlocal = a.Nlocal; const int Nlocal = a.Nlocal;
if( i >= Nlocal ) { if( i >= Nlocal ) {
@ -189,7 +187,6 @@ __global__ void compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np,
} }
neighbor->numneigh[i] = n; neighbor->numneigh[i] = n;
if(n > neighbor->maxneighs) { if(n > neighbor->maxneighs) {
atomicMax(new_maxneighs, n); atomicMax(new_maxneighs, n);
} }
@ -304,8 +301,8 @@ void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor, Atom *c_atom, Neighbor *
c_new_maxneighs, c_new_maxneighs,
cutneighsq); cutneighsq);
checkCUDAError( "PeekAtLastError ComputeNeighbor", cudaPeekAtLastError() ); checkCUDAError( "PeekAtLastError ComputeNeighbor", cudaPeekAtLastError() );
checkCUDAError( "DeviceSync ComputeNeighbor", cudaDeviceSynchronize() ); checkCUDAError( "DeviceSync ComputeNeighbor", cudaDeviceSynchronize() );
// TODO copy the value of c_new_maxneighs back to host and check if it has been modified // TODO copy the value of c_new_maxneighs back to host and check if it has been modified
int new_maxneighs; int new_maxneighs;
@ -323,7 +320,7 @@ void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor, Atom *c_atom, Neighbor *
} }
} }
neighbor->maxneighs = c_neighbor->maxneighs;
neighbor->maxneighs = c_neighbor->maxneighs;
cudaProfilerStop(); cudaProfilerStop();
} }

View File

@ -34,16 +34,15 @@ extern "C" {
} }
static int NmaxGhost; extern int NmaxGhost;
static int *PBCx, *PBCy, *PBCz; extern int *PBCx, *PBCy, *PBCz;
static int c_NmaxGhost = 0; static int c_NmaxGhost;
static int *c_PBCx = NULL, *c_PBCy = NULL, *c_PBCz = NULL; static int *c_PBCx, *c_PBCy, *c_PBCz;
__global__ void computeAtomsPbcUpdate(Atom a, MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd){ __global__ void computeAtomsPbcUpdate(Atom a, MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd){
const int i = blockIdx.x * blockDim.x + threadIdx.x; const int i = blockIdx.x * blockDim.x + threadIdx.x;
Atom* atom = &a; Atom* atom = &a;
if( i >= atom->Nlocal ){ if(i >= atom->Nlocal) {
return; return;
} }
@ -69,9 +68,10 @@ __global__ void computeAtomsPbcUpdate(Atom a, MD_FLOAT xprd, MD_FLOAT yprd, MD_F
__global__ void computePbcUpdate(Atom a, int* PBCx, int* PBCy, int* PBCz, MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd){ __global__ void computePbcUpdate(Atom a, int* PBCx, int* PBCy, int* PBCz, MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd){
const int i = blockIdx.x * blockDim.x + threadIdx.x; const int i = blockIdx.x * blockDim.x + threadIdx.x;
const int Nghost = a.Nghost; const int Nghost = a.Nghost;
if( i >= Nghost ) { if(i >= Nghost) {
return; return;
} }
Atom* atom = &a; Atom* atom = &a;
int *border_map = atom->border_map; int *border_map = atom->border_map;
int nlocal = atom->Nlocal; int nlocal = atom->Nlocal;
@ -86,7 +86,7 @@ __global__ void computePbcUpdate(Atom a, int* PBCx, int* PBCy, int* PBCz, MD_FLO
void updatePbc_cuda(Atom *atom, Atom *c_atom, Parameter *param, bool doReneighbor) { void updatePbc_cuda(Atom *atom, Atom *c_atom, Parameter *param, bool doReneighbor) {
const int num_threads_per_block = get_num_threads(); const int num_threads_per_block = get_num_threads();
if (doReneighbor){ if (doReneighbor) {
c_atom->Natoms = atom->Natoms; c_atom->Natoms = atom->Natoms;
c_atom->Nlocal = atom->Nlocal; c_atom->Nlocal = atom->Nlocal;
c_atom->Nghost = atom->Nghost; c_atom->Nghost = atom->Nghost;
@ -146,6 +146,5 @@ void updateAtomsPbc_cuda(Atom* atom, Atom *c_atom, Parameter *param){
checkCUDAError( "PeekAtLastError UpdateAtomsPbc", cudaPeekAtLastError() ); checkCUDAError( "PeekAtLastError UpdateAtomsPbc", cudaPeekAtLastError() );
checkCUDAError( "DeviceSync UpdateAtomsPbc", cudaDeviceSynchronize() ); checkCUDAError( "DeviceSync UpdateAtomsPbc", cudaDeviceSynchronize() );
checkCUDAError( "updateAtomsPbc position memcpy back", cudaMemcpy(atom->x, c_atom->x, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) ); checkCUDAError( "updateAtomsPbc position memcpy back", cudaMemcpy(atom->x, c_atom->x, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) );
} }

View File

@ -31,21 +31,21 @@
#define SMALL 1.0e-6 #define SMALL 1.0e-6
#define FACTOR 0.999 #define FACTOR 0.999
static MD_FLOAT xprd, yprd, zprd; MD_FLOAT xprd, yprd, zprd;
static MD_FLOAT bininvx, bininvy, bininvz; MD_FLOAT bininvx, bininvy, bininvz;
static int mbinxlo, mbinylo, mbinzlo; int mbinxlo, mbinylo, mbinzlo;
static int nbinx, nbiny, nbinz; int nbinx, nbiny, nbinz;
static int mbinx, mbiny, mbinz; // n bins in x, y, z int mbinx, mbiny, mbinz; // n bins in x, y, z
static int *bincount; int *bincount;
static int *bins; int *bins;
static int mbins; //total number of bins int mbins; //total number of bins
static int atoms_per_bin; // max atoms per bin int atoms_per_bin; // max atoms per bin
static MD_FLOAT cutneigh; MD_FLOAT cutneigh;
static MD_FLOAT cutneighsq; // neighbor cutoff squared MD_FLOAT cutneighsq; // neighbor cutoff squared
static int nmax; int nmax;
static int nstencil; // # of bins in stencil int nstencil; // # of bins in stencil
static int* stencil; // stencil list of bin offsets int* stencil; // stencil list of bin offsets
static MD_FLOAT binsizex, binsizey, binsizez; MD_FLOAT binsizex, binsizey, binsizez;
static int coord2bin(MD_FLOAT, MD_FLOAT , MD_FLOAT); static int coord2bin(MD_FLOAT, MD_FLOAT , MD_FLOAT);
static MD_FLOAT bindist(int, int, int); static MD_FLOAT bindist(int, int, int);

View File

@ -30,8 +30,8 @@
#define DELTA 20000 #define DELTA 20000
static int NmaxGhost; int NmaxGhost;
static int *PBCx, *PBCy, *PBCz; int *PBCx, *PBCy, *PBCz;
static void growPbc(Atom*); static void growPbc(Atom*);
@ -66,7 +66,6 @@ void updateAtomsPbc_cpu(Atom *atom, Atom *c_atom, Parameter *param) {
MD_FLOAT zprd = param->zprd; MD_FLOAT zprd = param->zprd;
for(int i = 0; i < atom->Nlocal; i++) { for(int i = 0; i < atom->Nlocal; i++) {
if(atom_x(i) < 0.0) { if(atom_x(i) < 0.0) {
atom_x(i) += xprd; atom_x(i) += xprd;
} else if(atom_x(i) >= xprd) { } else if(atom_x(i) >= xprd) {
@ -177,8 +176,7 @@ void setupPbc(Atom *atom, Parameter *param) {
} }
/* internal subroutines */ /* internal subroutines */
void growPbc(Atom* atom) void growPbc(Atom* atom) {
{
int nold = NmaxGhost; int nold = NmaxGhost;
NmaxGhost += DELTA; NmaxGhost += DELTA;