Fix GPU version
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
parent
3d95ec4b0a
commit
87d006d418
@ -59,14 +59,15 @@ void *reallocate(void* ptr, int alignment, size_t newBytesize, size_t oldBytesiz
|
|||||||
|
|
||||||
return newarray;
|
return newarray;
|
||||||
}
|
}
|
||||||
|
#ifdef CUDA_TARGET
|
||||||
#ifndef CUDA_TARGET
|
|
||||||
void *allocate_gpu(int alignment, size_t bytesize) { return NULL; }
|
void *allocate_gpu(int alignment, size_t bytesize) { return NULL; }
|
||||||
void *reallocate_gpu(void *ptr, int alignment, size_t newBytesize, size_t oldBytesize) { return NULL; }
|
void *reallocate_gpu(void *ptr, int alignment, size_t newBytesize, size_t oldBytesize) { return NULL; }
|
||||||
#else
|
#else
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include <cuda_atom.h>
|
||||||
void *allocate_gpu(int alignment, size_t bytesize) {
|
void *allocate_gpu(int alignment, size_t bytesize) {
|
||||||
void *ptr;
|
void *ptr;
|
||||||
checkCUDAError("allocate_gpu", cudaMallocHost((void **) &ptr, bytesize));
|
checkCUDAError("allocate", cudaMallocHost((void **) &ptr, bytesize));
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -38,17 +38,17 @@ extern "C" {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static MD_FLOAT xprd, yprd, zprd;
|
extern MD_FLOAT xprd, yprd, zprd;
|
||||||
static MD_FLOAT bininvx, bininvy, bininvz;
|
extern MD_FLOAT bininvx, bininvy, bininvz;
|
||||||
static int mbinxlo, mbinylo, mbinzlo;
|
extern int mbinxlo, mbinylo, mbinzlo;
|
||||||
static int nbinx, nbiny, nbinz;
|
extern int nbinx, nbiny, nbinz;
|
||||||
static int mbinx, mbiny, mbinz; // n bins in x, y, z
|
extern int mbinx, mbiny, mbinz; // n bins in x, y, z
|
||||||
static int mbins; //total number of bins
|
extern int mbins; //total number of bins
|
||||||
static int atoms_per_bin; // max atoms per bin
|
extern int atoms_per_bin; // max atoms per bin
|
||||||
static MD_FLOAT cutneighsq; // neighbor cutoff squared
|
extern MD_FLOAT cutneighsq; // neighbor cutoff squared
|
||||||
static int nmax;
|
extern int nmax;
|
||||||
static int nstencil; // # of bins in stencil
|
extern int nstencil; // # of bins in stencil
|
||||||
static int* stencil; // stencil list of bin offsets
|
extern int* stencil; // stencil list of bin offsets
|
||||||
static int* c_stencil = NULL;
|
static int* c_stencil = NULL;
|
||||||
static int* c_resize_needed = NULL;
|
static int* c_resize_needed = NULL;
|
||||||
static int* c_new_maxneighs = NULL;
|
static int* c_new_maxneighs = NULL;
|
||||||
@ -59,7 +59,6 @@ static Binning c_binning {
|
|||||||
.atoms_per_bin = 0
|
.atoms_per_bin = 0
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
__device__ int coord2bin_device(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin, Neighbor_params np) {
|
__device__ int coord2bin_device(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin, Neighbor_params np) {
|
||||||
int ix, iy, iz;
|
int ix, iy, iz;
|
||||||
|
|
||||||
@ -115,7 +114,7 @@ __global__ void sort_bin_contents_kernel(int* bincount, int* bins, int mbins, in
|
|||||||
} while (!sorted);
|
} while (!sorted);
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void binatoms_kernel(Atom a, int* bincount, int* bins, int atoms_per_bin, Neighbor_params np, int *resize_needed){
|
__global__ void binatoms_kernel(Atom a, int* bincount, int* bins, int atoms_per_bin, Neighbor_params np, int *resize_needed) {
|
||||||
Atom* atom = &a;
|
Atom* atom = &a;
|
||||||
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int nall = atom->Nlocal + atom->Nghost;
|
int nall = atom->Nlocal + atom->Nghost;
|
||||||
@ -127,7 +126,6 @@ __global__ void binatoms_kernel(Atom a, int* bincount, int* bins, int atoms_per_
|
|||||||
MD_FLOAT y = atom_y(i);
|
MD_FLOAT y = atom_y(i);
|
||||||
MD_FLOAT z = atom_z(i);
|
MD_FLOAT z = atom_z(i);
|
||||||
int ibin = coord2bin_device(x, y, z, np);
|
int ibin = coord2bin_device(x, y, z, np);
|
||||||
|
|
||||||
int ac = atomicAdd(&bincount[ibin], 1);
|
int ac = atomicAdd(&bincount[ibin], 1);
|
||||||
|
|
||||||
if(ac < atoms_per_bin){
|
if(ac < atoms_per_bin){
|
||||||
@ -138,7 +136,7 @@ __global__ void binatoms_kernel(Atom a, int* bincount, int* bins, int atoms_per_
|
|||||||
}
|
}
|
||||||
|
|
||||||
__global__ void compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np, int nstencil, int* stencil,
|
__global__ void compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np, int nstencil, int* stencil,
|
||||||
int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs, MD_FLOAT cutneighsq){
|
int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs, MD_FLOAT cutneighsq) {
|
||||||
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int Nlocal = a.Nlocal;
|
const int Nlocal = a.Nlocal;
|
||||||
if( i >= Nlocal ) {
|
if( i >= Nlocal ) {
|
||||||
@ -189,7 +187,6 @@ __global__ void compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np,
|
|||||||
}
|
}
|
||||||
|
|
||||||
neighbor->numneigh[i] = n;
|
neighbor->numneigh[i] = n;
|
||||||
|
|
||||||
if(n > neighbor->maxneighs) {
|
if(n > neighbor->maxneighs) {
|
||||||
atomicMax(new_maxneighs, n);
|
atomicMax(new_maxneighs, n);
|
||||||
}
|
}
|
||||||
@ -323,7 +320,7 @@ void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor, Atom *c_atom, Neighbor *
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
neighbor->maxneighs = c_neighbor->maxneighs;
|
|
||||||
|
|
||||||
|
neighbor->maxneighs = c_neighbor->maxneighs;
|
||||||
cudaProfilerStop();
|
cudaProfilerStop();
|
||||||
}
|
}
|
||||||
|
@ -34,16 +34,15 @@ extern "C" {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int NmaxGhost;
|
extern int NmaxGhost;
|
||||||
static int *PBCx, *PBCy, *PBCz;
|
extern int *PBCx, *PBCy, *PBCz;
|
||||||
static int c_NmaxGhost = 0;
|
static int c_NmaxGhost;
|
||||||
static int *c_PBCx = NULL, *c_PBCy = NULL, *c_PBCz = NULL;
|
static int *c_PBCx, *c_PBCy, *c_PBCz;
|
||||||
|
|
||||||
|
|
||||||
__global__ void computeAtomsPbcUpdate(Atom a, MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd){
|
__global__ void computeAtomsPbcUpdate(Atom a, MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd){
|
||||||
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
Atom* atom = &a;
|
Atom* atom = &a;
|
||||||
if( i >= atom->Nlocal ){
|
if(i >= atom->Nlocal) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -69,9 +68,10 @@ __global__ void computeAtomsPbcUpdate(Atom a, MD_FLOAT xprd, MD_FLOAT yprd, MD_F
|
|||||||
__global__ void computePbcUpdate(Atom a, int* PBCx, int* PBCy, int* PBCz, MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd){
|
__global__ void computePbcUpdate(Atom a, int* PBCx, int* PBCy, int* PBCz, MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd){
|
||||||
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int Nghost = a.Nghost;
|
const int Nghost = a.Nghost;
|
||||||
if( i >= Nghost ) {
|
if(i >= Nghost) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
Atom* atom = &a;
|
Atom* atom = &a;
|
||||||
int *border_map = atom->border_map;
|
int *border_map = atom->border_map;
|
||||||
int nlocal = atom->Nlocal;
|
int nlocal = atom->Nlocal;
|
||||||
@ -86,7 +86,7 @@ __global__ void computePbcUpdate(Atom a, int* PBCx, int* PBCy, int* PBCz, MD_FLO
|
|||||||
void updatePbc_cuda(Atom *atom, Atom *c_atom, Parameter *param, bool doReneighbor) {
|
void updatePbc_cuda(Atom *atom, Atom *c_atom, Parameter *param, bool doReneighbor) {
|
||||||
const int num_threads_per_block = get_num_threads();
|
const int num_threads_per_block = get_num_threads();
|
||||||
|
|
||||||
if (doReneighbor){
|
if (doReneighbor) {
|
||||||
c_atom->Natoms = atom->Natoms;
|
c_atom->Natoms = atom->Natoms;
|
||||||
c_atom->Nlocal = atom->Nlocal;
|
c_atom->Nlocal = atom->Nlocal;
|
||||||
c_atom->Nghost = atom->Nghost;
|
c_atom->Nghost = atom->Nghost;
|
||||||
@ -146,6 +146,5 @@ void updateAtomsPbc_cuda(Atom* atom, Atom *c_atom, Parameter *param){
|
|||||||
|
|
||||||
checkCUDAError( "PeekAtLastError UpdateAtomsPbc", cudaPeekAtLastError() );
|
checkCUDAError( "PeekAtLastError UpdateAtomsPbc", cudaPeekAtLastError() );
|
||||||
checkCUDAError( "DeviceSync UpdateAtomsPbc", cudaDeviceSynchronize() );
|
checkCUDAError( "DeviceSync UpdateAtomsPbc", cudaDeviceSynchronize() );
|
||||||
|
|
||||||
checkCUDAError( "updateAtomsPbc position memcpy back", cudaMemcpy(atom->x, c_atom->x, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) );
|
checkCUDAError( "updateAtomsPbc position memcpy back", cudaMemcpy(atom->x, c_atom->x, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) );
|
||||||
}
|
}
|
||||||
|
@ -31,21 +31,21 @@
|
|||||||
#define SMALL 1.0e-6
|
#define SMALL 1.0e-6
|
||||||
#define FACTOR 0.999
|
#define FACTOR 0.999
|
||||||
|
|
||||||
static MD_FLOAT xprd, yprd, zprd;
|
MD_FLOAT xprd, yprd, zprd;
|
||||||
static MD_FLOAT bininvx, bininvy, bininvz;
|
MD_FLOAT bininvx, bininvy, bininvz;
|
||||||
static int mbinxlo, mbinylo, mbinzlo;
|
int mbinxlo, mbinylo, mbinzlo;
|
||||||
static int nbinx, nbiny, nbinz;
|
int nbinx, nbiny, nbinz;
|
||||||
static int mbinx, mbiny, mbinz; // n bins in x, y, z
|
int mbinx, mbiny, mbinz; // n bins in x, y, z
|
||||||
static int *bincount;
|
int *bincount;
|
||||||
static int *bins;
|
int *bins;
|
||||||
static int mbins; //total number of bins
|
int mbins; //total number of bins
|
||||||
static int atoms_per_bin; // max atoms per bin
|
int atoms_per_bin; // max atoms per bin
|
||||||
static MD_FLOAT cutneigh;
|
MD_FLOAT cutneigh;
|
||||||
static MD_FLOAT cutneighsq; // neighbor cutoff squared
|
MD_FLOAT cutneighsq; // neighbor cutoff squared
|
||||||
static int nmax;
|
int nmax;
|
||||||
static int nstencil; // # of bins in stencil
|
int nstencil; // # of bins in stencil
|
||||||
static int* stencil; // stencil list of bin offsets
|
int* stencil; // stencil list of bin offsets
|
||||||
static MD_FLOAT binsizex, binsizey, binsizez;
|
MD_FLOAT binsizex, binsizey, binsizez;
|
||||||
static int coord2bin(MD_FLOAT, MD_FLOAT , MD_FLOAT);
|
static int coord2bin(MD_FLOAT, MD_FLOAT , MD_FLOAT);
|
||||||
static MD_FLOAT bindist(int, int, int);
|
static MD_FLOAT bindist(int, int, int);
|
||||||
|
|
||||||
|
@ -30,8 +30,8 @@
|
|||||||
|
|
||||||
#define DELTA 20000
|
#define DELTA 20000
|
||||||
|
|
||||||
static int NmaxGhost;
|
int NmaxGhost;
|
||||||
static int *PBCx, *PBCy, *PBCz;
|
int *PBCx, *PBCy, *PBCz;
|
||||||
|
|
||||||
static void growPbc(Atom*);
|
static void growPbc(Atom*);
|
||||||
|
|
||||||
@ -66,7 +66,6 @@ void updateAtomsPbc_cpu(Atom *atom, Atom *c_atom, Parameter *param) {
|
|||||||
MD_FLOAT zprd = param->zprd;
|
MD_FLOAT zprd = param->zprd;
|
||||||
|
|
||||||
for(int i = 0; i < atom->Nlocal; i++) {
|
for(int i = 0; i < atom->Nlocal; i++) {
|
||||||
|
|
||||||
if(atom_x(i) < 0.0) {
|
if(atom_x(i) < 0.0) {
|
||||||
atom_x(i) += xprd;
|
atom_x(i) += xprd;
|
||||||
} else if(atom_x(i) >= xprd) {
|
} else if(atom_x(i) >= xprd) {
|
||||||
@ -177,8 +176,7 @@ void setupPbc(Atom *atom, Parameter *param) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* internal subroutines */
|
/* internal subroutines */
|
||||||
void growPbc(Atom* atom)
|
void growPbc(Atom* atom) {
|
||||||
{
|
|
||||||
int nold = NmaxGhost;
|
int nold = NmaxGhost;
|
||||||
NmaxGhost += DELTA;
|
NmaxGhost += DELTA;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user