Fix GPU version
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
		@@ -59,14 +59,15 @@ void *reallocate(void* ptr, int alignment, size_t newBytesize, size_t oldBytesiz
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    return newarray;
 | 
					    return newarray;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					#ifdef CUDA_TARGET
 | 
				
			||||||
#ifndef CUDA_TARGET
 | 
					 | 
				
			||||||
void *allocate_gpu(int alignment, size_t bytesize) { return NULL; }
 | 
					void *allocate_gpu(int alignment, size_t bytesize) { return NULL; }
 | 
				
			||||||
void *reallocate_gpu(void *ptr, int alignment, size_t newBytesize, size_t oldBytesize) { return NULL; }
 | 
					void *reallocate_gpu(void *ptr, int alignment, size_t newBytesize, size_t oldBytesize) { return NULL; }
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
 | 
					#include <cuda_runtime.h>
 | 
				
			||||||
 | 
					#include <cuda_atom.h>
 | 
				
			||||||
void *allocate_gpu(int alignment, size_t bytesize) {
 | 
					void *allocate_gpu(int alignment, size_t bytesize) {
 | 
				
			||||||
    void *ptr;
 | 
					    void *ptr;
 | 
				
			||||||
    checkCUDAError("allocate_gpu", cudaMallocHost((void **) &ptr, bytesize));
 | 
					    checkCUDAError("allocate", cudaMallocHost((void **) &ptr, bytesize));
 | 
				
			||||||
    return ptr;
 | 
					    return ptr;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -38,17 +38,17 @@ extern "C" {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static MD_FLOAT xprd, yprd, zprd;
 | 
					extern MD_FLOAT xprd, yprd, zprd;
 | 
				
			||||||
static MD_FLOAT bininvx, bininvy, bininvz;
 | 
					extern MD_FLOAT bininvx, bininvy, bininvz;
 | 
				
			||||||
static int mbinxlo, mbinylo, mbinzlo;
 | 
					extern int mbinxlo, mbinylo, mbinzlo;
 | 
				
			||||||
static int nbinx, nbiny, nbinz;
 | 
					extern int nbinx, nbiny, nbinz;
 | 
				
			||||||
static int mbinx, mbiny, mbinz; // n bins in x, y, z
 | 
					extern int mbinx, mbiny, mbinz; // n bins in x, y, z
 | 
				
			||||||
static int mbins; //total number of bins
 | 
					extern int mbins; //total number of bins
 | 
				
			||||||
static int atoms_per_bin;  // max atoms per bin
 | 
					extern int atoms_per_bin;  // max atoms per bin
 | 
				
			||||||
static MD_FLOAT cutneighsq;  // neighbor cutoff squared
 | 
					extern MD_FLOAT cutneighsq;  // neighbor cutoff squared
 | 
				
			||||||
static int nmax;
 | 
					extern int nmax;
 | 
				
			||||||
static int nstencil;      // # of bins in stencil
 | 
					extern int nstencil;      // # of bins in stencil
 | 
				
			||||||
static int* stencil;      // stencil list of bin offsets
 | 
					extern int* stencil;      // stencil list of bin offsets
 | 
				
			||||||
static int* c_stencil = NULL;
 | 
					static int* c_stencil = NULL;
 | 
				
			||||||
static int* c_resize_needed = NULL;
 | 
					static int* c_resize_needed = NULL;
 | 
				
			||||||
static int* c_new_maxneighs = NULL;
 | 
					static int* c_new_maxneighs = NULL;
 | 
				
			||||||
@@ -59,7 +59,6 @@ static Binning c_binning {
 | 
				
			|||||||
    .atoms_per_bin = 0
 | 
					    .atoms_per_bin = 0
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
__device__ int coord2bin_device(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin, Neighbor_params np) {
 | 
					__device__ int coord2bin_device(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin, Neighbor_params np) {
 | 
				
			||||||
    int ix, iy, iz;
 | 
					    int ix, iy, iz;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -115,7 +114,7 @@ __global__ void sort_bin_contents_kernel(int* bincount, int* bins, int mbins, in
 | 
				
			|||||||
    } while (!sorted);
 | 
					    } while (!sorted);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
__global__ void binatoms_kernel(Atom a, int* bincount, int* bins, int atoms_per_bin, Neighbor_params np, int *resize_needed){
 | 
					__global__ void binatoms_kernel(Atom a, int* bincount, int* bins, int atoms_per_bin, Neighbor_params np, int *resize_needed) {
 | 
				
			||||||
    Atom* atom = &a;
 | 
					    Atom* atom = &a;
 | 
				
			||||||
    const int i = blockIdx.x * blockDim.x + threadIdx.x;
 | 
					    const int i = blockIdx.x * blockDim.x + threadIdx.x;
 | 
				
			||||||
    int nall = atom->Nlocal + atom->Nghost;
 | 
					    int nall = atom->Nlocal + atom->Nghost;
 | 
				
			||||||
@@ -127,7 +126,6 @@ __global__ void binatoms_kernel(Atom a, int* bincount, int* bins, int atoms_per_
 | 
				
			|||||||
    MD_FLOAT y = atom_y(i);
 | 
					    MD_FLOAT y = atom_y(i);
 | 
				
			||||||
    MD_FLOAT z = atom_z(i);
 | 
					    MD_FLOAT z = atom_z(i);
 | 
				
			||||||
    int ibin = coord2bin_device(x, y, z, np);
 | 
					    int ibin = coord2bin_device(x, y, z, np);
 | 
				
			||||||
 | 
					 | 
				
			||||||
    int ac = atomicAdd(&bincount[ibin], 1);
 | 
					    int ac = atomicAdd(&bincount[ibin], 1);
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
    if(ac < atoms_per_bin){
 | 
					    if(ac < atoms_per_bin){
 | 
				
			||||||
@@ -138,7 +136,7 @@ __global__ void binatoms_kernel(Atom a, int* bincount, int* bins, int atoms_per_
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
__global__ void compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np, int nstencil, int* stencil,
 | 
					__global__ void compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np, int nstencil, int* stencil,
 | 
				
			||||||
                                     int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs, MD_FLOAT cutneighsq){
 | 
					                                     int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs, MD_FLOAT cutneighsq) {
 | 
				
			||||||
    const int i = blockIdx.x * blockDim.x + threadIdx.x;
 | 
					    const int i = blockIdx.x * blockDim.x + threadIdx.x;
 | 
				
			||||||
    const int Nlocal = a.Nlocal;
 | 
					    const int Nlocal = a.Nlocal;
 | 
				
			||||||
    if( i >= Nlocal ) {
 | 
					    if( i >= Nlocal ) {
 | 
				
			||||||
@@ -189,7 +187,6 @@ __global__ void compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np,
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    neighbor->numneigh[i] = n;
 | 
					    neighbor->numneigh[i] = n;
 | 
				
			||||||
 | 
					 | 
				
			||||||
    if(n > neighbor->maxneighs) {
 | 
					    if(n > neighbor->maxneighs) {
 | 
				
			||||||
        atomicMax(new_maxneighs, n);
 | 
					        atomicMax(new_maxneighs, n);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@@ -323,7 +320,7 @@ void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor, Atom *c_atom, Neighbor *
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    neighbor->maxneighs = c_neighbor->maxneighs;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    neighbor->maxneighs = c_neighbor->maxneighs;
 | 
				
			||||||
    cudaProfilerStop();
 | 
					    cudaProfilerStop();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -34,16 +34,15 @@ extern "C" {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int NmaxGhost;
 | 
					extern int NmaxGhost;
 | 
				
			||||||
static int *PBCx, *PBCy, *PBCz;
 | 
					extern int *PBCx, *PBCy, *PBCz;
 | 
				
			||||||
static int c_NmaxGhost = 0;
 | 
					static int c_NmaxGhost;
 | 
				
			||||||
static int *c_PBCx = NULL, *c_PBCy = NULL, *c_PBCz = NULL;
 | 
					static int *c_PBCx, *c_PBCy, *c_PBCz;
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
__global__ void computeAtomsPbcUpdate(Atom a, MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd){
 | 
					__global__ void computeAtomsPbcUpdate(Atom a, MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd){
 | 
				
			||||||
    const int i = blockIdx.x * blockDim.x + threadIdx.x;
 | 
					    const int i = blockIdx.x * blockDim.x + threadIdx.x;
 | 
				
			||||||
    Atom* atom = &a;
 | 
					    Atom* atom = &a;
 | 
				
			||||||
    if( i >= atom->Nlocal ){
 | 
					    if(i >= atom->Nlocal) {
 | 
				
			||||||
        return;
 | 
					        return;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -69,9 +68,10 @@ __global__ void computeAtomsPbcUpdate(Atom a, MD_FLOAT xprd, MD_FLOAT yprd, MD_F
 | 
				
			|||||||
__global__ void computePbcUpdate(Atom a, int* PBCx, int* PBCy, int* PBCz, MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd){
 | 
					__global__ void computePbcUpdate(Atom a, int* PBCx, int* PBCy, int* PBCz, MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd){
 | 
				
			||||||
    const int i = blockIdx.x * blockDim.x + threadIdx.x;
 | 
					    const int i = blockIdx.x * blockDim.x + threadIdx.x;
 | 
				
			||||||
    const int Nghost = a.Nghost;
 | 
					    const int Nghost = a.Nghost;
 | 
				
			||||||
    if( i >= Nghost ) {
 | 
					    if(i >= Nghost) {
 | 
				
			||||||
        return;
 | 
					        return;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Atom* atom = &a;
 | 
					    Atom* atom = &a;
 | 
				
			||||||
    int *border_map = atom->border_map;
 | 
					    int *border_map = atom->border_map;
 | 
				
			||||||
    int nlocal = atom->Nlocal;
 | 
					    int nlocal = atom->Nlocal;
 | 
				
			||||||
@@ -86,7 +86,7 @@ __global__ void computePbcUpdate(Atom a, int* PBCx, int* PBCy, int* PBCz, MD_FLO
 | 
				
			|||||||
void updatePbc_cuda(Atom *atom, Atom *c_atom, Parameter *param, bool doReneighbor) {
 | 
					void updatePbc_cuda(Atom *atom, Atom *c_atom, Parameter *param, bool doReneighbor) {
 | 
				
			||||||
    const int num_threads_per_block = get_num_threads();
 | 
					    const int num_threads_per_block = get_num_threads();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (doReneighbor){
 | 
					    if (doReneighbor) {
 | 
				
			||||||
        c_atom->Natoms = atom->Natoms;
 | 
					        c_atom->Natoms = atom->Natoms;
 | 
				
			||||||
        c_atom->Nlocal = atom->Nlocal;
 | 
					        c_atom->Nlocal = atom->Nlocal;
 | 
				
			||||||
        c_atom->Nghost = atom->Nghost;
 | 
					        c_atom->Nghost = atom->Nghost;
 | 
				
			||||||
@@ -146,6 +146,5 @@ void updateAtomsPbc_cuda(Atom* atom, Atom *c_atom, Parameter *param){
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    checkCUDAError( "PeekAtLastError UpdateAtomsPbc", cudaPeekAtLastError() );
 | 
					    checkCUDAError( "PeekAtLastError UpdateAtomsPbc", cudaPeekAtLastError() );
 | 
				
			||||||
    checkCUDAError( "DeviceSync UpdateAtomsPbc", cudaDeviceSynchronize() );
 | 
					    checkCUDAError( "DeviceSync UpdateAtomsPbc", cudaDeviceSynchronize() );
 | 
				
			||||||
 | 
					 | 
				
			||||||
    checkCUDAError( "updateAtomsPbc position memcpy back", cudaMemcpy(atom->x, c_atom->x, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) );
 | 
					    checkCUDAError( "updateAtomsPbc position memcpy back", cudaMemcpy(atom->x, c_atom->x, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) );
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -31,21 +31,21 @@
 | 
				
			|||||||
#define SMALL 1.0e-6
 | 
					#define SMALL 1.0e-6
 | 
				
			||||||
#define FACTOR 0.999
 | 
					#define FACTOR 0.999
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static MD_FLOAT xprd, yprd, zprd;
 | 
					MD_FLOAT xprd, yprd, zprd;
 | 
				
			||||||
static MD_FLOAT bininvx, bininvy, bininvz;
 | 
					MD_FLOAT bininvx, bininvy, bininvz;
 | 
				
			||||||
static int mbinxlo, mbinylo, mbinzlo;
 | 
					int mbinxlo, mbinylo, mbinzlo;
 | 
				
			||||||
static int nbinx, nbiny, nbinz;
 | 
					int nbinx, nbiny, nbinz;
 | 
				
			||||||
static int mbinx, mbiny, mbinz; // n bins in x, y, z
 | 
					int mbinx, mbiny, mbinz; // n bins in x, y, z
 | 
				
			||||||
static int *bincount;
 | 
					int *bincount;
 | 
				
			||||||
static int *bins;
 | 
					int *bins;
 | 
				
			||||||
static int mbins; //total number of bins
 | 
					int mbins; //total number of bins
 | 
				
			||||||
static int atoms_per_bin;  // max atoms per bin
 | 
					int atoms_per_bin;  // max atoms per bin
 | 
				
			||||||
static MD_FLOAT cutneigh;
 | 
					MD_FLOAT cutneigh;
 | 
				
			||||||
static MD_FLOAT cutneighsq;  // neighbor cutoff squared
 | 
					MD_FLOAT cutneighsq;  // neighbor cutoff squared
 | 
				
			||||||
static int nmax;
 | 
					int nmax;
 | 
				
			||||||
static int nstencil;      // # of bins in stencil
 | 
					int nstencil;      // # of bins in stencil
 | 
				
			||||||
static int* stencil;      // stencil list of bin offsets
 | 
					int* stencil;      // stencil list of bin offsets
 | 
				
			||||||
static MD_FLOAT binsizex, binsizey, binsizez;
 | 
					MD_FLOAT binsizex, binsizey, binsizez;
 | 
				
			||||||
static int coord2bin(MD_FLOAT, MD_FLOAT , MD_FLOAT);
 | 
					static int coord2bin(MD_FLOAT, MD_FLOAT , MD_FLOAT);
 | 
				
			||||||
static MD_FLOAT bindist(int, int, int);
 | 
					static MD_FLOAT bindist(int, int, int);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -30,8 +30,8 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#define DELTA 20000
 | 
					#define DELTA 20000
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int NmaxGhost;
 | 
					int NmaxGhost;
 | 
				
			||||||
static int *PBCx, *PBCy, *PBCz;
 | 
					int *PBCx, *PBCy, *PBCz;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void growPbc(Atom*);
 | 
					static void growPbc(Atom*);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -66,7 +66,6 @@ void updateAtomsPbc_cpu(Atom *atom, Atom *c_atom, Parameter *param) {
 | 
				
			|||||||
    MD_FLOAT zprd = param->zprd;
 | 
					    MD_FLOAT zprd = param->zprd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for(int i = 0; i < atom->Nlocal; i++) {
 | 
					    for(int i = 0; i < atom->Nlocal; i++) {
 | 
				
			||||||
 | 
					 | 
				
			||||||
        if(atom_x(i) < 0.0) {
 | 
					        if(atom_x(i) < 0.0) {
 | 
				
			||||||
            atom_x(i) += xprd;
 | 
					            atom_x(i) += xprd;
 | 
				
			||||||
        } else if(atom_x(i) >= xprd) {
 | 
					        } else if(atom_x(i) >= xprd) {
 | 
				
			||||||
@@ -177,8 +176,7 @@ void setupPbc(Atom *atom, Parameter *param) {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* internal subroutines */
 | 
					/* internal subroutines */
 | 
				
			||||||
void growPbc(Atom* atom)
 | 
					void growPbc(Atom* atom) {
 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int nold = NmaxGhost;
 | 
					    int nold = NmaxGhost;
 | 
				
			||||||
    NmaxGhost += DELTA;
 | 
					    NmaxGhost += DELTA;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user