diff --git a/lammps/allocate.c b/lammps/allocate.c index a690deb..4c64794 100644 --- a/lammps/allocate.c +++ b/lammps/allocate.c @@ -27,8 +27,8 @@ #include void *allocate(int alignment, size_t bytesize) { + void *ptr; int errorCode; - void* ptr; errorCode = posix_memalign(&ptr, alignment, bytesize); if(errorCode == EINVAL) { @@ -58,52 +58,3 @@ void *reallocate(void* ptr, int alignment, size_t new_bytesize, size_t old_bytes return newarray; } - -#ifndef CUDA_TARGET -void *allocateGPU(size_t bytesize) { return NULL; } -void *reallocateGPU(void *ptr, size_t new_bytesize) { return NULL; } -void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize) {} -void memcpyFromGPU(void *h_ptr, void *d_ptr, size_t bytesize) {} -void memsetGPU(void *d_ptr, int value, size_t bytesize) {} -#else -#include -#include -void *allocateGPU(size_t bytesize) { - void *ptr; - #ifdef CUDA_HOST_MEMORY - cuda_assert("allocateGPU", cudaMallocHost((void **) &ptr, bytesize)); - #else - cuda_assert("allocateGPU", cudaMalloc((void **) &ptr, bytesize)); - #endif - return ptr; -} - -// Data is not preserved -void *reallocateGPU(void *ptr, size_t new_bytesize) { - if(ptr != NULL) { - #ifdef CUDA_HOST_MEMORY - cudaFreeHost(ptr); - #else - cudaFree(ptr); - #endif - } - - return allocateGPU(new_bytesize); -} - -void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize) { - #ifndef CUDA_HOST_MEMORY - cuda_assert("memcpyToGPU", cudaMemcpy(d_ptr, h_ptr, bytesize, cudaMemcpyHostToDevice)); - #endif -} - -void memcpyFromGPU(void *h_ptr, void *d_ptr, size_t bytesize) { - #ifndef CUDA_HOST_MEMORY - cuda_assert("memcpyFromGPU", cudaMemcpy(h_ptr, d_ptr, bytesize, cudaMemcpyDeviceToHost)); - #endif -} - -void memsetGPU(void *d_ptr, int value, size_t bytesize) { - cuda_assert("memsetGPU", cudaMemset(d_ptr, value, bytesize)); -} -#endif diff --git a/lammps/atom.c b/lammps/atom.c index ce133b8..0e0a3fe 100644 --- a/lammps/atom.c +++ b/lammps/atom.c @@ -29,12 +29,9 @@ #include #include +#include #include -#ifdef CUDA_TARGET -#include -#endif - #define DELTA 20000 #ifndef MAXLINE diff --git a/lammps/cuda/atom.cu b/lammps/cuda.c similarity index 61% rename from lammps/cuda/atom.cu rename to lammps/cuda.c index 7a9d4d3..d37885f 100644 --- a/lammps/cuda/atom.cu +++ b/lammps/cuda.c @@ -20,34 +20,13 @@ * with MD-Bench. If not, see . * ======================================================================================= */ - -extern "C" { - #include -#include +#include //--- -#include -#include -#include -#include +#include -void initCuda(Atom *atom, Neighbor *neighbor) { - DeviceAtom *d_atom = &(atom->d_atom); - DeviceNeighbor *d_neighbor = &(neighbor->d_neighbor); - - d_atom->epsilon = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); - d_atom->sigma6 = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); - d_atom->cutforcesq = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); - d_neighbor->neighbors = (int *) allocateGPU(sizeof(int) * atom->Nmax * neighbor->maxneighs); - d_neighbor->numneigh = (int *) allocateGPU(sizeof(int) * atom->Nmax); - - memcpyToGPU(d_atom->x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3); - memcpyToGPU(d_atom->vx, atom->vx, sizeof(MD_FLOAT) * atom->Nmax * 3); - memcpyToGPU(d_atom->sigma6, atom->sigma6, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); - memcpyToGPU(d_atom->epsilon, atom->epsilon, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); - memcpyToGPU(d_atom->cutforcesq, atom->cutforcesq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); - memcpyToGPU(d_atom->type, atom->type, sizeof(int) * atom->Nmax); -} +#ifdef CUDA_TARGET +#include void cuda_assert(const char *label, cudaError_t err) { if (err != cudaSuccess) { @@ -56,4 +35,68 @@ void cuda_assert(const char *label, cudaError_t err) { } } +void *allocateGPU(size_t bytesize) { + void *ptr; + #ifdef CUDA_HOST_MEMORY + cuda_assert("allocateGPU", cudaMallocHost((void **) &ptr, bytesize)); + #else + cuda_assert("allocateGPU", cudaMalloc((void **) &ptr, bytesize)); + #endif + return ptr; } + +// Data is not preserved +void *reallocateGPU(void *ptr, size_t new_bytesize) { + if(ptr != NULL) { + #ifdef CUDA_HOST_MEMORY + cudaFreeHost(ptr); + #else + cudaFree(ptr); + #endif + } + + return allocateGPU(new_bytesize); +} + +void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize) { + #ifndef CUDA_HOST_MEMORY + cuda_assert("memcpyToGPU", cudaMemcpy(d_ptr, h_ptr, bytesize, cudaMemcpyHostToDevice)); + #endif +} + +void memcpyFromGPU(void *h_ptr, void *d_ptr, size_t bytesize) { + #ifndef CUDA_HOST_MEMORY + cuda_assert("memcpyFromGPU", cudaMemcpy(h_ptr, d_ptr, bytesize, cudaMemcpyDeviceToHost)); + #endif +} + +void memsetGPU(void *d_ptr, int value, size_t bytesize) { + cuda_assert("memsetGPU", cudaMemset(d_ptr, value, bytesize)); +} + +void initDevice(Atom *atom, Neighbor *neighbor) { + DeviceAtom *d_atom = &(atom->d_atom); + DeviceNeighbor *d_neighbor = &(neighbor->d_neighbor); + + d_atom->epsilon = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); + d_atom->sigma6 = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); + d_atom->cutforcesq = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); + d_neighbor->neighbors = (int *) allocateGPU(sizeof(int) * atom->Nmax * neighbor->maxneighs); + d_neighbor->numneigh = (int *) allocateGPU(sizeof(int) * atom->Nmax); + + memcpyToGPU(d_atom->x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3); + memcpyToGPU(d_atom->vx, atom->vx, sizeof(MD_FLOAT) * atom->Nmax * 3); + memcpyToGPU(d_atom->sigma6, atom->sigma6, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); + memcpyToGPU(d_atom->epsilon, atom->epsilon, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); + memcpyToGPU(d_atom->cutforcesq, atom->cutforcesq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); + memcpyToGPU(d_atom->type, atom->type, sizeof(int) * atom->Nmax); +} + +#else +void initDevice(Atom *atom, Neighbor *neighbor) {} +void *allocateGPU(size_t bytesize) { return NULL; } +void *reallocateGPU(void *ptr, size_t new_bytesize) { return NULL; } +void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize) {} +void memcpyFromGPU(void *h_ptr, void *d_ptr, size_t bytesize) {} +void memsetGPU(void *d_ptr, int value, size_t bytesize) {} +#endif diff --git a/lammps/cuda/force.cu b/lammps/cuda/force.cu index bdd55da..3d5ad2d 100644 --- a/lammps/cuda/force.cu +++ b/lammps/cuda/force.cu @@ -35,8 +35,8 @@ extern "C" { #include #include -#include #include +#include #include #include #include diff --git a/lammps/cuda/neighbor.cu b/lammps/cuda/neighbor.cu index ad73d77..4a033e5 100644 --- a/lammps/cuda/neighbor.cu +++ b/lammps/cuda/neighbor.cu @@ -31,7 +31,7 @@ extern "C" { #include -#include +#include #include #include #include diff --git a/lammps/cuda/pbc.cu b/lammps/cuda/pbc.cu index 90e4c7f..f413ddd 100644 --- a/lammps/cuda/pbc.cu +++ b/lammps/cuda/pbc.cu @@ -28,7 +28,7 @@ extern "C" { #include #include -#include +#include #include #include diff --git a/lammps/includes/cuda_atom.h b/lammps/includes/device.h similarity index 79% rename from lammps/includes/cuda_atom.h rename to lammps/includes/device.h index 9f986bb..e3e5a6a 100644 --- a/lammps/includes/cuda_atom.h +++ b/lammps/includes/device.h @@ -1,12 +1,15 @@ -#include -//--- #include #include -#ifndef __CUDA_ATOM_H_ -#define __CUDA_ATOM_H_ -extern void initCuda(Atom*, Neighbor*); +#ifndef __DEVICE_H_ +#define __DEVICE_H_ + +#ifdef CUDA_TARGET +#include extern void cuda_assert(const char *msg, cudaError_t err); +#endif + +extern void initDevice(Atom*, Neighbor*); extern void *allocateGPU(size_t bytesize); extern void *reallocateGPU(void *ptr, size_t new_bytesize); extern void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize); diff --git a/lammps/includes/integrate.h b/lammps/includes/integrate.h index 42d62cf..a0e0301 100644 --- a/lammps/includes/integrate.h +++ b/lammps/includes/integrate.h @@ -20,6 +20,8 @@ * with MD-Bench. If not, see . * ======================================================================================= */ +#include +//--- #include #include diff --git a/lammps/main.c b/lammps/main.c index 7de2312..465f7a2 100644 --- a/lammps/main.c +++ b/lammps/main.c @@ -30,19 +30,20 @@ #include -#include #include +#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include #include +#include #include -#include -#include #include -#include +#include #define HLINE "----------------------------------------------------------------------------\n" @@ -53,7 +54,6 @@ extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*); extern double computeForceDemFullNeigh(Parameter*, Atom*, Neighbor*, Stats*); #ifdef CUDA_TARGET -#include extern double computeForceLJFullNeigh_cuda(Parameter*, Atom*, Neighbor*); #endif @@ -80,9 +80,7 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats * setupThermo(param, atom->Natoms); if(param->input_file == NULL) { adjustThermo(param, atom); } setupPbc(atom, param); - #ifdef CUDA_TARGET - initCuda(atom, neighbor); - #endif + initDevice(atom, neighbor); updatePbc(atom, param, true); buildNeighbor(atom, neighbor); E = getTimeStamp();