Adjust file structure for CUDA

Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
Rafael Ravedutti 2022-08-12 18:12:29 +02:00
parent 939197a785
commit 90609a2b5f
9 changed files with 92 additions and 98 deletions

View File

@ -27,8 +27,8 @@
#include <util.h> #include <util.h>
void *allocate(int alignment, size_t bytesize) { void *allocate(int alignment, size_t bytesize) {
void *ptr;
int errorCode; int errorCode;
void* ptr;
errorCode = posix_memalign(&ptr, alignment, bytesize); errorCode = posix_memalign(&ptr, alignment, bytesize);
if(errorCode == EINVAL) { if(errorCode == EINVAL) {
@ -58,52 +58,3 @@ void *reallocate(void* ptr, int alignment, size_t new_bytesize, size_t old_bytes
return newarray; return newarray;
} }
#ifndef CUDA_TARGET
void *allocateGPU(size_t bytesize) { return NULL; }
void *reallocateGPU(void *ptr, size_t new_bytesize) { return NULL; }
void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize) {}
void memcpyFromGPU(void *h_ptr, void *d_ptr, size_t bytesize) {}
void memsetGPU(void *d_ptr, int value, size_t bytesize) {}
#else
#include <cuda_runtime.h>
#include <cuda_atom.h>
void *allocateGPU(size_t bytesize) {
void *ptr;
#ifdef CUDA_HOST_MEMORY
cuda_assert("allocateGPU", cudaMallocHost((void **) &ptr, bytesize));
#else
cuda_assert("allocateGPU", cudaMalloc((void **) &ptr, bytesize));
#endif
return ptr;
}
// Data is not preserved
void *reallocateGPU(void *ptr, size_t new_bytesize) {
if(ptr != NULL) {
#ifdef CUDA_HOST_MEMORY
cudaFreeHost(ptr);
#else
cudaFree(ptr);
#endif
}
return allocateGPU(new_bytesize);
}
void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize) {
#ifndef CUDA_HOST_MEMORY
cuda_assert("memcpyToGPU", cudaMemcpy(d_ptr, h_ptr, bytesize, cudaMemcpyHostToDevice));
#endif
}
void memcpyFromGPU(void *h_ptr, void *d_ptr, size_t bytesize) {
#ifndef CUDA_HOST_MEMORY
cuda_assert("memcpyFromGPU", cudaMemcpy(h_ptr, d_ptr, bytesize, cudaMemcpyDeviceToHost));
#endif
}
void memsetGPU(void *d_ptr, int value, size_t bytesize) {
cuda_assert("memsetGPU", cudaMemset(d_ptr, value, bytesize));
}
#endif

View File

@ -29,12 +29,9 @@
#include <atom.h> #include <atom.h>
#include <allocate.h> #include <allocate.h>
#include <device.h>
#include <util.h> #include <util.h>
#ifdef CUDA_TARGET
#include <cuda_atom.h>
#endif
#define DELTA 20000 #define DELTA 20000
#ifndef MAXLINE #ifndef MAXLINE

View File

@ -20,34 +20,13 @@
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>. * with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* ======================================================================================= * =======================================================================================
*/ */
extern "C" {
#include <stdio.h> #include <stdio.h>
#include <cuda_runtime.h> #include <stdlib.h>
//--- //---
#include <allocate.h> #include <device.h>
#include <atom.h>
#include <cuda_atom.h>
#include <neighbor.h>
void initCuda(Atom *atom, Neighbor *neighbor) { #ifdef CUDA_TARGET
DeviceAtom *d_atom = &(atom->d_atom); #include <cuda_runtime.h>
DeviceNeighbor *d_neighbor = &(neighbor->d_neighbor);
d_atom->epsilon = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
d_atom->sigma6 = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
d_atom->cutforcesq = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
d_neighbor->neighbors = (int *) allocateGPU(sizeof(int) * atom->Nmax * neighbor->maxneighs);
d_neighbor->numneigh = (int *) allocateGPU(sizeof(int) * atom->Nmax);
memcpyToGPU(d_atom->x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3);
memcpyToGPU(d_atom->vx, atom->vx, sizeof(MD_FLOAT) * atom->Nmax * 3);
memcpyToGPU(d_atom->sigma6, atom->sigma6, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
memcpyToGPU(d_atom->epsilon, atom->epsilon, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
memcpyToGPU(d_atom->cutforcesq, atom->cutforcesq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
memcpyToGPU(d_atom->type, atom->type, sizeof(int) * atom->Nmax);
}
void cuda_assert(const char *label, cudaError_t err) { void cuda_assert(const char *label, cudaError_t err) {
if (err != cudaSuccess) { if (err != cudaSuccess) {
@ -56,4 +35,68 @@ void cuda_assert(const char *label, cudaError_t err) {
} }
} }
void *allocateGPU(size_t bytesize) {
void *ptr;
#ifdef CUDA_HOST_MEMORY
cuda_assert("allocateGPU", cudaMallocHost((void **) &ptr, bytesize));
#else
cuda_assert("allocateGPU", cudaMalloc((void **) &ptr, bytesize));
#endif
return ptr;
} }
// Data is not preserved
void *reallocateGPU(void *ptr, size_t new_bytesize) {
if(ptr != NULL) {
#ifdef CUDA_HOST_MEMORY
cudaFreeHost(ptr);
#else
cudaFree(ptr);
#endif
}
return allocateGPU(new_bytesize);
}
void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize) {
#ifndef CUDA_HOST_MEMORY
cuda_assert("memcpyToGPU", cudaMemcpy(d_ptr, h_ptr, bytesize, cudaMemcpyHostToDevice));
#endif
}
void memcpyFromGPU(void *h_ptr, void *d_ptr, size_t bytesize) {
#ifndef CUDA_HOST_MEMORY
cuda_assert("memcpyFromGPU", cudaMemcpy(h_ptr, d_ptr, bytesize, cudaMemcpyDeviceToHost));
#endif
}
void memsetGPU(void *d_ptr, int value, size_t bytesize) {
cuda_assert("memsetGPU", cudaMemset(d_ptr, value, bytesize));
}
void initDevice(Atom *atom, Neighbor *neighbor) {
DeviceAtom *d_atom = &(atom->d_atom);
DeviceNeighbor *d_neighbor = &(neighbor->d_neighbor);
d_atom->epsilon = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
d_atom->sigma6 = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
d_atom->cutforcesq = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
d_neighbor->neighbors = (int *) allocateGPU(sizeof(int) * atom->Nmax * neighbor->maxneighs);
d_neighbor->numneigh = (int *) allocateGPU(sizeof(int) * atom->Nmax);
memcpyToGPU(d_atom->x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3);
memcpyToGPU(d_atom->vx, atom->vx, sizeof(MD_FLOAT) * atom->Nmax * 3);
memcpyToGPU(d_atom->sigma6, atom->sigma6, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
memcpyToGPU(d_atom->epsilon, atom->epsilon, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
memcpyToGPU(d_atom->cutforcesq, atom->cutforcesq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
memcpyToGPU(d_atom->type, atom->type, sizeof(int) * atom->Nmax);
}
#else
void initDevice(Atom *atom, Neighbor *neighbor) {}
void *allocateGPU(size_t bytesize) { return NULL; }
void *reallocateGPU(void *ptr, size_t new_bytesize) { return NULL; }
void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize) {}
void memcpyFromGPU(void *h_ptr, void *d_ptr, size_t bytesize) {}
void memsetGPU(void *d_ptr, int value, size_t bytesize) {}
#endif

View File

@ -35,8 +35,8 @@ extern "C" {
#include <allocate.h> #include <allocate.h>
#include <atom.h> #include <atom.h>
#include <cuda_atom.h>
#include <allocate.h> #include <allocate.h>
#include <device.h>
#include <neighbor.h> #include <neighbor.h>
#include <parameter.h> #include <parameter.h>
#include <timing.h> #include <timing.h>

View File

@ -31,7 +31,7 @@
extern "C" { extern "C" {
#include <atom.h> #include <atom.h>
#include <cuda_atom.h> #include <device.h>
#include <parameter.h> #include <parameter.h>
#include <neighbor.h> #include <neighbor.h>
#include <util.h> #include <util.h>

View File

@ -28,7 +28,7 @@ extern "C" {
#include <allocate.h> #include <allocate.h>
#include <atom.h> #include <atom.h>
#include <cuda_atom.h> #include <device.h>
#include <pbc.h> #include <pbc.h>
#include <util.h> #include <util.h>

View File

@ -1,12 +1,15 @@
#include <cuda_runtime.h>
//---
#include <atom.h> #include <atom.h>
#include <neighbor.h> #include <neighbor.h>
#ifndef __CUDA_ATOM_H_ #ifndef __DEVICE_H_
#define __CUDA_ATOM_H_ #define __DEVICE_H_
extern void initCuda(Atom*, Neighbor*);
#ifdef CUDA_TARGET
#include <cuda_runtime.h>
extern void cuda_assert(const char *msg, cudaError_t err); extern void cuda_assert(const char *msg, cudaError_t err);
#endif
extern void initDevice(Atom*, Neighbor*);
extern void *allocateGPU(size_t bytesize); extern void *allocateGPU(size_t bytesize);
extern void *reallocateGPU(void *ptr, size_t new_bytesize); extern void *reallocateGPU(void *ptr, size_t new_bytesize);
extern void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize); extern void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize);

View File

@ -20,6 +20,8 @@
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>. * with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* ======================================================================================= * =======================================================================================
*/ */
#include <stdbool.h>
//---
#include <parameter.h> #include <parameter.h>
#include <atom.h> #include <atom.h>

View File

@ -30,19 +30,20 @@
#include <likwid-marker.h> #include <likwid-marker.h>
#include <timing.h>
#include <allocate.h> #include <allocate.h>
#include <atom.h>
#include <device.h>
#include <eam.h>
#include <integrate.h>
#include <thermo.h>
#include <timing.h>
#include <neighbor.h> #include <neighbor.h>
#include <parameter.h> #include <parameter.h>
#include <atom.h>
#include <stats.h>
#include <thermo.h>
#include <pbc.h> #include <pbc.h>
#include <stats.h>
#include <timers.h> #include <timers.h>
#include <eam.h>
#include <vtk.h>
#include <util.h> #include <util.h>
#include <integrate.h> #include <vtk.h>
#define HLINE "----------------------------------------------------------------------------\n" #define HLINE "----------------------------------------------------------------------------\n"
@ -53,7 +54,6 @@ extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*);
extern double computeForceDemFullNeigh(Parameter*, Atom*, Neighbor*, Stats*); extern double computeForceDemFullNeigh(Parameter*, Atom*, Neighbor*, Stats*);
#ifdef CUDA_TARGET #ifdef CUDA_TARGET
#include <cuda_atom.h>
extern double computeForceLJFullNeigh_cuda(Parameter*, Atom*, Neighbor*); extern double computeForceLJFullNeigh_cuda(Parameter*, Atom*, Neighbor*);
#endif #endif
@ -80,9 +80,7 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *
setupThermo(param, atom->Natoms); setupThermo(param, atom->Natoms);
if(param->input_file == NULL) { adjustThermo(param, atom); } if(param->input_file == NULL) { adjustThermo(param, atom); }
setupPbc(atom, param); setupPbc(atom, param);
#ifdef CUDA_TARGET initDevice(atom, neighbor);
initCuda(atom, neighbor);
#endif
updatePbc(atom, param, true); updatePbc(atom, param, true);
buildNeighbor(atom, neighbor); buildNeighbor(atom, neighbor);
E = getTimeStamp(); E = getTimeStamp();