Adjust file structure for CUDA
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
parent
939197a785
commit
90609a2b5f
@ -27,8 +27,8 @@
|
||||
#include <util.h>
|
||||
|
||||
void *allocate(int alignment, size_t bytesize) {
|
||||
void *ptr;
|
||||
int errorCode;
|
||||
void* ptr;
|
||||
|
||||
errorCode = posix_memalign(&ptr, alignment, bytesize);
|
||||
if(errorCode == EINVAL) {
|
||||
@ -58,52 +58,3 @@ void *reallocate(void* ptr, int alignment, size_t new_bytesize, size_t old_bytes
|
||||
|
||||
return newarray;
|
||||
}
|
||||
|
||||
#ifndef CUDA_TARGET
|
||||
void *allocateGPU(size_t bytesize) { return NULL; }
|
||||
void *reallocateGPU(void *ptr, size_t new_bytesize) { return NULL; }
|
||||
void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize) {}
|
||||
void memcpyFromGPU(void *h_ptr, void *d_ptr, size_t bytesize) {}
|
||||
void memsetGPU(void *d_ptr, int value, size_t bytesize) {}
|
||||
#else
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_atom.h>
|
||||
void *allocateGPU(size_t bytesize) {
|
||||
void *ptr;
|
||||
#ifdef CUDA_HOST_MEMORY
|
||||
cuda_assert("allocateGPU", cudaMallocHost((void **) &ptr, bytesize));
|
||||
#else
|
||||
cuda_assert("allocateGPU", cudaMalloc((void **) &ptr, bytesize));
|
||||
#endif
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// Data is not preserved
|
||||
void *reallocateGPU(void *ptr, size_t new_bytesize) {
|
||||
if(ptr != NULL) {
|
||||
#ifdef CUDA_HOST_MEMORY
|
||||
cudaFreeHost(ptr);
|
||||
#else
|
||||
cudaFree(ptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
return allocateGPU(new_bytesize);
|
||||
}
|
||||
|
||||
void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize) {
|
||||
#ifndef CUDA_HOST_MEMORY
|
||||
cuda_assert("memcpyToGPU", cudaMemcpy(d_ptr, h_ptr, bytesize, cudaMemcpyHostToDevice));
|
||||
#endif
|
||||
}
|
||||
|
||||
void memcpyFromGPU(void *h_ptr, void *d_ptr, size_t bytesize) {
|
||||
#ifndef CUDA_HOST_MEMORY
|
||||
cuda_assert("memcpyFromGPU", cudaMemcpy(h_ptr, d_ptr, bytesize, cudaMemcpyDeviceToHost));
|
||||
#endif
|
||||
}
|
||||
|
||||
void memsetGPU(void *d_ptr, int value, size_t bytesize) {
|
||||
cuda_assert("memsetGPU", cudaMemset(d_ptr, value, bytesize));
|
||||
}
|
||||
#endif
|
||||
|
@ -29,12 +29,9 @@
|
||||
|
||||
#include <atom.h>
|
||||
#include <allocate.h>
|
||||
#include <device.h>
|
||||
#include <util.h>
|
||||
|
||||
#ifdef CUDA_TARGET
|
||||
#include <cuda_atom.h>
|
||||
#endif
|
||||
|
||||
#define DELTA 20000
|
||||
|
||||
#ifndef MAXLINE
|
||||
|
@ -20,34 +20,13 @@
|
||||
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||
* =======================================================================================
|
||||
*/
|
||||
|
||||
extern "C" {
|
||||
|
||||
#include <stdio.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <stdlib.h>
|
||||
//---
|
||||
#include <allocate.h>
|
||||
#include <atom.h>
|
||||
#include <cuda_atom.h>
|
||||
#include <neighbor.h>
|
||||
#include <device.h>
|
||||
|
||||
void initCuda(Atom *atom, Neighbor *neighbor) {
|
||||
DeviceAtom *d_atom = &(atom->d_atom);
|
||||
DeviceNeighbor *d_neighbor = &(neighbor->d_neighbor);
|
||||
|
||||
d_atom->epsilon = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
|
||||
d_atom->sigma6 = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
|
||||
d_atom->cutforcesq = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
|
||||
d_neighbor->neighbors = (int *) allocateGPU(sizeof(int) * atom->Nmax * neighbor->maxneighs);
|
||||
d_neighbor->numneigh = (int *) allocateGPU(sizeof(int) * atom->Nmax);
|
||||
|
||||
memcpyToGPU(d_atom->x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3);
|
||||
memcpyToGPU(d_atom->vx, atom->vx, sizeof(MD_FLOAT) * atom->Nmax * 3);
|
||||
memcpyToGPU(d_atom->sigma6, atom->sigma6, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
|
||||
memcpyToGPU(d_atom->epsilon, atom->epsilon, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
|
||||
memcpyToGPU(d_atom->cutforcesq, atom->cutforcesq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
|
||||
memcpyToGPU(d_atom->type, atom->type, sizeof(int) * atom->Nmax);
|
||||
}
|
||||
#ifdef CUDA_TARGET
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
void cuda_assert(const char *label, cudaError_t err) {
|
||||
if (err != cudaSuccess) {
|
||||
@ -56,4 +35,68 @@ void cuda_assert(const char *label, cudaError_t err) {
|
||||
}
|
||||
}
|
||||
|
||||
void *allocateGPU(size_t bytesize) {
|
||||
void *ptr;
|
||||
#ifdef CUDA_HOST_MEMORY
|
||||
cuda_assert("allocateGPU", cudaMallocHost((void **) &ptr, bytesize));
|
||||
#else
|
||||
cuda_assert("allocateGPU", cudaMalloc((void **) &ptr, bytesize));
|
||||
#endif
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// Data is not preserved
|
||||
void *reallocateGPU(void *ptr, size_t new_bytesize) {
|
||||
if(ptr != NULL) {
|
||||
#ifdef CUDA_HOST_MEMORY
|
||||
cudaFreeHost(ptr);
|
||||
#else
|
||||
cudaFree(ptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
return allocateGPU(new_bytesize);
|
||||
}
|
||||
|
||||
void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize) {
|
||||
#ifndef CUDA_HOST_MEMORY
|
||||
cuda_assert("memcpyToGPU", cudaMemcpy(d_ptr, h_ptr, bytesize, cudaMemcpyHostToDevice));
|
||||
#endif
|
||||
}
|
||||
|
||||
void memcpyFromGPU(void *h_ptr, void *d_ptr, size_t bytesize) {
|
||||
#ifndef CUDA_HOST_MEMORY
|
||||
cuda_assert("memcpyFromGPU", cudaMemcpy(h_ptr, d_ptr, bytesize, cudaMemcpyDeviceToHost));
|
||||
#endif
|
||||
}
|
||||
|
||||
void memsetGPU(void *d_ptr, int value, size_t bytesize) {
|
||||
cuda_assert("memsetGPU", cudaMemset(d_ptr, value, bytesize));
|
||||
}
|
||||
|
||||
void initDevice(Atom *atom, Neighbor *neighbor) {
|
||||
DeviceAtom *d_atom = &(atom->d_atom);
|
||||
DeviceNeighbor *d_neighbor = &(neighbor->d_neighbor);
|
||||
|
||||
d_atom->epsilon = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
|
||||
d_atom->sigma6 = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
|
||||
d_atom->cutforcesq = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
|
||||
d_neighbor->neighbors = (int *) allocateGPU(sizeof(int) * atom->Nmax * neighbor->maxneighs);
|
||||
d_neighbor->numneigh = (int *) allocateGPU(sizeof(int) * atom->Nmax);
|
||||
|
||||
memcpyToGPU(d_atom->x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3);
|
||||
memcpyToGPU(d_atom->vx, atom->vx, sizeof(MD_FLOAT) * atom->Nmax * 3);
|
||||
memcpyToGPU(d_atom->sigma6, atom->sigma6, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
|
||||
memcpyToGPU(d_atom->epsilon, atom->epsilon, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
|
||||
memcpyToGPU(d_atom->cutforcesq, atom->cutforcesq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
|
||||
memcpyToGPU(d_atom->type, atom->type, sizeof(int) * atom->Nmax);
|
||||
}
|
||||
|
||||
#else
|
||||
void initDevice(Atom *atom, Neighbor *neighbor) {}
|
||||
void *allocateGPU(size_t bytesize) { return NULL; }
|
||||
void *reallocateGPU(void *ptr, size_t new_bytesize) { return NULL; }
|
||||
void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize) {}
|
||||
void memcpyFromGPU(void *h_ptr, void *d_ptr, size_t bytesize) {}
|
||||
void memsetGPU(void *d_ptr, int value, size_t bytesize) {}
|
||||
#endif
|
@ -35,8 +35,8 @@ extern "C" {
|
||||
|
||||
#include <allocate.h>
|
||||
#include <atom.h>
|
||||
#include <cuda_atom.h>
|
||||
#include <allocate.h>
|
||||
#include <device.h>
|
||||
#include <neighbor.h>
|
||||
#include <parameter.h>
|
||||
#include <timing.h>
|
||||
|
@ -31,7 +31,7 @@
|
||||
extern "C" {
|
||||
|
||||
#include <atom.h>
|
||||
#include <cuda_atom.h>
|
||||
#include <device.h>
|
||||
#include <parameter.h>
|
||||
#include <neighbor.h>
|
||||
#include <util.h>
|
||||
|
@ -28,7 +28,7 @@ extern "C" {
|
||||
|
||||
#include <allocate.h>
|
||||
#include <atom.h>
|
||||
#include <cuda_atom.h>
|
||||
#include <device.h>
|
||||
#include <pbc.h>
|
||||
#include <util.h>
|
||||
|
||||
|
@ -1,12 +1,15 @@
|
||||
#include <cuda_runtime.h>
|
||||
//---
|
||||
#include <atom.h>
|
||||
#include <neighbor.h>
|
||||
|
||||
#ifndef __CUDA_ATOM_H_
|
||||
#define __CUDA_ATOM_H_
|
||||
extern void initCuda(Atom*, Neighbor*);
|
||||
#ifndef __DEVICE_H_
|
||||
#define __DEVICE_H_
|
||||
|
||||
#ifdef CUDA_TARGET
|
||||
#include <cuda_runtime.h>
|
||||
extern void cuda_assert(const char *msg, cudaError_t err);
|
||||
#endif
|
||||
|
||||
extern void initDevice(Atom*, Neighbor*);
|
||||
extern void *allocateGPU(size_t bytesize);
|
||||
extern void *reallocateGPU(void *ptr, size_t new_bytesize);
|
||||
extern void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize);
|
@ -20,6 +20,8 @@
|
||||
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||
* =======================================================================================
|
||||
*/
|
||||
#include <stdbool.h>
|
||||
//---
|
||||
#include <parameter.h>
|
||||
#include <atom.h>
|
||||
|
||||
|
@ -30,19 +30,20 @@
|
||||
|
||||
#include <likwid-marker.h>
|
||||
|
||||
#include <timing.h>
|
||||
#include <allocate.h>
|
||||
#include <atom.h>
|
||||
#include <device.h>
|
||||
#include <eam.h>
|
||||
#include <integrate.h>
|
||||
#include <thermo.h>
|
||||
#include <timing.h>
|
||||
#include <neighbor.h>
|
||||
#include <parameter.h>
|
||||
#include <atom.h>
|
||||
#include <stats.h>
|
||||
#include <thermo.h>
|
||||
#include <pbc.h>
|
||||
#include <stats.h>
|
||||
#include <timers.h>
|
||||
#include <eam.h>
|
||||
#include <vtk.h>
|
||||
#include <util.h>
|
||||
#include <integrate.h>
|
||||
#include <vtk.h>
|
||||
|
||||
#define HLINE "----------------------------------------------------------------------------\n"
|
||||
|
||||
@ -53,7 +54,6 @@ extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*);
|
||||
extern double computeForceDemFullNeigh(Parameter*, Atom*, Neighbor*, Stats*);
|
||||
|
||||
#ifdef CUDA_TARGET
|
||||
#include <cuda_atom.h>
|
||||
extern double computeForceLJFullNeigh_cuda(Parameter*, Atom*, Neighbor*);
|
||||
#endif
|
||||
|
||||
@ -80,9 +80,7 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *
|
||||
setupThermo(param, atom->Natoms);
|
||||
if(param->input_file == NULL) { adjustThermo(param, atom); }
|
||||
setupPbc(atom, param);
|
||||
#ifdef CUDA_TARGET
|
||||
initCuda(atom, neighbor);
|
||||
#endif
|
||||
initDevice(atom, neighbor);
|
||||
updatePbc(atom, param, true);
|
||||
buildNeighbor(atom, neighbor);
|
||||
E = getTimeStamp();
|
||||
|
Loading…
Reference in New Issue
Block a user