Adjust file structure for CUDA

Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
2022-08-12 18:12:29 +02:00
parent 939197a785
commit 90609a2b5f
9 changed files with 92 additions and 98 deletions
--- a/lammps/allocate.c
+++ b/lammps/allocate.c
@@ -27,8 +27,8 @@
 #include <util.h>

 void *allocate(int alignment, size_t bytesize) {
+    void *ptr;
    int errorCode;
-    void* ptr;

    errorCode = posix_memalign(&ptr, alignment, bytesize);
    if(errorCode == EINVAL) {
@@ -58,52 +58,3 @@ void *reallocate(void* ptr, int alignment, size_t new_bytesize, size_t old_bytes

    return newarray;
 }
-
-#ifndef CUDA_TARGET
-void *allocateGPU(size_t bytesize) { return NULL; }
-void *reallocateGPU(void *ptr, size_t new_bytesize) { return NULL; }
-void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize) {}
-void memcpyFromGPU(void *h_ptr, void *d_ptr, size_t bytesize) {}
-void memsetGPU(void *d_ptr, int value, size_t bytesize) {}
-#else
-#include <cuda_runtime.h>
-#include <cuda_atom.h>
-void *allocateGPU(size_t bytesize) {
-    void *ptr;
-    #ifdef CUDA_HOST_MEMORY
-    cuda_assert("allocateGPU", cudaMallocHost((void **) &ptr, bytesize));
-    #else
-    cuda_assert("allocateGPU", cudaMalloc((void **) &ptr, bytesize));
-    #endif
-    return ptr;
-}
-
-// Data is not preserved
-void *reallocateGPU(void *ptr, size_t new_bytesize) {
-    if(ptr != NULL) {
-        #ifdef CUDA_HOST_MEMORY
-        cudaFreeHost(ptr);
-        #else
-        cudaFree(ptr);
-        #endif
-    }
-
-    return allocateGPU(new_bytesize);
-}
-
-void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize) {
-    #ifndef CUDA_HOST_MEMORY
-    cuda_assert("memcpyToGPU", cudaMemcpy(d_ptr, h_ptr, bytesize, cudaMemcpyHostToDevice));
-    #endif
-}
-
-void memcpyFromGPU(void *h_ptr, void *d_ptr, size_t bytesize) {
-    #ifndef CUDA_HOST_MEMORY
-    cuda_assert("memcpyFromGPU", cudaMemcpy(h_ptr, d_ptr, bytesize, cudaMemcpyDeviceToHost));
-    #endif
-}
-
-void memsetGPU(void *d_ptr, int value, size_t bytesize) {
-    cuda_assert("memsetGPU", cudaMemset(d_ptr, value, bytesize));
-}
-#endif
--- a/lammps/atom.c
+++ b/lammps/atom.c
@@ -29,12 +29,9 @@

 #include <atom.h>
 #include <allocate.h>
+#include <device.h>
 #include <util.h>

-#ifdef CUDA_TARGET
-#include <cuda_atom.h>
-#endif
-
 #define DELTA 20000

 #ifndef MAXLINE
--- a/lammps/cuda/atom.cu
+++ b/lammps/cuda/atom.cu
@@ -20,34 +20,13 @@
 *   with MD-Bench.  If not, see <https://www.gnu.org/licenses/>.
 * =======================================================================================
 */
-
-extern "C" {
-
 #include <stdio.h>
-#include <cuda_runtime.h>
+#include <stdlib.h>
 //---
-#include <allocate.h>
-#include <atom.h>
-#include <cuda_atom.h>
-#include <neighbor.h>
+#include <device.h>

-void initCuda(Atom *atom, Neighbor *neighbor) {
-    DeviceAtom *d_atom = &(atom->d_atom);
-    DeviceNeighbor *d_neighbor = &(neighbor->d_neighbor);
-
-    d_atom->epsilon         =   (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
-    d_atom->sigma6          =   (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
-    d_atom->cutforcesq      =   (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
-    d_neighbor->neighbors   =   (int *) allocateGPU(sizeof(int) * atom->Nmax * neighbor->maxneighs);
-    d_neighbor->numneigh    =   (int *) allocateGPU(sizeof(int) * atom->Nmax);
-
-    memcpyToGPU(d_atom->x,              atom->x,          sizeof(MD_FLOAT) * atom->Nmax * 3);
-    memcpyToGPU(d_atom->vx,             atom->vx,         sizeof(MD_FLOAT) * atom->Nmax * 3);
-    memcpyToGPU(d_atom->sigma6,         atom->sigma6,     sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
-    memcpyToGPU(d_atom->epsilon,        atom->epsilon,    sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
-    memcpyToGPU(d_atom->cutforcesq,     atom->cutforcesq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
-    memcpyToGPU(d_atom->type,           atom->type,       sizeof(int) * atom->Nmax);
-}
+#ifdef CUDA_TARGET
+#include <cuda_runtime.h>

 void cuda_assert(const char *label, cudaError_t err) {
    if (err != cudaSuccess) {
@@ -56,4 +35,68 @@ void cuda_assert(const char *label, cudaError_t err) {
    }
 }

+void *allocateGPU(size_t bytesize) {
+    void *ptr;
+    #ifdef CUDA_HOST_MEMORY
+    cuda_assert("allocateGPU", cudaMallocHost((void **) &ptr, bytesize));
+    #else
+    cuda_assert("allocateGPU", cudaMalloc((void **) &ptr, bytesize));
+    #endif
+    return ptr;
 }
+
+// Data is not preserved
+void *reallocateGPU(void *ptr, size_t new_bytesize) {
+    if(ptr != NULL) {
+        #ifdef CUDA_HOST_MEMORY
+        cudaFreeHost(ptr);
+        #else
+        cudaFree(ptr);
+        #endif
+    }
+
+    return allocateGPU(new_bytesize);
+}
+
+void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize) {
+    #ifndef CUDA_HOST_MEMORY
+    cuda_assert("memcpyToGPU", cudaMemcpy(d_ptr, h_ptr, bytesize, cudaMemcpyHostToDevice));
+    #endif
+}
+
+void memcpyFromGPU(void *h_ptr, void *d_ptr, size_t bytesize) {
+    #ifndef CUDA_HOST_MEMORY
+    cuda_assert("memcpyFromGPU", cudaMemcpy(h_ptr, d_ptr, bytesize, cudaMemcpyDeviceToHost));
+    #endif
+}
+
+void memsetGPU(void *d_ptr, int value, size_t bytesize) {
+    cuda_assert("memsetGPU", cudaMemset(d_ptr, value, bytesize));
+}
+
+void initDevice(Atom *atom, Neighbor *neighbor) {
+    DeviceAtom *d_atom = &(atom->d_atom);
+    DeviceNeighbor *d_neighbor = &(neighbor->d_neighbor);
+    
+    d_atom->epsilon         =   (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
+    d_atom->sigma6          =   (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
+    d_atom->cutforcesq      =   (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
+    d_neighbor->neighbors   =   (int *) allocateGPU(sizeof(int) * atom->Nmax * neighbor->maxneighs);
+    d_neighbor->numneigh    =   (int *) allocateGPU(sizeof(int) * atom->Nmax);
+    
+    memcpyToGPU(d_atom->x,              atom->x,          sizeof(MD_FLOAT) * atom->Nmax * 3);
+    memcpyToGPU(d_atom->vx,             atom->vx,         sizeof(MD_FLOAT) * atom->Nmax * 3);
+    memcpyToGPU(d_atom->sigma6,         atom->sigma6,     sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
+    memcpyToGPU(d_atom->epsilon,        atom->epsilon,    sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
+    memcpyToGPU(d_atom->cutforcesq,     atom->cutforcesq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
+    memcpyToGPU(d_atom->type,           atom->type,       sizeof(int) * atom->Nmax);
+}
+
+#else
+void initDevice(Atom *atom, Neighbor *neighbor) {}
+void *allocateGPU(size_t bytesize) { return NULL; }
+void *reallocateGPU(void *ptr, size_t new_bytesize) { return NULL; }
+void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize) {}
+void memcpyFromGPU(void *h_ptr, void *d_ptr, size_t bytesize) {}
+void memsetGPU(void *d_ptr, int value, size_t bytesize) {}
+#endif
--- a/lammps/cuda/force.cu
+++ b/lammps/cuda/force.cu
@@ -35,8 +35,8 @@ extern "C" {

 #include <allocate.h>
 #include <atom.h>
-#include <cuda_atom.h>
 #include <allocate.h>
+#include <device.h>
 #include <neighbor.h>
 #include <parameter.h>
 #include <timing.h>
--- a/lammps/cuda/neighbor.cu
+++ b/lammps/cuda/neighbor.cu
@@ -31,7 +31,7 @@
 extern "C" {

 #include <atom.h>
-#include <cuda_atom.h>
+#include <device.h>
 #include <parameter.h>
 #include <neighbor.h>
 #include <util.h>
--- a/lammps/cuda/pbc.cu
+++ b/lammps/cuda/pbc.cu
@@ -28,7 +28,7 @@ extern "C" {

 #include <allocate.h>
 #include <atom.h>
-#include <cuda_atom.h>
+#include <device.h>
 #include <pbc.h>
 #include <util.h>

--- a/lammps/includes/cuda_atom.h
+++ b/lammps/includes/cuda_atom.h
@@ -1,12 +1,15 @@
-#include <cuda_runtime.h>
-//---
 #include <atom.h>
 #include <neighbor.h>

-#ifndef __CUDA_ATOM_H_
-#define __CUDA_ATOM_H_
-extern void initCuda(Atom*, Neighbor*);
+#ifndef __DEVICE_H_
+#define __DEVICE_H_
+
+#ifdef CUDA_TARGET
+#include <cuda_runtime.h>
 extern void cuda_assert(const char *msg, cudaError_t err);
+#endif
+
+extern void initDevice(Atom*, Neighbor*);
 extern void *allocateGPU(size_t bytesize);
 extern void *reallocateGPU(void *ptr, size_t new_bytesize);
 extern void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize);
--- a/lammps/includes/integrate.h
+++ b/lammps/includes/integrate.h
@@ -20,6 +20,8 @@
 *   with MD-Bench.  If not, see <https://www.gnu.org/licenses/>.
 * =======================================================================================
 */
+#include <stdbool.h>
+//---
 #include <parameter.h>
 #include <atom.h>

--- a/lammps/main.c
+++ b/lammps/main.c
@@ -30,19 +30,20 @@

 #include <likwid-marker.h>

-#include <timing.h>
 #include <allocate.h>
+#include <atom.h>
+#include <device.h>
+#include <eam.h>
+#include <integrate.h>
+#include <thermo.h>
+#include <timing.h>
 #include <neighbor.h>
 #include <parameter.h>
-#include <atom.h>
-#include <stats.h>
-#include <thermo.h>
 #include <pbc.h>
+#include <stats.h>
 #include <timers.h>
-#include <eam.h>
-#include <vtk.h>
 #include <util.h>
-#include <integrate.h>
+#include <vtk.h>

 #define HLINE "----------------------------------------------------------------------------\n"

@@ -53,7 +54,6 @@ extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*);
 extern double computeForceDemFullNeigh(Parameter*, Atom*, Neighbor*, Stats*);

 #ifdef CUDA_TARGET
-#include <cuda_atom.h>
 extern double computeForceLJFullNeigh_cuda(Parameter*, Atom*, Neighbor*);
 #endif

@@ -80,9 +80,7 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *
    setupThermo(param, atom->Natoms);
    if(param->input_file == NULL) { adjustThermo(param, atom); }
    setupPbc(atom, param);
-    #ifdef CUDA_TARGET
-    initCuda(atom, neighbor);
-    #endif
+    initDevice(atom, neighbor);
    updatePbc(atom, param, true);
    buildNeighbor(atom, neighbor);
    E = getTimeStamp();