Adjust file structure for CUDA

Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
2022-08-12 18:12:29 +02:00
parent 939197a785
commit 90609a2b5f
9 changed files with 92 additions and 98 deletions
--- a/lammps/allocate.c
+++ b/lammps/allocate.c
@@ -27,8 +27,8 @@
 #include <util.h>
 void *allocate(int alignment, size_t bytesize) {
    void *ptr;
    int errorCode;
    void* ptr;
    errorCode = posix_memalign(&ptr, alignment, bytesize);
    if(errorCode == EINVAL) {
@@ -58,52 +58,3 @@ void *reallocate(void* ptr, int alignment, size_t new_bytesize, size_t old_bytes
    return newarray;
 }
 #ifndef CUDA_TARGET
 void *allocateGPU(size_t bytesize) { return NULL; }
 void *reallocateGPU(void *ptr, size_t new_bytesize) { return NULL; }
 void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize) {}
 void memcpyFromGPU(void *h_ptr, void *d_ptr, size_t bytesize) {}
 void memsetGPU(void *d_ptr, int value, size_t bytesize) {}
 #else
 #include <cuda_runtime.h>
 #include <cuda_atom.h>
 void *allocateGPU(size_t bytesize) {
    void *ptr;
    #ifdef CUDA_HOST_MEMORY
    cuda_assert("allocateGPU", cudaMallocHost((void **) &ptr, bytesize));
    #else
    cuda_assert("allocateGPU", cudaMalloc((void **) &ptr, bytesize));
    #endif
    return ptr;
 }
 // Data is not preserved
 void *reallocateGPU(void *ptr, size_t new_bytesize) {
    if(ptr != NULL) {
        #ifdef CUDA_HOST_MEMORY
        cudaFreeHost(ptr);
        #else
        cudaFree(ptr);
        #endif
    }
    return allocateGPU(new_bytesize);
 }
 void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize) {
    #ifndef CUDA_HOST_MEMORY
    cuda_assert("memcpyToGPU", cudaMemcpy(d_ptr, h_ptr, bytesize, cudaMemcpyHostToDevice));
    #endif
 }
 void memcpyFromGPU(void *h_ptr, void *d_ptr, size_t bytesize) {
    #ifndef CUDA_HOST_MEMORY
    cuda_assert("memcpyFromGPU", cudaMemcpy(h_ptr, d_ptr, bytesize, cudaMemcpyDeviceToHost));
    #endif
 }
 void memsetGPU(void *d_ptr, int value, size_t bytesize) {
    cuda_assert("memsetGPU", cudaMemset(d_ptr, value, bytesize));
 }
 #endif
--- a/lammps/atom.c
+++ b/lammps/atom.c
@@ -29,12 +29,9 @@
 #include <atom.h>
 #include <allocate.h>
 #include <device.h>
 #include <util.h>
 #ifdef CUDA_TARGET
 #include <cuda_atom.h>
 #endif
 #define DELTA 20000
 #ifndef MAXLINE
--- a/lammps/cuda/atom.cu
+++ b/lammps/cuda/atom.cu
@@ -20,18 +20,61 @@
 *   with MD-Bench.  If not, see <https://www.gnu.org/licenses/>.
 * =======================================================================================
 */
 extern "C" {
 #include <stdio.h>
-#include <cuda_runtime.h>
+#include <stdlib.h>
 //---
-#include <allocate.h>
+#include <device.h>
 #include <atom.h>
 #include <cuda_atom.h>
 #include <neighbor.h>
-void initCuda(Atom *atom, Neighbor *neighbor) {
+#ifdef CUDA_TARGET
 #include <cuda_runtime.h>
 void cuda_assert(const char *label, cudaError_t err) {
    if (err != cudaSuccess) {
        printf("[CUDA Error]: %s: %s\r\n", label, cudaGetErrorString(err));
        exit(-1);
    }
 }
 void *allocateGPU(size_t bytesize) {
    void *ptr;
    #ifdef CUDA_HOST_MEMORY
    cuda_assert("allocateGPU", cudaMallocHost((void **) &ptr, bytesize));
    #else
    cuda_assert("allocateGPU", cudaMalloc((void **) &ptr, bytesize));
    #endif
    return ptr;
 }
 // Data is not preserved
 void *reallocateGPU(void *ptr, size_t new_bytesize) {
    if(ptr != NULL) {
        #ifdef CUDA_HOST_MEMORY
        cudaFreeHost(ptr);
        #else
        cudaFree(ptr);
        #endif
    }
    return allocateGPU(new_bytesize);
 }
 void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize) {
    #ifndef CUDA_HOST_MEMORY
    cuda_assert("memcpyToGPU", cudaMemcpy(d_ptr, h_ptr, bytesize, cudaMemcpyHostToDevice));
    #endif
 }
 void memcpyFromGPU(void *h_ptr, void *d_ptr, size_t bytesize) {
    #ifndef CUDA_HOST_MEMORY
    cuda_assert("memcpyFromGPU", cudaMemcpy(h_ptr, d_ptr, bytesize, cudaMemcpyDeviceToHost));
    #endif
 }
 void memsetGPU(void *d_ptr, int value, size_t bytesize) {
    cuda_assert("memsetGPU", cudaMemset(d_ptr, value, bytesize));
 }
 void initDevice(Atom *atom, Neighbor *neighbor) {
    DeviceAtom *d_atom = &(atom->d_atom);
    DeviceNeighbor *d_neighbor = &(neighbor->d_neighbor);
@@ -49,11 +92,11 @@ void initCuda(Atom *atom, Neighbor *neighbor) {
    memcpyToGPU(d_atom->type,           atom->type,       sizeof(int) * atom->Nmax);
 }
-void cuda_assert(const char *label, cudaError_t err) {
+#else
-    if (err != cudaSuccess) {
+void initDevice(Atom *atom, Neighbor *neighbor) {}
-        printf("[CUDA Error]: %s: %s\r\n", label, cudaGetErrorString(err));
+void *allocateGPU(size_t bytesize) { return NULL; }
-        exit(-1);
+void *reallocateGPU(void *ptr, size_t new_bytesize) { return NULL; }
-    }
+void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize) {}
-}
+void memcpyFromGPU(void *h_ptr, void *d_ptr, size_t bytesize) {}
-
+void memsetGPU(void *d_ptr, int value, size_t bytesize) {}
-}
+#endif
--- a/lammps/cuda/force.cu
+++ b/lammps/cuda/force.cu
@@ -35,8 +35,8 @@ extern "C" {
 #include <allocate.h>
 #include <atom.h>
 #include <cuda_atom.h>
 #include <allocate.h>
 #include <device.h>
 #include <neighbor.h>
 #include <parameter.h>
 #include <timing.h>
--- a/lammps/cuda/neighbor.cu
+++ b/lammps/cuda/neighbor.cu
@@ -31,7 +31,7 @@
 extern "C" {
 #include <atom.h>
-#include <cuda_atom.h>
+#include <device.h>
 #include <parameter.h>
 #include <neighbor.h>
 #include <util.h>
--- a/lammps/cuda/pbc.cu
+++ b/lammps/cuda/pbc.cu
@@ -28,7 +28,7 @@ extern "C" {
 #include <allocate.h>
 #include <atom.h>
-#include <cuda_atom.h>
+#include <device.h>
 #include <pbc.h>
 #include <util.h>
--- a/lammps/includes/cuda_atom.h
+++ b/lammps/includes/cuda_atom.h
@@ -1,12 +1,15 @@
 #include <cuda_runtime.h>
 //---
 #include <atom.h>
 #include <neighbor.h>
-#ifndef __CUDA_ATOM_H_
+#ifndef __DEVICE_H_
-#define __CUDA_ATOM_H_
+#define __DEVICE_H_
-extern void initCuda(Atom*, Neighbor*);
+
 #ifdef CUDA_TARGET
 #include <cuda_runtime.h>
 extern void cuda_assert(const char *msg, cudaError_t err);
 #endif
 extern void initDevice(Atom*, Neighbor*);
 extern void *allocateGPU(size_t bytesize);
 extern void *reallocateGPU(void *ptr, size_t new_bytesize);
 extern void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize);
--- a/lammps/includes/integrate.h
+++ b/lammps/includes/integrate.h
@@ -20,6 +20,8 @@
 *   with MD-Bench.  If not, see <https://www.gnu.org/licenses/>.
 * =======================================================================================
 */
 #include <stdbool.h>
 //---
 #include <parameter.h>
 #include <atom.h>
--- a/lammps/main.c
+++ b/lammps/main.c
@@ -30,19 +30,20 @@
 #include <likwid-marker.h>
 #include <timing.h>
 #include <allocate.h>
 #include <atom.h>
 #include <device.h>
 #include <eam.h>
 #include <integrate.h>
 #include <thermo.h>
 #include <timing.h>
 #include <neighbor.h>
 #include <parameter.h>
 #include <atom.h>
 #include <stats.h>
 #include <thermo.h>
 #include <pbc.h>
 #include <stats.h>
 #include <timers.h>
 #include <eam.h>
 #include <vtk.h>
 #include <util.h>
-#include <integrate.h>
+#include <vtk.h>
 #define HLINE "----------------------------------------------------------------------------\n"
@@ -53,7 +54,6 @@ extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*);
 extern double computeForceDemFullNeigh(Parameter*, Atom*, Neighbor*, Stats*);
 #ifdef CUDA_TARGET
 #include <cuda_atom.h>
 extern double computeForceLJFullNeigh_cuda(Parameter*, Atom*, Neighbor*);
 #endif
@@ -80,9 +80,7 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *
    setupThermo(param, atom->Natoms);
    if(param->input_file == NULL) { adjustThermo(param, atom); }
    setupPbc(atom, param);
-    #ifdef CUDA_TARGET
+    initDevice(atom, neighbor);
    initCuda(atom, neighbor);
    #endif
    updatePbc(atom, param, true);
    buildNeighbor(atom, neighbor);
    E = getTimeStamp();