diff --git a/lammps/allocate.c b/lammps/allocate.c
index a690deb..4c64794 100644
--- a/lammps/allocate.c
+++ b/lammps/allocate.c
@@ -27,8 +27,8 @@
 #include <util.h>
 
 void *allocate(int alignment, size_t bytesize) {
+    void *ptr;
     int errorCode;
-    void* ptr;
 
     errorCode = posix_memalign(&ptr, alignment, bytesize);
     if(errorCode == EINVAL) {
@@ -58,52 +58,3 @@ void *reallocate(void* ptr, int alignment, size_t new_bytesize, size_t old_bytes
 
     return newarray;
 }
-
-#ifndef CUDA_TARGET
-void *allocateGPU(size_t bytesize) { return NULL; }
-void *reallocateGPU(void *ptr, size_t new_bytesize) { return NULL; }
-void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize) {}
-void memcpyFromGPU(void *h_ptr, void *d_ptr, size_t bytesize) {}
-void memsetGPU(void *d_ptr, int value, size_t bytesize) {}
-#else
-#include <cuda_runtime.h>
-#include <cuda_atom.h>
-void *allocateGPU(size_t bytesize) {
-    void *ptr;
-    #ifdef CUDA_HOST_MEMORY
-    cuda_assert("allocateGPU", cudaMallocHost((void **) &ptr, bytesize));
-    #else
-    cuda_assert("allocateGPU", cudaMalloc((void **) &ptr, bytesize));
-    #endif
-    return ptr;
-}
-
-// Data is not preserved
-void *reallocateGPU(void *ptr, size_t new_bytesize) {
-    if(ptr != NULL) {
-        #ifdef CUDA_HOST_MEMORY
-        cudaFreeHost(ptr);
-        #else
-        cudaFree(ptr);
-        #endif
-    }
-
-    return allocateGPU(new_bytesize);
-}
-
-void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize) {
-    #ifndef CUDA_HOST_MEMORY
-    cuda_assert("memcpyToGPU", cudaMemcpy(d_ptr, h_ptr, bytesize, cudaMemcpyHostToDevice));
-    #endif
-}
-
-void memcpyFromGPU(void *h_ptr, void *d_ptr, size_t bytesize) {
-    #ifndef CUDA_HOST_MEMORY
-    cuda_assert("memcpyFromGPU", cudaMemcpy(h_ptr, d_ptr, bytesize, cudaMemcpyDeviceToHost));
-    #endif
-}
-
-void memsetGPU(void *d_ptr, int value, size_t bytesize) {
-    cuda_assert("memsetGPU", cudaMemset(d_ptr, value, bytesize));
-}
-#endif
diff --git a/lammps/atom.c b/lammps/atom.c
index ce133b8..0e0a3fe 100644
--- a/lammps/atom.c
+++ b/lammps/atom.c
@@ -29,12 +29,9 @@
 
 #include <atom.h>
 #include <allocate.h>
+#include <device.h>
 #include <util.h>
 
-#ifdef CUDA_TARGET
-#include <cuda_atom.h>
-#endif
-
 #define DELTA 20000
 
 #ifndef MAXLINE
diff --git a/lammps/cuda/atom.cu b/lammps/cuda.c
similarity index 61%
rename from lammps/cuda/atom.cu
rename to lammps/cuda.c
index 7a9d4d3..d37885f 100644
--- a/lammps/cuda/atom.cu
+++ b/lammps/cuda.c
@@ -20,34 +20,13 @@
  *   with MD-Bench.  If not, see <https://www.gnu.org/licenses/>.
  * =======================================================================================
  */
-
-extern "C" {
-
 #include <stdio.h>
-#include <cuda_runtime.h>
+#include <stdlib.h>
 //---
-#include <allocate.h>
-#include <atom.h>
-#include <cuda_atom.h>
-#include <neighbor.h>
+#include <device.h>
 
-void initCuda(Atom *atom, Neighbor *neighbor) {
-    DeviceAtom *d_atom = &(atom->d_atom);
-    DeviceNeighbor *d_neighbor = &(neighbor->d_neighbor);
-
-    d_atom->epsilon         =   (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
-    d_atom->sigma6          =   (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
-    d_atom->cutforcesq      =   (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
-    d_neighbor->neighbors   =   (int *) allocateGPU(sizeof(int) * atom->Nmax * neighbor->maxneighs);
-    d_neighbor->numneigh    =   (int *) allocateGPU(sizeof(int) * atom->Nmax);
-
-    memcpyToGPU(d_atom->x,              atom->x,          sizeof(MD_FLOAT) * atom->Nmax * 3);
-    memcpyToGPU(d_atom->vx,             atom->vx,         sizeof(MD_FLOAT) * atom->Nmax * 3);
-    memcpyToGPU(d_atom->sigma6,         atom->sigma6,     sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
-    memcpyToGPU(d_atom->epsilon,        atom->epsilon,    sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
-    memcpyToGPU(d_atom->cutforcesq,     atom->cutforcesq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
-    memcpyToGPU(d_atom->type,           atom->type,       sizeof(int) * atom->Nmax);
-}
+#ifdef CUDA_TARGET
+#include <cuda_runtime.h>
 
 void cuda_assert(const char *label, cudaError_t err) {
     if (err != cudaSuccess) {
@@ -56,4 +35,68 @@ void cuda_assert(const char *label, cudaError_t err) {
     }
 }
 
+void *allocateGPU(size_t bytesize) {
+    void *ptr;
+    #ifdef CUDA_HOST_MEMORY
+    cuda_assert("allocateGPU", cudaMallocHost((void **) &ptr, bytesize));
+    #else
+    cuda_assert("allocateGPU", cudaMalloc((void **) &ptr, bytesize));
+    #endif
+    return ptr;
 }
+
+// Data is not preserved
+void *reallocateGPU(void *ptr, size_t new_bytesize) {
+    if(ptr != NULL) {
+        #ifdef CUDA_HOST_MEMORY
+        cudaFreeHost(ptr);
+        #else
+        cudaFree(ptr);
+        #endif
+    }
+
+    return allocateGPU(new_bytesize);
+}
+
+void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize) {
+    #ifndef CUDA_HOST_MEMORY
+    cuda_assert("memcpyToGPU", cudaMemcpy(d_ptr, h_ptr, bytesize, cudaMemcpyHostToDevice));
+    #endif
+}
+
+void memcpyFromGPU(void *h_ptr, void *d_ptr, size_t bytesize) {
+    #ifndef CUDA_HOST_MEMORY
+    cuda_assert("memcpyFromGPU", cudaMemcpy(h_ptr, d_ptr, bytesize, cudaMemcpyDeviceToHost));
+    #endif
+}
+
+void memsetGPU(void *d_ptr, int value, size_t bytesize) {
+    cuda_assert("memsetGPU", cudaMemset(d_ptr, value, bytesize));
+}
+
+void initDevice(Atom *atom, Neighbor *neighbor) {
+    DeviceAtom *d_atom = &(atom->d_atom);
+    DeviceNeighbor *d_neighbor = &(neighbor->d_neighbor);
+    
+    d_atom->epsilon         =   (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
+    d_atom->sigma6          =   (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
+    d_atom->cutforcesq      =   (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
+    d_neighbor->neighbors   =   (int *) allocateGPU(sizeof(int) * atom->Nmax * neighbor->maxneighs);
+    d_neighbor->numneigh    =   (int *) allocateGPU(sizeof(int) * atom->Nmax);
+    
+    memcpyToGPU(d_atom->x,              atom->x,          sizeof(MD_FLOAT) * atom->Nmax * 3);
+    memcpyToGPU(d_atom->vx,             atom->vx,         sizeof(MD_FLOAT) * atom->Nmax * 3);
+    memcpyToGPU(d_atom->sigma6,         atom->sigma6,     sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
+    memcpyToGPU(d_atom->epsilon,        atom->epsilon,    sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
+    memcpyToGPU(d_atom->cutforcesq,     atom->cutforcesq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
+    memcpyToGPU(d_atom->type,           atom->type,       sizeof(int) * atom->Nmax);
+}
+
+#else
+void initDevice(Atom *atom, Neighbor *neighbor) {}
+void *allocateGPU(size_t bytesize) { return NULL; }
+void *reallocateGPU(void *ptr, size_t new_bytesize) { return NULL; }
+void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize) {}
+void memcpyFromGPU(void *h_ptr, void *d_ptr, size_t bytesize) {}
+void memsetGPU(void *d_ptr, int value, size_t bytesize) {}
+#endif
diff --git a/lammps/cuda/force.cu b/lammps/cuda/force.cu
index bdd55da..3d5ad2d 100644
--- a/lammps/cuda/force.cu
+++ b/lammps/cuda/force.cu
@@ -35,8 +35,8 @@ extern "C" {
 
 #include <allocate.h>
 #include <atom.h>
-#include <cuda_atom.h>
 #include <allocate.h>
+#include <device.h>
 #include <neighbor.h>
 #include <parameter.h>
 #include <timing.h>
diff --git a/lammps/cuda/neighbor.cu b/lammps/cuda/neighbor.cu
index ad73d77..4a033e5 100644
--- a/lammps/cuda/neighbor.cu
+++ b/lammps/cuda/neighbor.cu
@@ -31,7 +31,7 @@
 extern "C" {
 
 #include <atom.h>
-#include <cuda_atom.h>
+#include <device.h>
 #include <parameter.h>
 #include <neighbor.h>
 #include <util.h>
diff --git a/lammps/cuda/pbc.cu b/lammps/cuda/pbc.cu
index 90e4c7f..f413ddd 100644
--- a/lammps/cuda/pbc.cu
+++ b/lammps/cuda/pbc.cu
@@ -28,7 +28,7 @@ extern "C" {
 
 #include <allocate.h>
 #include <atom.h>
-#include <cuda_atom.h>
+#include <device.h>
 #include <pbc.h>
 #include <util.h>
 
diff --git a/lammps/includes/cuda_atom.h b/lammps/includes/device.h
similarity index 79%
rename from lammps/includes/cuda_atom.h
rename to lammps/includes/device.h
index 9f986bb..e3e5a6a 100644
--- a/lammps/includes/cuda_atom.h
+++ b/lammps/includes/device.h
@@ -1,12 +1,15 @@
-#include <cuda_runtime.h>
-//---
 #include <atom.h>
 #include <neighbor.h>
 
-#ifndef __CUDA_ATOM_H_
-#define __CUDA_ATOM_H_
-extern void initCuda(Atom*, Neighbor*);
+#ifndef __DEVICE_H_
+#define __DEVICE_H_
+
+#ifdef CUDA_TARGET
+#include <cuda_runtime.h>
 extern void cuda_assert(const char *msg, cudaError_t err);
+#endif
+
+extern void initDevice(Atom*, Neighbor*);
 extern void *allocateGPU(size_t bytesize);
 extern void *reallocateGPU(void *ptr, size_t new_bytesize);
 extern void memcpyToGPU(void *d_ptr, void *h_ptr, size_t bytesize);
diff --git a/lammps/includes/integrate.h b/lammps/includes/integrate.h
index 42d62cf..a0e0301 100644
--- a/lammps/includes/integrate.h
+++ b/lammps/includes/integrate.h
@@ -20,6 +20,8 @@
  *   with MD-Bench.  If not, see <https://www.gnu.org/licenses/>.
  * =======================================================================================
  */
+#include <stdbool.h>
+//---
 #include <parameter.h>
 #include <atom.h>
 
diff --git a/lammps/main.c b/lammps/main.c
index 7de2312..465f7a2 100644
--- a/lammps/main.c
+++ b/lammps/main.c
@@ -30,19 +30,20 @@
 
 #include <likwid-marker.h>
 
-#include <timing.h>
 #include <allocate.h>
+#include <atom.h>
+#include <device.h>
+#include <eam.h>
+#include <integrate.h>
+#include <thermo.h>
+#include <timing.h>
 #include <neighbor.h>
 #include <parameter.h>
-#include <atom.h>
-#include <stats.h>
-#include <thermo.h>
 #include <pbc.h>
+#include <stats.h>
 #include <timers.h>
-#include <eam.h>
-#include <vtk.h>
 #include <util.h>
-#include <integrate.h>
+#include <vtk.h>
 
 #define HLINE "----------------------------------------------------------------------------\n"
 
@@ -53,7 +54,6 @@ extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*);
 extern double computeForceDemFullNeigh(Parameter*, Atom*, Neighbor*, Stats*);
 
 #ifdef CUDA_TARGET
-#include <cuda_atom.h>
 extern double computeForceLJFullNeigh_cuda(Parameter*, Atom*, Neighbor*);
 #endif
 
@@ -80,9 +80,7 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *
     setupThermo(param, atom->Natoms);
     if(param->input_file == NULL) { adjustThermo(param, atom); }
     setupPbc(atom, param);
-    #ifdef CUDA_TARGET
-    initCuda(atom, neighbor);
-    #endif
+    initDevice(atom, neighbor);
     updatePbc(atom, param, true);
     buildNeighbor(atom, neighbor);
     E = getTimeStamp();