Integrate GROMACS GPU implementation into master branch

Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
2022-11-08 18:33:23 +01:00
parent 493915fe95
commit c70ebce4c1
9 changed files with 480 additions and 84 deletions
--- a/gromacs/includes/atom.h
+++ b/gromacs/includes/atom.h
@@ -11,29 +11,43 @@

 #define DELTA 20000

-#define CLUSTER_M 4
-
 // Nbnxn layouts (as of GROMACS):
 // Simd4xN: M=4, N=VECTOR_WIDTH
 // Simd2xNN: M=4, N=(VECTOR_WIDTH/2)
+// Cuda: M=8, N=VECTOR_WIDTH

-// Simd2xNN (here used for single-precision)
-#if VECTOR_WIDTH > CLUSTER_M * 2
-#   define KERNEL_NAME              "Simd2xNN"
-#   define CLUSTER_N                (VECTOR_WIDTH / 2)
-#   define computeForceLJ           computeForceLJ_2xnn
-// Simd4xN
-#else
-#   define KERNEL_NAME              "Simd4xN"
+#ifdef CUDA_TARGET
+#   undef VECTOR_WIDTH
+#   define VECTOR_WIDTH             8
+#   define KERNEL_NAME              "CUDA"
+#   define CLUSTER_M                8
 #   define CLUSTER_N                VECTOR_WIDTH
-#   define computeForceLJ           computeForceLJ_4xn
-#endif
-
-#ifdef USE_REFERENCE_VERSION
-#   undef KERNEL_NAME
-#   undef computeForceLJ
-#   define KERNEL_NAME              "Reference"
-#   define computeForceLJ           computeForceLJ_ref
+#   define computeForceLJ           computeForceLJ_cuda
+#   define initialIntegrate         cudaInitialIntegrate
+#   define finalIntegrate           cudaFinalIntegrate
+#   define updatePbc                cudaUpdatePbc
+#else
+#   define CLUSTER_M                4
+// Simd2xNN (here used for single-precision)
+#   if VECTOR_WIDTH > CLUSTER_M * 2
+#       define KERNEL_NAME          "Simd2xNN"
+#       define CLUSTER_N            (VECTOR_WIDTH / 2)
+#       define computeForceLJ       computeForceLJ_2xnn
+// Simd4xN
+#   else
+#       define KERNEL_NAME          "Simd4xN"
+#       define CLUSTER_N            VECTOR_WIDTH
+#       define computeForceLJ       computeForceLJ_4xn
+#   endif
+#   ifdef USE_REFERENCE_VERSION
+#       undef KERNEL_NAME
+#       undef computeForceLJ
+#       define KERNEL_NAME          "Reference"
+#       define computeForceLJ       computeForceLJ_ref
+#   endif
+#   define initialIntegrate         cpuInitialIntegrate
+#   define finalIntegrate           cpuFinalIntegrate
+#   define updatePbc                cpuUpdatePbc
 #endif

 #if CLUSTER_M == CLUSTER_N
--- a/gromacs/includes/integrate.h
+++ b/gromacs/includes/integrate.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of MD-Bench.
+ * Use of this source code is governed by a LGPL-3.0
+ * license that can be found in the LICENSE file.
+ */
+#include <stdbool.h>
+//---
+#include <atom.h>
+#include <parameter.h>
+#include <util.h>
+
+void cpuInitialIntegrate(Parameter *param, Atom *atom) {
+    DEBUG_MESSAGE("cpuInitialIntegrate start\n");
+
+    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
+        int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
+        MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
+        MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
+        MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
+
+        for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
+            ci_v[CL_X_OFFSET + cii] += param->dtforce * ci_f[CL_X_OFFSET + cii];
+            ci_v[CL_Y_OFFSET + cii] += param->dtforce * ci_f[CL_Y_OFFSET + cii];
+            ci_v[CL_Z_OFFSET + cii] += param->dtforce * ci_f[CL_Z_OFFSET + cii];
+            ci_x[CL_X_OFFSET + cii] += param->dt * ci_v[CL_X_OFFSET + cii];
+            ci_x[CL_Y_OFFSET + cii] += param->dt * ci_v[CL_Y_OFFSET + cii];
+            ci_x[CL_Z_OFFSET + cii] += param->dt * ci_v[CL_Z_OFFSET + cii];
+        }
+    }
+
+    DEBUG_MESSAGE("cpuInitialIntegrate end\n");
+}
+
+void cpuFinalIntegrate(Parameter *param, Atom *atom) {
+    DEBUG_MESSAGE("cpuFinalIntegrate start\n");
+
+    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
+        int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
+        MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
+        MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
+
+        for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
+            ci_v[CL_X_OFFSET + cii] += param->dtforce * ci_f[CL_X_OFFSET + cii];
+            ci_v[CL_Y_OFFSET + cii] += param->dtforce * ci_f[CL_Y_OFFSET + cii];
+            ci_v[CL_Z_OFFSET + cii] += param->dtforce * ci_f[CL_Z_OFFSET + cii];
+        }
+    }
+
+    DEBUG_MESSAGE("cpuFinalIntegrate end\n");
+}
+
+#ifdef CUDA_TARGET
+void cudaInitialIntegrate(Parameter*, Atom*);
+void cudaFinalIntegrate(Parameter*, Atom*);
+#endif
--- a/gromacs/includes/pbc.h
+++ b/gromacs/includes/pbc.h
@@ -10,7 +10,11 @@
 #ifndef __PBC_H_
 #define __PBC_H_
 extern void initPbc();
-extern void updatePbc(Atom*, Parameter*, int);
+extern void cpuUpdatePbc(Atom*, Parameter*, int);
 extern void updateAtomsPbc(Atom*, Parameter*);
 extern void setupPbc(Atom*, Parameter*);
+
+#ifdef CUDA_TARGET
+extern void cudaUpdatePbc(Atom*, Parameter*, int);
+#endif
 #endif