From 7691b23d6785d34181e29293c6e4a3f4962f233b Mon Sep 17 00:00:00 2001
From: Maximilian Gaul <ptfs410h@woody3.rrze.uni-erlangen.de>
Date: Wed, 1 Dec 2021 17:16:32 +0100
Subject: [PATCH] Measure memory transfer of CPU to GPU, add explanation how to
 distribute calculation among multiple GPUs

---
 src/force.cu | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/force.cu b/src/force.cu
index c7ed26e..8724342 100644
--- a/src/force.cu
+++ b/src/force.cu
@@ -144,10 +144,15 @@ double computeForce(
     for(int i = 0; i < nDevices; ++i) {
         cudaDeviceProp prop;
         cudaGetDeviceProperties(&prop, i);
-        printf("DEVICE NAME: %s\r\n", prop.name);
+        printf("DEVICE %d/%d NAME: %s\r\n", i + 1, nDevices, prop.name);
     }
+
+    // Choose GPU where you want to execute code on
+    // It is possible to execute the same kernel on multiple GPUs but you have to copy the data multiple times
+    // Executing `cudaSetDevice(N)` before cudaMalloc / cudaMemcpy / calc_force <<< >>> will set the GPU accordingly
     */
 
+
     // HINT: Run with cuda-memcheck ./MDBench-NVCC in case of error
     // HINT: Only works for data layout = AOS!!!