Measure memory transfer of CPU to GPU, add explanation how to distribute calculation among multiple GPUs

2021-12-01 17:16:32 +01:00
parent da90466f98
commit 7691b23d67
1 changed files with 6 additions and 1 deletions
@@ -144,10 +144,15 @@ double computeForce(
    for(int i = 0; i < nDevices; ++i) {
        cudaDeviceProp prop;
        cudaGetDeviceProperties(&prop, i);
-        printf("DEVICE NAME: %s\r\n", prop.name);
+        printf("DEVICE %d/%d NAME: %s\r\n", i + 1, nDevices, prop.name);
    }
+
+    // Choose GPU where you want to execute code on
+    // It is possible to execute the same kernel on multiple GPUs but you have to copy the data multiple times
+    // Executing `cudaSetDevice(N)` before cudaMalloc / cudaMemcpy / calc_force <<< >>> will set the GPU accordingly
    */

+
    // HINT: Run with cuda-memcheck ./MDBench-NVCC in case of error
    // HINT: Only works for data layout = AOS!!!