Measure memory transfer of CPU to GPU, add explanation how to distribute calculation among multiple GPUs
This commit is contained in:
parent
da90466f98
commit
7691b23d67
@ -144,10 +144,15 @@ double computeForce(
|
|||||||
for(int i = 0; i < nDevices; ++i) {
|
for(int i = 0; i < nDevices; ++i) {
|
||||||
cudaDeviceProp prop;
|
cudaDeviceProp prop;
|
||||||
cudaGetDeviceProperties(&prop, i);
|
cudaGetDeviceProperties(&prop, i);
|
||||||
printf("DEVICE NAME: %s\r\n", prop.name);
|
printf("DEVICE %d/%d NAME: %s\r\n", i + 1, nDevices, prop.name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Choose GPU where you want to execute code on
|
||||||
|
// It is possible to execute the same kernel on multiple GPUs but you have to copy the data multiple times
|
||||||
|
// Executing `cudaSetDevice(N)` before cudaMalloc / cudaMemcpy / calc_force <<< >>> will set the GPU accordingly
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
// HINT: Run with cuda-memcheck ./MDBench-NVCC in case of error
|
// HINT: Run with cuda-memcheck ./MDBench-NVCC in case of error
|
||||||
// HINT: Only works for data layout = AOS!!!
|
// HINT: Only works for data layout = AOS!!!
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user