Measure memory transfer of CPU to GPU, add explanation how to distribute calculation among multiple GPUs
This commit is contained in:
		| @@ -144,10 +144,15 @@ double computeForce( | |||||||
|     for(int i = 0; i < nDevices; ++i) { |     for(int i = 0; i < nDevices; ++i) { | ||||||
|         cudaDeviceProp prop; |         cudaDeviceProp prop; | ||||||
|         cudaGetDeviceProperties(&prop, i); |         cudaGetDeviceProperties(&prop, i); | ||||||
|         printf("DEVICE NAME: %s\r\n", prop.name); |         printf("DEVICE %d/%d NAME: %s\r\n", i + 1, nDevices, prop.name); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     // Choose GPU where you want to execute code on | ||||||
|  |     // It is possible to execute the same kernel on multiple GPUs but you have to copy the data multiple times | ||||||
|  |     // Executing `cudaSetDevice(N)` before cudaMalloc / cudaMemcpy / calc_force <<< >>> will set the GPU accordingly | ||||||
|     */ |     */ | ||||||
|  |  | ||||||
|  |  | ||||||
|     // HINT: Run with cuda-memcheck ./MDBench-NVCC in case of error |     // HINT: Run with cuda-memcheck ./MDBench-NVCC in case of error | ||||||
|     // HINT: Only works for data layout = AOS!!! |     // HINT: Only works for data layout = AOS!!! | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user