From 0586ef150a3f37baba4edac7af452059475610d5 Mon Sep 17 00:00:00 2001 From: Maximilian Gaul Date: Mon, 15 Nov 2021 19:39:09 +0100 Subject: [PATCH] Fix num of threads instead of num of blocks, add logbook template --- src/force.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/force.cu b/src/force.cu index 72ceb0a..dfd8792 100644 --- a/src/force.cu +++ b/src/force.cu @@ -170,8 +170,8 @@ double computeForce( checkError( "c_neigh_numneigh malloc", cudaMalloc((void**)&c_neigh_numneigh, sizeof(int) * Nlocal) ); checkError( "c_neigh_numneigh memcpy", cudaMemcpy(c_neigh_numneigh, neighbor->numneigh, sizeof(int) * Nlocal, cudaMemcpyHostToDevice) ); - const int num_blocks = 1024; - const int num_threads_per_block = ceil((float)Nlocal / (float)num_blocks); + const int num_threads_per_block = 32; // this should be multiple of 32 as operations are performed at the level of warps + const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block); double S = getTimeStamp(); LIKWID_MARKER_START("force");