Fix num of threads instead of num of blocks, add logbook template

This commit is contained in:
Maximilian Gaul 2021-11-15 19:39:09 +01:00
parent 2e5d973f7d
commit 0586ef150a

View File

@ -170,8 +170,8 @@ double computeForce(
checkError( "c_neigh_numneigh malloc", cudaMalloc((void**)&c_neigh_numneigh, sizeof(int) * Nlocal) ); checkError( "c_neigh_numneigh malloc", cudaMalloc((void**)&c_neigh_numneigh, sizeof(int) * Nlocal) );
checkError( "c_neigh_numneigh memcpy", cudaMemcpy(c_neigh_numneigh, neighbor->numneigh, sizeof(int) * Nlocal, cudaMemcpyHostToDevice) ); checkError( "c_neigh_numneigh memcpy", cudaMemcpy(c_neigh_numneigh, neighbor->numneigh, sizeof(int) * Nlocal, cudaMemcpyHostToDevice) );
const int num_blocks = 1024; const int num_threads_per_block = 32; // this should be multiple of 32 as operations are performed at the level of warps
const int num_threads_per_block = ceil((float)Nlocal / (float)num_blocks); const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block);
double S = getTimeStamp(); double S = getTimeStamp();
LIKWID_MARKER_START("force"); LIKWID_MARKER_START("force");