From 176de0525b96e56a2bfe5341ffe989ec3e3f6a04 Mon Sep 17 00:00:00 2001 From: Martin Bauernfeind Date: Sun, 17 Jul 2022 18:34:17 +0200 Subject: [PATCH] Instrumented the reneighbor function with timers (via getTimestamp()) to measure the runtime of its different components/methods --- src/includes/timers.h | 4 ++++ src/main.c | 30 +++++++++++++++++++++++++----- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/src/includes/timers.h b/src/includes/timers.h index 2400c01..9938f50 100644 --- a/src/includes/timers.h +++ b/src/includes/timers.h @@ -5,6 +5,10 @@ typedef enum { TOTAL = 0, NEIGH, FORCE, + NEIGH_UPDATE_ATOMS_PBC, + NEIGH_SETUP_PBC, + NEIGH_UPDATE_PBC, + NEIGH_BUILD_NEIGHBOR, NUMTIMER } timertype; diff --git a/src/main.c b/src/main.c index 54c9645..8e0c2bf 100644 --- a/src/main.c +++ b/src/main.c @@ -158,19 +158,32 @@ double reneighbour( Neighbor *neighbor, Atom *c_atom, Neighbor *c_neighbor, - const int num_threads_per_block) + const int num_threads_per_block, + double* timers) { - double S, E; + double S, E, beforeEvent, afterEvent; S = getTimeStamp(); + beforeEvent = S; LIKWID_MARKER_START("reneighbour"); updateAtomsPbc_cuda(atom, param, c_atom, num_threads_per_block); + afterEvent = getTimeStamp(); + timers[NEIGH_UPDATE_ATOMS_PBC] += afterEvent - beforeEvent; + beforeEvent = afterEvent; setupPbc(atom, param); + afterEvent = getTimeStamp(); + timers[NEIGH_SETUP_PBC] += afterEvent - beforeEvent; + beforeEvent = afterEvent; updatePbc_cuda(atom, param, c_atom, true, num_threads_per_block); + afterEvent = getTimeStamp(); + timers[NEIGH_UPDATE_PBC] += afterEvent - beforeEvent; + beforeEvent = afterEvent; //sortAtom(atom); buildNeighbor_cuda(atom, neighbor, c_atom, c_neighbor, num_threads_per_block); LIKWID_MARKER_STOP("reneighbour"); E = getTimeStamp(); + afterEvent = E; + timers[NEIGH_BUILD_NEIGHBOR] += afterEvent - beforeEvent; return E-S; } @@ -333,6 +346,10 @@ int main(int argc, char** argv) timer[FORCE] = 0.0; timer[NEIGH] = 0.0; timer[TOTAL] = getTimeStamp(); + timer[NEIGH_UPDATE_ATOMS_PBC] = 0.0; + timer[NEIGH_SETUP_PBC] = 0.0; + timer[NEIGH_UPDATE_PBC] = 0.0; + timer[NEIGH_BUILD_NEIGHBOR] = 0.0; if(param.vtk_file != NULL) { write_atoms_to_vtk_file(param.vtk_file, &atom, 0); @@ -345,9 +362,12 @@ int main(int argc, char** argv) cuda_initial_integrate(doReneighbour, ¶m, &atom, &c_atom, num_threads_per_block); if(doReneighbour) { - timer[NEIGH] += reneighbour(¶m, &atom, &neighbor, &c_atom, &c_neighbor, num_threads_per_block); + timer[NEIGH] += reneighbour(¶m, &atom, &neighbor, &c_atom, &c_neighbor, num_threads_per_block, &timer); } else { + double before = getTimeStamp(); updatePbc_cuda(&atom, ¶m, &c_atom, false, num_threads_per_block); + double after = getTimeStamp(); + timer[NEIGH_UPDATE_PBC] += after - before; } if(param.force_field == FF_EAM) { @@ -385,8 +405,8 @@ int main(int argc, char** argv) #endif printf(HLINE); printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, atom.Nghost, param.ntimes); - printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n", - timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH]); + printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs NEIGH_TIMERS: UPD_AT: %.2fs SETUP_PBC %.2fs UPDATE_PBC %.2fs BUILD_NEIGHBOR %.2fs\n", + timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH], timer[NEIGH_UPDATE_ATOMS_PBC], timer[NEIGH_SETUP_PBC], timer[NEIGH_UPDATE_PBC], timer[NEIGH_BUILD_NEIGHBOR]); printf(HLINE); printf("Performance: %.2f million atom updates per second\n", 1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);