Instrumented the reneighbor function with timers (via getTimestamp()) to measure the runtime of its different components/methods

This commit is contained in:
Martin Bauernfeind 2022-07-17 18:34:17 +02:00
parent 7bad7e84b6
commit 176de0525b
2 changed files with 29 additions and 5 deletions

View File

@ -5,6 +5,10 @@ typedef enum {
TOTAL = 0, TOTAL = 0,
NEIGH, NEIGH,
FORCE, FORCE,
NEIGH_UPDATE_ATOMS_PBC,
NEIGH_SETUP_PBC,
NEIGH_UPDATE_PBC,
NEIGH_BUILD_NEIGHBOR,
NUMTIMER NUMTIMER
} timertype; } timertype;

View File

@ -158,19 +158,32 @@ double reneighbour(
Neighbor *neighbor, Neighbor *neighbor,
Atom *c_atom, Atom *c_atom,
Neighbor *c_neighbor, Neighbor *c_neighbor,
const int num_threads_per_block) const int num_threads_per_block,
double* timers)
{ {
double S, E; double S, E, beforeEvent, afterEvent;
S = getTimeStamp(); S = getTimeStamp();
beforeEvent = S;
LIKWID_MARKER_START("reneighbour"); LIKWID_MARKER_START("reneighbour");
updateAtomsPbc_cuda(atom, param, c_atom, num_threads_per_block); updateAtomsPbc_cuda(atom, param, c_atom, num_threads_per_block);
afterEvent = getTimeStamp();
timers[NEIGH_UPDATE_ATOMS_PBC] += afterEvent - beforeEvent;
beforeEvent = afterEvent;
setupPbc(atom, param); setupPbc(atom, param);
afterEvent = getTimeStamp();
timers[NEIGH_SETUP_PBC] += afterEvent - beforeEvent;
beforeEvent = afterEvent;
updatePbc_cuda(atom, param, c_atom, true, num_threads_per_block); updatePbc_cuda(atom, param, c_atom, true, num_threads_per_block);
afterEvent = getTimeStamp();
timers[NEIGH_UPDATE_PBC] += afterEvent - beforeEvent;
beforeEvent = afterEvent;
//sortAtom(atom); //sortAtom(atom);
buildNeighbor_cuda(atom, neighbor, c_atom, c_neighbor, num_threads_per_block); buildNeighbor_cuda(atom, neighbor, c_atom, c_neighbor, num_threads_per_block);
LIKWID_MARKER_STOP("reneighbour"); LIKWID_MARKER_STOP("reneighbour");
E = getTimeStamp(); E = getTimeStamp();
afterEvent = E;
timers[NEIGH_BUILD_NEIGHBOR] += afterEvent - beforeEvent;
return E-S; return E-S;
} }
@ -333,6 +346,10 @@ int main(int argc, char** argv)
timer[FORCE] = 0.0; timer[FORCE] = 0.0;
timer[NEIGH] = 0.0; timer[NEIGH] = 0.0;
timer[TOTAL] = getTimeStamp(); timer[TOTAL] = getTimeStamp();
timer[NEIGH_UPDATE_ATOMS_PBC] = 0.0;
timer[NEIGH_SETUP_PBC] = 0.0;
timer[NEIGH_UPDATE_PBC] = 0.0;
timer[NEIGH_BUILD_NEIGHBOR] = 0.0;
if(param.vtk_file != NULL) { if(param.vtk_file != NULL) {
write_atoms_to_vtk_file(param.vtk_file, &atom, 0); write_atoms_to_vtk_file(param.vtk_file, &atom, 0);
@ -345,9 +362,12 @@ int main(int argc, char** argv)
cuda_initial_integrate(doReneighbour, &param, &atom, &c_atom, num_threads_per_block); cuda_initial_integrate(doReneighbour, &param, &atom, &c_atom, num_threads_per_block);
if(doReneighbour) { if(doReneighbour) {
timer[NEIGH] += reneighbour(&param, &atom, &neighbor, &c_atom, &c_neighbor, num_threads_per_block); timer[NEIGH] += reneighbour(&param, &atom, &neighbor, &c_atom, &c_neighbor, num_threads_per_block, &timer);
} else { } else {
double before = getTimeStamp();
updatePbc_cuda(&atom, &param, &c_atom, false, num_threads_per_block); updatePbc_cuda(&atom, &param, &c_atom, false, num_threads_per_block);
double after = getTimeStamp();
timer[NEIGH_UPDATE_PBC] += after - before;
} }
if(param.force_field == FF_EAM) { if(param.force_field == FF_EAM) {
@ -385,8 +405,8 @@ int main(int argc, char** argv)
#endif #endif
printf(HLINE); printf(HLINE);
printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, atom.Nghost, param.ntimes); printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, atom.Nghost, param.ntimes);
printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n", printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs NEIGH_TIMERS: UPD_AT: %.2fs SETUP_PBC %.2fs UPDATE_PBC %.2fs BUILD_NEIGHBOR %.2fs\n",
timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH]); timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH], timer[NEIGH_UPDATE_ATOMS_PBC], timer[NEIGH_SETUP_PBC], timer[NEIGH_UPDATE_PBC], timer[NEIGH_BUILD_NEIGHBOR]);
printf(HLINE); printf(HLINE);
printf("Performance: %.2f million atom updates per second\n", printf("Performance: %.2f million atom updates per second\n",
1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]); 1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);