Instrumented the reneighbor function with timers (via getTimestamp()) to measure the runtime of its different components/methods
This commit is contained in:
parent
7bad7e84b6
commit
176de0525b
@ -5,6 +5,10 @@ typedef enum {
|
|||||||
TOTAL = 0,
|
TOTAL = 0,
|
||||||
NEIGH,
|
NEIGH,
|
||||||
FORCE,
|
FORCE,
|
||||||
|
NEIGH_UPDATE_ATOMS_PBC,
|
||||||
|
NEIGH_SETUP_PBC,
|
||||||
|
NEIGH_UPDATE_PBC,
|
||||||
|
NEIGH_BUILD_NEIGHBOR,
|
||||||
NUMTIMER
|
NUMTIMER
|
||||||
} timertype;
|
} timertype;
|
||||||
|
|
||||||
|
30
src/main.c
30
src/main.c
@ -158,19 +158,32 @@ double reneighbour(
|
|||||||
Neighbor *neighbor,
|
Neighbor *neighbor,
|
||||||
Atom *c_atom,
|
Atom *c_atom,
|
||||||
Neighbor *c_neighbor,
|
Neighbor *c_neighbor,
|
||||||
const int num_threads_per_block)
|
const int num_threads_per_block,
|
||||||
|
double* timers)
|
||||||
{
|
{
|
||||||
double S, E;
|
double S, E, beforeEvent, afterEvent;
|
||||||
|
|
||||||
S = getTimeStamp();
|
S = getTimeStamp();
|
||||||
|
beforeEvent = S;
|
||||||
LIKWID_MARKER_START("reneighbour");
|
LIKWID_MARKER_START("reneighbour");
|
||||||
updateAtomsPbc_cuda(atom, param, c_atom, num_threads_per_block);
|
updateAtomsPbc_cuda(atom, param, c_atom, num_threads_per_block);
|
||||||
|
afterEvent = getTimeStamp();
|
||||||
|
timers[NEIGH_UPDATE_ATOMS_PBC] += afterEvent - beforeEvent;
|
||||||
|
beforeEvent = afterEvent;
|
||||||
setupPbc(atom, param);
|
setupPbc(atom, param);
|
||||||
|
afterEvent = getTimeStamp();
|
||||||
|
timers[NEIGH_SETUP_PBC] += afterEvent - beforeEvent;
|
||||||
|
beforeEvent = afterEvent;
|
||||||
updatePbc_cuda(atom, param, c_atom, true, num_threads_per_block);
|
updatePbc_cuda(atom, param, c_atom, true, num_threads_per_block);
|
||||||
|
afterEvent = getTimeStamp();
|
||||||
|
timers[NEIGH_UPDATE_PBC] += afterEvent - beforeEvent;
|
||||||
|
beforeEvent = afterEvent;
|
||||||
//sortAtom(atom);
|
//sortAtom(atom);
|
||||||
buildNeighbor_cuda(atom, neighbor, c_atom, c_neighbor, num_threads_per_block);
|
buildNeighbor_cuda(atom, neighbor, c_atom, c_neighbor, num_threads_per_block);
|
||||||
LIKWID_MARKER_STOP("reneighbour");
|
LIKWID_MARKER_STOP("reneighbour");
|
||||||
E = getTimeStamp();
|
E = getTimeStamp();
|
||||||
|
afterEvent = E;
|
||||||
|
timers[NEIGH_BUILD_NEIGHBOR] += afterEvent - beforeEvent;
|
||||||
|
|
||||||
return E-S;
|
return E-S;
|
||||||
}
|
}
|
||||||
@ -333,6 +346,10 @@ int main(int argc, char** argv)
|
|||||||
timer[FORCE] = 0.0;
|
timer[FORCE] = 0.0;
|
||||||
timer[NEIGH] = 0.0;
|
timer[NEIGH] = 0.0;
|
||||||
timer[TOTAL] = getTimeStamp();
|
timer[TOTAL] = getTimeStamp();
|
||||||
|
timer[NEIGH_UPDATE_ATOMS_PBC] = 0.0;
|
||||||
|
timer[NEIGH_SETUP_PBC] = 0.0;
|
||||||
|
timer[NEIGH_UPDATE_PBC] = 0.0;
|
||||||
|
timer[NEIGH_BUILD_NEIGHBOR] = 0.0;
|
||||||
|
|
||||||
if(param.vtk_file != NULL) {
|
if(param.vtk_file != NULL) {
|
||||||
write_atoms_to_vtk_file(param.vtk_file, &atom, 0);
|
write_atoms_to_vtk_file(param.vtk_file, &atom, 0);
|
||||||
@ -345,9 +362,12 @@ int main(int argc, char** argv)
|
|||||||
cuda_initial_integrate(doReneighbour, ¶m, &atom, &c_atom, num_threads_per_block);
|
cuda_initial_integrate(doReneighbour, ¶m, &atom, &c_atom, num_threads_per_block);
|
||||||
|
|
||||||
if(doReneighbour) {
|
if(doReneighbour) {
|
||||||
timer[NEIGH] += reneighbour(¶m, &atom, &neighbor, &c_atom, &c_neighbor, num_threads_per_block);
|
timer[NEIGH] += reneighbour(¶m, &atom, &neighbor, &c_atom, &c_neighbor, num_threads_per_block, &timer);
|
||||||
} else {
|
} else {
|
||||||
|
double before = getTimeStamp();
|
||||||
updatePbc_cuda(&atom, ¶m, &c_atom, false, num_threads_per_block);
|
updatePbc_cuda(&atom, ¶m, &c_atom, false, num_threads_per_block);
|
||||||
|
double after = getTimeStamp();
|
||||||
|
timer[NEIGH_UPDATE_PBC] += after - before;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(param.force_field == FF_EAM) {
|
if(param.force_field == FF_EAM) {
|
||||||
@ -385,8 +405,8 @@ int main(int argc, char** argv)
|
|||||||
#endif
|
#endif
|
||||||
printf(HLINE);
|
printf(HLINE);
|
||||||
printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, atom.Nghost, param.ntimes);
|
printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, atom.Nghost, param.ntimes);
|
||||||
printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n",
|
printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs NEIGH_TIMERS: UPD_AT: %.2fs SETUP_PBC %.2fs UPDATE_PBC %.2fs BUILD_NEIGHBOR %.2fs\n",
|
||||||
timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH]);
|
timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH], timer[NEIGH_UPDATE_ATOMS_PBC], timer[NEIGH_SETUP_PBC], timer[NEIGH_UPDATE_PBC], timer[NEIGH_BUILD_NEIGHBOR]);
|
||||||
printf(HLINE);
|
printf(HLINE);
|
||||||
printf("Performance: %.2f million atom updates per second\n",
|
printf("Performance: %.2f million atom updates per second\n",
|
||||||
1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
|
1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
|
||||||
|
Loading…
Reference in New Issue
Block a user