Separate tracing from force computation and fix stubbed version

Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
Rafael Ravedutti 2021-12-01 00:07:45 +01:00
parent bb21a885a1
commit 35c110155e
10 changed files with 168 additions and 169 deletions

View File

@ -32,7 +32,7 @@
#include <eam.h> #include <eam.h>
#include <util.h> #include <util.h>
double computeForceEam(Eam* eam, Parameter* param, Atom *atom, Neighbor *neighbor, Stats *stats, int first_exec, int timestep) { double computeForceEam(Eam* eam, Parameter* param, Atom *atom, Neighbor *neighbor, Stats *stats) {
if(eam->nmax < atom->Nmax) { if(eam->nmax < atom->Nmax) {
eam->nmax = atom->Nmax; eam->nmax = atom->Nmax;
if(eam->fp != NULL) { free(eam->fp); } if(eam->fp != NULL) { free(eam->fp); }

View File

@ -26,13 +26,9 @@
#include <neighbor.h> #include <neighbor.h>
#include <parameter.h> #include <parameter.h>
#include <atom.h> #include <atom.h>
#include <stats.h>
double computeForce( double computeForceLJ(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
Parameter *param,
Atom *atom,
Neighbor *neighbor
)
{
int Nlocal = atom->Nlocal; int Nlocal = atom->Nlocal;
int* neighs; int* neighs;
MD_FLOAT* fx = atom->fx; MD_FLOAT* fx = atom->fx;
@ -97,10 +93,12 @@ double computeForce(
fx[i] += fix; fx[i] += fix;
fy[i] += fiy; fy[i] += fiy;
fz[i] += fiz; fz[i] += fiz;
addStat(stats->total_force_neighs, numneighs);
addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
} }
LIKWID_MARKER_STOP("force"); LIKWID_MARKER_STOP("force");
double E = getTimeStamp(); double E = getTimeStamp();
return E-S; return E-S;
} }

View File

@ -23,9 +23,6 @@
#ifndef __PARAMETER_H_ #ifndef __PARAMETER_H_
#define __PARAMETER_H_ #define __PARAMETER_H_
#define FF_LJ 0
#define FF_EAM 1
#if PRECISION == 1 #if PRECISION == 1
#define MD_FLOAT float #define MD_FLOAT float
#else #else

View File

@ -20,13 +20,9 @@
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>. * with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* ======================================================================================= * =======================================================================================
*/ */
#include <likwid-marker.h>
#include <timing.h>
#include <neighbor.h> #include <neighbor.h>
#include <parameter.h> #include <parameter.h>
#include <atom.h> #include <atom.h>
#include <stats.h>
#if defined(MEM_TRACER) || defined(INDEX_TRACER) #if defined(MEM_TRACER) || defined(INDEX_TRACER)
#include <stdio.h> #include <stdio.h>
@ -119,114 +115,4 @@
# define DIST_TRACE(l, e) # define DIST_TRACE(l, e)
#endif #endif
double computeForceTracing(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats, int first_exec, int timestep) { extern void traceAddresses(Parameter *param, Atom *atom, Neighbor *neighbor, int timestep);
MEM_TRACER_INIT;
INDEX_TRACER_INIT;
int Nlocal = atom->Nlocal;
int* neighs;
MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz;
#ifndef EXPLICIT_TYPES
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon;
#endif
for(int i = 0; i < Nlocal; i++) {
fx[i] = 0.0;
fy[i] = 0.0;
fz[i] = 0.0;
}
INDEX_TRACE_NATOMS(Nlocal, atom->Nghost, neighbor->maxneighs);
double S = getTimeStamp();
LIKWID_MARKER_START("force");
for(int na = 0; na < (first_exec ? 1 : ATOMS_LOOP_RUNS); na++) {
#pragma omp parallel for
for(int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i];
MD_FLOAT xtmp = atom_x(i);
MD_FLOAT ytmp = atom_y(i);
MD_FLOAT ztmp = atom_z(i);
MD_FLOAT fix = 0;
MD_FLOAT fiy = 0;
MD_FLOAT fiz = 0;
MEM_TRACE(atom_x(i), 'R');
MEM_TRACE(atom_y(i), 'R');
MEM_TRACE(atom_z(i), 'R');
INDEX_TRACE_ATOM(i);
#ifdef EXPLICIT_TYPES
const int type_i = atom->type[i];
MEM_TRACE(atom->type(i), 'R');
#endif
#if defined(VARIANT) && VARIANT == stub && defined(NEIGHBORS_LOOP_RUNS) && NEIGHBORS_LOOP_RUNS > 1
#define REPEAT_NEIGHBORS_LOOP
int nmax = first_exec ? 1 : NEIGHBORS_LOOP_RUNS;
for(int nn = 0; nn < (first_exec ? 1 : NEIGHBORS_LOOP_RUNS); nn++) {
#endif
//DIST_TRACE_SORT(neighs, numneighs);
INDEX_TRACE(neighs, numneighs);
//DIST_TRACE(neighs, numneighs);
for(int k = 0; k < numneighs; k++) {
int j = neighs[k];
MD_FLOAT delx = xtmp - atom_x(j);
MD_FLOAT dely = ytmp - atom_y(j);
MD_FLOAT delz = ztmp - atom_z(j);
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
MEM_TRACE(neighs[k], 'R');
MEM_TRACE(atom_x(j), 'R');
MEM_TRACE(atom_y(j), 'R');
MEM_TRACE(atom_z(j), 'R');
#ifdef EXPLICIT_TYPES
const int type_j = atom->type[j];
const int type_ij = type_i * atom->ntypes + type_j;
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
const MD_FLOAT epsilon = atom->epsilon[type_ij];
MEM_TRACE(atom->type(j), 'R');
#endif
if(rsq < cutforcesq) {
MD_FLOAT sr2 = 1.0 / rsq;
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
fix += delx * force;
fiy += dely * force;
fiz += delz * force;
}
}
#ifdef REPEAT_NEIGHBORS_LOOP
}
#endif
fx[i] += fix;
fy[i] += fiy;
fz[i] += fiz;
addStat(stats->total_force_neighs, numneighs);
addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
MEM_TRACE(fx[i], 'R');
MEM_TRACE(fx[i], 'W');
MEM_TRACE(fy[i], 'R');
MEM_TRACE(fy[i], 'W');
MEM_TRACE(fz[i], 'R');
MEM_TRACE(fz[i], 'W');
}
}
LIKWID_MARKER_STOP("force");
double E = getTimeStamp();
INDEX_TRACER_END;
MEM_TRACER_END;
return E-S;
}

View File

@ -33,5 +33,11 @@
#define ABS(a) ((a) >= 0 ? (a) : -(a)) #define ABS(a) ((a) >= 0 ? (a) : -(a))
#endif #endif
#define FF_LJ 0
#define FF_EAM 1
extern double myrandom(int*); extern double myrandom(int*);
extern void random_reset(int *seed, int ibase, double *coord);
extern int str2ff(const char *string);
extern const char* ff2str(int ff);
#endif #endif

View File

@ -10,17 +10,21 @@
#include <atom.h> #include <atom.h>
#include <stats.h> #include <stats.h>
#include <thermo.h> #include <thermo.h>
#include <eam.h>
#include <pbc.h> #include <pbc.h>
#include <timers.h> #include <timers.h>
#include <util.h>
#define HLINE "----------------------------------------------------------------------------\n" #define HLINE "----------------------------------------------------------------------------\n"
#define LATTICE_DISTANCE 10.0 #define LATTICE_DISTANCE 10.0
#define NEIGH_DISTANCE 1.0 #define NEIGH_DISTANCE 1.0
extern double computeForce(Parameter*, Atom*, Neighbor*, Stats*, int, int); extern double computeForceLJ(Parameter*, Atom*, Neighbor*, Stats*);
extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*);
void init(Parameter *param) { void init(Parameter *param) {
param->input_file = NULL;
param->epsilon = 1.0; param->epsilon = 1.0;
param->sigma6 = 1.0; param->sigma6 = 1.0;
param->rho = 0.8442; param->rho = 0.8442;
@ -39,13 +43,14 @@ void init(Parameter *param) {
param->nstat = 100; param->nstat = 100;
param->temp = 1.44; param->temp = 1.44;
param->every = 20; param->every = 20;
param->proc_freq = 0.0; param->proc_freq = 2.4;
param->eam_file = NULL;
} }
// Show debug messages // Show debug messages
//#define DEBUG(msg) printf(msg) #define DEBUG(msg) printf(msg)
// Do not show debug messages // Do not show debug messages
#define DEBUG(msg) //#define DEBUG(msg)
#define ADD_ATOM(x, y, z, vx, vy, vz) atom_x(atom->Nlocal) = base_x + x * NEIGH_DISTANCE; \ #define ADD_ATOM(x, y, z, vx, vy, vz) atom_x(atom->Nlocal) = base_x + x * NEIGH_DISTANCE; \
atom_y(atom->Nlocal) = base_y + y * NEIGH_DISTANCE; \ atom_y(atom->Nlocal) = base_y + y * NEIGH_DISTANCE; \
@ -56,6 +61,7 @@ void init(Parameter *param) {
atom->Nlocal++ atom->Nlocal++
int main(int argc, const char *argv[]) { int main(int argc, const char *argv[]) {
Eam eam;
Atom atom_data; Atom atom_data;
Atom *atom = (Atom *)(&atom_data); Atom *atom = (Atom *)(&atom_data);
Neighbor neighbor; Neighbor neighbor;
@ -71,6 +77,19 @@ int main(int argc, const char *argv[]) {
for(int i = 0; i < argc; i++) for(int i = 0; i < argc; i++)
{ {
if((strcmp(argv[i], "-f") == 0))
{
if((param.force_field = str2ff(argv[++i])) < 0) {
fprintf(stderr, "Invalid force field!\n");
exit(-1);
}
continue;
}
if((strcmp(argv[i], "-e") == 0))
{
param.eam_file = strdup(argv[++i]);
continue;
}
if((strcmp(argv[i], "-n") == 0) || (strcmp(argv[i], "--nsteps") == 0)) if((strcmp(argv[i], "-n") == 0) || (strcmp(argv[i], "--nsteps") == 0))
{ {
param.ntimes = atoi(argv[++i]); param.ntimes = atoi(argv[++i]);
@ -96,12 +115,12 @@ int main(int argc, const char *argv[]) {
atoms_per_unit_cell = atoi(argv[++i]); atoms_per_unit_cell = atoi(argv[++i]);
continue; continue;
} }
if((strcmp(argv[i], "-f") == 0)) if((strcmp(argv[i], "--freq") == 0))
{ {
param.proc_freq = atof(argv[++i]); param.proc_freq = atof(argv[++i]);
continue; continue;
} }
if((strcmp(argv[i], "-csv") == 0)) if((strcmp(argv[i], "--csv") == 0))
{ {
csv = 1; csv = 1;
continue; continue;
@ -110,16 +129,23 @@ int main(int argc, const char *argv[]) {
{ {
printf("MD Bench: A minimalistic re-implementation of miniMD\n"); printf("MD Bench: A minimalistic re-implementation of miniMD\n");
printf(HLINE); printf(HLINE);
printf("-f <string>: force field (lj or eam), default lj\n");
printf("-n / --nsteps <int>: set number of timesteps for simulation\n"); printf("-n / --nsteps <int>: set number of timesteps for simulation\n");
printf("-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n"); printf("-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n");
printf("-na <int>: set number of atoms per unit cell\n"); printf("-na <int>: set number of atoms per unit cell\n");
printf("-f <real>: set CPU frequency (GHz) and display average cycles per atom and neighbors\n"); printf("--freq <real>: set CPU frequency (GHz) and display average cycles per atom and neighbors\n");
printf("-csv: set output as CSV style\n"); printf("--csv: set output as CSV style\n");
printf(HLINE); printf(HLINE);
exit(EXIT_SUCCESS); exit(EXIT_SUCCESS);
} }
} }
if(param.force_field == FF_EAM) {
DEBUG("Initializing EAM parameters...\n");
initEam(&eam, &param);
}
param.xprd = param.nx * LATTICE_DISTANCE; param.xprd = param.nx * LATTICE_DISTANCE;
param.yprd = param.ny * LATTICE_DISTANCE; param.yprd = param.ny * LATTICE_DISTANCE;
param.zprd = param.nz * LATTICE_DISTANCE; param.zprd = param.nz * LATTICE_DISTANCE;
@ -204,16 +230,29 @@ int main(int argc, const char *argv[]) {
DEBUG("Initializing neighbor lists...\n"); DEBUG("Initializing neighbor lists...\n");
initNeighbor(&neighbor, &param); initNeighbor(&neighbor, &param);
DEBUG("Setting up neighbor lists...\n"); DEBUG("Setting up neighbor lists...\n");
setupNeighbor(); setupNeighbor(&param);
DEBUG("Building neighbor lists...\n"); DEBUG("Building neighbor lists...\n");
buildNeighbor(atom, &neighbor); buildNeighbor(atom, &neighbor);
DEBUG("Computing forces...\n"); DEBUG("Computing forces...\n");
computeForce(&param, atom, &neighbor, &stats, 1, 1); if(param.force_field == FF_EAM) {
computeForceEam(&eam, &param, atom, &neighbor, &stats);
} else {
computeForceLJ(&param, atom, &neighbor, &stats);
}
double S, E; double S, E;
S = getTimeStamp(); S = getTimeStamp();
for(int i = 0; i < param.ntimes; i++) { for(int i = 0; i < param.ntimes; i++) {
computeForce(&param, atom, &neighbor, &stats, 0, i + 1);
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
traceAddresses(&param, atom, &neighbor, i + 1);
#endif
if(param.force_field == FF_EAM) {
computeForceEam(&eam, &param, atom, &neighbor, &stats);
} else {
computeForceLJ(&param, atom, &neighbor, &stats);
}
} }
E = getTimeStamp(); E = getTimeStamp();
double T_accum = E-S; double T_accum = E-S;

View File

@ -41,12 +41,12 @@
#include <timers.h> #include <timers.h>
#include <eam.h> #include <eam.h>
#include <vtk.h> #include <vtk.h>
#include <util.h>
#define HLINE "----------------------------------------------------------------------------\n" #define HLINE "----------------------------------------------------------------------------\n"
extern double computeForce(Parameter*, Atom*, Neighbor*); extern double computeForceLJ(Parameter*, Atom*, Neighbor*, Stats*);
extern double computeForceTracing(Parameter*, Atom*, Neighbor*, Stats*, int, int); extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*);
extern double computeForceEam(Eam* eam, Parameter*, Atom *atom, Neighbor *neighbor, Stats *stats, int first_exec, int timestep);
void init(Parameter *param) void init(Parameter *param)
{ {
@ -166,20 +166,6 @@ void printAtomState(Atom *atom)
/* } */ /* } */
} }
int str2ff(const char *string)
{
if(strncmp(string, "lj", 2) == 0) return FF_LJ;
if(strncmp(string, "eam", 3) == 0) return FF_EAM;
return -1;
}
const char* ff2str(int ff)
{
if(ff == FF_LJ) { return "lj"; }
if(ff == FF_EAM) { return "eam"; }
return "invalid";
}
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
double timer[NUMTIMER]; double timer[NUMTIMER];
@ -266,17 +252,15 @@ int main(int argc, char** argv)
setup(&param, &eam, &atom, &neighbor, &stats); setup(&param, &eam, &atom, &neighbor, &stats);
computeThermo(0, &param, &atom); computeThermo(0, &param, &atom);
if(param.force_field == FF_EAM) { #if defined(MEM_TRACER) || defined(INDEX_TRACER)
computeForceEam(&eam, &param, &atom, &neighbor, &stats, 1, 0); traceAddresses(&param, &atom, &neighbor, n + 1);
} else {
#if defined(MEM_TRACER) || defined(INDEX_TRACER) || defined(COMPUTE_STATS)
computeForceTracing(&param, &atom, &neighbor, &stats, 1, 0);
#else
computeForce(&param, &atom, &neighbor);
#endif #endif
if(param.force_field == FF_EAM) {
timer[FORCE] = computeForceEam(&eam, &param, &atom, &neighbor, &stats);
} else {
timer[FORCE] = computeForceLJ(&param, &atom, &neighbor, &stats);
} }
timer[FORCE] = 0.0;
timer[NEIGH] = 0.0; timer[NEIGH] = 0.0;
timer[TOTAL] = getTimeStamp(); timer[TOTAL] = getTimeStamp();
@ -293,15 +277,16 @@ int main(int argc, char** argv)
timer[NEIGH] += reneighbour(&param, &atom, &neighbor); timer[NEIGH] += reneighbour(&param, &atom, &neighbor);
} }
if(param.force_field == FF_EAM) { #if defined(MEM_TRACER) || defined(INDEX_TRACER)
timer[FORCE] += computeForceEam(&eam, &param, &atom, &neighbor, &stats, 0, n + 1); traceAddresses(&param, &atom, &neighbor, n + 1);
} else {
#if defined(MEM_TRACER) || defined(INDEX_TRACER) || defined(COMPUTE_STATS)
timer[FORCE] += computeForceTracing(&param, &atom, &neighbor, &stats, 0, n + 1);
#else
timer[FORCE] += computeForce(&param, &atom, &neighbor);
#endif #endif
if(param.force_field == FF_EAM) {
timer[FORCE] += computeForceEam(&eam, &param, &atom, &neighbor, &stats);
} else {
timer[FORCE] += computeForceLJ(&param, &atom, &neighbor, &stats);
} }
finalIntegrate(&param, &atom); finalIntegrate(&param, &atom);
if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) { if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {

View File

@ -25,6 +25,7 @@
#include <math.h> #include <math.h>
#include <thermo.h> #include <thermo.h>
#include <util.h>
static int *steparr; static int *steparr;
static MD_FLOAT *tmparr; static MD_FLOAT *tmparr;

73
src/tracing.c Normal file
View File

@ -0,0 +1,73 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2021 RRZE, University Erlangen-Nuremberg
*
* This file is part of MD-Bench.
*
* MD-Bench is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* =======================================================================================
*/
#include <neighbor.h>
#include <parameter.h>
#include <atom.h>
#include <tracing.h>
void traceAddresses(Parameter *param, Atom *atom, Neighbor *neighbor, int timestep) {
MEM_TRACER_INIT;
INDEX_TRACER_INIT;
int Nlocal = atom->Nlocal;
int* neighs;
MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz;
INDEX_TRACE_NATOMS(Nlocal, atom->Nghost, neighbor->maxneighs);
for(int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i];
MEM_TRACE(atom_x(i), 'R');
MEM_TRACE(atom_y(i), 'R');
MEM_TRACE(atom_z(i), 'R');
INDEX_TRACE_ATOM(i);
#ifdef EXPLICIT_TYPES
MEM_TRACE(atom->type[i], 'R');
#endif
DIST_TRACE_SORT(neighs, numneighs);
INDEX_TRACE(neighs, numneighs);
DIST_TRACE(neighs, numneighs);
for(int k = 0; k < numneighs; k++) {
MEM_TRACE(neighs[k], 'R');
MEM_TRACE(atom_x(j), 'R');
MEM_TRACE(atom_y(j), 'R');
MEM_TRACE(atom_z(j), 'R');
#ifdef EXPLICIT_TYPES
MEM_TRACE(atom->type[j], 'R');
#endif
}
MEM_TRACE(fx[i], 'R');
MEM_TRACE(fx[i], 'W');
MEM_TRACE(fy[i], 'R');
MEM_TRACE(fy[i], 'W');
MEM_TRACE(fz[i], 'R');
MEM_TRACE(fz[i], 'W');
}
INDEX_TRACER_END;
MEM_TRACER_END;
}

View File

@ -77,3 +77,17 @@ void random_reset(int *seed, int ibase, double *coord)
for (i = 0; i < 5; i++) myrandom(seed); for (i = 0; i < 5; i++) myrandom(seed);
//save = 0; //save = 0;
} }
int str2ff(const char *string)
{
if(strncmp(string, "lj", 2) == 0) return FF_LJ;
if(strncmp(string, "eam", 3) == 0) return FF_EAM;
return -1;
}
const char* ff2str(int ff)
{
if(ff == FF_LJ) { return "lj"; }
if(ff == FF_EAM) { return "eam"; }
return "invalid";
}