Add memory tracer and update config.mk with all options

Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
Rafael Ravedutti 2021-06-16 00:56:00 +02:00
parent 933f7c7bba
commit 0a2ec6376c
6 changed files with 65 additions and 17 deletions

View File

@ -21,14 +21,18 @@ else
DEFINES += -DPRECISION=2 DEFINES += -DPRECISION=2
endif endif
ifneq ($(INTERNAL_LOOP_NTIMES),) ifneq ($(NEIGHBORS_LOOP_RUNS),)
DEFINES += -DINTERNAL_LOOP_NTIMES=$(INTERNAL_LOOP_NTIMES) DEFINES += -DNEIGHBORS_LOOP_RUNS=$(NEIGHBORS_LOOP_RUNS)
endif endif
ifneq ($(EXPLICIT_TYPES),) ifeq ($(strip $(EXPLICIT_TYPES)),true)
DEFINES += -DEXPLICIT_TYPES DEFINES += -DEXPLICIT_TYPES
endif endif
ifeq ($(strip $(MEM_TRACER)),true)
DEFINES += -DMEM_TRACER
endif
VPATH = $(SRC_DIR) $(ASM_DIR) VPATH = $(SRC_DIR) $(ASM_DIR)
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c)) ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c))
OVERWRITE:= $(patsubst $(ASM_DIR)/%-new.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*-new.s)) OVERWRITE:= $(patsubst $(ASM_DIR)/%-new.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*-new.s))
@ -40,6 +44,7 @@ CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(OPTIONS) $(INCLUDES)
ifneq ($(VARIANT),) ifneq ($(VARIANT),)
.DEFAULT_GOAL := ${TARGET}-$(VARIANT) .DEFAULT_GOAL := ${TARGET}-$(VARIANT)
DEFINES += -DVARIANT=$(VARIANT)
endif endif
${TARGET}: $(BUILD_DIR) $(OBJ) $(SRC_DIR)/main.c ${TARGET}: $(BUILD_DIR) $(OBJ) $(SRC_DIR)/main.c

View File

@ -6,6 +6,13 @@ DATA_TYPE ?= DP
# AOS or SOA # AOS or SOA
DATA_LAYOUT ?= AOS DATA_LAYOUT ?= AOS
# Number of times to run the neighbors loop
NEIGHBORS_LOOP_RUNS ?= 1
# Explicitly store and load atom types
EXPLICIT_TYPES ?= false
# Trace memory addresses for cache simulator
MEM_TRACER ?= false
#Feature options #Feature options
OPTIONS = -DALIGNMENT=64 OPTIONS = -DALIGNMENT=64
#OPTIONS += More options #OPTIONS += More options

View File

@ -27,7 +27,22 @@
#include <parameter.h> #include <parameter.h>
#include <atom.h> #include <atom.h>
double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, int ntimes) { #ifndef TRACER_PRINT
# include <stdio.h>
# ifdef MEM_TRACER
# define TRACER_INIT FILE *tracer_fp; \
if(first_exec) { tracer_fp = fopen("mem_tracer.out", "w"); }
# define TRACER_END if(first_exec) { fclose(tracer_fp); }
# define TRACER_PRINT(addr, op) if(first_exec) { fprintf(tracer_fp, "%c: %p\n", op, (void *)(addr)); }
# else
# define TRACER_INIT
# define TRACER_END
# define TRACER_PRINT(addr, op)
# endif
#endif
double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, int first_exec) {
TRACER_INIT;
double S = getTimeStamp(); double S = getTimeStamp();
int Nlocal = atom->Nlocal; int Nlocal = atom->Nlocal;
int* neighs; int* neighs;
@ -54,23 +69,41 @@ double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, int ntimes
MD_FLOAT fix = 0; MD_FLOAT fix = 0;
MD_FLOAT fiy = 0; MD_FLOAT fiy = 0;
MD_FLOAT fiz = 0; MD_FLOAT fiz = 0;
TRACER_PRINT(&atom_x(i), 'R');
TRACER_PRINT(&atom_y(i), 'R');
TRACER_PRINT(&atom_z(i), 'R');
#ifdef EXPLICIT_TYPES #ifdef EXPLICIT_TYPES
const int type_i = atom->type[i]; const int type_i = atom->type[i];
TRACER_PRINT(&atom->type(i), 'R');
#endif
#if VARIANT == stub && defined(NEIGHBORS_LOOP_RUNS) && NEIGHBORS_LOOP_RUNS > 1
#define REPEAT_NEIGHBORS_LOOP
int nmax = first_exec ? 1 : NEIGHBORS_LOOP_RUNS;
for(int n = 0; n < nmax; n++) {
#endif #endif
for(int n = 0; n < ntimes; n++) {
for(int k = 0; k < numneighs; k++) { for(int k = 0; k < numneighs; k++) {
int j = neighs[k]; int j = neighs[k];
MD_FLOAT delx = xtmp - atom_x(j); MD_FLOAT delx = xtmp - atom_x(j);
MD_FLOAT dely = ytmp - atom_y(j); MD_FLOAT dely = ytmp - atom_y(j);
MD_FLOAT delz = ztmp - atom_z(j); MD_FLOAT delz = ztmp - atom_z(j);
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz; MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
TRACER_PRINT(&neighs[k], 'R');
TRACER_PRINT(&atom_x(j), 'R');
TRACER_PRINT(&atom_y(j), 'R');
TRACER_PRINT(&atom_z(j), 'R');
#ifdef EXPLICIT_TYPES #ifdef EXPLICIT_TYPES
const int type_j = atom->type[j]; const int type_j = atom->type[j];
const int type_ij = type_i * atom->ntypes + type_j; const int type_ij = type_i * atom->ntypes + type_j;
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij]; const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
const MD_FLOAT sigma6 = atom->sigma6[type_ij]; const MD_FLOAT sigma6 = atom->sigma6[type_ij];
const MD_FLOAT epsilon = atom->epsilon[type_ij]; const MD_FLOAT epsilon = atom->epsilon[type_ij];
TRACER_PRINT(&atom->type(j), 'R');
#endif #endif
if(rsq < cutforcesq) { if(rsq < cutforcesq) {
@ -82,13 +115,21 @@ double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, int ntimes
fiz += delz * force; fiz += delz * force;
} }
} }
#ifdef REPEAT_NEIGHBORS_LOOP
} }
#endif
fx[i] += fix; fx[i] += fix;
fy[i] += fiy; fy[i] += fiy;
fz[i] += fiz; fz[i] += fiz;
TRACER_PRINT(&fx[i], 'W');
TRACER_PRINT(&fy[i], 'W');
TRACER_PRINT(&fz[i], 'W');
} }
double E = getTimeStamp(); double E = getTimeStamp();
TRACER_END;
return E-S; return E-S;
} }

View File

@ -29,11 +29,6 @@
#define MD_FLOAT double #define MD_FLOAT double
#endif #endif
// Number of times to compute the most internal loop
#ifndef INTERNAL_LOOP_NTIMES
#define INTERNAL_LOOP_NTIMES 1
#endif
typedef struct { typedef struct {
MD_FLOAT epsilon; MD_FLOAT epsilon;
MD_FLOAT sigma6; MD_FLOAT sigma6;

View File

@ -16,7 +16,7 @@
#define LATTICE_DISTANCE 10.0 #define LATTICE_DISTANCE 10.0
#define NEIGH_DISTANCE 1.0 #define NEIGH_DISTANCE 1.0
extern double computeForce( Parameter*, Atom*, Neighbor*, int); extern double computeForce(Parameter*, Atom*, Neighbor*, int);
void init(Parameter *param) { void init(Parameter *param) {
param->epsilon = 1.0; param->epsilon = 1.0;
@ -191,7 +191,7 @@ int main(int argc, const char *argv[]) {
if(!csv) { if(!csv) {
printf("Number of timesteps: %d\n", param.ntimes); printf("Number of timesteps: %d\n", param.ntimes);
printf("Number of times to compute the most internal loop: %d\n", INTERNAL_LOOP_NTIMES); printf("Number of times to compute the neighbors loop: %d\n", NEIGHBORS_LOOP_RUNS);
printf("System size (unit cells): %dx%dx%d\n", param.nx, param.ny, param.nz); printf("System size (unit cells): %dx%dx%d\n", param.nx, param.ny, param.nz);
printf("Atoms per unit cell: %d\n", atoms_per_unit_cell); printf("Atoms per unit cell: %d\n", atoms_per_unit_cell);
printf("Total number of atoms: %d\n", atom->Nlocal); printf("Total number of atoms: %d\n", atom->Nlocal);
@ -213,13 +213,13 @@ int main(int argc, const char *argv[]) {
S = getTimeStamp(); S = getTimeStamp();
LIKWID_MARKER_START("force"); LIKWID_MARKER_START("force");
for(int i = 0; i < param.ntimes; i++) { for(int i = 0; i < param.ntimes; i++) {
computeForce(&param, atom, &neighbor, INTERNAL_LOOP_NTIMES); computeForce(&param, atom, &neighbor, 0);
} }
LIKWID_MARKER_STOP("force"); LIKWID_MARKER_STOP("force");
E = getTimeStamp(); E = getTimeStamp();
double T_accum = E-S; double T_accum = E-S;
const double atoms_updates_per_sec = (double)(atom->Nlocal) / T_accum * (double)(param.ntimes * INTERNAL_LOOP_NTIMES); const double atoms_updates_per_sec = (double)(atom->Nlocal) / T_accum * (double)(param.ntimes * NEIGHBORS_LOOP_RUNS);
const double cycles_per_atom = T_accum / (double)(atom->Nlocal) / (double)(param.ntimes * INTERNAL_LOOP_NTIMES) * freq; const double cycles_per_atom = T_accum / (double)(atom->Nlocal) / (double)(param.ntimes * NEIGHBORS_LOOP_RUNS) * freq;
const double cycles_per_neigh = cycles_per_atom / (double)(atoms_per_unit_cell - 1); const double cycles_per_neigh = cycles_per_atom / (double)(atoms_per_unit_cell - 1);
if(!csv) { if(!csv) {

View File

@ -47,7 +47,7 @@ typedef enum {
NUMTIMER NUMTIMER
} timertype; } timertype;
extern double computeForce( Parameter*, Atom*, Neighbor*, int); extern double computeForce(Parameter*, Atom*, Neighbor*, int);
void init(Parameter *param) void init(Parameter *param)
{ {
@ -222,7 +222,7 @@ int main (int argc, char** argv)
timer[NEIGH] += reneighbour(&param, &atom, &neighbor); timer[NEIGH] += reneighbour(&param, &atom, &neighbor);
} }
timer[FORCE] += computeForce(&param, &atom, &neighbor, 1); timer[FORCE] += computeForce(&param, &atom, &neighbor, 0);
finalIntegrate(&param, &atom); finalIntegrate(&param, &atom);
if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) { if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {