Add memory tracer and update config.mk with all options
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
parent
933f7c7bba
commit
0a2ec6376c
11
Makefile
11
Makefile
@ -21,14 +21,18 @@ else
|
||||
DEFINES += -DPRECISION=2
|
||||
endif
|
||||
|
||||
ifneq ($(INTERNAL_LOOP_NTIMES),)
|
||||
DEFINES += -DINTERNAL_LOOP_NTIMES=$(INTERNAL_LOOP_NTIMES)
|
||||
ifneq ($(NEIGHBORS_LOOP_RUNS),)
|
||||
DEFINES += -DNEIGHBORS_LOOP_RUNS=$(NEIGHBORS_LOOP_RUNS)
|
||||
endif
|
||||
|
||||
ifneq ($(EXPLICIT_TYPES),)
|
||||
ifeq ($(strip $(EXPLICIT_TYPES)),true)
|
||||
DEFINES += -DEXPLICIT_TYPES
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(MEM_TRACER)),true)
|
||||
DEFINES += -DMEM_TRACER
|
||||
endif
|
||||
|
||||
VPATH = $(SRC_DIR) $(ASM_DIR)
|
||||
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c))
|
||||
OVERWRITE:= $(patsubst $(ASM_DIR)/%-new.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*-new.s))
|
||||
@ -40,6 +44,7 @@ CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(OPTIONS) $(INCLUDES)
|
||||
|
||||
ifneq ($(VARIANT),)
|
||||
.DEFAULT_GOAL := ${TARGET}-$(VARIANT)
|
||||
DEFINES += -DVARIANT=$(VARIANT)
|
||||
endif
|
||||
|
||||
${TARGET}: $(BUILD_DIR) $(OBJ) $(SRC_DIR)/main.c
|
||||
|
@ -6,6 +6,13 @@ DATA_TYPE ?= DP
|
||||
# AOS or SOA
|
||||
DATA_LAYOUT ?= AOS
|
||||
|
||||
# Number of times to run the neighbors loop
|
||||
NEIGHBORS_LOOP_RUNS ?= 1
|
||||
# Explicitly store and load atom types
|
||||
EXPLICIT_TYPES ?= false
|
||||
# Trace memory addresses for cache simulator
|
||||
MEM_TRACER ?= false
|
||||
|
||||
#Feature options
|
||||
OPTIONS = -DALIGNMENT=64
|
||||
#OPTIONS += More options
|
||||
|
45
src/force.c
45
src/force.c
@ -27,7 +27,22 @@
|
||||
#include <parameter.h>
|
||||
#include <atom.h>
|
||||
|
||||
double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, int ntimes) {
|
||||
#ifndef TRACER_PRINT
|
||||
# include <stdio.h>
|
||||
# ifdef MEM_TRACER
|
||||
# define TRACER_INIT FILE *tracer_fp; \
|
||||
if(first_exec) { tracer_fp = fopen("mem_tracer.out", "w"); }
|
||||
# define TRACER_END if(first_exec) { fclose(tracer_fp); }
|
||||
# define TRACER_PRINT(addr, op) if(first_exec) { fprintf(tracer_fp, "%c: %p\n", op, (void *)(addr)); }
|
||||
# else
|
||||
# define TRACER_INIT
|
||||
# define TRACER_END
|
||||
# define TRACER_PRINT(addr, op)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, int first_exec) {
|
||||
TRACER_INIT;
|
||||
double S = getTimeStamp();
|
||||
int Nlocal = atom->Nlocal;
|
||||
int* neighs;
|
||||
@ -54,23 +69,41 @@ double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, int ntimes
|
||||
MD_FLOAT fix = 0;
|
||||
MD_FLOAT fiy = 0;
|
||||
MD_FLOAT fiz = 0;
|
||||
|
||||
TRACER_PRINT(&atom_x(i), 'R');
|
||||
TRACER_PRINT(&atom_y(i), 'R');
|
||||
TRACER_PRINT(&atom_z(i), 'R');
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
const int type_i = atom->type[i];
|
||||
TRACER_PRINT(&atom->type(i), 'R');
|
||||
#endif
|
||||
|
||||
#if VARIANT == stub && defined(NEIGHBORS_LOOP_RUNS) && NEIGHBORS_LOOP_RUNS > 1
|
||||
#define REPEAT_NEIGHBORS_LOOP
|
||||
int nmax = first_exec ? 1 : NEIGHBORS_LOOP_RUNS;
|
||||
for(int n = 0; n < nmax; n++) {
|
||||
#endif
|
||||
|
||||
for(int n = 0; n < ntimes; n++) {
|
||||
for(int k = 0; k < numneighs; k++) {
|
||||
int j = neighs[k];
|
||||
MD_FLOAT delx = xtmp - atom_x(j);
|
||||
MD_FLOAT dely = ytmp - atom_y(j);
|
||||
MD_FLOAT delz = ztmp - atom_z(j);
|
||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
TRACER_PRINT(&neighs[k], 'R');
|
||||
TRACER_PRINT(&atom_x(j), 'R');
|
||||
TRACER_PRINT(&atom_y(j), 'R');
|
||||
TRACER_PRINT(&atom_z(j), 'R');
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
const int type_j = atom->type[j];
|
||||
const int type_ij = type_i * atom->ntypes + type_j;
|
||||
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
|
||||
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
|
||||
const MD_FLOAT epsilon = atom->epsilon[type_ij];
|
||||
TRACER_PRINT(&atom->type(j), 'R');
|
||||
#endif
|
||||
|
||||
if(rsq < cutforcesq) {
|
||||
@ -82,13 +115,21 @@ double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, int ntimes
|
||||
fiz += delz * force;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef REPEAT_NEIGHBORS_LOOP
|
||||
}
|
||||
#endif
|
||||
|
||||
fx[i] += fix;
|
||||
fy[i] += fiy;
|
||||
fz[i] += fiz;
|
||||
|
||||
TRACER_PRINT(&fx[i], 'W');
|
||||
TRACER_PRINT(&fy[i], 'W');
|
||||
TRACER_PRINT(&fz[i], 'W');
|
||||
}
|
||||
|
||||
double E = getTimeStamp();
|
||||
TRACER_END;
|
||||
return E-S;
|
||||
}
|
||||
|
@ -29,11 +29,6 @@
|
||||
#define MD_FLOAT double
|
||||
#endif
|
||||
|
||||
// Number of times to compute the most internal loop
|
||||
#ifndef INTERNAL_LOOP_NTIMES
|
||||
#define INTERNAL_LOOP_NTIMES 1
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
MD_FLOAT epsilon;
|
||||
MD_FLOAT sigma6;
|
||||
|
@ -191,7 +191,7 @@ int main(int argc, const char *argv[]) {
|
||||
|
||||
if(!csv) {
|
||||
printf("Number of timesteps: %d\n", param.ntimes);
|
||||
printf("Number of times to compute the most internal loop: %d\n", INTERNAL_LOOP_NTIMES);
|
||||
printf("Number of times to compute the neighbors loop: %d\n", NEIGHBORS_LOOP_RUNS);
|
||||
printf("System size (unit cells): %dx%dx%d\n", param.nx, param.ny, param.nz);
|
||||
printf("Atoms per unit cell: %d\n", atoms_per_unit_cell);
|
||||
printf("Total number of atoms: %d\n", atom->Nlocal);
|
||||
@ -213,13 +213,13 @@ int main(int argc, const char *argv[]) {
|
||||
S = getTimeStamp();
|
||||
LIKWID_MARKER_START("force");
|
||||
for(int i = 0; i < param.ntimes; i++) {
|
||||
computeForce(¶m, atom, &neighbor, INTERNAL_LOOP_NTIMES);
|
||||
computeForce(¶m, atom, &neighbor, 0);
|
||||
}
|
||||
LIKWID_MARKER_STOP("force");
|
||||
E = getTimeStamp();
|
||||
double T_accum = E-S;
|
||||
const double atoms_updates_per_sec = (double)(atom->Nlocal) / T_accum * (double)(param.ntimes * INTERNAL_LOOP_NTIMES);
|
||||
const double cycles_per_atom = T_accum / (double)(atom->Nlocal) / (double)(param.ntimes * INTERNAL_LOOP_NTIMES) * freq;
|
||||
const double atoms_updates_per_sec = (double)(atom->Nlocal) / T_accum * (double)(param.ntimes * NEIGHBORS_LOOP_RUNS);
|
||||
const double cycles_per_atom = T_accum / (double)(atom->Nlocal) / (double)(param.ntimes * NEIGHBORS_LOOP_RUNS) * freq;
|
||||
const double cycles_per_neigh = cycles_per_atom / (double)(atoms_per_unit_cell - 1);
|
||||
|
||||
if(!csv) {
|
||||
|
@ -222,7 +222,7 @@ int main (int argc, char** argv)
|
||||
timer[NEIGH] += reneighbour(¶m, &atom, &neighbor);
|
||||
}
|
||||
|
||||
timer[FORCE] += computeForce(¶m, &atom, &neighbor, 1);
|
||||
timer[FORCE] += computeForce(¶m, &atom, &neighbor, 0);
|
||||
finalIntegrate(¶m, &atom);
|
||||
|
||||
if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
|
||||
|
Loading…
Reference in New Issue
Block a user