Add memory tracer and update config.mk with all options
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
parent
933f7c7bba
commit
0a2ec6376c
11
Makefile
11
Makefile
@ -21,14 +21,18 @@ else
|
|||||||
DEFINES += -DPRECISION=2
|
DEFINES += -DPRECISION=2
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifneq ($(INTERNAL_LOOP_NTIMES),)
|
ifneq ($(NEIGHBORS_LOOP_RUNS),)
|
||||||
DEFINES += -DINTERNAL_LOOP_NTIMES=$(INTERNAL_LOOP_NTIMES)
|
DEFINES += -DNEIGHBORS_LOOP_RUNS=$(NEIGHBORS_LOOP_RUNS)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifneq ($(EXPLICIT_TYPES),)
|
ifeq ($(strip $(EXPLICIT_TYPES)),true)
|
||||||
DEFINES += -DEXPLICIT_TYPES
|
DEFINES += -DEXPLICIT_TYPES
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(strip $(MEM_TRACER)),true)
|
||||||
|
DEFINES += -DMEM_TRACER
|
||||||
|
endif
|
||||||
|
|
||||||
VPATH = $(SRC_DIR) $(ASM_DIR)
|
VPATH = $(SRC_DIR) $(ASM_DIR)
|
||||||
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c))
|
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c))
|
||||||
OVERWRITE:= $(patsubst $(ASM_DIR)/%-new.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*-new.s))
|
OVERWRITE:= $(patsubst $(ASM_DIR)/%-new.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*-new.s))
|
||||||
@ -40,6 +44,7 @@ CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(OPTIONS) $(INCLUDES)
|
|||||||
|
|
||||||
ifneq ($(VARIANT),)
|
ifneq ($(VARIANT),)
|
||||||
.DEFAULT_GOAL := ${TARGET}-$(VARIANT)
|
.DEFAULT_GOAL := ${TARGET}-$(VARIANT)
|
||||||
|
DEFINES += -DVARIANT=$(VARIANT)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
${TARGET}: $(BUILD_DIR) $(OBJ) $(SRC_DIR)/main.c
|
${TARGET}: $(BUILD_DIR) $(OBJ) $(SRC_DIR)/main.c
|
||||||
|
@ -6,6 +6,13 @@ DATA_TYPE ?= DP
|
|||||||
# AOS or SOA
|
# AOS or SOA
|
||||||
DATA_LAYOUT ?= AOS
|
DATA_LAYOUT ?= AOS
|
||||||
|
|
||||||
|
# Number of times to run the neighbors loop
|
||||||
|
NEIGHBORS_LOOP_RUNS ?= 1
|
||||||
|
# Explicitly store and load atom types
|
||||||
|
EXPLICIT_TYPES ?= false
|
||||||
|
# Trace memory addresses for cache simulator
|
||||||
|
MEM_TRACER ?= false
|
||||||
|
|
||||||
#Feature options
|
#Feature options
|
||||||
OPTIONS = -DALIGNMENT=64
|
OPTIONS = -DALIGNMENT=64
|
||||||
#OPTIONS += More options
|
#OPTIONS += More options
|
||||||
|
45
src/force.c
45
src/force.c
@ -27,7 +27,22 @@
|
|||||||
#include <parameter.h>
|
#include <parameter.h>
|
||||||
#include <atom.h>
|
#include <atom.h>
|
||||||
|
|
||||||
double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, int ntimes) {
|
#ifndef TRACER_PRINT
|
||||||
|
# include <stdio.h>
|
||||||
|
# ifdef MEM_TRACER
|
||||||
|
# define TRACER_INIT FILE *tracer_fp; \
|
||||||
|
if(first_exec) { tracer_fp = fopen("mem_tracer.out", "w"); }
|
||||||
|
# define TRACER_END if(first_exec) { fclose(tracer_fp); }
|
||||||
|
# define TRACER_PRINT(addr, op) if(first_exec) { fprintf(tracer_fp, "%c: %p\n", op, (void *)(addr)); }
|
||||||
|
# else
|
||||||
|
# define TRACER_INIT
|
||||||
|
# define TRACER_END
|
||||||
|
# define TRACER_PRINT(addr, op)
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, int first_exec) {
|
||||||
|
TRACER_INIT;
|
||||||
double S = getTimeStamp();
|
double S = getTimeStamp();
|
||||||
int Nlocal = atom->Nlocal;
|
int Nlocal = atom->Nlocal;
|
||||||
int* neighs;
|
int* neighs;
|
||||||
@ -54,23 +69,41 @@ double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, int ntimes
|
|||||||
MD_FLOAT fix = 0;
|
MD_FLOAT fix = 0;
|
||||||
MD_FLOAT fiy = 0;
|
MD_FLOAT fiy = 0;
|
||||||
MD_FLOAT fiz = 0;
|
MD_FLOAT fiz = 0;
|
||||||
|
|
||||||
|
TRACER_PRINT(&atom_x(i), 'R');
|
||||||
|
TRACER_PRINT(&atom_y(i), 'R');
|
||||||
|
TRACER_PRINT(&atom_z(i), 'R');
|
||||||
|
|
||||||
#ifdef EXPLICIT_TYPES
|
#ifdef EXPLICIT_TYPES
|
||||||
const int type_i = atom->type[i];
|
const int type_i = atom->type[i];
|
||||||
|
TRACER_PRINT(&atom->type(i), 'R');
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if VARIANT == stub && defined(NEIGHBORS_LOOP_RUNS) && NEIGHBORS_LOOP_RUNS > 1
|
||||||
|
#define REPEAT_NEIGHBORS_LOOP
|
||||||
|
int nmax = first_exec ? 1 : NEIGHBORS_LOOP_RUNS;
|
||||||
|
for(int n = 0; n < nmax; n++) {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for(int n = 0; n < ntimes; n++) {
|
|
||||||
for(int k = 0; k < numneighs; k++) {
|
for(int k = 0; k < numneighs; k++) {
|
||||||
int j = neighs[k];
|
int j = neighs[k];
|
||||||
MD_FLOAT delx = xtmp - atom_x(j);
|
MD_FLOAT delx = xtmp - atom_x(j);
|
||||||
MD_FLOAT dely = ytmp - atom_y(j);
|
MD_FLOAT dely = ytmp - atom_y(j);
|
||||||
MD_FLOAT delz = ztmp - atom_z(j);
|
MD_FLOAT delz = ztmp - atom_z(j);
|
||||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||||
|
|
||||||
|
TRACER_PRINT(&neighs[k], 'R');
|
||||||
|
TRACER_PRINT(&atom_x(j), 'R');
|
||||||
|
TRACER_PRINT(&atom_y(j), 'R');
|
||||||
|
TRACER_PRINT(&atom_z(j), 'R');
|
||||||
|
|
||||||
#ifdef EXPLICIT_TYPES
|
#ifdef EXPLICIT_TYPES
|
||||||
const int type_j = atom->type[j];
|
const int type_j = atom->type[j];
|
||||||
const int type_ij = type_i * atom->ntypes + type_j;
|
const int type_ij = type_i * atom->ntypes + type_j;
|
||||||
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
|
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
|
||||||
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
|
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
|
||||||
const MD_FLOAT epsilon = atom->epsilon[type_ij];
|
const MD_FLOAT epsilon = atom->epsilon[type_ij];
|
||||||
|
TRACER_PRINT(&atom->type(j), 'R');
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if(rsq < cutforcesq) {
|
if(rsq < cutforcesq) {
|
||||||
@ -82,13 +115,21 @@ double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, int ntimes
|
|||||||
fiz += delz * force;
|
fiz += delz * force;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef REPEAT_NEIGHBORS_LOOP
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
fx[i] += fix;
|
fx[i] += fix;
|
||||||
fy[i] += fiy;
|
fy[i] += fiy;
|
||||||
fz[i] += fiz;
|
fz[i] += fiz;
|
||||||
|
|
||||||
|
TRACER_PRINT(&fx[i], 'W');
|
||||||
|
TRACER_PRINT(&fy[i], 'W');
|
||||||
|
TRACER_PRINT(&fz[i], 'W');
|
||||||
}
|
}
|
||||||
|
|
||||||
double E = getTimeStamp();
|
double E = getTimeStamp();
|
||||||
|
TRACER_END;
|
||||||
return E-S;
|
return E-S;
|
||||||
}
|
}
|
||||||
|
@ -29,11 +29,6 @@
|
|||||||
#define MD_FLOAT double
|
#define MD_FLOAT double
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Number of times to compute the most internal loop
|
|
||||||
#ifndef INTERNAL_LOOP_NTIMES
|
|
||||||
#define INTERNAL_LOOP_NTIMES 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
MD_FLOAT epsilon;
|
MD_FLOAT epsilon;
|
||||||
MD_FLOAT sigma6;
|
MD_FLOAT sigma6;
|
||||||
|
@ -191,7 +191,7 @@ int main(int argc, const char *argv[]) {
|
|||||||
|
|
||||||
if(!csv) {
|
if(!csv) {
|
||||||
printf("Number of timesteps: %d\n", param.ntimes);
|
printf("Number of timesteps: %d\n", param.ntimes);
|
||||||
printf("Number of times to compute the most internal loop: %d\n", INTERNAL_LOOP_NTIMES);
|
printf("Number of times to compute the neighbors loop: %d\n", NEIGHBORS_LOOP_RUNS);
|
||||||
printf("System size (unit cells): %dx%dx%d\n", param.nx, param.ny, param.nz);
|
printf("System size (unit cells): %dx%dx%d\n", param.nx, param.ny, param.nz);
|
||||||
printf("Atoms per unit cell: %d\n", atoms_per_unit_cell);
|
printf("Atoms per unit cell: %d\n", atoms_per_unit_cell);
|
||||||
printf("Total number of atoms: %d\n", atom->Nlocal);
|
printf("Total number of atoms: %d\n", atom->Nlocal);
|
||||||
@ -213,13 +213,13 @@ int main(int argc, const char *argv[]) {
|
|||||||
S = getTimeStamp();
|
S = getTimeStamp();
|
||||||
LIKWID_MARKER_START("force");
|
LIKWID_MARKER_START("force");
|
||||||
for(int i = 0; i < param.ntimes; i++) {
|
for(int i = 0; i < param.ntimes; i++) {
|
||||||
computeForce(¶m, atom, &neighbor, INTERNAL_LOOP_NTIMES);
|
computeForce(¶m, atom, &neighbor, 0);
|
||||||
}
|
}
|
||||||
LIKWID_MARKER_STOP("force");
|
LIKWID_MARKER_STOP("force");
|
||||||
E = getTimeStamp();
|
E = getTimeStamp();
|
||||||
double T_accum = E-S;
|
double T_accum = E-S;
|
||||||
const double atoms_updates_per_sec = (double)(atom->Nlocal) / T_accum * (double)(param.ntimes * INTERNAL_LOOP_NTIMES);
|
const double atoms_updates_per_sec = (double)(atom->Nlocal) / T_accum * (double)(param.ntimes * NEIGHBORS_LOOP_RUNS);
|
||||||
const double cycles_per_atom = T_accum / (double)(atom->Nlocal) / (double)(param.ntimes * INTERNAL_LOOP_NTIMES) * freq;
|
const double cycles_per_atom = T_accum / (double)(atom->Nlocal) / (double)(param.ntimes * NEIGHBORS_LOOP_RUNS) * freq;
|
||||||
const double cycles_per_neigh = cycles_per_atom / (double)(atoms_per_unit_cell - 1);
|
const double cycles_per_neigh = cycles_per_atom / (double)(atoms_per_unit_cell - 1);
|
||||||
|
|
||||||
if(!csv) {
|
if(!csv) {
|
||||||
|
@ -222,7 +222,7 @@ int main (int argc, char** argv)
|
|||||||
timer[NEIGH] += reneighbour(¶m, &atom, &neighbor);
|
timer[NEIGH] += reneighbour(¶m, &atom, &neighbor);
|
||||||
}
|
}
|
||||||
|
|
||||||
timer[FORCE] += computeForce(¶m, &atom, &neighbor, 1);
|
timer[FORCE] += computeForce(¶m, &atom, &neighbor, 0);
|
||||||
finalIntegrate(¶m, &atom);
|
finalIntegrate(¶m, &atom);
|
||||||
|
|
||||||
if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
|
if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
|
||||||
|
Loading…
Reference in New Issue
Block a user