Restructure timing and instrumentation. Add performance metric.

This commit is contained in:
Jan Eitzinger 2021-04-15 14:55:02 +02:00
parent 35273c491b
commit 06ba3b2726
6 changed files with 33 additions and 21 deletions

View File

@ -68,5 +68,5 @@ clean:
distclean: clean distclean: clean
@echo "===> DIST CLEAN" @echo "===> DIST CLEAN"
@rm -f $(TARGET) @rm -f $(TARGET)*
@rm -f tags @rm -f tags

View File

@ -1,9 +1,9 @@
# Supported: GCC, CLANG, ICC # Supported: GCC, CLANG, ICC
TAG ?= GCC TAG ?= ICC
# SP or DP # SP or DP
DATA_TYPE ?= DP DATA_TYPE ?= DP
# AOS or SOA # AOS or SOA
DATA_LAYOUT ?= SOA DATA_LAYOUT ?= AOS
#Feature options #Feature options
OPTIONS += -DALIGNMENT=64 OPTIONS += -DALIGNMENT=64

View File

@ -10,6 +10,6 @@ ANSI_CFLAGS += -Wextra
CFLAGS = -O3 -march=znver1 -ffast-math -funroll-loops # -fopenmp CFLAGS = -O3 -march=znver1 -ffast-math -funroll-loops # -fopenmp
ASFLAGS = -masm=intel ASFLAGS = -masm=intel
LFLAGS = LFLAGS =
DEFINES = -D_GNU_SOURCE DEFINES = -D_GNU_SOURCE -DLIKWID_PERFMON
INCLUDES = INCLUDES = $(LIKWID_INC)
LIBS = -lm LIBS = -lm $(LIKWID_LIB) -llikwid

View File

@ -3,15 +3,15 @@ LINKER = $(CC)
OPENMP = #-qopenmp OPENMP = #-qopenmp
PROFILE = #-profile-functions -g -pg PROFILE = #-profile-functions -g -pg
# OPTS = -fast -xCORE-AVX512 -qopt-zmm-usage=high $(PROFILE) OPTS = -Ofast -xCORE-AVX512 -qopt-zmm-usage=high $(PROFILE)
#OPTS = -fast -xCORE-AVX2 $(PROFILE) #OPTS = -fast -xCORE-AVX2 $(PROFILE)
#OPTS = -fast -xAVX $(PROFILE) #OPTS = -fast -xAVX $(PROFILE)
#OPTS = -fast -xSSE4.2 $(PROFILE) #OPTS = -fast -xSSE4.2 $(PROFILE)
#OPTS = -fast -no-vec $(PROFILE) #OPTS = -fast -no-vec $(PROFILE)
OPTS = -fast -xHost $(PROFILE) #OPTS = -fast -xHost $(PROFILE)
CFLAGS = $(PROFILE) -restrict $(OPENMP) $(OPTS) CFLAGS = $(PROFILE) -restrict $(OPENMP) $(OPTS)
ASFLAGS = -masm=intel ASFLAGS = #-masm=intel
LFLAGS = $(PROFILE) $(OPTS) $(OPENMP) LFLAGS = $(PROFILE) $(OPTS) $(OPENMP)
DEFINES = -D_GNU_SOURCE # -DALIGNMENT=64 -DLIKWID_PERFMON -DPRECISION=1 DEFINES = -D_GNU_SOURCE #-DLIKWID_PERFMON
INCLUDES = #$(LIKWID_INC) INCLUDES = #$(LIKWID_INC)
LIBS = -lm #$(LIKWID_LIB) -llikwid LIBS = -lm #$(LIKWID_LIB) -llikwid

View File

@ -39,9 +39,8 @@ double computeForce(
MD_FLOAT sigma6 = param->sigma6; MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon; MD_FLOAT epsilon = param->epsilon;
MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz; MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz;
MD_FLOAT S, E; double S, E;
S = getTimeStamp();
for(int i = 0; i < Nlocal; i++) { for(int i = 0; i < Nlocal; i++) {
fx[i] = 0.0; fx[i] = 0.0;
fy[i] = 0.0; fy[i] = 0.0;
@ -49,7 +48,7 @@ double computeForce(
} }
if(profile) { if(profile) {
LIKWID_MARKER_START("force"); // LIKWID_MARKER_START("force");
} }
#pragma omp parallel for #pragma omp parallel for
@ -64,6 +63,8 @@ double computeForce(
MD_FLOAT fiy = 0; MD_FLOAT fiy = 0;
MD_FLOAT fiz = 0; MD_FLOAT fiz = 0;
// printf("%d: %d\n", i, numneighs);
for(int k = 0; k < numneighs; k++) { for(int k = 0; k < numneighs; k++) {
int j = neighs[k]; int j = neighs[k];
MD_FLOAT delx = xtmp - atom_x(j); MD_FLOAT delx = xtmp - atom_x(j);
@ -87,9 +88,8 @@ double computeForce(
} }
if(profile) { if(profile) {
LIKWID_MARKER_STOP("force"); // LIKWID_MARKER_STOP("force");
} }
E = getTimeStamp(); return 0.0;
return E-S;
} }

View File

@ -158,11 +158,17 @@ int main(int argc, const char *argv[]) {
} }
} }
const double estim_volume = (double)(atom->Nlocal * 6 * sizeof(MD_FLOAT) + (atoms_per_unit_cell - 1 + 2) * sizeof(int)) / 1000.0; const double estim_volume = (double)
(atom->Nlocal * 6 * sizeof(MD_FLOAT) +
atom->Nlocal * (atoms_per_unit_cell - 1 + 2) * sizeof(int)) / 1000.0;
printf("System size (unit cells): %dx%dx%d\n", param.nx, param.ny, param.nz); printf("System size (unit cells): %dx%dx%d\n", param.nx, param.ny, param.nz);
printf("Atoms per unit cell: %d\n", atoms_per_unit_cell); printf("Atoms per unit cell: %d\n", atoms_per_unit_cell);
printf("Total number of atoms: %d\n", atom->Nlocal); printf("Total number of atoms: %d\n", atom->Nlocal);
printf("Estimated memory volume (kB): %.4f\n", estim_volume); printf("Estimated total data volume (kB): %.4f\n", estim_volume );
printf("Estimated atom data volume (kB): %.4f\n",
(double)(atom->Nlocal * 3 * sizeof(MD_FLOAT) / 1000.0));
printf("Estimated neighborlist data volume (kB): %.4f\n",
(double)(atom->Nlocal * (atoms_per_unit_cell - 1 + 2) * sizeof(int)) / 1000.0);
DEBUG("Initializing neighbor lists...\n"); DEBUG("Initializing neighbor lists...\n");
initNeighbor(&neighbor, &param); initNeighbor(&neighbor, &param);
@ -173,12 +179,18 @@ int main(int argc, const char *argv[]) {
DEBUG("Computing forces...\n"); DEBUG("Computing forces...\n");
computeForce(&param, atom, &neighbor, 0); computeForce(&param, atom, &neighbor, 0);
double T_accum = 0.0; double S, E;
S = getTimeStamp();
LIKWID_MARKER_START("force");
for(int i = 0; i < param.ntimes; i++) { for(int i = 0; i < param.ntimes; i++) {
T_accum += computeForce(&param, atom, &neighbor, 1); computeForce(&param, atom, &neighbor, 1);
} }
LIKWID_MARKER_STOP("force");
E = getTimeStamp();
double T_accum = E-S;
printf("Total time: %.4f, Time/force: %.4f\n", T_accum, T_accum / param.ntimes); printf("Total time: %.4f, Mega atom updates/s: %.4f\n",
T_accum, atom->Nlocal * param.ntimes/T_accum/1.E6);
LIKWID_MARKER_CLOSE; LIKWID_MARKER_CLOSE;
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }