diff --git a/Makefile b/Makefile index ba3a7b9..e1cfa29 100644 --- a/Makefile +++ b/Makefile @@ -41,6 +41,10 @@ ifeq ($(strip $(INDEX_TRACER)),true) DEFINES += -DINDEX_TRACER endif +ifeq ($(strip $(COMPUTE_STATS)),true) + DEFINES += -DCOMPUTE_STATS +endif + ifneq ($(VECTOR_WIDTH),) DEFINES += -DVECTOR_WIDTH=$(VECTOR_WIDTH) endif diff --git a/config.mk b/config.mk index 2aaff83..ef43016 100644 --- a/config.mk +++ b/config.mk @@ -1,13 +1,13 @@ # Compiler tag (GCC/CLANG/ICC) TAG ?= ICC # Enable likwid (true or false) -ENABLE_LIKWID ?= false +ENABLE_LIKWID ?= true # SP or DP DATA_TYPE ?= DP # AOS or SOA DATA_LAYOUT ?= AOS # Assembly syntax to generate (ATT/INTEL) -ASM_SYNTAX ?= INTEL +ASM_SYNTAX ?= ATT # Number of times to run the neighbors loop on stubbed variant NEIGHBORS_LOOP_RUNS ?= 1 @@ -19,6 +19,8 @@ MEM_TRACER ?= false INDEX_TRACER ?= false # Vector width (elements) for index and distance tracer VECTOR_WIDTH ?= 8 +# Compute statistics +COMPUTE_STATS ?= true #Feature options OPTIONS = -DALIGNMENT=64 diff --git a/src/force.c b/src/force.c index 4cc9a0a..9dee844 100644 --- a/src/force.c +++ b/src/force.c @@ -26,12 +26,17 @@ #include #include #include +#include #if defined(MEM_TRACER) || defined(INDEX_TRACER) #include #include #endif +#ifndef VECTOR_WIDTH +# define VECTOR_WIDTH 8 +#endif + #ifndef TRACER_CONDITION # define TRACER_CONDITION (!(timestep % param->every)) #endif @@ -53,10 +58,6 @@ #endif #ifdef INDEX_TRACER -# ifndef VECTOR_WIDTH -# define VECTOR_WIDTH 8 -# endif - # define INDEX_TRACER_INIT FILE *index_tracer_fp; \ if(TRACER_CONDITION) { \ char index_tracer_fn[128]; \ @@ -118,7 +119,7 @@ # define DIST_TRACE(l, e) #endif -double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, int first_exec, int timestep) { +double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats, int first_exec, int timestep) { MEM_TRACER_INIT; INDEX_TRACER_INIT; int Nlocal = atom->Nlocal; @@ -143,7 +144,6 @@ double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, int first_ for(int i = 0; i < Nlocal; i++) { neighs = &neighbor->neighbors[i * neighbor->maxneighs]; int numneighs = neighbor->numneigh[i]; - neighbor->totalneighs += numneighs; // Maybe remove this for real time measurements MD_FLOAT xtmp = atom_x(i); MD_FLOAT ytmp = atom_y(i); MD_FLOAT ztmp = atom_z(i); @@ -210,6 +210,8 @@ double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, int first_ fy[i] += fiy; fz[i] += fiz; + addStat(stats->total_force_neighs, numneighs); + addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH); MEM_TRACE(fx[i], 'R'); MEM_TRACE(fx[i], 'W'); MEM_TRACE(fy[i], 'R'); diff --git a/src/includes/neighbor.h b/src/includes/neighbor.h index 9c35e8c..c9bad95 100644 --- a/src/includes/neighbor.h +++ b/src/includes/neighbor.h @@ -31,7 +31,6 @@ typedef struct { int* neighbors; int maxneighs; int* numneigh; - long long int totalneighs; } Neighbor; extern void initNeighbor(Neighbor*, Parameter*); diff --git a/src/includes/parameter.h b/src/includes/parameter.h index fb5d8b4..e95e1e8 100644 --- a/src/includes/parameter.h +++ b/src/includes/parameter.h @@ -46,5 +46,6 @@ typedef struct { int nx, ny, nz; MD_FLOAT lattice; MD_FLOAT xprd, yprd, zprd; + double proc_freq; } Parameter; #endif diff --git a/src/includes/stats.h b/src/includes/stats.h new file mode 100644 index 0000000..d0cde3c --- /dev/null +++ b/src/includes/stats.h @@ -0,0 +1,45 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg + * + * This file is part of MD-Bench. + * + * MD-Bench is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with MD-Bench. If not, see . + * ======================================================================================= + */ +#include +#include + +#ifndef __STATS_H_ +#define __STATS_H_ +typedef struct { + long long int total_force_neighs; + long long int total_force_iters; +} Stats; + +void initStats(Stats *s); + +#ifdef COMPUTE_STATS +# define addStat(stat, value) stat += value; +# define beginStatTimer() double Si = getTimeStamp(); +# define endStatTimer(stat) stat += getTimeStamp() - Si; +#else +# define addStat(stat, value) +# define beginStatTimer() +# define endStatTimer(stat) +#endif + +#endif diff --git a/src/main.c b/src/main.c index 9ac72e2..ff2b2ad 100644 --- a/src/main.c +++ b/src/main.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -47,7 +48,7 @@ typedef enum { NUMTIMER } timertype; -extern double computeForce(Parameter*, Atom*, Neighbor*, int, int); +extern double computeForce(Parameter*, Atom*, Neighbor*, Stats*, int, int); void init(Parameter *param) { @@ -67,12 +68,14 @@ void init(Parameter *param) param->mass = 1.0; param->dtforce = 0.5 * param->dt; param->every = 20; + param->proc_freq = 2.4; } double setup( Parameter *param, Atom *atom, - Neighbor *neighbor) + Neighbor *neighbor, + Stats *stats) { double S, E; param->lattice = pow((4.0 / param->rho), (1.0 / 3.0)); @@ -84,6 +87,7 @@ double setup( initAtom(atom); initNeighbor(neighbor, param); initPbc(); + initStats(stats); setupNeighbor(); createAtom(atom, param); setupThermo(param, atom->Natoms); @@ -160,6 +164,7 @@ int main (int argc, char** argv) double timer[NUMTIMER]; Atom atom; Neighbor neighbor; + Stats stats; Parameter param; LIKWID_MARKER_INIT; @@ -193,20 +198,26 @@ int main (int argc, char** argv) param.nz = atoi(argv[++i]); continue; } + if((strcmp(argv[i], "-f") == 0)) + { + param.proc_freq = atof(argv[++i]); + continue; + } if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) { printf("MD Bench: A minimalistic re-implementation of miniMD\n"); printf(HLINE); printf("-n / --nsteps : set number of timesteps for simulation\n"); printf("-nx/-ny/-nz : set linear dimension of systembox in x/y/z direction\n"); + printf("-f : processor frequency (GHz)\n"); printf(HLINE); exit(EXIT_SUCCESS); } } - setup(¶m, &atom, &neighbor); + setup(¶m, &atom, &neighbor, &stats); computeThermo(0, ¶m, &atom); - computeForce(¶m, &atom, &neighbor, 1, 0); + computeForce(¶m, &atom, &neighbor, &stats, 1, 0); timer[FORCE] = 0.0; timer[NEIGH] = 0.0; @@ -221,7 +232,7 @@ int main (int argc, char** argv) timer[NEIGH] += reneighbour(¶m, &atom, &neighbor); } - timer[FORCE] += computeForce(¶m, &atom, &neighbor, 0, n + 1); + timer[FORCE] += computeForce(¶m, &atom, &neighbor, &stats, 0, n + 1); finalIntegrate(¶m, &atom); if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) { @@ -246,13 +257,21 @@ int main (int argc, char** argv) printf(HLINE); printf("Performance: %.2f million atom updates per second\n", 1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]); + +#ifdef COMPUTE_STATS double force_useful_volume = 1e-9 * ( (double)(atom.Nlocal * (param.ntimes + 1)) * (sizeof(MD_FLOAT) * 6 + sizeof(int)) + - (double)(neighbor.totalneighs) * (sizeof(MD_FLOAT) * 3 + sizeof(int)) ); + (double)(stats.total_force_neighs) * (sizeof(MD_FLOAT) * 3 + sizeof(int)) ); #ifdef EXPLICIT_TYPES - force_useful_volume += 1e-9 * (double)((atom.Nlocal * (param.ntimes + 1)) + neighbor.totalneighs) * sizeof(int); + force_useful_volume += 1e-9 * (double)((atom.Nlocal * (param.ntimes + 1)) + stats.total_force_neighs) * sizeof(int); #endif - printf("total_neighs = %lld/%.2f\n", neighbor.totalneighs, (double)(neighbor.totalneighs)); - printf("Useful read data volume for force computation: %.2fGB\n", force_useful_volume); + printf("Statistics:\n"); + printf("\tVector width: %d, Processor frequency: %.4f GHz\n", VECTOR_WIDTH, param.proc_freq); + printf("\tTotal number of computed pair interactions: %lld\n", stats.total_force_neighs); + printf("\tTotal number of most SIMD iterations: %lld\n", stats.total_force_iters); + printf("\tUseful read data volume for force computation: %.2fGB\n", force_useful_volume); + printf("\tCycles/SIMD iteration: %.4f\n", timer[FORCE] * param.proc_freq * 1e9 / stats.total_force_iters); +#endif + LIKWID_MARKER_CLOSE; return EXIT_SUCCESS; } diff --git a/src/neighbor.c b/src/neighbor.c index 451342c..91c7ab8 100644 --- a/src/neighbor.c +++ b/src/neighbor.c @@ -69,7 +69,6 @@ void initNeighbor(Neighbor *neighbor, Parameter *param) neighbor->maxneighs = 100; neighbor->numneigh = NULL; neighbor->neighbors = NULL; - neighbor->totalneighs = 0; } void setupNeighbor() diff --git a/src/stats.c b/src/stats.c new file mode 100644 index 0000000..bae23ee --- /dev/null +++ b/src/stats.c @@ -0,0 +1,6 @@ +#include + +void initStats(Stats *s) { + s->total_force_neighs = 0; + s->total_force_iters = 0; +}