Compute statistics, useful data volume and cycles per SIMD iteration

Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
Rafael Ravedutti 2021-10-12 15:04:08 +02:00
parent f295f54fca
commit 55d346510e
9 changed files with 96 additions and 19 deletions

View File

@ -41,6 +41,10 @@ ifeq ($(strip $(INDEX_TRACER)),true)
DEFINES += -DINDEX_TRACER DEFINES += -DINDEX_TRACER
endif endif
ifeq ($(strip $(COMPUTE_STATS)),true)
DEFINES += -DCOMPUTE_STATS
endif
ifneq ($(VECTOR_WIDTH),) ifneq ($(VECTOR_WIDTH),)
DEFINES += -DVECTOR_WIDTH=$(VECTOR_WIDTH) DEFINES += -DVECTOR_WIDTH=$(VECTOR_WIDTH)
endif endif

View File

@ -1,13 +1,13 @@
# Compiler tag (GCC/CLANG/ICC) # Compiler tag (GCC/CLANG/ICC)
TAG ?= ICC TAG ?= ICC
# Enable likwid (true or false) # Enable likwid (true or false)
ENABLE_LIKWID ?= false ENABLE_LIKWID ?= true
# SP or DP # SP or DP
DATA_TYPE ?= DP DATA_TYPE ?= DP
# AOS or SOA # AOS or SOA
DATA_LAYOUT ?= AOS DATA_LAYOUT ?= AOS
# Assembly syntax to generate (ATT/INTEL) # Assembly syntax to generate (ATT/INTEL)
ASM_SYNTAX ?= INTEL ASM_SYNTAX ?= ATT
# Number of times to run the neighbors loop on stubbed variant # Number of times to run the neighbors loop on stubbed variant
NEIGHBORS_LOOP_RUNS ?= 1 NEIGHBORS_LOOP_RUNS ?= 1
@ -19,6 +19,8 @@ MEM_TRACER ?= false
INDEX_TRACER ?= false INDEX_TRACER ?= false
# Vector width (elements) for index and distance tracer # Vector width (elements) for index and distance tracer
VECTOR_WIDTH ?= 8 VECTOR_WIDTH ?= 8
# Compute statistics
COMPUTE_STATS ?= true
#Feature options #Feature options
OPTIONS = -DALIGNMENT=64 OPTIONS = -DALIGNMENT=64

View File

@ -26,12 +26,17 @@
#include <neighbor.h> #include <neighbor.h>
#include <parameter.h> #include <parameter.h>
#include <atom.h> #include <atom.h>
#include <stats.h>
#if defined(MEM_TRACER) || defined(INDEX_TRACER) #if defined(MEM_TRACER) || defined(INDEX_TRACER)
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#endif #endif
#ifndef VECTOR_WIDTH
# define VECTOR_WIDTH 8
#endif
#ifndef TRACER_CONDITION #ifndef TRACER_CONDITION
# define TRACER_CONDITION (!(timestep % param->every)) # define TRACER_CONDITION (!(timestep % param->every))
#endif #endif
@ -53,10 +58,6 @@
#endif #endif
#ifdef INDEX_TRACER #ifdef INDEX_TRACER
# ifndef VECTOR_WIDTH
# define VECTOR_WIDTH 8
# endif
# define INDEX_TRACER_INIT FILE *index_tracer_fp; \ # define INDEX_TRACER_INIT FILE *index_tracer_fp; \
if(TRACER_CONDITION) { \ if(TRACER_CONDITION) { \
char index_tracer_fn[128]; \ char index_tracer_fn[128]; \
@ -118,7 +119,7 @@
# define DIST_TRACE(l, e) # define DIST_TRACE(l, e)
#endif #endif
double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, int first_exec, int timestep) { double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats, int first_exec, int timestep) {
MEM_TRACER_INIT; MEM_TRACER_INIT;
INDEX_TRACER_INIT; INDEX_TRACER_INIT;
int Nlocal = atom->Nlocal; int Nlocal = atom->Nlocal;
@ -143,7 +144,6 @@ double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, int first_
for(int i = 0; i < Nlocal; i++) { for(int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs]; neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i]; int numneighs = neighbor->numneigh[i];
neighbor->totalneighs += numneighs; // Maybe remove this for real time measurements
MD_FLOAT xtmp = atom_x(i); MD_FLOAT xtmp = atom_x(i);
MD_FLOAT ytmp = atom_y(i); MD_FLOAT ytmp = atom_y(i);
MD_FLOAT ztmp = atom_z(i); MD_FLOAT ztmp = atom_z(i);
@ -210,6 +210,8 @@ double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, int first_
fy[i] += fiy; fy[i] += fiy;
fz[i] += fiz; fz[i] += fiz;
addStat(stats->total_force_neighs, numneighs);
addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
MEM_TRACE(fx[i], 'R'); MEM_TRACE(fx[i], 'R');
MEM_TRACE(fx[i], 'W'); MEM_TRACE(fx[i], 'W');
MEM_TRACE(fy[i], 'R'); MEM_TRACE(fy[i], 'R');

View File

@ -31,7 +31,6 @@ typedef struct {
int* neighbors; int* neighbors;
int maxneighs; int maxneighs;
int* numneigh; int* numneigh;
long long int totalneighs;
} Neighbor; } Neighbor;
extern void initNeighbor(Neighbor*, Parameter*); extern void initNeighbor(Neighbor*, Parameter*);

View File

@ -46,5 +46,6 @@ typedef struct {
int nx, ny, nz; int nx, ny, nz;
MD_FLOAT lattice; MD_FLOAT lattice;
MD_FLOAT xprd, yprd, zprd; MD_FLOAT xprd, yprd, zprd;
double proc_freq;
} Parameter; } Parameter;
#endif #endif

45
src/includes/stats.h Normal file
View File

@ -0,0 +1,45 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* This file is part of MD-Bench.
*
* MD-Bench is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* =======================================================================================
*/
#include <atom.h>
#include <parameter.h>
#ifndef __STATS_H_
#define __STATS_H_
typedef struct {
long long int total_force_neighs;
long long int total_force_iters;
} Stats;
void initStats(Stats *s);
#ifdef COMPUTE_STATS
# define addStat(stat, value) stat += value;
# define beginStatTimer() double Si = getTimeStamp();
# define endStatTimer(stat) stat += getTimeStamp() - Si;
#else
# define addStat(stat, value)
# define beginStatTimer()
# define endStatTimer(stat)
#endif
#endif

View File

@ -35,6 +35,7 @@
#include <neighbor.h> #include <neighbor.h>
#include <parameter.h> #include <parameter.h>
#include <atom.h> #include <atom.h>
#include <stats.h>
#include <thermo.h> #include <thermo.h>
#include <pbc.h> #include <pbc.h>
@ -47,7 +48,7 @@ typedef enum {
NUMTIMER NUMTIMER
} timertype; } timertype;
extern double computeForce(Parameter*, Atom*, Neighbor*, int, int); extern double computeForce(Parameter*, Atom*, Neighbor*, Stats*, int, int);
void init(Parameter *param) void init(Parameter *param)
{ {
@ -67,12 +68,14 @@ void init(Parameter *param)
param->mass = 1.0; param->mass = 1.0;
param->dtforce = 0.5 * param->dt; param->dtforce = 0.5 * param->dt;
param->every = 20; param->every = 20;
param->proc_freq = 2.4;
} }
double setup( double setup(
Parameter *param, Parameter *param,
Atom *atom, Atom *atom,
Neighbor *neighbor) Neighbor *neighbor,
Stats *stats)
{ {
double S, E; double S, E;
param->lattice = pow((4.0 / param->rho), (1.0 / 3.0)); param->lattice = pow((4.0 / param->rho), (1.0 / 3.0));
@ -84,6 +87,7 @@ double setup(
initAtom(atom); initAtom(atom);
initNeighbor(neighbor, param); initNeighbor(neighbor, param);
initPbc(); initPbc();
initStats(stats);
setupNeighbor(); setupNeighbor();
createAtom(atom, param); createAtom(atom, param);
setupThermo(param, atom->Natoms); setupThermo(param, atom->Natoms);
@ -160,6 +164,7 @@ int main (int argc, char** argv)
double timer[NUMTIMER]; double timer[NUMTIMER];
Atom atom; Atom atom;
Neighbor neighbor; Neighbor neighbor;
Stats stats;
Parameter param; Parameter param;
LIKWID_MARKER_INIT; LIKWID_MARKER_INIT;
@ -193,20 +198,26 @@ int main (int argc, char** argv)
param.nz = atoi(argv[++i]); param.nz = atoi(argv[++i]);
continue; continue;
} }
if((strcmp(argv[i], "-f") == 0))
{
param.proc_freq = atof(argv[++i]);
continue;
}
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0))
{ {
printf("MD Bench: A minimalistic re-implementation of miniMD\n"); printf("MD Bench: A minimalistic re-implementation of miniMD\n");
printf(HLINE); printf(HLINE);
printf("-n / --nsteps <int>: set number of timesteps for simulation\n"); printf("-n / --nsteps <int>: set number of timesteps for simulation\n");
printf("-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n"); printf("-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n");
printf("-f <real>: processor frequency (GHz)\n");
printf(HLINE); printf(HLINE);
exit(EXIT_SUCCESS); exit(EXIT_SUCCESS);
} }
} }
setup(&param, &atom, &neighbor); setup(&param, &atom, &neighbor, &stats);
computeThermo(0, &param, &atom); computeThermo(0, &param, &atom);
computeForce(&param, &atom, &neighbor, 1, 0); computeForce(&param, &atom, &neighbor, &stats, 1, 0);
timer[FORCE] = 0.0; timer[FORCE] = 0.0;
timer[NEIGH] = 0.0; timer[NEIGH] = 0.0;
@ -221,7 +232,7 @@ int main (int argc, char** argv)
timer[NEIGH] += reneighbour(&param, &atom, &neighbor); timer[NEIGH] += reneighbour(&param, &atom, &neighbor);
} }
timer[FORCE] += computeForce(&param, &atom, &neighbor, 0, n + 1); timer[FORCE] += computeForce(&param, &atom, &neighbor, &stats, 0, n + 1);
finalIntegrate(&param, &atom); finalIntegrate(&param, &atom);
if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) { if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
@ -246,13 +257,21 @@ int main (int argc, char** argv)
printf(HLINE); printf(HLINE);
printf("Performance: %.2f million atom updates per second\n", printf("Performance: %.2f million atom updates per second\n",
1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]); 1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
#ifdef COMPUTE_STATS
double force_useful_volume = 1e-9 * ( (double)(atom.Nlocal * (param.ntimes + 1)) * (sizeof(MD_FLOAT) * 6 + sizeof(int)) + double force_useful_volume = 1e-9 * ( (double)(atom.Nlocal * (param.ntimes + 1)) * (sizeof(MD_FLOAT) * 6 + sizeof(int)) +
(double)(neighbor.totalneighs) * (sizeof(MD_FLOAT) * 3 + sizeof(int)) ); (double)(stats.total_force_neighs) * (sizeof(MD_FLOAT) * 3 + sizeof(int)) );
#ifdef EXPLICIT_TYPES #ifdef EXPLICIT_TYPES
force_useful_volume += 1e-9 * (double)((atom.Nlocal * (param.ntimes + 1)) + neighbor.totalneighs) * sizeof(int); force_useful_volume += 1e-9 * (double)((atom.Nlocal * (param.ntimes + 1)) + stats.total_force_neighs) * sizeof(int);
#endif #endif
printf("total_neighs = %lld/%.2f\n", neighbor.totalneighs, (double)(neighbor.totalneighs)); printf("Statistics:\n");
printf("Useful read data volume for force computation: %.2fGB\n", force_useful_volume); printf("\tVector width: %d, Processor frequency: %.4f GHz\n", VECTOR_WIDTH, param.proc_freq);
printf("\tTotal number of computed pair interactions: %lld\n", stats.total_force_neighs);
printf("\tTotal number of most SIMD iterations: %lld\n", stats.total_force_iters);
printf("\tUseful read data volume for force computation: %.2fGB\n", force_useful_volume);
printf("\tCycles/SIMD iteration: %.4f\n", timer[FORCE] * param.proc_freq * 1e9 / stats.total_force_iters);
#endif
LIKWID_MARKER_CLOSE; LIKWID_MARKER_CLOSE;
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }

View File

@ -69,7 +69,6 @@ void initNeighbor(Neighbor *neighbor, Parameter *param)
neighbor->maxneighs = 100; neighbor->maxneighs = 100;
neighbor->numneigh = NULL; neighbor->numneigh = NULL;
neighbor->neighbors = NULL; neighbor->neighbors = NULL;
neighbor->totalneighs = 0;
} }
void setupNeighbor() void setupNeighbor()

6
src/stats.c Normal file
View File

@ -0,0 +1,6 @@
#include <stats.h>
void initStats(Stats *s) {
s->total_force_neighs = 0;
s->total_force_iters = 0;
}