Compute statistics, useful data volume and cycles per SIMD iteration
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
parent
f295f54fca
commit
55d346510e
4
Makefile
4
Makefile
@ -41,6 +41,10 @@ ifeq ($(strip $(INDEX_TRACER)),true)
|
|||||||
DEFINES += -DINDEX_TRACER
|
DEFINES += -DINDEX_TRACER
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(strip $(COMPUTE_STATS)),true)
|
||||||
|
DEFINES += -DCOMPUTE_STATS
|
||||||
|
endif
|
||||||
|
|
||||||
ifneq ($(VECTOR_WIDTH),)
|
ifneq ($(VECTOR_WIDTH),)
|
||||||
DEFINES += -DVECTOR_WIDTH=$(VECTOR_WIDTH)
|
DEFINES += -DVECTOR_WIDTH=$(VECTOR_WIDTH)
|
||||||
endif
|
endif
|
||||||
|
@ -1,13 +1,13 @@
|
|||||||
# Compiler tag (GCC/CLANG/ICC)
|
# Compiler tag (GCC/CLANG/ICC)
|
||||||
TAG ?= ICC
|
TAG ?= ICC
|
||||||
# Enable likwid (true or false)
|
# Enable likwid (true or false)
|
||||||
ENABLE_LIKWID ?= false
|
ENABLE_LIKWID ?= true
|
||||||
# SP or DP
|
# SP or DP
|
||||||
DATA_TYPE ?= DP
|
DATA_TYPE ?= DP
|
||||||
# AOS or SOA
|
# AOS or SOA
|
||||||
DATA_LAYOUT ?= AOS
|
DATA_LAYOUT ?= AOS
|
||||||
# Assembly syntax to generate (ATT/INTEL)
|
# Assembly syntax to generate (ATT/INTEL)
|
||||||
ASM_SYNTAX ?= INTEL
|
ASM_SYNTAX ?= ATT
|
||||||
|
|
||||||
# Number of times to run the neighbors loop on stubbed variant
|
# Number of times to run the neighbors loop on stubbed variant
|
||||||
NEIGHBORS_LOOP_RUNS ?= 1
|
NEIGHBORS_LOOP_RUNS ?= 1
|
||||||
@ -19,6 +19,8 @@ MEM_TRACER ?= false
|
|||||||
INDEX_TRACER ?= false
|
INDEX_TRACER ?= false
|
||||||
# Vector width (elements) for index and distance tracer
|
# Vector width (elements) for index and distance tracer
|
||||||
VECTOR_WIDTH ?= 8
|
VECTOR_WIDTH ?= 8
|
||||||
|
# Compute statistics
|
||||||
|
COMPUTE_STATS ?= true
|
||||||
|
|
||||||
#Feature options
|
#Feature options
|
||||||
OPTIONS = -DALIGNMENT=64
|
OPTIONS = -DALIGNMENT=64
|
||||||
|
14
src/force.c
14
src/force.c
@ -26,12 +26,17 @@
|
|||||||
#include <neighbor.h>
|
#include <neighbor.h>
|
||||||
#include <parameter.h>
|
#include <parameter.h>
|
||||||
#include <atom.h>
|
#include <atom.h>
|
||||||
|
#include <stats.h>
|
||||||
|
|
||||||
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
|
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef VECTOR_WIDTH
|
||||||
|
# define VECTOR_WIDTH 8
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef TRACER_CONDITION
|
#ifndef TRACER_CONDITION
|
||||||
# define TRACER_CONDITION (!(timestep % param->every))
|
# define TRACER_CONDITION (!(timestep % param->every))
|
||||||
#endif
|
#endif
|
||||||
@ -53,10 +58,6 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef INDEX_TRACER
|
#ifdef INDEX_TRACER
|
||||||
# ifndef VECTOR_WIDTH
|
|
||||||
# define VECTOR_WIDTH 8
|
|
||||||
# endif
|
|
||||||
|
|
||||||
# define INDEX_TRACER_INIT FILE *index_tracer_fp; \
|
# define INDEX_TRACER_INIT FILE *index_tracer_fp; \
|
||||||
if(TRACER_CONDITION) { \
|
if(TRACER_CONDITION) { \
|
||||||
char index_tracer_fn[128]; \
|
char index_tracer_fn[128]; \
|
||||||
@ -118,7 +119,7 @@
|
|||||||
# define DIST_TRACE(l, e)
|
# define DIST_TRACE(l, e)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, int first_exec, int timestep) {
|
double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats, int first_exec, int timestep) {
|
||||||
MEM_TRACER_INIT;
|
MEM_TRACER_INIT;
|
||||||
INDEX_TRACER_INIT;
|
INDEX_TRACER_INIT;
|
||||||
int Nlocal = atom->Nlocal;
|
int Nlocal = atom->Nlocal;
|
||||||
@ -143,7 +144,6 @@ double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, int first_
|
|||||||
for(int i = 0; i < Nlocal; i++) {
|
for(int i = 0; i < Nlocal; i++) {
|
||||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||||
int numneighs = neighbor->numneigh[i];
|
int numneighs = neighbor->numneigh[i];
|
||||||
neighbor->totalneighs += numneighs; // Maybe remove this for real time measurements
|
|
||||||
MD_FLOAT xtmp = atom_x(i);
|
MD_FLOAT xtmp = atom_x(i);
|
||||||
MD_FLOAT ytmp = atom_y(i);
|
MD_FLOAT ytmp = atom_y(i);
|
||||||
MD_FLOAT ztmp = atom_z(i);
|
MD_FLOAT ztmp = atom_z(i);
|
||||||
@ -210,6 +210,8 @@ double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, int first_
|
|||||||
fy[i] += fiy;
|
fy[i] += fiy;
|
||||||
fz[i] += fiz;
|
fz[i] += fiz;
|
||||||
|
|
||||||
|
addStat(stats->total_force_neighs, numneighs);
|
||||||
|
addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
|
||||||
MEM_TRACE(fx[i], 'R');
|
MEM_TRACE(fx[i], 'R');
|
||||||
MEM_TRACE(fx[i], 'W');
|
MEM_TRACE(fx[i], 'W');
|
||||||
MEM_TRACE(fy[i], 'R');
|
MEM_TRACE(fy[i], 'R');
|
||||||
|
@ -31,7 +31,6 @@ typedef struct {
|
|||||||
int* neighbors;
|
int* neighbors;
|
||||||
int maxneighs;
|
int maxneighs;
|
||||||
int* numneigh;
|
int* numneigh;
|
||||||
long long int totalneighs;
|
|
||||||
} Neighbor;
|
} Neighbor;
|
||||||
|
|
||||||
extern void initNeighbor(Neighbor*, Parameter*);
|
extern void initNeighbor(Neighbor*, Parameter*);
|
||||||
|
@ -46,5 +46,6 @@ typedef struct {
|
|||||||
int nx, ny, nz;
|
int nx, ny, nz;
|
||||||
MD_FLOAT lattice;
|
MD_FLOAT lattice;
|
||||||
MD_FLOAT xprd, yprd, zprd;
|
MD_FLOAT xprd, yprd, zprd;
|
||||||
|
double proc_freq;
|
||||||
} Parameter;
|
} Parameter;
|
||||||
#endif
|
#endif
|
||||||
|
45
src/includes/stats.h
Normal file
45
src/includes/stats.h
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
/*
|
||||||
|
* =======================================================================================
|
||||||
|
*
|
||||||
|
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||||
|
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
||||||
|
*
|
||||||
|
* This file is part of MD-Bench.
|
||||||
|
*
|
||||||
|
* MD-Bench is free software: you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU Lesser General Public License as published
|
||||||
|
* by the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||||
|
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||||
|
* details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public License along
|
||||||
|
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
* =======================================================================================
|
||||||
|
*/
|
||||||
|
#include <atom.h>
|
||||||
|
#include <parameter.h>
|
||||||
|
|
||||||
|
#ifndef __STATS_H_
|
||||||
|
#define __STATS_H_
|
||||||
|
typedef struct {
|
||||||
|
long long int total_force_neighs;
|
||||||
|
long long int total_force_iters;
|
||||||
|
} Stats;
|
||||||
|
|
||||||
|
void initStats(Stats *s);
|
||||||
|
|
||||||
|
#ifdef COMPUTE_STATS
|
||||||
|
# define addStat(stat, value) stat += value;
|
||||||
|
# define beginStatTimer() double Si = getTimeStamp();
|
||||||
|
# define endStatTimer(stat) stat += getTimeStamp() - Si;
|
||||||
|
#else
|
||||||
|
# define addStat(stat, value)
|
||||||
|
# define beginStatTimer()
|
||||||
|
# define endStatTimer(stat)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
37
src/main.c
37
src/main.c
@ -35,6 +35,7 @@
|
|||||||
#include <neighbor.h>
|
#include <neighbor.h>
|
||||||
#include <parameter.h>
|
#include <parameter.h>
|
||||||
#include <atom.h>
|
#include <atom.h>
|
||||||
|
#include <stats.h>
|
||||||
#include <thermo.h>
|
#include <thermo.h>
|
||||||
#include <pbc.h>
|
#include <pbc.h>
|
||||||
|
|
||||||
@ -47,7 +48,7 @@ typedef enum {
|
|||||||
NUMTIMER
|
NUMTIMER
|
||||||
} timertype;
|
} timertype;
|
||||||
|
|
||||||
extern double computeForce(Parameter*, Atom*, Neighbor*, int, int);
|
extern double computeForce(Parameter*, Atom*, Neighbor*, Stats*, int, int);
|
||||||
|
|
||||||
void init(Parameter *param)
|
void init(Parameter *param)
|
||||||
{
|
{
|
||||||
@ -67,12 +68,14 @@ void init(Parameter *param)
|
|||||||
param->mass = 1.0;
|
param->mass = 1.0;
|
||||||
param->dtforce = 0.5 * param->dt;
|
param->dtforce = 0.5 * param->dt;
|
||||||
param->every = 20;
|
param->every = 20;
|
||||||
|
param->proc_freq = 2.4;
|
||||||
}
|
}
|
||||||
|
|
||||||
double setup(
|
double setup(
|
||||||
Parameter *param,
|
Parameter *param,
|
||||||
Atom *atom,
|
Atom *atom,
|
||||||
Neighbor *neighbor)
|
Neighbor *neighbor,
|
||||||
|
Stats *stats)
|
||||||
{
|
{
|
||||||
double S, E;
|
double S, E;
|
||||||
param->lattice = pow((4.0 / param->rho), (1.0 / 3.0));
|
param->lattice = pow((4.0 / param->rho), (1.0 / 3.0));
|
||||||
@ -84,6 +87,7 @@ double setup(
|
|||||||
initAtom(atom);
|
initAtom(atom);
|
||||||
initNeighbor(neighbor, param);
|
initNeighbor(neighbor, param);
|
||||||
initPbc();
|
initPbc();
|
||||||
|
initStats(stats);
|
||||||
setupNeighbor();
|
setupNeighbor();
|
||||||
createAtom(atom, param);
|
createAtom(atom, param);
|
||||||
setupThermo(param, atom->Natoms);
|
setupThermo(param, atom->Natoms);
|
||||||
@ -160,6 +164,7 @@ int main (int argc, char** argv)
|
|||||||
double timer[NUMTIMER];
|
double timer[NUMTIMER];
|
||||||
Atom atom;
|
Atom atom;
|
||||||
Neighbor neighbor;
|
Neighbor neighbor;
|
||||||
|
Stats stats;
|
||||||
Parameter param;
|
Parameter param;
|
||||||
|
|
||||||
LIKWID_MARKER_INIT;
|
LIKWID_MARKER_INIT;
|
||||||
@ -193,20 +198,26 @@ int main (int argc, char** argv)
|
|||||||
param.nz = atoi(argv[++i]);
|
param.nz = atoi(argv[++i]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if((strcmp(argv[i], "-f") == 0))
|
||||||
|
{
|
||||||
|
param.proc_freq = atof(argv[++i]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0))
|
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0))
|
||||||
{
|
{
|
||||||
printf("MD Bench: A minimalistic re-implementation of miniMD\n");
|
printf("MD Bench: A minimalistic re-implementation of miniMD\n");
|
||||||
printf(HLINE);
|
printf(HLINE);
|
||||||
printf("-n / --nsteps <int>: set number of timesteps for simulation\n");
|
printf("-n / --nsteps <int>: set number of timesteps for simulation\n");
|
||||||
printf("-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n");
|
printf("-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n");
|
||||||
|
printf("-f <real>: processor frequency (GHz)\n");
|
||||||
printf(HLINE);
|
printf(HLINE);
|
||||||
exit(EXIT_SUCCESS);
|
exit(EXIT_SUCCESS);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
setup(¶m, &atom, &neighbor);
|
setup(¶m, &atom, &neighbor, &stats);
|
||||||
computeThermo(0, ¶m, &atom);
|
computeThermo(0, ¶m, &atom);
|
||||||
computeForce(¶m, &atom, &neighbor, 1, 0);
|
computeForce(¶m, &atom, &neighbor, &stats, 1, 0);
|
||||||
|
|
||||||
timer[FORCE] = 0.0;
|
timer[FORCE] = 0.0;
|
||||||
timer[NEIGH] = 0.0;
|
timer[NEIGH] = 0.0;
|
||||||
@ -221,7 +232,7 @@ int main (int argc, char** argv)
|
|||||||
timer[NEIGH] += reneighbour(¶m, &atom, &neighbor);
|
timer[NEIGH] += reneighbour(¶m, &atom, &neighbor);
|
||||||
}
|
}
|
||||||
|
|
||||||
timer[FORCE] += computeForce(¶m, &atom, &neighbor, 0, n + 1);
|
timer[FORCE] += computeForce(¶m, &atom, &neighbor, &stats, 0, n + 1);
|
||||||
finalIntegrate(¶m, &atom);
|
finalIntegrate(¶m, &atom);
|
||||||
|
|
||||||
if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
|
if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
|
||||||
@ -246,13 +257,21 @@ int main (int argc, char** argv)
|
|||||||
printf(HLINE);
|
printf(HLINE);
|
||||||
printf("Performance: %.2f million atom updates per second\n",
|
printf("Performance: %.2f million atom updates per second\n",
|
||||||
1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
|
1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
|
||||||
|
|
||||||
|
#ifdef COMPUTE_STATS
|
||||||
double force_useful_volume = 1e-9 * ( (double)(atom.Nlocal * (param.ntimes + 1)) * (sizeof(MD_FLOAT) * 6 + sizeof(int)) +
|
double force_useful_volume = 1e-9 * ( (double)(atom.Nlocal * (param.ntimes + 1)) * (sizeof(MD_FLOAT) * 6 + sizeof(int)) +
|
||||||
(double)(neighbor.totalneighs) * (sizeof(MD_FLOAT) * 3 + sizeof(int)) );
|
(double)(stats.total_force_neighs) * (sizeof(MD_FLOAT) * 3 + sizeof(int)) );
|
||||||
#ifdef EXPLICIT_TYPES
|
#ifdef EXPLICIT_TYPES
|
||||||
force_useful_volume += 1e-9 * (double)((atom.Nlocal * (param.ntimes + 1)) + neighbor.totalneighs) * sizeof(int);
|
force_useful_volume += 1e-9 * (double)((atom.Nlocal * (param.ntimes + 1)) + stats.total_force_neighs) * sizeof(int);
|
||||||
#endif
|
#endif
|
||||||
printf("total_neighs = %lld/%.2f\n", neighbor.totalneighs, (double)(neighbor.totalneighs));
|
printf("Statistics:\n");
|
||||||
printf("Useful read data volume for force computation: %.2fGB\n", force_useful_volume);
|
printf("\tVector width: %d, Processor frequency: %.4f GHz\n", VECTOR_WIDTH, param.proc_freq);
|
||||||
|
printf("\tTotal number of computed pair interactions: %lld\n", stats.total_force_neighs);
|
||||||
|
printf("\tTotal number of most SIMD iterations: %lld\n", stats.total_force_iters);
|
||||||
|
printf("\tUseful read data volume for force computation: %.2fGB\n", force_useful_volume);
|
||||||
|
printf("\tCycles/SIMD iteration: %.4f\n", timer[FORCE] * param.proc_freq * 1e9 / stats.total_force_iters);
|
||||||
|
#endif
|
||||||
|
|
||||||
LIKWID_MARKER_CLOSE;
|
LIKWID_MARKER_CLOSE;
|
||||||
return EXIT_SUCCESS;
|
return EXIT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -69,7 +69,6 @@ void initNeighbor(Neighbor *neighbor, Parameter *param)
|
|||||||
neighbor->maxneighs = 100;
|
neighbor->maxneighs = 100;
|
||||||
neighbor->numneigh = NULL;
|
neighbor->numneigh = NULL;
|
||||||
neighbor->neighbors = NULL;
|
neighbor->neighbors = NULL;
|
||||||
neighbor->totalneighs = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void setupNeighbor()
|
void setupNeighbor()
|
||||||
|
6
src/stats.c
Normal file
6
src/stats.c
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
#include <stats.h>
|
||||||
|
|
||||||
|
void initStats(Stats *s) {
|
||||||
|
s->total_force_neighs = 0;
|
||||||
|
s->total_force_iters = 0;
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user