Compute statistics, useful data volume and cycles per SIMD iteration
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
		
							
								
								
									
										4
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								Makefile
									
									
									
									
									
								
							| @@ -41,6 +41,10 @@ ifeq ($(strip $(INDEX_TRACER)),true) | ||||
|     DEFINES += -DINDEX_TRACER | ||||
| endif | ||||
|  | ||||
| ifeq ($(strip $(COMPUTE_STATS)),true) | ||||
|     DEFINES += -DCOMPUTE_STATS | ||||
| endif | ||||
|  | ||||
| ifneq ($(VECTOR_WIDTH),) | ||||
|     DEFINES += -DVECTOR_WIDTH=$(VECTOR_WIDTH) | ||||
| endif | ||||
|   | ||||
| @@ -1,13 +1,13 @@ | ||||
| # Compiler tag (GCC/CLANG/ICC) | ||||
| TAG ?= ICC | ||||
| # Enable likwid (true or false) | ||||
| ENABLE_LIKWID ?= false | ||||
| ENABLE_LIKWID ?= true | ||||
| # SP or DP | ||||
| DATA_TYPE ?= DP | ||||
| # AOS or SOA | ||||
| DATA_LAYOUT ?= AOS | ||||
| # Assembly syntax to generate (ATT/INTEL) | ||||
| ASM_SYNTAX ?= INTEL | ||||
| ASM_SYNTAX ?= ATT | ||||
|  | ||||
| # Number of times to run the neighbors loop on stubbed variant | ||||
| NEIGHBORS_LOOP_RUNS ?= 1 | ||||
| @@ -19,6 +19,8 @@ MEM_TRACER ?= false | ||||
| INDEX_TRACER ?= false | ||||
| # Vector width (elements) for index and distance tracer | ||||
| VECTOR_WIDTH ?= 8 | ||||
| # Compute statistics | ||||
| COMPUTE_STATS ?= true | ||||
|  | ||||
| #Feature options | ||||
| OPTIONS =  -DALIGNMENT=64 | ||||
|   | ||||
							
								
								
									
										14
									
								
								src/force.c
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								src/force.c
									
									
									
									
									
								
							| @@ -26,12 +26,17 @@ | ||||
| #include <neighbor.h> | ||||
| #include <parameter.h> | ||||
| #include <atom.h> | ||||
| #include <stats.h> | ||||
|  | ||||
| #if defined(MEM_TRACER) || defined(INDEX_TRACER) | ||||
| #include <stdio.h> | ||||
| #include <stdlib.h> | ||||
| #endif | ||||
|  | ||||
| #ifndef VECTOR_WIDTH | ||||
| #   define VECTOR_WIDTH                 8 | ||||
| #endif | ||||
|  | ||||
| #ifndef TRACER_CONDITION | ||||
| #   define TRACER_CONDITION                 (!(timestep % param->every)) | ||||
| #endif | ||||
| @@ -53,10 +58,6 @@ | ||||
| #endif | ||||
|  | ||||
| #ifdef INDEX_TRACER | ||||
| #   ifndef VECTOR_WIDTH | ||||
| #       define VECTOR_WIDTH                 8 | ||||
| #   endif | ||||
|  | ||||
| #   define INDEX_TRACER_INIT                FILE *index_tracer_fp; \ | ||||
|                                             if(TRACER_CONDITION) { \ | ||||
|                                                 char index_tracer_fn[128]; \ | ||||
| @@ -118,7 +119,7 @@ | ||||
| #   define DIST_TRACE(l, e) | ||||
| #endif | ||||
|  | ||||
| double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, int first_exec, int timestep) { | ||||
| double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats, int first_exec, int timestep) { | ||||
|     MEM_TRACER_INIT; | ||||
|     INDEX_TRACER_INIT; | ||||
|     int Nlocal = atom->Nlocal; | ||||
| @@ -143,7 +144,6 @@ double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, int first_ | ||||
|     for(int i = 0; i < Nlocal; i++) { | ||||
|         neighs = &neighbor->neighbors[i * neighbor->maxneighs]; | ||||
|         int numneighs = neighbor->numneigh[i]; | ||||
|         neighbor->totalneighs += numneighs; // Maybe remove this for real time measurements | ||||
|         MD_FLOAT xtmp = atom_x(i); | ||||
|         MD_FLOAT ytmp = atom_y(i); | ||||
|         MD_FLOAT ztmp = atom_z(i); | ||||
| @@ -210,6 +210,8 @@ double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, int first_ | ||||
|         fy[i] += fiy; | ||||
|         fz[i] += fiz; | ||||
|  | ||||
|         addStat(stats->total_force_neighs, numneighs); | ||||
|         addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH); | ||||
|         MEM_TRACE(fx[i], 'R'); | ||||
|         MEM_TRACE(fx[i], 'W'); | ||||
|         MEM_TRACE(fy[i], 'R'); | ||||
|   | ||||
| @@ -31,7 +31,6 @@ typedef struct { | ||||
|     int* neighbors; | ||||
|     int maxneighs; | ||||
|     int* numneigh; | ||||
|     long long int totalneighs; | ||||
| } Neighbor; | ||||
|  | ||||
| extern void initNeighbor(Neighbor*, Parameter*); | ||||
|   | ||||
| @@ -46,5 +46,6 @@ typedef struct { | ||||
|     int nx, ny, nz; | ||||
|     MD_FLOAT lattice; | ||||
|     MD_FLOAT xprd, yprd, zprd; | ||||
|     double proc_freq; | ||||
| } Parameter; | ||||
| #endif | ||||
|   | ||||
							
								
								
									
										45
									
								
								src/includes/stats.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								src/includes/stats.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,45 @@ | ||||
| /* | ||||
|  * ======================================================================================= | ||||
|  * | ||||
|  *   Author:   Jan Eitzinger (je), jan.eitzinger@fau.de | ||||
|  *   Copyright (c) 2020 RRZE, University Erlangen-Nuremberg | ||||
|  * | ||||
|  *   This file is part of MD-Bench. | ||||
|  * | ||||
|  *   MD-Bench is free software: you can redistribute it and/or modify it | ||||
|  *   under the terms of the GNU Lesser General Public License as published | ||||
|  *   by the Free Software Foundation, either version 3 of the License, or | ||||
|  *   (at your option) any later version. | ||||
|  * | ||||
|  *   MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY | ||||
|  *   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A | ||||
|  *   PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more | ||||
|  *   details. | ||||
|  * | ||||
|  *   You should have received a copy of the GNU Lesser General Public License along | ||||
|  *   with MD-Bench.  If not, see <https://www.gnu.org/licenses/>. | ||||
|  * ======================================================================================= | ||||
|  */ | ||||
| #include <atom.h> | ||||
| #include <parameter.h> | ||||
|  | ||||
| #ifndef __STATS_H_ | ||||
| #define __STATS_H_ | ||||
| typedef struct { | ||||
|     long long int total_force_neighs; | ||||
|     long long int total_force_iters; | ||||
| } Stats; | ||||
|  | ||||
| void initStats(Stats *s); | ||||
|  | ||||
| #ifdef COMPUTE_STATS | ||||
| #   define addStat(stat, value)     stat += value; | ||||
| #   define beginStatTimer()         double Si = getTimeStamp(); | ||||
| #   define endStatTimer(stat)       stat += getTimeStamp() - Si; | ||||
| #else | ||||
| #   define addStat(stat, value) | ||||
| #   define beginStatTimer() | ||||
| #   define endStatTimer(stat) | ||||
| #endif | ||||
|  | ||||
| #endif | ||||
							
								
								
									
										37
									
								
								src/main.c
									
									
									
									
									
								
							
							
						
						
									
										37
									
								
								src/main.c
									
									
									
									
									
								
							| @@ -35,6 +35,7 @@ | ||||
| #include <neighbor.h> | ||||
| #include <parameter.h> | ||||
| #include <atom.h> | ||||
| #include <stats.h> | ||||
| #include <thermo.h> | ||||
| #include <pbc.h> | ||||
|  | ||||
| @@ -47,7 +48,7 @@ typedef enum { | ||||
|     NUMTIMER | ||||
| } timertype; | ||||
|  | ||||
| extern double computeForce(Parameter*, Atom*, Neighbor*, int, int); | ||||
| extern double computeForce(Parameter*, Atom*, Neighbor*, Stats*, int, int); | ||||
|  | ||||
| void init(Parameter *param) | ||||
| { | ||||
| @@ -67,12 +68,14 @@ void init(Parameter *param) | ||||
|     param->mass = 1.0; | ||||
|     param->dtforce = 0.5 * param->dt; | ||||
|     param->every = 20; | ||||
|     param->proc_freq = 2.4; | ||||
| } | ||||
|  | ||||
| double setup( | ||||
|         Parameter *param, | ||||
|         Atom *atom, | ||||
|         Neighbor *neighbor) | ||||
|         Neighbor *neighbor, | ||||
|         Stats *stats) | ||||
| { | ||||
|     double S, E; | ||||
|     param->lattice = pow((4.0 / param->rho), (1.0 / 3.0)); | ||||
| @@ -84,6 +87,7 @@ double setup( | ||||
|     initAtom(atom); | ||||
|     initNeighbor(neighbor, param); | ||||
|     initPbc(); | ||||
|     initStats(stats); | ||||
|     setupNeighbor(); | ||||
|     createAtom(atom, param); | ||||
|     setupThermo(param, atom->Natoms); | ||||
| @@ -160,6 +164,7 @@ int main (int argc, char** argv) | ||||
|     double timer[NUMTIMER]; | ||||
|     Atom atom; | ||||
|     Neighbor neighbor; | ||||
|     Stats stats; | ||||
|     Parameter param; | ||||
|  | ||||
|     LIKWID_MARKER_INIT; | ||||
| @@ -193,20 +198,26 @@ int main (int argc, char** argv) | ||||
|             param.nz = atoi(argv[++i]); | ||||
|             continue; | ||||
|         } | ||||
|         if((strcmp(argv[i], "-f") == 0)) | ||||
|         { | ||||
|             param.proc_freq = atof(argv[++i]); | ||||
|             continue; | ||||
|         } | ||||
|         if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) | ||||
|         { | ||||
|             printf("MD Bench: A minimalistic re-implementation of miniMD\n"); | ||||
|             printf(HLINE); | ||||
|             printf("-n / --nsteps <int>:  set number of timesteps for simulation\n"); | ||||
|             printf("-nx/-ny/-nz <int>:    set linear dimension of systembox in x/y/z direction\n"); | ||||
|             printf("-f <real>:            processor frequency (GHz)\n"); | ||||
|             printf(HLINE); | ||||
|             exit(EXIT_SUCCESS); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     setup(¶m, &atom, &neighbor); | ||||
|     setup(¶m, &atom, &neighbor, &stats); | ||||
|     computeThermo(0, ¶m, &atom); | ||||
|     computeForce(¶m, &atom, &neighbor, 1, 0); | ||||
|     computeForce(¶m, &atom, &neighbor, &stats, 1, 0); | ||||
|  | ||||
|     timer[FORCE] = 0.0; | ||||
|     timer[NEIGH] = 0.0; | ||||
| @@ -221,7 +232,7 @@ int main (int argc, char** argv) | ||||
|             timer[NEIGH] += reneighbour(¶m, &atom, &neighbor); | ||||
|         } | ||||
|  | ||||
|         timer[FORCE] += computeForce(¶m, &atom, &neighbor, 0, n + 1); | ||||
|         timer[FORCE] += computeForce(¶m, &atom, &neighbor, &stats, 0, n + 1); | ||||
|         finalIntegrate(¶m, &atom); | ||||
|  | ||||
|         if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) { | ||||
| @@ -246,13 +257,21 @@ int main (int argc, char** argv) | ||||
|     printf(HLINE); | ||||
|     printf("Performance: %.2f million atom updates per second\n", | ||||
|             1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]); | ||||
|  | ||||
| #ifdef COMPUTE_STATS | ||||
|     double force_useful_volume = 1e-9 * ( (double)(atom.Nlocal * (param.ntimes + 1)) * (sizeof(MD_FLOAT) * 6 + sizeof(int)) + | ||||
|                                           (double)(neighbor.totalneighs) * (sizeof(MD_FLOAT) * 3 + sizeof(int)) ); | ||||
|                                           (double)(stats.total_force_neighs) * (sizeof(MD_FLOAT) * 3 + sizeof(int)) ); | ||||
| #ifdef EXPLICIT_TYPES | ||||
|     force_useful_volume += 1e-9 * (double)((atom.Nlocal * (param.ntimes + 1)) + neighbor.totalneighs) * sizeof(int); | ||||
|     force_useful_volume += 1e-9 * (double)((atom.Nlocal * (param.ntimes + 1)) + stats.total_force_neighs) * sizeof(int); | ||||
| #endif | ||||
|     printf("total_neighs = %lld/%.2f\n", neighbor.totalneighs, (double)(neighbor.totalneighs)); | ||||
|     printf("Useful read data volume for force computation: %.2fGB\n", force_useful_volume); | ||||
|     printf("Statistics:\n"); | ||||
|     printf("\tVector width: %d, Processor frequency: %.4f GHz\n", VECTOR_WIDTH, param.proc_freq); | ||||
|     printf("\tTotal number of computed pair interactions: %lld\n", stats.total_force_neighs); | ||||
|     printf("\tTotal number of most SIMD iterations: %lld\n", stats.total_force_iters); | ||||
|     printf("\tUseful read data volume for force computation: %.2fGB\n", force_useful_volume); | ||||
|     printf("\tCycles/SIMD iteration: %.4f\n", timer[FORCE] * param.proc_freq * 1e9 / stats.total_force_iters); | ||||
| #endif | ||||
|  | ||||
|     LIKWID_MARKER_CLOSE; | ||||
|     return EXIT_SUCCESS; | ||||
| } | ||||
|   | ||||
| @@ -69,7 +69,6 @@ void initNeighbor(Neighbor *neighbor, Parameter *param) | ||||
|     neighbor->maxneighs = 100; | ||||
|     neighbor->numneigh = NULL; | ||||
|     neighbor->neighbors = NULL; | ||||
|     neighbor->totalneighs = 0; | ||||
| } | ||||
|  | ||||
| void setupNeighbor() | ||||
|   | ||||
							
								
								
									
										6
									
								
								src/stats.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								src/stats.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,6 @@ | ||||
| #include <stats.h> | ||||
|  | ||||
| void initStats(Stats *s) { | ||||
|     s->total_force_neighs = 0; | ||||
|     s->total_force_iters = 0; | ||||
| } | ||||
		Reference in New Issue
	
	Block a user