Add ATOMS_LOOP_RUNS option and statistics to stub variant
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
		
							
								
								
									
										4
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								Makefile
									
									
									
									
									
								
							| @@ -25,6 +25,10 @@ ifneq ($(ASM_SYNTAX), ATT) | ||||
|     ASFLAGS += -masm=intel | ||||
| endif | ||||
|  | ||||
| ifneq ($(ATOMS_LOOP_RUNS),) | ||||
|     DEFINES += -DATOMS_LOOP_RUNS=$(ATOMS_LOOP_RUNS) | ||||
| endif | ||||
|  | ||||
| ifneq ($(NEIGHBORS_LOOP_RUNS),) | ||||
|     DEFINES += -DNEIGHBORS_LOOP_RUNS=$(NEIGHBORS_LOOP_RUNS) | ||||
| endif | ||||
|   | ||||
							
								
								
									
										13
									
								
								src/force.c
									
									
									
									
									
								
							
							
						
						
									
										13
									
								
								src/force.c
									
									
									
									
									
								
							| @@ -140,6 +140,12 @@ double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *sta | ||||
|     INDEX_TRACE_NATOMS(Nlocal, atom->Nghost, neighbor->maxneighs); | ||||
|     double S = getTimeStamp(); | ||||
|     LIKWID_MARKER_START("force"); | ||||
|  | ||||
|     #if VARIANT == stub && defined(ATOMS_LOOP_RUNS) && ATOMS_LOOP_RUNS > 1 | ||||
|     #define REPEAT_ATOMS_LOOP | ||||
|     for(int na = 0; na < (first_exec ? 1 : ATOMS_LOOP_RUNS); na++) { | ||||
|     #endif | ||||
|  | ||||
|         #pragma omp parallel for | ||||
|         for(int i = 0; i < Nlocal; i++) { | ||||
|             neighs = &neighbor->neighbors[i * neighbor->maxneighs]; | ||||
| @@ -164,7 +170,7 @@ double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *sta | ||||
|             #if VARIANT == stub && defined(NEIGHBORS_LOOP_RUNS) && NEIGHBORS_LOOP_RUNS > 1 | ||||
|             #define REPEAT_NEIGHBORS_LOOP | ||||
|             int nmax = first_exec ? 1 : NEIGHBORS_LOOP_RUNS; | ||||
|         for(int n = 0; n < nmax; n++) { | ||||
|             for(int nn = 0; nn < (first_exec ? 1 : NEIGHBORS_LOOP_RUNS); nn++) { | ||||
|             #endif | ||||
|  | ||||
|                 //DIST_TRACE_SORT(neighs, numneighs); | ||||
| @@ -219,6 +225,11 @@ double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *sta | ||||
|             MEM_TRACE(fz[i], 'R'); | ||||
|             MEM_TRACE(fz[i], 'W'); | ||||
|         } | ||||
|  | ||||
|     #ifdef REPEAT_ATOMS_LOOP | ||||
|     } | ||||
|     #endif | ||||
|  | ||||
|     LIKWID_MARKER_STOP("force"); | ||||
|     double E = getTimeStamp(); | ||||
|  | ||||
|   | ||||
| @@ -31,6 +31,7 @@ typedef struct { | ||||
| } Stats; | ||||
|  | ||||
| void initStats(Stats *s); | ||||
| void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer); | ||||
|  | ||||
| #ifdef COMPUTE_STATS | ||||
| #   define addStat(stat, value)     stat += value; | ||||
|   | ||||
							
								
								
									
										11
									
								
								src/includes/timers.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								src/includes/timers.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,11 @@ | ||||
| #ifndef __TIMERS_H_ | ||||
| #define __TIMERS_H_ | ||||
|  | ||||
| typedef enum { | ||||
|     TOTAL = 0, | ||||
|     NEIGH, | ||||
|     FORCE, | ||||
|     NUMTIMER | ||||
| } timertype; | ||||
|  | ||||
| #endif | ||||
| @@ -8,15 +8,17 @@ | ||||
| #include <neighbor.h> | ||||
| #include <parameter.h> | ||||
| #include <atom.h> | ||||
| #include <stats.h> | ||||
| #include <thermo.h> | ||||
| #include <pbc.h> | ||||
| #include <timers.h> | ||||
|  | ||||
| #define HLINE "----------------------------------------------------------------------------\n" | ||||
|  | ||||
| #define LATTICE_DISTANCE    10.0 | ||||
| #define NEIGH_DISTANCE      1.0 | ||||
|  | ||||
| extern double computeForce(Parameter*, Atom*, Neighbor*, int, int); | ||||
| extern double computeForce(Parameter*, Atom*, Neighbor*, Stats*, int, int); | ||||
|  | ||||
| void init(Parameter *param) { | ||||
|     param->epsilon = 1.0; | ||||
| @@ -37,6 +39,7 @@ void init(Parameter *param) { | ||||
|     param->nstat = 100; | ||||
|     param->temp = 1.44; | ||||
|     param->every = 20; | ||||
|     param->proc_freq = 0.0; | ||||
| } | ||||
|  | ||||
| // Show debug messages | ||||
| @@ -56,10 +59,10 @@ int main(int argc, const char *argv[]) { | ||||
|     Atom atom_data; | ||||
|     Atom *atom = (Atom *)(&atom_data); | ||||
|     Neighbor neighbor; | ||||
|     Stats stats; | ||||
|     Parameter param; | ||||
|     int atoms_per_unit_cell = 8; | ||||
|     int csv = 0; | ||||
|     double freq = 0.0; | ||||
|  | ||||
|     LIKWID_MARKER_INIT; | ||||
|     LIKWID_MARKER_REGISTER("force"); | ||||
| @@ -95,7 +98,7 @@ int main(int argc, const char *argv[]) { | ||||
|         } | ||||
|         if((strcmp(argv[i], "-f") == 0)) | ||||
|         { | ||||
|             freq = atof(argv[++i]) * 1.E9; | ||||
|             param.proc_freq = atof(argv[++i]); | ||||
|             continue; | ||||
|         } | ||||
|         if((strcmp(argv[i], "-csv") == 0)) | ||||
| @@ -123,6 +126,7 @@ int main(int argc, const char *argv[]) { | ||||
|  | ||||
|     DEBUG("Initializing atoms...\n"); | ||||
|     initAtom(atom); | ||||
|     initStats(&stats); | ||||
|  | ||||
|     #ifdef EXPLICIT_TYPES | ||||
|     atom->ntypes = param.ntypes; | ||||
| @@ -191,6 +195,7 @@ int main(int argc, const char *argv[]) { | ||||
|  | ||||
|     if(!csv) { | ||||
|         printf("Number of timesteps: %d\n", param.ntimes); | ||||
|         printf("Number of times to compute the atoms loop: %d\n", ATOMS_LOOP_RUNS); | ||||
|         printf("Number of times to compute the neighbors loop: %d\n", NEIGHBORS_LOOP_RUNS); | ||||
|         printf("System size (unit cells): %dx%dx%d\n", param.nx, param.ny, param.nz); | ||||
|         printf("Atoms per unit cell: %d\n", atoms_per_unit_cell); | ||||
| @@ -207,41 +212,46 @@ int main(int argc, const char *argv[]) { | ||||
|     DEBUG("Building neighbor lists...\n"); | ||||
|     buildNeighbor(atom, &neighbor); | ||||
|     DEBUG("Computing forces...\n"); | ||||
|     computeForce(¶m, atom, &neighbor, 1, 0); | ||||
|     computeForce(¶m, atom, &neighbor, &stats, 1, 1); | ||||
|  | ||||
|     double S, E; | ||||
|     S = getTimeStamp(); | ||||
|     for(int i = 0; i < param.ntimes; i++) { | ||||
|         computeForce(¶m, atom, &neighbor, 0, i + 1); | ||||
|         computeForce(¶m, atom, &neighbor, &stats, 0, i + 1); | ||||
|     } | ||||
|     E = getTimeStamp(); | ||||
|     double T_accum = E-S; | ||||
|     const double atoms_updates_per_sec = (double)(atom->Nlocal) / T_accum * (double)(param.ntimes * NEIGHBORS_LOOP_RUNS); | ||||
|     const double cycles_per_atom = T_accum / (double)(atom->Nlocal) / (double)(param.ntimes * NEIGHBORS_LOOP_RUNS) * freq; | ||||
|     double freq_hz = param.proc_freq * 1.e9; | ||||
|     const double repeats = ATOMS_LOOP_RUNS * NEIGHBORS_LOOP_RUNS; | ||||
|     const double atoms_updates_per_sec = (double)(atom->Nlocal) / T_accum * (double)(param.ntimes * repeats); | ||||
|     const double cycles_per_atom = T_accum / (double)(atom->Nlocal) / (double)(param.ntimes * repeats) * freq_hz; | ||||
|     const double cycles_per_neigh = cycles_per_atom / (double)(atoms_per_unit_cell - 1); | ||||
|  | ||||
|     if(!csv) { | ||||
|         printf("Total time: %.4f, Mega atom updates/s: %.4f\n", T_accum, atoms_updates_per_sec / 1.E6); | ||||
|         if(freq > 0.0) { | ||||
|         printf("Total time: %.4f, Mega atom updates/s: %.4f\n", T_accum, atoms_updates_per_sec / 1.e6); | ||||
|         if(param.proc_freq > 0.0) { | ||||
|             printf("Cycles per atom: %.4f, Cycles per neighbor: %.4f\n", cycles_per_atom, cycles_per_neigh); | ||||
|         } | ||||
|     } else { | ||||
|         printf("steps,unit cells,atoms/unit cell,total atoms,total vol.(kB),atoms vol.(kB),neigh vol.(kB),time(s),atom upds/s(M)"); | ||||
|         if(freq > 0.0) { | ||||
|         if(param.proc_freq > 0.0) { | ||||
|             printf(",cy/atom,cy/neigh"); | ||||
|         } | ||||
|         printf("\n"); | ||||
|  | ||||
|         printf("%d,%dx%dx%d,%d,%d,%.4f,%.4f,%.4f,%.4f,%.4f", | ||||
|             param.ntimes, param.nx, param.ny, param.nz, atoms_per_unit_cell, atom->Nlocal, | ||||
|             estim_volume / 1.E3, estim_atom_volume / 1.E3, estim_neighbors_volume / 1.E3, T_accum, atoms_updates_per_sec / 1.E6); | ||||
|             estim_volume / 1.e3, estim_atom_volume / 1.e3, estim_neighbors_volume / 1.e3, T_accum, atoms_updates_per_sec / 1.e6); | ||||
|  | ||||
|         if(freq > 0.0) { | ||||
|         if(param.proc_freq > 0.0) { | ||||
|             printf(",%.4f,%.4f", cycles_per_atom, cycles_per_neigh); | ||||
|         } | ||||
|         printf("\n"); | ||||
|     } | ||||
|  | ||||
|     double timer[NUMTIMER]; | ||||
|     timer[FORCE] = T_accum; | ||||
|     displayStatistics(atom, ¶m, &stats, timer); | ||||
|     LIKWID_MARKER_CLOSE; | ||||
|     return EXIT_SUCCESS; | ||||
| } | ||||
|   | ||||
							
								
								
									
										24
									
								
								src/main.c
									
									
									
									
									
								
							
							
						
						
									
										24
									
								
								src/main.c
									
									
									
									
									
								
							| @@ -38,16 +38,10 @@ | ||||
| #include <stats.h> | ||||
| #include <thermo.h> | ||||
| #include <pbc.h> | ||||
| #include <timers.h> | ||||
|  | ||||
| #define HLINE "----------------------------------------------------------------------------\n" | ||||
|  | ||||
| typedef enum { | ||||
|     TOTAL = 0, | ||||
|     NEIGH, | ||||
|     FORCE, | ||||
|     NUMTIMER | ||||
| } timertype; | ||||
|  | ||||
| extern double computeForce(Parameter*, Atom*, Neighbor*, Stats*, int, int); | ||||
|  | ||||
| void init(Parameter *param) | ||||
| @@ -257,21 +251,7 @@ int main (int argc, char** argv) | ||||
|     printf(HLINE); | ||||
|     printf("Performance: %.2f million atom updates per second\n", | ||||
|             1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]); | ||||
|  | ||||
| #ifdef COMPUTE_STATS | ||||
|     double force_useful_volume = 1e-9 * ( (double)(atom.Nlocal * (param.ntimes + 1)) * (sizeof(MD_FLOAT) * 6 + sizeof(int)) + | ||||
|                                           (double)(stats.total_force_neighs) * (sizeof(MD_FLOAT) * 3 + sizeof(int)) ); | ||||
| #ifdef EXPLICIT_TYPES | ||||
|     force_useful_volume += 1e-9 * (double)((atom.Nlocal * (param.ntimes + 1)) + stats.total_force_neighs) * sizeof(int); | ||||
| #endif | ||||
|     printf("Statistics:\n"); | ||||
|     printf("\tVector width: %d, Processor frequency: %.4f GHz\n", VECTOR_WIDTH, param.proc_freq); | ||||
|     printf("\tTotal number of computed pair interactions: %lld\n", stats.total_force_neighs); | ||||
|     printf("\tTotal number of most SIMD iterations: %lld\n", stats.total_force_iters); | ||||
|     printf("\tUseful read data volume for force computation: %.2fGB\n", force_useful_volume); | ||||
|     printf("\tCycles/SIMD iteration: %.4f\n", timer[FORCE] * param.proc_freq * 1e9 / stats.total_force_iters); | ||||
| #endif | ||||
|  | ||||
|     displayStatistics(&atom, ¶m, &stats, timer); | ||||
|     LIKWID_MARKER_CLOSE; | ||||
|     return EXIT_SUCCESS; | ||||
| } | ||||
|   | ||||
							
								
								
									
										21
									
								
								src/stats.c
									
									
									
									
									
								
							
							
						
						
									
										21
									
								
								src/stats.c
									
									
									
									
									
								
							| @@ -1,6 +1,27 @@ | ||||
| #include <stdio.h> | ||||
|  | ||||
| #include <atom.h> | ||||
| #include <parameter.h> | ||||
| #include <stats.h> | ||||
| #include <timers.h> | ||||
|  | ||||
| void initStats(Stats *s) { | ||||
|     s->total_force_neighs = 0; | ||||
|     s->total_force_iters = 0; | ||||
| } | ||||
|  | ||||
| void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer) { | ||||
| #ifdef COMPUTE_STATS | ||||
|     double force_useful_volume = 1e-9 * ( (double)(atom->Nlocal * (param->ntimes + 1)) * (sizeof(MD_FLOAT) * 6 + sizeof(int)) + | ||||
|                                           (double)(stats->total_force_neighs) * (sizeof(MD_FLOAT) * 3 + sizeof(int)) ); | ||||
| #ifdef EXPLICIT_TYPES | ||||
|     force_useful_volume += 1e-9 * (double)((atom.Nlocal * (param.ntimes + 1)) + stats.total_force_neighs) * sizeof(int); | ||||
| #endif | ||||
|     printf("Statistics:\n"); | ||||
|     printf("\tVector width: %d, Processor frequency: %.4f GHz\n", VECTOR_WIDTH, param->proc_freq); | ||||
|     printf("\tTotal number of computed pair interactions: %lld\n", stats->total_force_neighs); | ||||
|     printf("\tTotal number of most SIMD iterations: %lld\n", stats->total_force_iters); | ||||
|     printf("\tUseful read data volume for force computation: %.2fGB\n", force_useful_volume); | ||||
|     printf("\tCycles/SIMD iteration: %.4f\n", timer[FORCE] * param->proc_freq * 1e9 / stats->total_force_iters); | ||||
| #endif | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user