Add ATOMS_LOOP_RUNS option and statistics to stub variant
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
		
							
								
								
									
										4
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								Makefile
									
									
									
									
									
								
							@@ -25,6 +25,10 @@ ifneq ($(ASM_SYNTAX), ATT)
 | 
				
			|||||||
    ASFLAGS += -masm=intel
 | 
					    ASFLAGS += -masm=intel
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ifneq ($(ATOMS_LOOP_RUNS),)
 | 
				
			||||||
 | 
					    DEFINES += -DATOMS_LOOP_RUNS=$(ATOMS_LOOP_RUNS)
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ifneq ($(NEIGHBORS_LOOP_RUNS),)
 | 
					ifneq ($(NEIGHBORS_LOOP_RUNS),)
 | 
				
			||||||
    DEFINES += -DNEIGHBORS_LOOP_RUNS=$(NEIGHBORS_LOOP_RUNS)
 | 
					    DEFINES += -DNEIGHBORS_LOOP_RUNS=$(NEIGHBORS_LOOP_RUNS)
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										141
									
								
								src/force.c
									
									
									
									
									
								
							
							
						
						
									
										141
									
								
								src/force.c
									
									
									
									
									
								
							@@ -140,85 +140,96 @@ double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *sta
 | 
				
			|||||||
    INDEX_TRACE_NATOMS(Nlocal, atom->Nghost, neighbor->maxneighs);
 | 
					    INDEX_TRACE_NATOMS(Nlocal, atom->Nghost, neighbor->maxneighs);
 | 
				
			||||||
    double S = getTimeStamp();
 | 
					    double S = getTimeStamp();
 | 
				
			||||||
    LIKWID_MARKER_START("force");
 | 
					    LIKWID_MARKER_START("force");
 | 
				
			||||||
    #pragma omp parallel for
 | 
					 | 
				
			||||||
    for(int i = 0; i < Nlocal; i++) {
 | 
					 | 
				
			||||||
        neighs = &neighbor->neighbors[i * neighbor->maxneighs];
 | 
					 | 
				
			||||||
        int numneighs = neighbor->numneigh[i];
 | 
					 | 
				
			||||||
        MD_FLOAT xtmp = atom_x(i);
 | 
					 | 
				
			||||||
        MD_FLOAT ytmp = atom_y(i);
 | 
					 | 
				
			||||||
        MD_FLOAT ztmp = atom_z(i);
 | 
					 | 
				
			||||||
        MD_FLOAT fix = 0;
 | 
					 | 
				
			||||||
        MD_FLOAT fiy = 0;
 | 
					 | 
				
			||||||
        MD_FLOAT fiz = 0;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        MEM_TRACE(atom_x(i), 'R');
 | 
					    #if VARIANT == stub && defined(ATOMS_LOOP_RUNS) && ATOMS_LOOP_RUNS > 1
 | 
				
			||||||
        MEM_TRACE(atom_y(i), 'R');
 | 
					    #define REPEAT_ATOMS_LOOP
 | 
				
			||||||
        MEM_TRACE(atom_z(i), 'R');
 | 
					    for(int na = 0; na < (first_exec ? 1 : ATOMS_LOOP_RUNS); na++) {
 | 
				
			||||||
        INDEX_TRACE_ATOM(i);
 | 
					    #endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        #ifdef EXPLICIT_TYPES
 | 
					        #pragma omp parallel for
 | 
				
			||||||
        const int type_i = atom->type[i];
 | 
					        for(int i = 0; i < Nlocal; i++) {
 | 
				
			||||||
        MEM_TRACE(atom->type(i), 'R');
 | 
					            neighs = &neighbor->neighbors[i * neighbor->maxneighs];
 | 
				
			||||||
        #endif
 | 
					            int numneighs = neighbor->numneigh[i];
 | 
				
			||||||
 | 
					            MD_FLOAT xtmp = atom_x(i);
 | 
				
			||||||
 | 
					            MD_FLOAT ytmp = atom_y(i);
 | 
				
			||||||
 | 
					            MD_FLOAT ztmp = atom_z(i);
 | 
				
			||||||
 | 
					            MD_FLOAT fix = 0;
 | 
				
			||||||
 | 
					            MD_FLOAT fiy = 0;
 | 
				
			||||||
 | 
					            MD_FLOAT fiz = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        #if VARIANT == stub && defined(NEIGHBORS_LOOP_RUNS) && NEIGHBORS_LOOP_RUNS > 1
 | 
					            MEM_TRACE(atom_x(i), 'R');
 | 
				
			||||||
        #define REPEAT_NEIGHBORS_LOOP
 | 
					            MEM_TRACE(atom_y(i), 'R');
 | 
				
			||||||
        int nmax = first_exec ? 1 : NEIGHBORS_LOOP_RUNS;
 | 
					            MEM_TRACE(atom_z(i), 'R');
 | 
				
			||||||
        for(int n = 0; n < nmax; n++) {
 | 
					            INDEX_TRACE_ATOM(i);
 | 
				
			||||||
        #endif
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
            //DIST_TRACE_SORT(neighs, numneighs);
 | 
					            #ifdef EXPLICIT_TYPES
 | 
				
			||||||
            INDEX_TRACE(neighs, numneighs);
 | 
					            const int type_i = atom->type[i];
 | 
				
			||||||
            //DIST_TRACE(neighs, numneighs);
 | 
					            MEM_TRACE(atom->type(i), 'R');
 | 
				
			||||||
 | 
					            #endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            for(int k = 0; k < numneighs; k++) {
 | 
					            #if VARIANT == stub && defined(NEIGHBORS_LOOP_RUNS) && NEIGHBORS_LOOP_RUNS > 1
 | 
				
			||||||
                int j = neighs[k];
 | 
					            #define REPEAT_NEIGHBORS_LOOP
 | 
				
			||||||
                MD_FLOAT delx = xtmp - atom_x(j);
 | 
					            int nmax = first_exec ? 1 : NEIGHBORS_LOOP_RUNS;
 | 
				
			||||||
                MD_FLOAT dely = ytmp - atom_y(j);
 | 
					            for(int nn = 0; nn < (first_exec ? 1 : NEIGHBORS_LOOP_RUNS); nn++) {
 | 
				
			||||||
                MD_FLOAT delz = ztmp - atom_z(j);
 | 
					            #endif
 | 
				
			||||||
                MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
                MEM_TRACE(neighs[k], 'R');
 | 
					                //DIST_TRACE_SORT(neighs, numneighs);
 | 
				
			||||||
                MEM_TRACE(atom_x(j), 'R');
 | 
					                INDEX_TRACE(neighs, numneighs);
 | 
				
			||||||
                MEM_TRACE(atom_y(j), 'R');
 | 
					                //DIST_TRACE(neighs, numneighs);
 | 
				
			||||||
                MEM_TRACE(atom_z(j), 'R');
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
                #ifdef EXPLICIT_TYPES
 | 
					                for(int k = 0; k < numneighs; k++) {
 | 
				
			||||||
                const int type_j = atom->type[j];
 | 
					                    int j = neighs[k];
 | 
				
			||||||
                const int type_ij = type_i * atom->ntypes + type_j;
 | 
					                    MD_FLOAT delx = xtmp - atom_x(j);
 | 
				
			||||||
                const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
 | 
					                    MD_FLOAT dely = ytmp - atom_y(j);
 | 
				
			||||||
                const MD_FLOAT sigma6 = atom->sigma6[type_ij];
 | 
					                    MD_FLOAT delz = ztmp - atom_z(j);
 | 
				
			||||||
                const MD_FLOAT epsilon = atom->epsilon[type_ij];
 | 
					                    MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
 | 
				
			||||||
                MEM_TRACE(atom->type(j), 'R');
 | 
					 | 
				
			||||||
                #endif
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
                if(rsq < cutforcesq) {
 | 
					                    MEM_TRACE(neighs[k], 'R');
 | 
				
			||||||
                    MD_FLOAT sr2 = 1.0 / rsq;
 | 
					                    MEM_TRACE(atom_x(j), 'R');
 | 
				
			||||||
                    MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
 | 
					                    MEM_TRACE(atom_y(j), 'R');
 | 
				
			||||||
                    MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
 | 
					                    MEM_TRACE(atom_z(j), 'R');
 | 
				
			||||||
                    fix += delx * force;
 | 
					
 | 
				
			||||||
                    fiy += dely * force;
 | 
					                    #ifdef EXPLICIT_TYPES
 | 
				
			||||||
                    fiz += delz * force;
 | 
					                    const int type_j = atom->type[j];
 | 
				
			||||||
 | 
					                    const int type_ij = type_i * atom->ntypes + type_j;
 | 
				
			||||||
 | 
					                    const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
 | 
				
			||||||
 | 
					                    const MD_FLOAT sigma6 = atom->sigma6[type_ij];
 | 
				
			||||||
 | 
					                    const MD_FLOAT epsilon = atom->epsilon[type_ij];
 | 
				
			||||||
 | 
					                    MEM_TRACE(atom->type(j), 'R');
 | 
				
			||||||
 | 
					                    #endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    if(rsq < cutforcesq) {
 | 
				
			||||||
 | 
					                        MD_FLOAT sr2 = 1.0 / rsq;
 | 
				
			||||||
 | 
					                        MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
 | 
				
			||||||
 | 
					                        MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
 | 
				
			||||||
 | 
					                        fix += delx * force;
 | 
				
			||||||
 | 
					                        fiy += dely * force;
 | 
				
			||||||
 | 
					                        fiz += delz * force;
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            #ifdef REPEAT_NEIGHBORS_LOOP
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					            #endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        #ifdef REPEAT_NEIGHBORS_LOOP
 | 
					            fx[i] += fix;
 | 
				
			||||||
 | 
					            fy[i] += fiy;
 | 
				
			||||||
 | 
					            fz[i] += fiz;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            addStat(stats->total_force_neighs, numneighs);
 | 
				
			||||||
 | 
					            addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
 | 
				
			||||||
 | 
					            MEM_TRACE(fx[i], 'R');
 | 
				
			||||||
 | 
					            MEM_TRACE(fx[i], 'W');
 | 
				
			||||||
 | 
					            MEM_TRACE(fy[i], 'R');
 | 
				
			||||||
 | 
					            MEM_TRACE(fy[i], 'W');
 | 
				
			||||||
 | 
					            MEM_TRACE(fz[i], 'R');
 | 
				
			||||||
 | 
					            MEM_TRACE(fz[i], 'W');
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        #endif
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        fx[i] += fix;
 | 
					    #ifdef REPEAT_ATOMS_LOOP
 | 
				
			||||||
        fy[i] += fiy;
 | 
					 | 
				
			||||||
        fz[i] += fiz;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        addStat(stats->total_force_neighs, numneighs);
 | 
					 | 
				
			||||||
        addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
 | 
					 | 
				
			||||||
        MEM_TRACE(fx[i], 'R');
 | 
					 | 
				
			||||||
        MEM_TRACE(fx[i], 'W');
 | 
					 | 
				
			||||||
        MEM_TRACE(fy[i], 'R');
 | 
					 | 
				
			||||||
        MEM_TRACE(fy[i], 'W');
 | 
					 | 
				
			||||||
        MEM_TRACE(fz[i], 'R');
 | 
					 | 
				
			||||||
        MEM_TRACE(fz[i], 'W');
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					    #endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    LIKWID_MARKER_STOP("force");
 | 
					    LIKWID_MARKER_STOP("force");
 | 
				
			||||||
    double E = getTimeStamp();
 | 
					    double E = getTimeStamp();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -31,6 +31,7 @@ typedef struct {
 | 
				
			|||||||
} Stats;
 | 
					} Stats;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void initStats(Stats *s);
 | 
					void initStats(Stats *s);
 | 
				
			||||||
 | 
					void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef COMPUTE_STATS
 | 
					#ifdef COMPUTE_STATS
 | 
				
			||||||
#   define addStat(stat, value)     stat += value;
 | 
					#   define addStat(stat, value)     stat += value;
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										11
									
								
								src/includes/timers.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								src/includes/timers.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,11 @@
 | 
				
			|||||||
 | 
					#ifndef __TIMERS_H_
 | 
				
			||||||
 | 
					#define __TIMERS_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef enum {
 | 
				
			||||||
 | 
					    TOTAL = 0,
 | 
				
			||||||
 | 
					    NEIGH,
 | 
				
			||||||
 | 
					    FORCE,
 | 
				
			||||||
 | 
					    NUMTIMER
 | 
				
			||||||
 | 
					} timertype;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
@@ -8,15 +8,17 @@
 | 
				
			|||||||
#include <neighbor.h>
 | 
					#include <neighbor.h>
 | 
				
			||||||
#include <parameter.h>
 | 
					#include <parameter.h>
 | 
				
			||||||
#include <atom.h>
 | 
					#include <atom.h>
 | 
				
			||||||
 | 
					#include <stats.h>
 | 
				
			||||||
#include <thermo.h>
 | 
					#include <thermo.h>
 | 
				
			||||||
#include <pbc.h>
 | 
					#include <pbc.h>
 | 
				
			||||||
 | 
					#include <timers.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define HLINE "----------------------------------------------------------------------------\n"
 | 
					#define HLINE "----------------------------------------------------------------------------\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define LATTICE_DISTANCE    10.0
 | 
					#define LATTICE_DISTANCE    10.0
 | 
				
			||||||
#define NEIGH_DISTANCE      1.0
 | 
					#define NEIGH_DISTANCE      1.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern double computeForce(Parameter*, Atom*, Neighbor*, int, int);
 | 
					extern double computeForce(Parameter*, Atom*, Neighbor*, Stats*, int, int);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void init(Parameter *param) {
 | 
					void init(Parameter *param) {
 | 
				
			||||||
    param->epsilon = 1.0;
 | 
					    param->epsilon = 1.0;
 | 
				
			||||||
@@ -37,6 +39,7 @@ void init(Parameter *param) {
 | 
				
			|||||||
    param->nstat = 100;
 | 
					    param->nstat = 100;
 | 
				
			||||||
    param->temp = 1.44;
 | 
					    param->temp = 1.44;
 | 
				
			||||||
    param->every = 20;
 | 
					    param->every = 20;
 | 
				
			||||||
 | 
					    param->proc_freq = 0.0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Show debug messages
 | 
					// Show debug messages
 | 
				
			||||||
@@ -56,10 +59,10 @@ int main(int argc, const char *argv[]) {
 | 
				
			|||||||
    Atom atom_data;
 | 
					    Atom atom_data;
 | 
				
			||||||
    Atom *atom = (Atom *)(&atom_data);
 | 
					    Atom *atom = (Atom *)(&atom_data);
 | 
				
			||||||
    Neighbor neighbor;
 | 
					    Neighbor neighbor;
 | 
				
			||||||
 | 
					    Stats stats;
 | 
				
			||||||
    Parameter param;
 | 
					    Parameter param;
 | 
				
			||||||
    int atoms_per_unit_cell = 8;
 | 
					    int atoms_per_unit_cell = 8;
 | 
				
			||||||
    int csv = 0;
 | 
					    int csv = 0;
 | 
				
			||||||
    double freq = 0.0;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    LIKWID_MARKER_INIT;
 | 
					    LIKWID_MARKER_INIT;
 | 
				
			||||||
    LIKWID_MARKER_REGISTER("force");
 | 
					    LIKWID_MARKER_REGISTER("force");
 | 
				
			||||||
@@ -95,7 +98,7 @@ int main(int argc, const char *argv[]) {
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
        if((strcmp(argv[i], "-f") == 0))
 | 
					        if((strcmp(argv[i], "-f") == 0))
 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
            freq = atof(argv[++i]) * 1.E9;
 | 
					            param.proc_freq = atof(argv[++i]);
 | 
				
			||||||
            continue;
 | 
					            continue;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        if((strcmp(argv[i], "-csv") == 0))
 | 
					        if((strcmp(argv[i], "-csv") == 0))
 | 
				
			||||||
@@ -123,6 +126,7 @@ int main(int argc, const char *argv[]) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    DEBUG("Initializing atoms...\n");
 | 
					    DEBUG("Initializing atoms...\n");
 | 
				
			||||||
    initAtom(atom);
 | 
					    initAtom(atom);
 | 
				
			||||||
 | 
					    initStats(&stats);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #ifdef EXPLICIT_TYPES
 | 
					    #ifdef EXPLICIT_TYPES
 | 
				
			||||||
    atom->ntypes = param.ntypes;
 | 
					    atom->ntypes = param.ntypes;
 | 
				
			||||||
@@ -191,6 +195,7 @@ int main(int argc, const char *argv[]) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    if(!csv) {
 | 
					    if(!csv) {
 | 
				
			||||||
        printf("Number of timesteps: %d\n", param.ntimes);
 | 
					        printf("Number of timesteps: %d\n", param.ntimes);
 | 
				
			||||||
 | 
					        printf("Number of times to compute the atoms loop: %d\n", ATOMS_LOOP_RUNS);
 | 
				
			||||||
        printf("Number of times to compute the neighbors loop: %d\n", NEIGHBORS_LOOP_RUNS);
 | 
					        printf("Number of times to compute the neighbors loop: %d\n", NEIGHBORS_LOOP_RUNS);
 | 
				
			||||||
        printf("System size (unit cells): %dx%dx%d\n", param.nx, param.ny, param.nz);
 | 
					        printf("System size (unit cells): %dx%dx%d\n", param.nx, param.ny, param.nz);
 | 
				
			||||||
        printf("Atoms per unit cell: %d\n", atoms_per_unit_cell);
 | 
					        printf("Atoms per unit cell: %d\n", atoms_per_unit_cell);
 | 
				
			||||||
@@ -207,41 +212,46 @@ int main(int argc, const char *argv[]) {
 | 
				
			|||||||
    DEBUG("Building neighbor lists...\n");
 | 
					    DEBUG("Building neighbor lists...\n");
 | 
				
			||||||
    buildNeighbor(atom, &neighbor);
 | 
					    buildNeighbor(atom, &neighbor);
 | 
				
			||||||
    DEBUG("Computing forces...\n");
 | 
					    DEBUG("Computing forces...\n");
 | 
				
			||||||
    computeForce(¶m, atom, &neighbor, 1, 0);
 | 
					    computeForce(¶m, atom, &neighbor, &stats, 1, 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    double S, E;
 | 
					    double S, E;
 | 
				
			||||||
    S = getTimeStamp();
 | 
					    S = getTimeStamp();
 | 
				
			||||||
    for(int i = 0; i < param.ntimes; i++) {
 | 
					    for(int i = 0; i < param.ntimes; i++) {
 | 
				
			||||||
        computeForce(¶m, atom, &neighbor, 0, i + 1);
 | 
					        computeForce(¶m, atom, &neighbor, &stats, 0, i + 1);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    E = getTimeStamp();
 | 
					    E = getTimeStamp();
 | 
				
			||||||
    double T_accum = E-S;
 | 
					    double T_accum = E-S;
 | 
				
			||||||
    const double atoms_updates_per_sec = (double)(atom->Nlocal) / T_accum * (double)(param.ntimes * NEIGHBORS_LOOP_RUNS);
 | 
					    double freq_hz = param.proc_freq * 1.e9;
 | 
				
			||||||
    const double cycles_per_atom = T_accum / (double)(atom->Nlocal) / (double)(param.ntimes * NEIGHBORS_LOOP_RUNS) * freq;
 | 
					    const double repeats = ATOMS_LOOP_RUNS * NEIGHBORS_LOOP_RUNS;
 | 
				
			||||||
 | 
					    const double atoms_updates_per_sec = (double)(atom->Nlocal) / T_accum * (double)(param.ntimes * repeats);
 | 
				
			||||||
 | 
					    const double cycles_per_atom = T_accum / (double)(atom->Nlocal) / (double)(param.ntimes * repeats) * freq_hz;
 | 
				
			||||||
    const double cycles_per_neigh = cycles_per_atom / (double)(atoms_per_unit_cell - 1);
 | 
					    const double cycles_per_neigh = cycles_per_atom / (double)(atoms_per_unit_cell - 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if(!csv) {
 | 
					    if(!csv) {
 | 
				
			||||||
        printf("Total time: %.4f, Mega atom updates/s: %.4f\n", T_accum, atoms_updates_per_sec / 1.E6);
 | 
					        printf("Total time: %.4f, Mega atom updates/s: %.4f\n", T_accum, atoms_updates_per_sec / 1.e6);
 | 
				
			||||||
        if(freq > 0.0) {
 | 
					        if(param.proc_freq > 0.0) {
 | 
				
			||||||
            printf("Cycles per atom: %.4f, Cycles per neighbor: %.4f\n", cycles_per_atom, cycles_per_neigh);
 | 
					            printf("Cycles per atom: %.4f, Cycles per neighbor: %.4f\n", cycles_per_atom, cycles_per_neigh);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
        printf("steps,unit cells,atoms/unit cell,total atoms,total vol.(kB),atoms vol.(kB),neigh vol.(kB),time(s),atom upds/s(M)");
 | 
					        printf("steps,unit cells,atoms/unit cell,total atoms,total vol.(kB),atoms vol.(kB),neigh vol.(kB),time(s),atom upds/s(M)");
 | 
				
			||||||
        if(freq > 0.0) {
 | 
					        if(param.proc_freq > 0.0) {
 | 
				
			||||||
            printf(",cy/atom,cy/neigh");
 | 
					            printf(",cy/atom,cy/neigh");
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        printf("\n");
 | 
					        printf("\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        printf("%d,%dx%dx%d,%d,%d,%.4f,%.4f,%.4f,%.4f,%.4f",
 | 
					        printf("%d,%dx%dx%d,%d,%d,%.4f,%.4f,%.4f,%.4f,%.4f",
 | 
				
			||||||
            param.ntimes, param.nx, param.ny, param.nz, atoms_per_unit_cell, atom->Nlocal,
 | 
					            param.ntimes, param.nx, param.ny, param.nz, atoms_per_unit_cell, atom->Nlocal,
 | 
				
			||||||
            estim_volume / 1.E3, estim_atom_volume / 1.E3, estim_neighbors_volume / 1.E3, T_accum, atoms_updates_per_sec / 1.E6);
 | 
					            estim_volume / 1.e3, estim_atom_volume / 1.e3, estim_neighbors_volume / 1.e3, T_accum, atoms_updates_per_sec / 1.e6);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if(freq > 0.0) {
 | 
					        if(param.proc_freq > 0.0) {
 | 
				
			||||||
            printf(",%.4f,%.4f", cycles_per_atom, cycles_per_neigh);
 | 
					            printf(",%.4f,%.4f", cycles_per_atom, cycles_per_neigh);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        printf("\n");
 | 
					        printf("\n");
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    double timer[NUMTIMER];
 | 
				
			||||||
 | 
					    timer[FORCE] = T_accum;
 | 
				
			||||||
 | 
					    displayStatistics(atom, ¶m, &stats, timer);
 | 
				
			||||||
    LIKWID_MARKER_CLOSE;
 | 
					    LIKWID_MARKER_CLOSE;
 | 
				
			||||||
    return EXIT_SUCCESS;
 | 
					    return EXIT_SUCCESS;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										24
									
								
								src/main.c
									
									
									
									
									
								
							
							
						
						
									
										24
									
								
								src/main.c
									
									
									
									
									
								
							@@ -38,16 +38,10 @@
 | 
				
			|||||||
#include <stats.h>
 | 
					#include <stats.h>
 | 
				
			||||||
#include <thermo.h>
 | 
					#include <thermo.h>
 | 
				
			||||||
#include <pbc.h>
 | 
					#include <pbc.h>
 | 
				
			||||||
 | 
					#include <timers.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define HLINE "----------------------------------------------------------------------------\n"
 | 
					#define HLINE "----------------------------------------------------------------------------\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
typedef enum {
 | 
					 | 
				
			||||||
    TOTAL = 0,
 | 
					 | 
				
			||||||
    NEIGH,
 | 
					 | 
				
			||||||
    FORCE,
 | 
					 | 
				
			||||||
    NUMTIMER
 | 
					 | 
				
			||||||
} timertype;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
extern double computeForce(Parameter*, Atom*, Neighbor*, Stats*, int, int);
 | 
					extern double computeForce(Parameter*, Atom*, Neighbor*, Stats*, int, int);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void init(Parameter *param)
 | 
					void init(Parameter *param)
 | 
				
			||||||
@@ -257,21 +251,7 @@ int main (int argc, char** argv)
 | 
				
			|||||||
    printf(HLINE);
 | 
					    printf(HLINE);
 | 
				
			||||||
    printf("Performance: %.2f million atom updates per second\n",
 | 
					    printf("Performance: %.2f million atom updates per second\n",
 | 
				
			||||||
            1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
 | 
					            1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
 | 
				
			||||||
 | 
					    displayStatistics(&atom, ¶m, &stats, timer);
 | 
				
			||||||
#ifdef COMPUTE_STATS
 | 
					 | 
				
			||||||
    double force_useful_volume = 1e-9 * ( (double)(atom.Nlocal * (param.ntimes + 1)) * (sizeof(MD_FLOAT) * 6 + sizeof(int)) +
 | 
					 | 
				
			||||||
                                          (double)(stats.total_force_neighs) * (sizeof(MD_FLOAT) * 3 + sizeof(int)) );
 | 
					 | 
				
			||||||
#ifdef EXPLICIT_TYPES
 | 
					 | 
				
			||||||
    force_useful_volume += 1e-9 * (double)((atom.Nlocal * (param.ntimes + 1)) + stats.total_force_neighs) * sizeof(int);
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
    printf("Statistics:\n");
 | 
					 | 
				
			||||||
    printf("\tVector width: %d, Processor frequency: %.4f GHz\n", VECTOR_WIDTH, param.proc_freq);
 | 
					 | 
				
			||||||
    printf("\tTotal number of computed pair interactions: %lld\n", stats.total_force_neighs);
 | 
					 | 
				
			||||||
    printf("\tTotal number of most SIMD iterations: %lld\n", stats.total_force_iters);
 | 
					 | 
				
			||||||
    printf("\tUseful read data volume for force computation: %.2fGB\n", force_useful_volume);
 | 
					 | 
				
			||||||
    printf("\tCycles/SIMD iteration: %.4f\n", timer[FORCE] * param.proc_freq * 1e9 / stats.total_force_iters);
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    LIKWID_MARKER_CLOSE;
 | 
					    LIKWID_MARKER_CLOSE;
 | 
				
			||||||
    return EXIT_SUCCESS;
 | 
					    return EXIT_SUCCESS;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										21
									
								
								src/stats.c
									
									
									
									
									
								
							
							
						
						
									
										21
									
								
								src/stats.c
									
									
									
									
									
								
							@@ -1,6 +1,27 @@
 | 
				
			|||||||
 | 
					#include <stdio.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <atom.h>
 | 
				
			||||||
 | 
					#include <parameter.h>
 | 
				
			||||||
#include <stats.h>
 | 
					#include <stats.h>
 | 
				
			||||||
 | 
					#include <timers.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void initStats(Stats *s) {
 | 
					void initStats(Stats *s) {
 | 
				
			||||||
    s->total_force_neighs = 0;
 | 
					    s->total_force_neighs = 0;
 | 
				
			||||||
    s->total_force_iters = 0;
 | 
					    s->total_force_iters = 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer) {
 | 
				
			||||||
 | 
					#ifdef COMPUTE_STATS
 | 
				
			||||||
 | 
					    double force_useful_volume = 1e-9 * ( (double)(atom->Nlocal * (param->ntimes + 1)) * (sizeof(MD_FLOAT) * 6 + sizeof(int)) +
 | 
				
			||||||
 | 
					                                          (double)(stats->total_force_neighs) * (sizeof(MD_FLOAT) * 3 + sizeof(int)) );
 | 
				
			||||||
 | 
					#ifdef EXPLICIT_TYPES
 | 
				
			||||||
 | 
					    force_useful_volume += 1e-9 * (double)((atom.Nlocal * (param.ntimes + 1)) + stats.total_force_neighs) * sizeof(int);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					    printf("Statistics:\n");
 | 
				
			||||||
 | 
					    printf("\tVector width: %d, Processor frequency: %.4f GHz\n", VECTOR_WIDTH, param->proc_freq);
 | 
				
			||||||
 | 
					    printf("\tTotal number of computed pair interactions: %lld\n", stats->total_force_neighs);
 | 
				
			||||||
 | 
					    printf("\tTotal number of most SIMD iterations: %lld\n", stats->total_force_iters);
 | 
				
			||||||
 | 
					    printf("\tUseful read data volume for force computation: %.2fGB\n", force_useful_volume);
 | 
				
			||||||
 | 
					    printf("\tCycles/SIMD iteration: %.4f\n", timer[FORCE] * param->proc_freq * 1e9 / stats->total_force_iters);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user