diff --git a/Makefile b/Makefile index bc0e037..2e05f9a 100644 --- a/Makefile +++ b/Makefile @@ -68,5 +68,5 @@ clean: distclean: clean @echo "===> DIST CLEAN" - @rm -f $(TARGET) + @rm -f $(TARGET)* @rm -f tags diff --git a/config.mk b/config.mk index a67dd4d..04b48dd 100644 --- a/config.mk +++ b/config.mk @@ -1,9 +1,9 @@ # Supported: GCC, CLANG, ICC -TAG ?= GCC +TAG ?= ICC # SP or DP DATA_TYPE ?= DP # AOS or SOA -DATA_LAYOUT ?= SOA +DATA_LAYOUT ?= AOS #Feature options OPTIONS += -DALIGNMENT=64 diff --git a/include_GCC.mk b/include_GCC.mk index d29cca9..954ac5d 100644 --- a/include_GCC.mk +++ b/include_GCC.mk @@ -10,6 +10,6 @@ ANSI_CFLAGS += -Wextra CFLAGS = -O3 -march=znver1 -ffast-math -funroll-loops # -fopenmp ASFLAGS = -masm=intel LFLAGS = -DEFINES = -D_GNU_SOURCE -INCLUDES = -LIBS = -lm +DEFINES = -D_GNU_SOURCE -DLIKWID_PERFMON +INCLUDES = $(LIKWID_INC) +LIBS = -lm $(LIKWID_LIB) -llikwid diff --git a/include_ICC.mk b/include_ICC.mk index ddb61ac..37b017f 100644 --- a/include_ICC.mk +++ b/include_ICC.mk @@ -3,15 +3,15 @@ LINKER = $(CC) OPENMP = #-qopenmp PROFILE = #-profile-functions -g -pg -# OPTS = -fast -xCORE-AVX512 -qopt-zmm-usage=high $(PROFILE) + OPTS = -Ofast -xCORE-AVX512 -qopt-zmm-usage=high $(PROFILE) #OPTS = -fast -xCORE-AVX2 $(PROFILE) #OPTS = -fast -xAVX $(PROFILE) #OPTS = -fast -xSSE4.2 $(PROFILE) #OPTS = -fast -no-vec $(PROFILE) -OPTS = -fast -xHost $(PROFILE) +#OPTS = -fast -xHost $(PROFILE) CFLAGS = $(PROFILE) -restrict $(OPENMP) $(OPTS) -ASFLAGS = -masm=intel +ASFLAGS = #-masm=intel LFLAGS = $(PROFILE) $(OPTS) $(OPENMP) -DEFINES = -D_GNU_SOURCE # -DALIGNMENT=64 -DLIKWID_PERFMON -DPRECISION=1 +DEFINES = -D_GNU_SOURCE #-DLIKWID_PERFMON INCLUDES = #$(LIKWID_INC) LIBS = -lm #$(LIKWID_LIB) -llikwid diff --git a/src/force.c b/src/force.c index 7ce9d8a..24a433f 100644 --- a/src/force.c +++ b/src/force.c @@ -39,9 +39,8 @@ double computeForce( MD_FLOAT sigma6 = param->sigma6; MD_FLOAT epsilon = param->epsilon; MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz; - MD_FLOAT S, E; + double S, E; - S = getTimeStamp(); for(int i = 0; i < Nlocal; i++) { fx[i] = 0.0; fy[i] = 0.0; @@ -49,7 +48,7 @@ double computeForce( } if(profile) { - LIKWID_MARKER_START("force"); + // LIKWID_MARKER_START("force"); } #pragma omp parallel for @@ -64,6 +63,8 @@ double computeForce( MD_FLOAT fiy = 0; MD_FLOAT fiz = 0; +// printf("%d: %d\n", i, numneighs); + for(int k = 0; k < numneighs; k++) { int j = neighs[k]; MD_FLOAT delx = xtmp - atom_x(j); @@ -87,9 +88,8 @@ double computeForce( } if(profile) { - LIKWID_MARKER_STOP("force"); + // LIKWID_MARKER_STOP("force"); } - E = getTimeStamp(); - return E-S; + return 0.0; } diff --git a/src/main-stub.c b/src/main-stub.c index f2bf8fb..6178584 100644 --- a/src/main-stub.c +++ b/src/main-stub.c @@ -158,11 +158,17 @@ int main(int argc, const char *argv[]) { } } - const double estim_volume = (double)(atom->Nlocal * 6 * sizeof(MD_FLOAT) + (atoms_per_unit_cell - 1 + 2) * sizeof(int)) / 1000.0; + const double estim_volume = (double) + (atom->Nlocal * 6 * sizeof(MD_FLOAT) + + atom->Nlocal * (atoms_per_unit_cell - 1 + 2) * sizeof(int)) / 1000.0; printf("System size (unit cells): %dx%dx%d\n", param.nx, param.ny, param.nz); printf("Atoms per unit cell: %d\n", atoms_per_unit_cell); printf("Total number of atoms: %d\n", atom->Nlocal); - printf("Estimated memory volume (kB): %.4f\n", estim_volume); + printf("Estimated total data volume (kB): %.4f\n", estim_volume ); + printf("Estimated atom data volume (kB): %.4f\n", + (double)(atom->Nlocal * 3 * sizeof(MD_FLOAT) / 1000.0)); + printf("Estimated neighborlist data volume (kB): %.4f\n", + (double)(atom->Nlocal * (atoms_per_unit_cell - 1 + 2) * sizeof(int)) / 1000.0); DEBUG("Initializing neighbor lists...\n"); initNeighbor(&neighbor, ¶m); @@ -173,12 +179,18 @@ int main(int argc, const char *argv[]) { DEBUG("Computing forces...\n"); computeForce(¶m, atom, &neighbor, 0); - double T_accum = 0.0; + double S, E; + S = getTimeStamp(); + LIKWID_MARKER_START("force"); for(int i = 0; i < param.ntimes; i++) { - T_accum += computeForce(¶m, atom, &neighbor, 1); + computeForce(¶m, atom, &neighbor, 1); } + LIKWID_MARKER_STOP("force"); + E = getTimeStamp(); + double T_accum = E-S; - printf("Total time: %.4f, Time/force: %.4f\n", T_accum, T_accum / param.ntimes); + printf("Total time: %.4f, Mega atom updates/s: %.4f\n", + T_accum, atom->Nlocal * param.ntimes/T_accum/1.E6); LIKWID_MARKER_CLOSE; return EXIT_SUCCESS; }