diff --git a/.gitignore b/.gitignore index c6127b3..52c86c7 100644 --- a/.gitignore +++ b/.gitignore @@ -50,3 +50,9 @@ modules.order Module.symvers Mkfile.old dkms.conf + +# Build directories and executables +GCC/ +ICC/ +MDBench-GCC* +MDBench-ICC* diff --git a/Makefile b/Makefile index 2e05f9a..28d203e 100644 --- a/Makefile +++ b/Makefile @@ -2,12 +2,14 @@ TARGET = MDBench-$(TAG) BUILD_DIR = ./$(TAG) SRC_DIR = ./src +ASM_DIR = ./asm MAKE_DIR = ./ Q ?= @ #DO NOT EDIT BELOW include $(MAKE_DIR)/config.mk include $(MAKE_DIR)/include_$(TAG).mk +include $(MAKE_DIR)/include_LIKWID.mk INCLUDES += -I./src/includes ifeq ($(strip $(DATA_LAYOUT)),AOS) @@ -19,11 +21,23 @@ else DEFINES += -DPRECISION=2 endif -VPATH = $(SRC_DIR) +ifneq ($(INTERNAL_LOOP_NTIMES),) + DEFINES += -DINTERNAL_LOOP_NTIMES=$(INTERNAL_LOOP_NTIMES) +endif + +ifneq ($(EXPLICIT_TYPES),) + DEFINES += -DEXPLICIT_TYPES +endif + +VPATH = $(SRC_DIR) $(ASM_DIR) ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c)) -OBJ = $(filter-out $(BUILD_DIR)/main%,$(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c))) +OVERWRITE:= $(patsubst $(ASM_DIR)/%-new.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*-new.s)) +OBJ = $(filter-out $(BUILD_DIR)/main% $(OVERWRITE),$(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c))) +OBJ += $(patsubst $(ASM_DIR)/%.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*.s)) CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(OPTIONS) $(INCLUDES) +# $(warning $(OBJ)) + ifneq ($(VARIANT),) .DEFAULT_GOAL := ${TARGET}-$(VARIANT) endif @@ -36,37 +50,42 @@ ${TARGET}-%: $(BUILD_DIR) $(OBJ) $(SRC_DIR)/main-%.c @echo "===> LINKING $(TARGET)-$* " $(Q)${LINKER} $(CPPFLAGS) ${LFLAGS} -o $(TARGET)-$* $(SRC_DIR)/main-$*.c $(OBJ) $(LIBS) -asm: $(BUILD_DIR) $(ASM) - $(BUILD_DIR)/%.o: %.c - @echo "===> COMPILE $@" + $(info ===> COMPILE $@) $(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ - $(Q)$(CC) $(CPPFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d + $(Q)$(CC) $(CPPFLAGS) -MT $@ -MM $< > $(BUILD_DIR)/$*.d $(BUILD_DIR)/%.s: %.c - @echo "===> GENERATE ASM $@" - $(Q)$(CC) -S $(ASFLAGS) $(CPPFLAGS) $(CFLAGS) $< -o $@ + $(info ===> GENERATE ASM $@) + $(Q)$(CC) -S $(CPPFLAGS) $(CFLAGS) $< -o $@ -tags: - @echo "===> GENERATE TAGS" - $(Q)ctags -R +$(BUILD_DIR)/%.o: %.s + $(info ===> ASSEMBLE $@) + $(Q)$(AS) $(ASFLAGS) $< -o $@ - -$(BUILD_DIR): - @mkdir $(BUILD_DIR) - -ifeq ($(findstring $(MAKECMDGOALS),clean),) --include $(OBJ:.o=.d) -endif - -.PHONY: clean distclean +.PHONY: clean distclean tags info asm clean: - @echo "===> CLEAN" + $(info ===> CLEAN) @rm -rf $(BUILD_DIR) @rm -f tags distclean: clean - @echo "===> DIST CLEAN" + $(info ===> DIST CLEAN) @rm -f $(TARGET)* @rm -f tags + +info: + $(info $(CFLAGS)) + $(Q)$(CC) $(VERSION) + +asm: $(BUILD_DIR) $(ASM) + +tags: + $(info ===> GENERATE TAGS) + $(Q)ctags -R + +$(BUILD_DIR): + @mkdir $(BUILD_DIR) + +-include $(OBJ:.o=.d) diff --git a/asm/.gitkeep b/asm/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/config.mk b/config.mk index 04b48dd..c6b8e36 100644 --- a/config.mk +++ b/config.mk @@ -1,9 +1,11 @@ # Supported: GCC, CLANG, ICC TAG ?= ICC +ENABLE_LIKWID ?= false # SP or DP DATA_TYPE ?= DP # AOS or SOA DATA_LAYOUT ?= AOS #Feature options -OPTIONS += -DALIGNMENT=64 +OPTIONS = -DALIGNMENT=64 +#OPTIONS += More options diff --git a/include_LIKWID.mk b/include_LIKWID.mk new file mode 100644 index 0000000..4ca5456 --- /dev/null +++ b/include_LIKWID.mk @@ -0,0 +1,10 @@ +LIKWID_INC ?= -I/usr/local/include +LIKWID_DEFINES ?= -DLIKWID_PERFMON +LIKWID_LIB ?= -L/usr/local/lib + +ifeq ($(strip $(ENABLE_LIKWID)),true) +INCLUDES += ${LIKWID_INC} +DEFINES += ${LIKWID_DEFINES} +LIBS += -llikwid +LFLAGS += ${LIKWID_LIB} +endif diff --git a/scripts/run_stub.sh b/scripts/run_stub.sh new file mode 100644 index 0000000..5ceea52 --- /dev/null +++ b/scripts/run_stub.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +while getopts "a:f:n:o:r:x:y:z:" flag; do + case "${flag}" in + a) atoms_per_unit_cell=${OPTARG};; + f) frequency=${OPTARG};; + n) timesteps=${OPTARG};; + o) output_file=${OPTARG};; + r) nruns=${OPTARG};; + x) nx=${OPTARG};; + y) ny=${OPTARG};; + z) nz=${OPTARG};; + esac +done + +EXEC="../MDBench-ICC-stub" +ATOMS_PER_UNIT_CELL="${atoms_per_unit_cell:-8}" +FREQUENCY="${frequency:-0.0}" +TIMESTEPS="${timesteps:-200}" +OUTPUT_FILE="${output_file:-run_results.txt}" +NRUNS="${nruns:-3}" +NX="${nx:-4}" +NY="${ny:-4}" +NZ="${nz:-2}" + +for timesteps in ${TIMESTEPS}; do + for atoms_per_unit_cell in ${ATOMS_PER_UNIT_CELL}; do + for nx in ${NX}; do + for ny in ${NY}; do + for nz in ${NZ}; do + best_perf= + best_output="invalid" + for nruns in ${NRUNS}; do + output=$( + ./${EXEC} -f ${FREQUENCY} -n ${timesteps} -na ${atoms_per_unit_cell} -nx ${nx} -ny ${ny} -nz ${nz} -csv | + grep -v steps | + grep -iv resize + ) + perf=$(echo $output | cut -d',' -f8) + if [ -z "$best_perf" ]; then + best_perf="$perf" + best_output="$output" + elif (( $(echo "$perf > 0.0 && $perf < $best_perf" | bc -l) )); then + best_perf="$perf" + best_output="$output" + fi + done + + echo "${best_output}" | tee -a "${OUTPUT_FILE}" + done + done + done + done +done diff --git a/src/atom.c b/src/atom.c index 687de55..4e28d7a 100644 --- a/src/atom.c +++ b/src/atom.c @@ -41,6 +41,14 @@ void initAtom(Atom *atom) atom->Nlocal = 0; atom->Nghost = 0; atom->Nmax = 0; + #ifdef EXPLICIT_TYPES + atom->type = NULL; + atom->ntypes = 0; + atom->epsilon = NULL; + atom->sigma6 = NULL; + atom->cutforcesq = NULL; + atom->cutneighsq = NULL; + #endif } void createAtom(Atom *atom, Parameter *param) @@ -50,6 +58,21 @@ void createAtom(Atom *atom, Parameter *param) MD_FLOAT zlo = 0.0; MD_FLOAT zhi = param->zprd; atom->Natoms = 4 * param->nx * param->ny * param->nz; atom->Nlocal = 0; + + #ifdef EXPLICIT_TYPES + atom->ntypes = param->ntypes; + atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT)); + atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT)); + atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT)); + atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT)); + for(int i = 0; i < atom->ntypes * atom->ntypes; i++) { + atom->epsilon[i] = param->epsilon; + atom->sigma6[i] = param->sigma6; + atom->cutneighsq[i] = param->cutneigh * param->cutneigh; + atom->cutforcesq[i] = param->cutforce * param->cutforce; + } + #endif + MD_FLOAT alat = pow((4.0 / param->rho), (1.0 / 3.0)); int ilo = (int) (xlo / (0.5 * alat) - 1); int ihi = (int) (xhi / (0.5 * alat) + 1); @@ -119,6 +142,9 @@ void createAtom(Atom *atom, Parameter *param) atom->vx[atom->Nlocal] = vxtmp; atom->vy[atom->Nlocal] = vytmp; atom->vz[atom->Nlocal] = vztmp; + #ifdef EXPLICIT_TYPES + atom->type[atom->Nlocal] = rand() % atom->ntypes; + #endif atom->Nlocal++; } } @@ -151,6 +177,9 @@ void growAtom(Atom *atom) atom->fx = (MD_FLOAT*) reallocate(atom->fx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); atom->fy = (MD_FLOAT*) reallocate(atom->fy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); atom->fz = (MD_FLOAT*) reallocate(atom->fz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); + #ifdef EXPLICIT_TYPES + atom->type = (int *) reallocate(atom->type, ALIGNMENT, atom->Nmax * sizeof(int), nold * sizeof(int)); + #endif } diff --git a/src/force.c b/src/force.c index 24a433f..d1dda58 100644 --- a/src/force.c +++ b/src/force.c @@ -27,19 +27,16 @@ #include #include -double computeForce( - Parameter *param, - Atom *atom, - Neighbor *neighbor, - int profile) -{ +double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, int ntimes) { + double S = getTimeStamp(); int Nlocal = atom->Nlocal; int* neighs; + MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz; + #ifndef EXPLICIT_TYPES MD_FLOAT cutforcesq = param->cutforce * param->cutforce; MD_FLOAT sigma6 = param->sigma6; MD_FLOAT epsilon = param->epsilon; - MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz; - double S, E; + #endif for(int i = 0; i < Nlocal; i++) { fx[i] = 0.0; @@ -47,38 +44,43 @@ double computeForce( fz[i] = 0.0; } - if(profile) { - // LIKWID_MARKER_START("force"); - } - -#pragma omp parallel for + #pragma omp parallel for for(int i = 0; i < Nlocal; i++) { neighs = &neighbor->neighbors[i * neighbor->maxneighs]; int numneighs = neighbor->numneigh[i]; MD_FLOAT xtmp = atom_x(i); MD_FLOAT ytmp = atom_y(i); MD_FLOAT ztmp = atom_z(i); - MD_FLOAT fix = 0; MD_FLOAT fiy = 0; MD_FLOAT fiz = 0; + #ifdef EXPLICIT_TYPES + const int type_i = atom->type[i]; + #endif -// printf("%d: %d\n", i, numneighs); + for(int n = 0; n < ntimes; n++) { + for(int k = 0; k < numneighs; k++) { + int j = neighs[k]; + MD_FLOAT delx = xtmp - atom_x(j); + MD_FLOAT dely = ytmp - atom_y(j); + MD_FLOAT delz = ztmp - atom_z(j); + MD_FLOAT rsq = delx * delx + dely * dely + delz * delz; + #ifdef EXPLICIT_TYPES + const int type_j = atom->type[j]; + const int type_ij = type_i * atom->ntypes + type_j; + const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij]; + const MD_FLOAT sigma6 = atom->sigma6[type_ij]; + const MD_FLOAT epsilon = atom->epsilon[type_ij]; + #endif - for(int k = 0; k < numneighs; k++) { - int j = neighs[k]; - MD_FLOAT delx = xtmp - atom_x(j); - MD_FLOAT dely = ytmp - atom_y(j); - MD_FLOAT delz = ztmp - atom_z(j); - MD_FLOAT rsq = delx * delx + dely * dely + delz * delz; - - if(rsq < cutforcesq) { - MD_FLOAT sr2 = 1.0 / rsq; - MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6; - MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon; - fix += delx * force; - fiy += dely * force; - fiz += delz * force; + if(rsq < cutforcesq) { + MD_FLOAT sr2 = 1.0 / rsq; + MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6; + MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon; + fix += delx * force; + fiy += dely * force; + fiz += delz * force; + } } } @@ -87,9 +89,6 @@ double computeForce( fz[i] += fiz; } - if(profile) { - // LIKWID_MARKER_STOP("force"); - } - - return 0.0; + double E = getTimeStamp(); + return E-S; } diff --git a/src/includes/atom.h b/src/includes/atom.h index e090936..ed1a0f9 100644 --- a/src/includes/atom.h +++ b/src/includes/atom.h @@ -30,6 +30,14 @@ typedef struct { MD_FLOAT *x, *y, *z; MD_FLOAT *vx, *vy, *vz; MD_FLOAT *fx, *fy, *fz; + #ifdef EXPLICIT_TYPES + int *type; + int ntypes; + MD_FLOAT *epsilon; + MD_FLOAT *sigma6; + MD_FLOAT *cutforcesq; + MD_FLOAT *cutneighsq; + #endif } Atom; extern void initAtom(Atom*); diff --git a/src/includes/parameter.h b/src/includes/parameter.h index 3bf8b9f..0ddec95 100644 --- a/src/includes/parameter.h +++ b/src/includes/parameter.h @@ -29,12 +29,18 @@ #define MD_FLOAT double #endif +// Number of times to compute the most internal loop +#ifndef INTERNAL_LOOP_NTIMES +#define INTERNAL_LOOP_NTIMES 1 +#endif + typedef struct { MD_FLOAT epsilon; MD_FLOAT sigma6; MD_FLOAT temp; MD_FLOAT rho; MD_FLOAT mass; + int ntypes; int ntimes; int nstat; int every; diff --git a/src/main-stub.c b/src/main-stub.c index 6178584..e7493b8 100644 --- a/src/main-stub.c +++ b/src/main-stub.c @@ -22,6 +22,7 @@ void init(Parameter *param) { param->epsilon = 1.0; param->sigma6 = 1.0; param->rho = 0.8442; + param->ntypes = 4; param->ntimes = 200; param->nx = 4; param->ny = 4; @@ -56,6 +57,9 @@ int main(int argc, const char *argv[]) { Atom *atom = (Atom *)(&atom_data); Neighbor neighbor; Parameter param; + int atoms_per_unit_cell = 8; + int csv = 0; + double freq = 0.0; LIKWID_MARKER_INIT; LIKWID_MARKER_REGISTER("force"); @@ -84,12 +88,30 @@ int main(int argc, const char *argv[]) { param.nz = atoi(argv[++i]); continue; } + if((strcmp(argv[i], "-na") == 0)) + { + atoms_per_unit_cell = atoi(argv[++i]); + continue; + } + if((strcmp(argv[i], "-f") == 0)) + { + freq = atof(argv[++i]) * 1.E9; + continue; + } + if((strcmp(argv[i], "-csv") == 0)) + { + csv = 1; + continue; + } if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) { printf("MD Bench: A minimalistic re-implementation of miniMD\n"); printf(HLINE); printf("-n / --nsteps : set number of timesteps for simulation\n"); printf("-nx/-ny/-nz : set linear dimension of systembox in x/y/z direction\n"); + printf("-na : set number of atoms per unit cell\n"); + printf("-f : set CPU frequency (GHz) and display average cycles per atom and neighbors\n"); + printf("-csv: set output as CSV style\n"); printf(HLINE); exit(EXIT_SUCCESS); } @@ -102,12 +124,29 @@ int main(int argc, const char *argv[]) { DEBUG("Initializing atoms...\n"); initAtom(atom); - DEBUG("Creating atoms...\n"); - const int atoms_per_unit_cell = 16; + #ifdef EXPLICIT_TYPES + atom->ntypes = param.ntypes; + atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT)); + atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT)); + atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT)); + atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT)); + for(int i = 0; i < atom->ntypes * atom->ntypes; i++) { + atom->epsilon[i] = param.epsilon; + atom->sigma6[i] = param.sigma6; + atom->cutneighsq[i] = param.cutneigh * param.cutneigh; + atom->cutforcesq[i] = param.cutforce * param.cutforce; + } + #endif + DEBUG("Creating atoms...\n"); for(int i = 0; i < param.nx; ++i) { for(int j = 0; j < param.ny; ++j) { for(int k = 0; k < param.nz; ++k) { + int added_atoms = 0; + int fac_x = 1; + int fac_y = 1; + int fac_z = 1; + int fmod = 0; MD_FLOAT base_x = i * LATTICE_DISTANCE; MD_FLOAT base_y = j * LATTICE_DISTANCE; MD_FLOAT base_z = k * LATTICE_DISTANCE; @@ -119,56 +158,47 @@ int main(int argc, const char *argv[]) { growAtom(atom); } - if(atoms_per_unit_cell == 4) { - ADD_ATOM(0.0, 0.0, 0.0, vx, vy, vz); - ADD_ATOM(1.0, 0.0, 0.0, vx, vy, vz); - ADD_ATOM(0.0, 1.0, 0.0, vx, vy, vz); - ADD_ATOM(0.0, 0.0, 1.0, vx, vy, vz); - } else if(atoms_per_unit_cell == 8) { - ADD_ATOM(0.0, 0.0, 0.0, vx, vy, vz); - ADD_ATOM(1.0, 0.0, 0.0, vx, vy, vz); - ADD_ATOM(0.0, 1.0, 0.0, vx, vy, vz); - ADD_ATOM(0.0, 0.0, 1.0, vx, vy, vz); - ADD_ATOM(1.0, 1.0, 0.0, vx, vy, vz); - ADD_ATOM(1.0, 0.0, 1.0, vx, vy, vz); - ADD_ATOM(0.0, 1.0, 1.0, vx, vy, vz); - ADD_ATOM(1.0, 1.0, 1.0, vx, vy, vz); - } else if(atoms_per_unit_cell == 16) { - ADD_ATOM(0.0, 0.0, 0.0, vx, vy, vz); - ADD_ATOM(1.0, 0.0, 0.0, vx, vy, vz); - ADD_ATOM(0.0, 1.0, 0.0, vx, vy, vz); - ADD_ATOM(0.0, 0.0, 1.0, vx, vy, vz); - ADD_ATOM(1.0, 1.0, 0.0, vx, vy, vz); - ADD_ATOM(1.0, 0.0, 1.0, vx, vy, vz); - ADD_ATOM(0.0, 1.0, 1.0, vx, vy, vz); - ADD_ATOM(1.0, 1.0, 1.0, vx, vy, vz); - ADD_ATOM(0.5, 0.5, 0.5, vx, vy, vz); - ADD_ATOM(1.5, 0.5, 0.5, vx, vy, vz); - ADD_ATOM(0.5, 1.5, 0.5, vx, vy, vz); - ADD_ATOM(0.5, 0.5, 1.5, vx, vy, vz); - ADD_ATOM(1.5, 1.5, 0.5, vx, vy, vz); - ADD_ATOM(1.5, 0.5, 1.5, vx, vy, vz); - ADD_ATOM(0.5, 1.5, 1.5, vx, vy, vz); - ADD_ATOM(1.5, 1.5, 1.5, vx, vy, vz); - } else { - printf("Invalid number of atoms per unit cell, must be: 4, 8 or 16\n"); - return EXIT_FAILURE; + while(fac_x * fac_y * fac_z < atoms_per_unit_cell) { + if(fmod == 0) { fac_x *= 2; } + if(fmod == 1) { fac_y *= 2; } + if(fmod == 2) { fac_z *= 2; } + fmod = (fmod + 1) % 3; + } + + MD_FLOAT offset_x = (fac_x > 1) ? 1.0 / (fac_x - 1) : (int)fac_x; + MD_FLOAT offset_y = (fac_y > 1) ? 1.0 / (fac_y - 1) : (int)fac_y; + MD_FLOAT offset_z = (fac_z > 1) ? 1.0 / (fac_z - 1) : (int)fac_z; + for(int ii = 0; ii < fac_x; ++ii) { + for(int jj = 0; jj < fac_y; ++jj) { + for(int kk = 0; kk < fac_z; ++kk) { + if(added_atoms < atoms_per_unit_cell) { + #ifdef EXPLICIT_TYPES + atom->type[atom->Nlocal] = rand() % atom->ntypes; + #endif + ADD_ATOM(ii * offset_x, jj * offset_y, kk * offset_z, vx, vy, vz); + added_atoms++; + } + } + } } } } } - const double estim_volume = (double) - (atom->Nlocal * 6 * sizeof(MD_FLOAT) + - atom->Nlocal * (atoms_per_unit_cell - 1 + 2) * sizeof(int)) / 1000.0; - printf("System size (unit cells): %dx%dx%d\n", param.nx, param.ny, param.nz); - printf("Atoms per unit cell: %d\n", atoms_per_unit_cell); - printf("Total number of atoms: %d\n", atom->Nlocal); - printf("Estimated total data volume (kB): %.4f\n", estim_volume ); - printf("Estimated atom data volume (kB): %.4f\n", - (double)(atom->Nlocal * 3 * sizeof(MD_FLOAT) / 1000.0)); - printf("Estimated neighborlist data volume (kB): %.4f\n", - (double)(atom->Nlocal * (atoms_per_unit_cell - 1 + 2) * sizeof(int)) / 1000.0); + const double estim_atom_volume = (double)(atom->Nlocal * 3 * sizeof(MD_FLOAT)); + const double estim_neighbors_volume = (double)(atom->Nlocal * (atoms_per_unit_cell - 1 + 2) * sizeof(int)); + const double estim_volume = (double)(atom->Nlocal * 6 * sizeof(MD_FLOAT) + estim_neighbors_volume); + + if(!csv) { + printf("Number of timesteps: %d\n", param.ntimes); + printf("Number of times to compute the most internal loop: %d\n", INTERNAL_LOOP_NTIMES); + printf("System size (unit cells): %dx%dx%d\n", param.nx, param.ny, param.nz); + printf("Atoms per unit cell: %d\n", atoms_per_unit_cell); + printf("Total number of atoms: %d\n", atom->Nlocal); + printf("Estimated total data volume (kB): %.4f\n", estim_volume / 1000.0); + printf("Estimated atom data volume (kB): %.4f\n", estim_atom_volume / 1000.0); + printf("Estimated neighborlist data volume (kB): %.4f\n", estim_neighbors_volume / 1000.0); + } DEBUG("Initializing neighbor lists...\n"); initNeighbor(&neighbor, ¶m); @@ -177,20 +207,43 @@ int main(int argc, const char *argv[]) { DEBUG("Building neighbor lists...\n"); buildNeighbor(atom, &neighbor); DEBUG("Computing forces...\n"); - computeForce(¶m, atom, &neighbor, 0); + computeForce(¶m, atom, &neighbor, 1); double S, E; S = getTimeStamp(); LIKWID_MARKER_START("force"); for(int i = 0; i < param.ntimes; i++) { - computeForce(¶m, atom, &neighbor, 1); + computeForce(¶m, atom, &neighbor, INTERNAL_LOOP_NTIMES); } LIKWID_MARKER_STOP("force"); E = getTimeStamp(); double T_accum = E-S; + const double atoms_updates_per_sec = (double)(atom->Nlocal) / T_accum * (double)(param.ntimes * INTERNAL_LOOP_NTIMES); + const double cycles_per_atom = T_accum / (double)(atom->Nlocal) / (double)(param.ntimes * INTERNAL_LOOP_NTIMES) * freq; + const double cycles_per_neigh = cycles_per_atom / (double)(atoms_per_unit_cell - 1); + + if(!csv) { + printf("Total time: %.4f, Mega atom updates/s: %.4f\n", T_accum, atoms_updates_per_sec / 1.E6); + if(freq > 0.0) { + printf("Cycles per atom: %.4f, Cycles per neighbor: %.4f\n", cycles_per_atom, cycles_per_neigh); + } + } else { + printf("steps,unit cells,atoms/unit cell,total atoms,total vol.(kB),atoms vol.(kB),neigh vol.(kB),time(s),atom upds/s(M)"); + if(freq > 0.0) { + printf(",cy/atom,cy/neigh"); + } + printf("\n"); + + printf("%d,%dx%dx%d,%d,%d,%.4f,%.4f,%.4f,%.4f,%.4f", + param.ntimes, param.nx, param.ny, param.nz, atoms_per_unit_cell, atom->Nlocal, + estim_volume / 1.E3, estim_atom_volume / 1.E3, estim_neighbors_volume / 1.E3, T_accum, atoms_updates_per_sec / 1.E6); + + if(freq > 0.0) { + printf(",%.4f,%.4f", cycles_per_atom, cycles_per_neigh); + } + printf("\n"); + } - printf("Total time: %.4f, Mega atom updates/s: %.4f\n", - T_accum, atom->Nlocal * param.ntimes/T_accum/1.E6); LIKWID_MARKER_CLOSE; return EXIT_SUCCESS; } diff --git a/src/main.c b/src/main.c index 564708e..061da97 100644 --- a/src/main.c +++ b/src/main.c @@ -54,6 +54,7 @@ void init(Parameter *param) param->epsilon = 1.0; param->sigma6 = 1.0; param->rho = 0.8442; + param->ntypes = 4; param->ntimes = 200; param->dt = 0.005; param->nx = 32; diff --git a/src/neighbor.c b/src/neighbor.c index 72a6ee1..91c7ab8 100644 --- a/src/neighbor.c +++ b/src/neighbor.c @@ -196,7 +196,9 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) MD_FLOAT ytmp = atom_y(i); MD_FLOAT ztmp = atom_z(i); int ibin = coord2bin(xtmp, ytmp, ztmp); - + #ifdef EXPLICIT_TYPES + int type_i = atom->type[i]; + #endif for(int k = 0; k < nstencil; k++) { int jbin = ibin + stencil[k]; int* loc_bin = &bins[jbin * atoms_per_bin]; @@ -213,7 +215,13 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) MD_FLOAT delz = ztmp - atom_z(j); MD_FLOAT rsq = delx * delx + dely * dely + delz * delz; - if( rsq <= cutneighsq ) { + #ifdef EXPLICIT_TYPES + int type_j = atom->type[j]; + const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j]; + #else + const MD_FLOAT cutoff = cutneighsq; + #endif + if( rsq <= cutoff ) { neighptr[n++] = j; } }