commit
1a18341d84
6
.gitignore
vendored
6
.gitignore
vendored
@ -50,3 +50,9 @@ modules.order
|
|||||||
Module.symvers
|
Module.symvers
|
||||||
Mkfile.old
|
Mkfile.old
|
||||||
dkms.conf
|
dkms.conf
|
||||||
|
|
||||||
|
# Build directories and executables
|
||||||
|
GCC/
|
||||||
|
ICC/
|
||||||
|
MDBench-GCC*
|
||||||
|
MDBench-ICC*
|
||||||
|
63
Makefile
63
Makefile
@ -2,12 +2,14 @@
|
|||||||
TARGET = MDBench-$(TAG)
|
TARGET = MDBench-$(TAG)
|
||||||
BUILD_DIR = ./$(TAG)
|
BUILD_DIR = ./$(TAG)
|
||||||
SRC_DIR = ./src
|
SRC_DIR = ./src
|
||||||
|
ASM_DIR = ./asm
|
||||||
MAKE_DIR = ./
|
MAKE_DIR = ./
|
||||||
Q ?= @
|
Q ?= @
|
||||||
|
|
||||||
#DO NOT EDIT BELOW
|
#DO NOT EDIT BELOW
|
||||||
include $(MAKE_DIR)/config.mk
|
include $(MAKE_DIR)/config.mk
|
||||||
include $(MAKE_DIR)/include_$(TAG).mk
|
include $(MAKE_DIR)/include_$(TAG).mk
|
||||||
|
include $(MAKE_DIR)/include_LIKWID.mk
|
||||||
INCLUDES += -I./src/includes
|
INCLUDES += -I./src/includes
|
||||||
|
|
||||||
ifeq ($(strip $(DATA_LAYOUT)),AOS)
|
ifeq ($(strip $(DATA_LAYOUT)),AOS)
|
||||||
@ -19,11 +21,23 @@ else
|
|||||||
DEFINES += -DPRECISION=2
|
DEFINES += -DPRECISION=2
|
||||||
endif
|
endif
|
||||||
|
|
||||||
VPATH = $(SRC_DIR)
|
ifneq ($(INTERNAL_LOOP_NTIMES),)
|
||||||
|
DEFINES += -DINTERNAL_LOOP_NTIMES=$(INTERNAL_LOOP_NTIMES)
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifneq ($(EXPLICIT_TYPES),)
|
||||||
|
DEFINES += -DEXPLICIT_TYPES
|
||||||
|
endif
|
||||||
|
|
||||||
|
VPATH = $(SRC_DIR) $(ASM_DIR)
|
||||||
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c))
|
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c))
|
||||||
OBJ = $(filter-out $(BUILD_DIR)/main%,$(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c)))
|
OVERWRITE:= $(patsubst $(ASM_DIR)/%-new.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*-new.s))
|
||||||
|
OBJ = $(filter-out $(BUILD_DIR)/main% $(OVERWRITE),$(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c)))
|
||||||
|
OBJ += $(patsubst $(ASM_DIR)/%.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*.s))
|
||||||
CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(OPTIONS) $(INCLUDES)
|
CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(OPTIONS) $(INCLUDES)
|
||||||
|
|
||||||
|
# $(warning $(OBJ))
|
||||||
|
|
||||||
ifneq ($(VARIANT),)
|
ifneq ($(VARIANT),)
|
||||||
.DEFAULT_GOAL := ${TARGET}-$(VARIANT)
|
.DEFAULT_GOAL := ${TARGET}-$(VARIANT)
|
||||||
endif
|
endif
|
||||||
@ -36,37 +50,42 @@ ${TARGET}-%: $(BUILD_DIR) $(OBJ) $(SRC_DIR)/main-%.c
|
|||||||
@echo "===> LINKING $(TARGET)-$* "
|
@echo "===> LINKING $(TARGET)-$* "
|
||||||
$(Q)${LINKER} $(CPPFLAGS) ${LFLAGS} -o $(TARGET)-$* $(SRC_DIR)/main-$*.c $(OBJ) $(LIBS)
|
$(Q)${LINKER} $(CPPFLAGS) ${LFLAGS} -o $(TARGET)-$* $(SRC_DIR)/main-$*.c $(OBJ) $(LIBS)
|
||||||
|
|
||||||
asm: $(BUILD_DIR) $(ASM)
|
|
||||||
|
|
||||||
$(BUILD_DIR)/%.o: %.c
|
$(BUILD_DIR)/%.o: %.c
|
||||||
@echo "===> COMPILE $@"
|
$(info ===> COMPILE $@)
|
||||||
$(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
|
$(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
|
||||||
$(Q)$(CC) $(CPPFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d
|
$(Q)$(CC) $(CPPFLAGS) -MT $@ -MM $< > $(BUILD_DIR)/$*.d
|
||||||
|
|
||||||
$(BUILD_DIR)/%.s: %.c
|
$(BUILD_DIR)/%.s: %.c
|
||||||
@echo "===> GENERATE ASM $@"
|
$(info ===> GENERATE ASM $@)
|
||||||
$(Q)$(CC) -S $(ASFLAGS) $(CPPFLAGS) $(CFLAGS) $< -o $@
|
$(Q)$(CC) -S $(CPPFLAGS) $(CFLAGS) $< -o $@
|
||||||
|
|
||||||
tags:
|
$(BUILD_DIR)/%.o: %.s
|
||||||
@echo "===> GENERATE TAGS"
|
$(info ===> ASSEMBLE $@)
|
||||||
$(Q)ctags -R
|
$(Q)$(AS) $(ASFLAGS) $< -o $@
|
||||||
|
|
||||||
|
.PHONY: clean distclean tags info asm
|
||||||
$(BUILD_DIR):
|
|
||||||
@mkdir $(BUILD_DIR)
|
|
||||||
|
|
||||||
ifeq ($(findstring $(MAKECMDGOALS),clean),)
|
|
||||||
-include $(OBJ:.o=.d)
|
|
||||||
endif
|
|
||||||
|
|
||||||
.PHONY: clean distclean
|
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
@echo "===> CLEAN"
|
$(info ===> CLEAN)
|
||||||
@rm -rf $(BUILD_DIR)
|
@rm -rf $(BUILD_DIR)
|
||||||
@rm -f tags
|
@rm -f tags
|
||||||
|
|
||||||
distclean: clean
|
distclean: clean
|
||||||
@echo "===> DIST CLEAN"
|
$(info ===> DIST CLEAN)
|
||||||
@rm -f $(TARGET)*
|
@rm -f $(TARGET)*
|
||||||
@rm -f tags
|
@rm -f tags
|
||||||
|
|
||||||
|
info:
|
||||||
|
$(info $(CFLAGS))
|
||||||
|
$(Q)$(CC) $(VERSION)
|
||||||
|
|
||||||
|
asm: $(BUILD_DIR) $(ASM)
|
||||||
|
|
||||||
|
tags:
|
||||||
|
$(info ===> GENERATE TAGS)
|
||||||
|
$(Q)ctags -R
|
||||||
|
|
||||||
|
$(BUILD_DIR):
|
||||||
|
@mkdir $(BUILD_DIR)
|
||||||
|
|
||||||
|
-include $(OBJ:.o=.d)
|
||||||
|
0
asm/.gitkeep
Normal file
0
asm/.gitkeep
Normal file
@ -1,9 +1,11 @@
|
|||||||
# Supported: GCC, CLANG, ICC
|
# Supported: GCC, CLANG, ICC
|
||||||
TAG ?= ICC
|
TAG ?= ICC
|
||||||
|
ENABLE_LIKWID ?= false
|
||||||
# SP or DP
|
# SP or DP
|
||||||
DATA_TYPE ?= DP
|
DATA_TYPE ?= DP
|
||||||
# AOS or SOA
|
# AOS or SOA
|
||||||
DATA_LAYOUT ?= AOS
|
DATA_LAYOUT ?= AOS
|
||||||
|
|
||||||
#Feature options
|
#Feature options
|
||||||
OPTIONS += -DALIGNMENT=64
|
OPTIONS = -DALIGNMENT=64
|
||||||
|
#OPTIONS += More options
|
||||||
|
10
include_LIKWID.mk
Normal file
10
include_LIKWID.mk
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
LIKWID_INC ?= -I/usr/local/include
|
||||||
|
LIKWID_DEFINES ?= -DLIKWID_PERFMON
|
||||||
|
LIKWID_LIB ?= -L/usr/local/lib
|
||||||
|
|
||||||
|
ifeq ($(strip $(ENABLE_LIKWID)),true)
|
||||||
|
INCLUDES += ${LIKWID_INC}
|
||||||
|
DEFINES += ${LIKWID_DEFINES}
|
||||||
|
LIBS += -llikwid
|
||||||
|
LFLAGS += ${LIKWID_LIB}
|
||||||
|
endif
|
54
scripts/run_stub.sh
Normal file
54
scripts/run_stub.sh
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
while getopts "a:f:n:o:r:x:y:z:" flag; do
|
||||||
|
case "${flag}" in
|
||||||
|
a) atoms_per_unit_cell=${OPTARG};;
|
||||||
|
f) frequency=${OPTARG};;
|
||||||
|
n) timesteps=${OPTARG};;
|
||||||
|
o) output_file=${OPTARG};;
|
||||||
|
r) nruns=${OPTARG};;
|
||||||
|
x) nx=${OPTARG};;
|
||||||
|
y) ny=${OPTARG};;
|
||||||
|
z) nz=${OPTARG};;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
EXEC="../MDBench-ICC-stub"
|
||||||
|
ATOMS_PER_UNIT_CELL="${atoms_per_unit_cell:-8}"
|
||||||
|
FREQUENCY="${frequency:-0.0}"
|
||||||
|
TIMESTEPS="${timesteps:-200}"
|
||||||
|
OUTPUT_FILE="${output_file:-run_results.txt}"
|
||||||
|
NRUNS="${nruns:-3}"
|
||||||
|
NX="${nx:-4}"
|
||||||
|
NY="${ny:-4}"
|
||||||
|
NZ="${nz:-2}"
|
||||||
|
|
||||||
|
for timesteps in ${TIMESTEPS}; do
|
||||||
|
for atoms_per_unit_cell in ${ATOMS_PER_UNIT_CELL}; do
|
||||||
|
for nx in ${NX}; do
|
||||||
|
for ny in ${NY}; do
|
||||||
|
for nz in ${NZ}; do
|
||||||
|
best_perf=
|
||||||
|
best_output="invalid"
|
||||||
|
for nruns in ${NRUNS}; do
|
||||||
|
output=$(
|
||||||
|
./${EXEC} -f ${FREQUENCY} -n ${timesteps} -na ${atoms_per_unit_cell} -nx ${nx} -ny ${ny} -nz ${nz} -csv |
|
||||||
|
grep -v steps |
|
||||||
|
grep -iv resize
|
||||||
|
)
|
||||||
|
perf=$(echo $output | cut -d',' -f8)
|
||||||
|
if [ -z "$best_perf" ]; then
|
||||||
|
best_perf="$perf"
|
||||||
|
best_output="$output"
|
||||||
|
elif (( $(echo "$perf > 0.0 && $perf < $best_perf" | bc -l) )); then
|
||||||
|
best_perf="$perf"
|
||||||
|
best_output="$output"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "${best_output}" | tee -a "${OUTPUT_FILE}"
|
||||||
|
done
|
||||||
|
done
|
||||||
|
done
|
||||||
|
done
|
||||||
|
done
|
29
src/atom.c
29
src/atom.c
@ -41,6 +41,14 @@ void initAtom(Atom *atom)
|
|||||||
atom->Nlocal = 0;
|
atom->Nlocal = 0;
|
||||||
atom->Nghost = 0;
|
atom->Nghost = 0;
|
||||||
atom->Nmax = 0;
|
atom->Nmax = 0;
|
||||||
|
#ifdef EXPLICIT_TYPES
|
||||||
|
atom->type = NULL;
|
||||||
|
atom->ntypes = 0;
|
||||||
|
atom->epsilon = NULL;
|
||||||
|
atom->sigma6 = NULL;
|
||||||
|
atom->cutforcesq = NULL;
|
||||||
|
atom->cutneighsq = NULL;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void createAtom(Atom *atom, Parameter *param)
|
void createAtom(Atom *atom, Parameter *param)
|
||||||
@ -50,6 +58,21 @@ void createAtom(Atom *atom, Parameter *param)
|
|||||||
MD_FLOAT zlo = 0.0; MD_FLOAT zhi = param->zprd;
|
MD_FLOAT zlo = 0.0; MD_FLOAT zhi = param->zprd;
|
||||||
atom->Natoms = 4 * param->nx * param->ny * param->nz;
|
atom->Natoms = 4 * param->nx * param->ny * param->nz;
|
||||||
atom->Nlocal = 0;
|
atom->Nlocal = 0;
|
||||||
|
|
||||||
|
#ifdef EXPLICIT_TYPES
|
||||||
|
atom->ntypes = param->ntypes;
|
||||||
|
atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||||
|
atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||||
|
atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||||
|
atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||||
|
for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
|
||||||
|
atom->epsilon[i] = param->epsilon;
|
||||||
|
atom->sigma6[i] = param->sigma6;
|
||||||
|
atom->cutneighsq[i] = param->cutneigh * param->cutneigh;
|
||||||
|
atom->cutforcesq[i] = param->cutforce * param->cutforce;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
MD_FLOAT alat = pow((4.0 / param->rho), (1.0 / 3.0));
|
MD_FLOAT alat = pow((4.0 / param->rho), (1.0 / 3.0));
|
||||||
int ilo = (int) (xlo / (0.5 * alat) - 1);
|
int ilo = (int) (xlo / (0.5 * alat) - 1);
|
||||||
int ihi = (int) (xhi / (0.5 * alat) + 1);
|
int ihi = (int) (xhi / (0.5 * alat) + 1);
|
||||||
@ -119,6 +142,9 @@ void createAtom(Atom *atom, Parameter *param)
|
|||||||
atom->vx[atom->Nlocal] = vxtmp;
|
atom->vx[atom->Nlocal] = vxtmp;
|
||||||
atom->vy[atom->Nlocal] = vytmp;
|
atom->vy[atom->Nlocal] = vytmp;
|
||||||
atom->vz[atom->Nlocal] = vztmp;
|
atom->vz[atom->Nlocal] = vztmp;
|
||||||
|
#ifdef EXPLICIT_TYPES
|
||||||
|
atom->type[atom->Nlocal] = rand() % atom->ntypes;
|
||||||
|
#endif
|
||||||
atom->Nlocal++;
|
atom->Nlocal++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -151,6 +177,9 @@ void growAtom(Atom *atom)
|
|||||||
atom->fx = (MD_FLOAT*) reallocate(atom->fx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
atom->fx = (MD_FLOAT*) reallocate(atom->fx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
||||||
atom->fy = (MD_FLOAT*) reallocate(atom->fy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
atom->fy = (MD_FLOAT*) reallocate(atom->fy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
||||||
atom->fz = (MD_FLOAT*) reallocate(atom->fz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
atom->fz = (MD_FLOAT*) reallocate(atom->fz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
||||||
|
#ifdef EXPLICIT_TYPES
|
||||||
|
atom->type = (int *) reallocate(atom->type, ALIGNMENT, atom->Nmax * sizeof(int), nold * sizeof(int));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
41
src/force.c
41
src/force.c
@ -27,19 +27,16 @@
|
|||||||
#include <parameter.h>
|
#include <parameter.h>
|
||||||
#include <atom.h>
|
#include <atom.h>
|
||||||
|
|
||||||
double computeForce(
|
double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, int ntimes) {
|
||||||
Parameter *param,
|
double S = getTimeStamp();
|
||||||
Atom *atom,
|
|
||||||
Neighbor *neighbor,
|
|
||||||
int profile)
|
|
||||||
{
|
|
||||||
int Nlocal = atom->Nlocal;
|
int Nlocal = atom->Nlocal;
|
||||||
int* neighs;
|
int* neighs;
|
||||||
|
MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz;
|
||||||
|
#ifndef EXPLICIT_TYPES
|
||||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||||
MD_FLOAT sigma6 = param->sigma6;
|
MD_FLOAT sigma6 = param->sigma6;
|
||||||
MD_FLOAT epsilon = param->epsilon;
|
MD_FLOAT epsilon = param->epsilon;
|
||||||
MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz;
|
#endif
|
||||||
double S, E;
|
|
||||||
|
|
||||||
for(int i = 0; i < Nlocal; i++) {
|
for(int i = 0; i < Nlocal; i++) {
|
||||||
fx[i] = 0.0;
|
fx[i] = 0.0;
|
||||||
@ -47,30 +44,34 @@ double computeForce(
|
|||||||
fz[i] = 0.0;
|
fz[i] = 0.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(profile) {
|
#pragma omp parallel for
|
||||||
// LIKWID_MARKER_START("force");
|
|
||||||
}
|
|
||||||
|
|
||||||
#pragma omp parallel for
|
|
||||||
for(int i = 0; i < Nlocal; i++) {
|
for(int i = 0; i < Nlocal; i++) {
|
||||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||||
int numneighs = neighbor->numneigh[i];
|
int numneighs = neighbor->numneigh[i];
|
||||||
MD_FLOAT xtmp = atom_x(i);
|
MD_FLOAT xtmp = atom_x(i);
|
||||||
MD_FLOAT ytmp = atom_y(i);
|
MD_FLOAT ytmp = atom_y(i);
|
||||||
MD_FLOAT ztmp = atom_z(i);
|
MD_FLOAT ztmp = atom_z(i);
|
||||||
|
|
||||||
MD_FLOAT fix = 0;
|
MD_FLOAT fix = 0;
|
||||||
MD_FLOAT fiy = 0;
|
MD_FLOAT fiy = 0;
|
||||||
MD_FLOAT fiz = 0;
|
MD_FLOAT fiz = 0;
|
||||||
|
#ifdef EXPLICIT_TYPES
|
||||||
|
const int type_i = atom->type[i];
|
||||||
|
#endif
|
||||||
|
|
||||||
// printf("%d: %d\n", i, numneighs);
|
for(int n = 0; n < ntimes; n++) {
|
||||||
|
|
||||||
for(int k = 0; k < numneighs; k++) {
|
for(int k = 0; k < numneighs; k++) {
|
||||||
int j = neighs[k];
|
int j = neighs[k];
|
||||||
MD_FLOAT delx = xtmp - atom_x(j);
|
MD_FLOAT delx = xtmp - atom_x(j);
|
||||||
MD_FLOAT dely = ytmp - atom_y(j);
|
MD_FLOAT dely = ytmp - atom_y(j);
|
||||||
MD_FLOAT delz = ztmp - atom_z(j);
|
MD_FLOAT delz = ztmp - atom_z(j);
|
||||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||||
|
#ifdef EXPLICIT_TYPES
|
||||||
|
const int type_j = atom->type[j];
|
||||||
|
const int type_ij = type_i * atom->ntypes + type_j;
|
||||||
|
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
|
||||||
|
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
|
||||||
|
const MD_FLOAT epsilon = atom->epsilon[type_ij];
|
||||||
|
#endif
|
||||||
|
|
||||||
if(rsq < cutforcesq) {
|
if(rsq < cutforcesq) {
|
||||||
MD_FLOAT sr2 = 1.0 / rsq;
|
MD_FLOAT sr2 = 1.0 / rsq;
|
||||||
@ -81,15 +82,13 @@ double computeForce(
|
|||||||
fiz += delz * force;
|
fiz += delz * force;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fx[i] += fix;
|
fx[i] += fix;
|
||||||
fy[i] += fiy;
|
fy[i] += fiy;
|
||||||
fz[i] += fiz;
|
fz[i] += fiz;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(profile) {
|
double E = getTimeStamp();
|
||||||
// LIKWID_MARKER_STOP("force");
|
return E-S;
|
||||||
}
|
|
||||||
|
|
||||||
return 0.0;
|
|
||||||
}
|
}
|
||||||
|
@ -30,6 +30,14 @@ typedef struct {
|
|||||||
MD_FLOAT *x, *y, *z;
|
MD_FLOAT *x, *y, *z;
|
||||||
MD_FLOAT *vx, *vy, *vz;
|
MD_FLOAT *vx, *vy, *vz;
|
||||||
MD_FLOAT *fx, *fy, *fz;
|
MD_FLOAT *fx, *fy, *fz;
|
||||||
|
#ifdef EXPLICIT_TYPES
|
||||||
|
int *type;
|
||||||
|
int ntypes;
|
||||||
|
MD_FLOAT *epsilon;
|
||||||
|
MD_FLOAT *sigma6;
|
||||||
|
MD_FLOAT *cutforcesq;
|
||||||
|
MD_FLOAT *cutneighsq;
|
||||||
|
#endif
|
||||||
} Atom;
|
} Atom;
|
||||||
|
|
||||||
extern void initAtom(Atom*);
|
extern void initAtom(Atom*);
|
||||||
|
@ -29,12 +29,18 @@
|
|||||||
#define MD_FLOAT double
|
#define MD_FLOAT double
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Number of times to compute the most internal loop
|
||||||
|
#ifndef INTERNAL_LOOP_NTIMES
|
||||||
|
#define INTERNAL_LOOP_NTIMES 1
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
MD_FLOAT epsilon;
|
MD_FLOAT epsilon;
|
||||||
MD_FLOAT sigma6;
|
MD_FLOAT sigma6;
|
||||||
MD_FLOAT temp;
|
MD_FLOAT temp;
|
||||||
MD_FLOAT rho;
|
MD_FLOAT rho;
|
||||||
MD_FLOAT mass;
|
MD_FLOAT mass;
|
||||||
|
int ntypes;
|
||||||
int ntimes;
|
int ntimes;
|
||||||
int nstat;
|
int nstat;
|
||||||
int every;
|
int every;
|
||||||
|
149
src/main-stub.c
149
src/main-stub.c
@ -22,6 +22,7 @@ void init(Parameter *param) {
|
|||||||
param->epsilon = 1.0;
|
param->epsilon = 1.0;
|
||||||
param->sigma6 = 1.0;
|
param->sigma6 = 1.0;
|
||||||
param->rho = 0.8442;
|
param->rho = 0.8442;
|
||||||
|
param->ntypes = 4;
|
||||||
param->ntimes = 200;
|
param->ntimes = 200;
|
||||||
param->nx = 4;
|
param->nx = 4;
|
||||||
param->ny = 4;
|
param->ny = 4;
|
||||||
@ -56,6 +57,9 @@ int main(int argc, const char *argv[]) {
|
|||||||
Atom *atom = (Atom *)(&atom_data);
|
Atom *atom = (Atom *)(&atom_data);
|
||||||
Neighbor neighbor;
|
Neighbor neighbor;
|
||||||
Parameter param;
|
Parameter param;
|
||||||
|
int atoms_per_unit_cell = 8;
|
||||||
|
int csv = 0;
|
||||||
|
double freq = 0.0;
|
||||||
|
|
||||||
LIKWID_MARKER_INIT;
|
LIKWID_MARKER_INIT;
|
||||||
LIKWID_MARKER_REGISTER("force");
|
LIKWID_MARKER_REGISTER("force");
|
||||||
@ -84,12 +88,30 @@ int main(int argc, const char *argv[]) {
|
|||||||
param.nz = atoi(argv[++i]);
|
param.nz = atoi(argv[++i]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if((strcmp(argv[i], "-na") == 0))
|
||||||
|
{
|
||||||
|
atoms_per_unit_cell = atoi(argv[++i]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if((strcmp(argv[i], "-f") == 0))
|
||||||
|
{
|
||||||
|
freq = atof(argv[++i]) * 1.E9;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if((strcmp(argv[i], "-csv") == 0))
|
||||||
|
{
|
||||||
|
csv = 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0))
|
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0))
|
||||||
{
|
{
|
||||||
printf("MD Bench: A minimalistic re-implementation of miniMD\n");
|
printf("MD Bench: A minimalistic re-implementation of miniMD\n");
|
||||||
printf(HLINE);
|
printf(HLINE);
|
||||||
printf("-n / --nsteps <int>: set number of timesteps for simulation\n");
|
printf("-n / --nsteps <int>: set number of timesteps for simulation\n");
|
||||||
printf("-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n");
|
printf("-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n");
|
||||||
|
printf("-na <int>: set number of atoms per unit cell\n");
|
||||||
|
printf("-f <real>: set CPU frequency (GHz) and display average cycles per atom and neighbors\n");
|
||||||
|
printf("-csv: set output as CSV style\n");
|
||||||
printf(HLINE);
|
printf(HLINE);
|
||||||
exit(EXIT_SUCCESS);
|
exit(EXIT_SUCCESS);
|
||||||
}
|
}
|
||||||
@ -102,12 +124,29 @@ int main(int argc, const char *argv[]) {
|
|||||||
DEBUG("Initializing atoms...\n");
|
DEBUG("Initializing atoms...\n");
|
||||||
initAtom(atom);
|
initAtom(atom);
|
||||||
|
|
||||||
DEBUG("Creating atoms...\n");
|
#ifdef EXPLICIT_TYPES
|
||||||
const int atoms_per_unit_cell = 16;
|
atom->ntypes = param.ntypes;
|
||||||
|
atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||||
|
atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||||
|
atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||||
|
atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||||
|
for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
|
||||||
|
atom->epsilon[i] = param.epsilon;
|
||||||
|
atom->sigma6[i] = param.sigma6;
|
||||||
|
atom->cutneighsq[i] = param.cutneigh * param.cutneigh;
|
||||||
|
atom->cutforcesq[i] = param.cutforce * param.cutforce;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
DEBUG("Creating atoms...\n");
|
||||||
for(int i = 0; i < param.nx; ++i) {
|
for(int i = 0; i < param.nx; ++i) {
|
||||||
for(int j = 0; j < param.ny; ++j) {
|
for(int j = 0; j < param.ny; ++j) {
|
||||||
for(int k = 0; k < param.nz; ++k) {
|
for(int k = 0; k < param.nz; ++k) {
|
||||||
|
int added_atoms = 0;
|
||||||
|
int fac_x = 1;
|
||||||
|
int fac_y = 1;
|
||||||
|
int fac_z = 1;
|
||||||
|
int fmod = 0;
|
||||||
MD_FLOAT base_x = i * LATTICE_DISTANCE;
|
MD_FLOAT base_x = i * LATTICE_DISTANCE;
|
||||||
MD_FLOAT base_y = j * LATTICE_DISTANCE;
|
MD_FLOAT base_y = j * LATTICE_DISTANCE;
|
||||||
MD_FLOAT base_z = k * LATTICE_DISTANCE;
|
MD_FLOAT base_z = k * LATTICE_DISTANCE;
|
||||||
@ -119,56 +158,47 @@ int main(int argc, const char *argv[]) {
|
|||||||
growAtom(atom);
|
growAtom(atom);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(atoms_per_unit_cell == 4) {
|
while(fac_x * fac_y * fac_z < atoms_per_unit_cell) {
|
||||||
ADD_ATOM(0.0, 0.0, 0.0, vx, vy, vz);
|
if(fmod == 0) { fac_x *= 2; }
|
||||||
ADD_ATOM(1.0, 0.0, 0.0, vx, vy, vz);
|
if(fmod == 1) { fac_y *= 2; }
|
||||||
ADD_ATOM(0.0, 1.0, 0.0, vx, vy, vz);
|
if(fmod == 2) { fac_z *= 2; }
|
||||||
ADD_ATOM(0.0, 0.0, 1.0, vx, vy, vz);
|
fmod = (fmod + 1) % 3;
|
||||||
} else if(atoms_per_unit_cell == 8) {
|
}
|
||||||
ADD_ATOM(0.0, 0.0, 0.0, vx, vy, vz);
|
|
||||||
ADD_ATOM(1.0, 0.0, 0.0, vx, vy, vz);
|
MD_FLOAT offset_x = (fac_x > 1) ? 1.0 / (fac_x - 1) : (int)fac_x;
|
||||||
ADD_ATOM(0.0, 1.0, 0.0, vx, vy, vz);
|
MD_FLOAT offset_y = (fac_y > 1) ? 1.0 / (fac_y - 1) : (int)fac_y;
|
||||||
ADD_ATOM(0.0, 0.0, 1.0, vx, vy, vz);
|
MD_FLOAT offset_z = (fac_z > 1) ? 1.0 / (fac_z - 1) : (int)fac_z;
|
||||||
ADD_ATOM(1.0, 1.0, 0.0, vx, vy, vz);
|
for(int ii = 0; ii < fac_x; ++ii) {
|
||||||
ADD_ATOM(1.0, 0.0, 1.0, vx, vy, vz);
|
for(int jj = 0; jj < fac_y; ++jj) {
|
||||||
ADD_ATOM(0.0, 1.0, 1.0, vx, vy, vz);
|
for(int kk = 0; kk < fac_z; ++kk) {
|
||||||
ADD_ATOM(1.0, 1.0, 1.0, vx, vy, vz);
|
if(added_atoms < atoms_per_unit_cell) {
|
||||||
} else if(atoms_per_unit_cell == 16) {
|
#ifdef EXPLICIT_TYPES
|
||||||
ADD_ATOM(0.0, 0.0, 0.0, vx, vy, vz);
|
atom->type[atom->Nlocal] = rand() % atom->ntypes;
|
||||||
ADD_ATOM(1.0, 0.0, 0.0, vx, vy, vz);
|
#endif
|
||||||
ADD_ATOM(0.0, 1.0, 0.0, vx, vy, vz);
|
ADD_ATOM(ii * offset_x, jj * offset_y, kk * offset_z, vx, vy, vz);
|
||||||
ADD_ATOM(0.0, 0.0, 1.0, vx, vy, vz);
|
added_atoms++;
|
||||||
ADD_ATOM(1.0, 1.0, 0.0, vx, vy, vz);
|
}
|
||||||
ADD_ATOM(1.0, 0.0, 1.0, vx, vy, vz);
|
}
|
||||||
ADD_ATOM(0.0, 1.0, 1.0, vx, vy, vz);
|
}
|
||||||
ADD_ATOM(1.0, 1.0, 1.0, vx, vy, vz);
|
|
||||||
ADD_ATOM(0.5, 0.5, 0.5, vx, vy, vz);
|
|
||||||
ADD_ATOM(1.5, 0.5, 0.5, vx, vy, vz);
|
|
||||||
ADD_ATOM(0.5, 1.5, 0.5, vx, vy, vz);
|
|
||||||
ADD_ATOM(0.5, 0.5, 1.5, vx, vy, vz);
|
|
||||||
ADD_ATOM(1.5, 1.5, 0.5, vx, vy, vz);
|
|
||||||
ADD_ATOM(1.5, 0.5, 1.5, vx, vy, vz);
|
|
||||||
ADD_ATOM(0.5, 1.5, 1.5, vx, vy, vz);
|
|
||||||
ADD_ATOM(1.5, 1.5, 1.5, vx, vy, vz);
|
|
||||||
} else {
|
|
||||||
printf("Invalid number of atoms per unit cell, must be: 4, 8 or 16\n");
|
|
||||||
return EXIT_FAILURE;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const double estim_volume = (double)
|
const double estim_atom_volume = (double)(atom->Nlocal * 3 * sizeof(MD_FLOAT));
|
||||||
(atom->Nlocal * 6 * sizeof(MD_FLOAT) +
|
const double estim_neighbors_volume = (double)(atom->Nlocal * (atoms_per_unit_cell - 1 + 2) * sizeof(int));
|
||||||
atom->Nlocal * (atoms_per_unit_cell - 1 + 2) * sizeof(int)) / 1000.0;
|
const double estim_volume = (double)(atom->Nlocal * 6 * sizeof(MD_FLOAT) + estim_neighbors_volume);
|
||||||
|
|
||||||
|
if(!csv) {
|
||||||
|
printf("Number of timesteps: %d\n", param.ntimes);
|
||||||
|
printf("Number of times to compute the most internal loop: %d\n", INTERNAL_LOOP_NTIMES);
|
||||||
printf("System size (unit cells): %dx%dx%d\n", param.nx, param.ny, param.nz);
|
printf("System size (unit cells): %dx%dx%d\n", param.nx, param.ny, param.nz);
|
||||||
printf("Atoms per unit cell: %d\n", atoms_per_unit_cell);
|
printf("Atoms per unit cell: %d\n", atoms_per_unit_cell);
|
||||||
printf("Total number of atoms: %d\n", atom->Nlocal);
|
printf("Total number of atoms: %d\n", atom->Nlocal);
|
||||||
printf("Estimated total data volume (kB): %.4f\n", estim_volume );
|
printf("Estimated total data volume (kB): %.4f\n", estim_volume / 1000.0);
|
||||||
printf("Estimated atom data volume (kB): %.4f\n",
|
printf("Estimated atom data volume (kB): %.4f\n", estim_atom_volume / 1000.0);
|
||||||
(double)(atom->Nlocal * 3 * sizeof(MD_FLOAT) / 1000.0));
|
printf("Estimated neighborlist data volume (kB): %.4f\n", estim_neighbors_volume / 1000.0);
|
||||||
printf("Estimated neighborlist data volume (kB): %.4f\n",
|
}
|
||||||
(double)(atom->Nlocal * (atoms_per_unit_cell - 1 + 2) * sizeof(int)) / 1000.0);
|
|
||||||
|
|
||||||
DEBUG("Initializing neighbor lists...\n");
|
DEBUG("Initializing neighbor lists...\n");
|
||||||
initNeighbor(&neighbor, ¶m);
|
initNeighbor(&neighbor, ¶m);
|
||||||
@ -177,20 +207,43 @@ int main(int argc, const char *argv[]) {
|
|||||||
DEBUG("Building neighbor lists...\n");
|
DEBUG("Building neighbor lists...\n");
|
||||||
buildNeighbor(atom, &neighbor);
|
buildNeighbor(atom, &neighbor);
|
||||||
DEBUG("Computing forces...\n");
|
DEBUG("Computing forces...\n");
|
||||||
computeForce(¶m, atom, &neighbor, 0);
|
computeForce(¶m, atom, &neighbor, 1);
|
||||||
|
|
||||||
double S, E;
|
double S, E;
|
||||||
S = getTimeStamp();
|
S = getTimeStamp();
|
||||||
LIKWID_MARKER_START("force");
|
LIKWID_MARKER_START("force");
|
||||||
for(int i = 0; i < param.ntimes; i++) {
|
for(int i = 0; i < param.ntimes; i++) {
|
||||||
computeForce(¶m, atom, &neighbor, 1);
|
computeForce(¶m, atom, &neighbor, INTERNAL_LOOP_NTIMES);
|
||||||
}
|
}
|
||||||
LIKWID_MARKER_STOP("force");
|
LIKWID_MARKER_STOP("force");
|
||||||
E = getTimeStamp();
|
E = getTimeStamp();
|
||||||
double T_accum = E-S;
|
double T_accum = E-S;
|
||||||
|
const double atoms_updates_per_sec = (double)(atom->Nlocal) / T_accum * (double)(param.ntimes * INTERNAL_LOOP_NTIMES);
|
||||||
|
const double cycles_per_atom = T_accum / (double)(atom->Nlocal) / (double)(param.ntimes * INTERNAL_LOOP_NTIMES) * freq;
|
||||||
|
const double cycles_per_neigh = cycles_per_atom / (double)(atoms_per_unit_cell - 1);
|
||||||
|
|
||||||
|
if(!csv) {
|
||||||
|
printf("Total time: %.4f, Mega atom updates/s: %.4f\n", T_accum, atoms_updates_per_sec / 1.E6);
|
||||||
|
if(freq > 0.0) {
|
||||||
|
printf("Cycles per atom: %.4f, Cycles per neighbor: %.4f\n", cycles_per_atom, cycles_per_neigh);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
printf("steps,unit cells,atoms/unit cell,total atoms,total vol.(kB),atoms vol.(kB),neigh vol.(kB),time(s),atom upds/s(M)");
|
||||||
|
if(freq > 0.0) {
|
||||||
|
printf(",cy/atom,cy/neigh");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
|
||||||
|
printf("%d,%dx%dx%d,%d,%d,%.4f,%.4f,%.4f,%.4f,%.4f",
|
||||||
|
param.ntimes, param.nx, param.ny, param.nz, atoms_per_unit_cell, atom->Nlocal,
|
||||||
|
estim_volume / 1.E3, estim_atom_volume / 1.E3, estim_neighbors_volume / 1.E3, T_accum, atoms_updates_per_sec / 1.E6);
|
||||||
|
|
||||||
|
if(freq > 0.0) {
|
||||||
|
printf(",%.4f,%.4f", cycles_per_atom, cycles_per_neigh);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
printf("Total time: %.4f, Mega atom updates/s: %.4f\n",
|
|
||||||
T_accum, atom->Nlocal * param.ntimes/T_accum/1.E6);
|
|
||||||
LIKWID_MARKER_CLOSE;
|
LIKWID_MARKER_CLOSE;
|
||||||
return EXIT_SUCCESS;
|
return EXIT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -54,6 +54,7 @@ void init(Parameter *param)
|
|||||||
param->epsilon = 1.0;
|
param->epsilon = 1.0;
|
||||||
param->sigma6 = 1.0;
|
param->sigma6 = 1.0;
|
||||||
param->rho = 0.8442;
|
param->rho = 0.8442;
|
||||||
|
param->ntypes = 4;
|
||||||
param->ntimes = 200;
|
param->ntimes = 200;
|
||||||
param->dt = 0.005;
|
param->dt = 0.005;
|
||||||
param->nx = 32;
|
param->nx = 32;
|
||||||
|
@ -196,7 +196,9 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor)
|
|||||||
MD_FLOAT ytmp = atom_y(i);
|
MD_FLOAT ytmp = atom_y(i);
|
||||||
MD_FLOAT ztmp = atom_z(i);
|
MD_FLOAT ztmp = atom_z(i);
|
||||||
int ibin = coord2bin(xtmp, ytmp, ztmp);
|
int ibin = coord2bin(xtmp, ytmp, ztmp);
|
||||||
|
#ifdef EXPLICIT_TYPES
|
||||||
|
int type_i = atom->type[i];
|
||||||
|
#endif
|
||||||
for(int k = 0; k < nstencil; k++) {
|
for(int k = 0; k < nstencil; k++) {
|
||||||
int jbin = ibin + stencil[k];
|
int jbin = ibin + stencil[k];
|
||||||
int* loc_bin = &bins[jbin * atoms_per_bin];
|
int* loc_bin = &bins[jbin * atoms_per_bin];
|
||||||
@ -213,7 +215,13 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor)
|
|||||||
MD_FLOAT delz = ztmp - atom_z(j);
|
MD_FLOAT delz = ztmp - atom_z(j);
|
||||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||||
|
|
||||||
if( rsq <= cutneighsq ) {
|
#ifdef EXPLICIT_TYPES
|
||||||
|
int type_j = atom->type[j];
|
||||||
|
const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j];
|
||||||
|
#else
|
||||||
|
const MD_FLOAT cutoff = cutneighsq;
|
||||||
|
#endif
|
||||||
|
if( rsq <= cutoff ) {
|
||||||
neighptr[n++] = j;
|
neighptr[n++] = j;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user