Merge pull request #3 from RRZE-HPC/stub

Stub force kernel
This commit is contained in:
Jan Eitzinger 2021-06-11 09:50:05 +02:00 committed by GitHub
commit 1a18341d84
13 changed files with 305 additions and 110 deletions

6
.gitignore vendored
View File

@ -50,3 +50,9 @@ modules.order
Module.symvers Module.symvers
Mkfile.old Mkfile.old
dkms.conf dkms.conf
# Build directories and executables
GCC/
ICC/
MDBench-GCC*
MDBench-ICC*

View File

@ -2,12 +2,14 @@
TARGET = MDBench-$(TAG) TARGET = MDBench-$(TAG)
BUILD_DIR = ./$(TAG) BUILD_DIR = ./$(TAG)
SRC_DIR = ./src SRC_DIR = ./src
ASM_DIR = ./asm
MAKE_DIR = ./ MAKE_DIR = ./
Q ?= @ Q ?= @
#DO NOT EDIT BELOW #DO NOT EDIT BELOW
include $(MAKE_DIR)/config.mk include $(MAKE_DIR)/config.mk
include $(MAKE_DIR)/include_$(TAG).mk include $(MAKE_DIR)/include_$(TAG).mk
include $(MAKE_DIR)/include_LIKWID.mk
INCLUDES += -I./src/includes INCLUDES += -I./src/includes
ifeq ($(strip $(DATA_LAYOUT)),AOS) ifeq ($(strip $(DATA_LAYOUT)),AOS)
@ -19,11 +21,23 @@ else
DEFINES += -DPRECISION=2 DEFINES += -DPRECISION=2
endif endif
VPATH = $(SRC_DIR) ifneq ($(INTERNAL_LOOP_NTIMES),)
DEFINES += -DINTERNAL_LOOP_NTIMES=$(INTERNAL_LOOP_NTIMES)
endif
ifneq ($(EXPLICIT_TYPES),)
DEFINES += -DEXPLICIT_TYPES
endif
VPATH = $(SRC_DIR) $(ASM_DIR)
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c)) ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c))
OBJ = $(filter-out $(BUILD_DIR)/main%,$(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c))) OVERWRITE:= $(patsubst $(ASM_DIR)/%-new.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*-new.s))
OBJ = $(filter-out $(BUILD_DIR)/main% $(OVERWRITE),$(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c)))
OBJ += $(patsubst $(ASM_DIR)/%.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*.s))
CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(OPTIONS) $(INCLUDES) CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(OPTIONS) $(INCLUDES)
# $(warning $(OBJ))
ifneq ($(VARIANT),) ifneq ($(VARIANT),)
.DEFAULT_GOAL := ${TARGET}-$(VARIANT) .DEFAULT_GOAL := ${TARGET}-$(VARIANT)
endif endif
@ -36,37 +50,42 @@ ${TARGET}-%: $(BUILD_DIR) $(OBJ) $(SRC_DIR)/main-%.c
@echo "===> LINKING $(TARGET)-$* " @echo "===> LINKING $(TARGET)-$* "
$(Q)${LINKER} $(CPPFLAGS) ${LFLAGS} -o $(TARGET)-$* $(SRC_DIR)/main-$*.c $(OBJ) $(LIBS) $(Q)${LINKER} $(CPPFLAGS) ${LFLAGS} -o $(TARGET)-$* $(SRC_DIR)/main-$*.c $(OBJ) $(LIBS)
asm: $(BUILD_DIR) $(ASM)
$(BUILD_DIR)/%.o: %.c $(BUILD_DIR)/%.o: %.c
@echo "===> COMPILE $@" $(info ===> COMPILE $@)
$(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ $(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
$(Q)$(CC) $(CPPFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d $(Q)$(CC) $(CPPFLAGS) -MT $@ -MM $< > $(BUILD_DIR)/$*.d
$(BUILD_DIR)/%.s: %.c $(BUILD_DIR)/%.s: %.c
@echo "===> GENERATE ASM $@" $(info ===> GENERATE ASM $@)
$(Q)$(CC) -S $(ASFLAGS) $(CPPFLAGS) $(CFLAGS) $< -o $@ $(Q)$(CC) -S $(CPPFLAGS) $(CFLAGS) $< -o $@
tags: $(BUILD_DIR)/%.o: %.s
@echo "===> GENERATE TAGS" $(info ===> ASSEMBLE $@)
$(Q)ctags -R $(Q)$(AS) $(ASFLAGS) $< -o $@
.PHONY: clean distclean tags info asm
$(BUILD_DIR):
@mkdir $(BUILD_DIR)
ifeq ($(findstring $(MAKECMDGOALS),clean),)
-include $(OBJ:.o=.d)
endif
.PHONY: clean distclean
clean: clean:
@echo "===> CLEAN" $(info ===> CLEAN)
@rm -rf $(BUILD_DIR) @rm -rf $(BUILD_DIR)
@rm -f tags @rm -f tags
distclean: clean distclean: clean
@echo "===> DIST CLEAN" $(info ===> DIST CLEAN)
@rm -f $(TARGET)* @rm -f $(TARGET)*
@rm -f tags @rm -f tags
info:
$(info $(CFLAGS))
$(Q)$(CC) $(VERSION)
asm: $(BUILD_DIR) $(ASM)
tags:
$(info ===> GENERATE TAGS)
$(Q)ctags -R
$(BUILD_DIR):
@mkdir $(BUILD_DIR)
-include $(OBJ:.o=.d)

0
asm/.gitkeep Normal file
View File

View File

@ -1,9 +1,11 @@
# Supported: GCC, CLANG, ICC # Supported: GCC, CLANG, ICC
TAG ?= ICC TAG ?= ICC
ENABLE_LIKWID ?= false
# SP or DP # SP or DP
DATA_TYPE ?= DP DATA_TYPE ?= DP
# AOS or SOA # AOS or SOA
DATA_LAYOUT ?= AOS DATA_LAYOUT ?= AOS
#Feature options #Feature options
OPTIONS += -DALIGNMENT=64 OPTIONS = -DALIGNMENT=64
#OPTIONS += More options

10
include_LIKWID.mk Normal file
View File

@ -0,0 +1,10 @@
LIKWID_INC ?= -I/usr/local/include
LIKWID_DEFINES ?= -DLIKWID_PERFMON
LIKWID_LIB ?= -L/usr/local/lib
ifeq ($(strip $(ENABLE_LIKWID)),true)
INCLUDES += ${LIKWID_INC}
DEFINES += ${LIKWID_DEFINES}
LIBS += -llikwid
LFLAGS += ${LIKWID_LIB}
endif

54
scripts/run_stub.sh Normal file
View File

@ -0,0 +1,54 @@
#!/bin/bash
while getopts "a:f:n:o:r:x:y:z:" flag; do
case "${flag}" in
a) atoms_per_unit_cell=${OPTARG};;
f) frequency=${OPTARG};;
n) timesteps=${OPTARG};;
o) output_file=${OPTARG};;
r) nruns=${OPTARG};;
x) nx=${OPTARG};;
y) ny=${OPTARG};;
z) nz=${OPTARG};;
esac
done
EXEC="../MDBench-ICC-stub"
ATOMS_PER_UNIT_CELL="${atoms_per_unit_cell:-8}"
FREQUENCY="${frequency:-0.0}"
TIMESTEPS="${timesteps:-200}"
OUTPUT_FILE="${output_file:-run_results.txt}"
NRUNS="${nruns:-3}"
NX="${nx:-4}"
NY="${ny:-4}"
NZ="${nz:-2}"
for timesteps in ${TIMESTEPS}; do
for atoms_per_unit_cell in ${ATOMS_PER_UNIT_CELL}; do
for nx in ${NX}; do
for ny in ${NY}; do
for nz in ${NZ}; do
best_perf=
best_output="invalid"
for nruns in ${NRUNS}; do
output=$(
./${EXEC} -f ${FREQUENCY} -n ${timesteps} -na ${atoms_per_unit_cell} -nx ${nx} -ny ${ny} -nz ${nz} -csv |
grep -v steps |
grep -iv resize
)
perf=$(echo $output | cut -d',' -f8)
if [ -z "$best_perf" ]; then
best_perf="$perf"
best_output="$output"
elif (( $(echo "$perf > 0.0 && $perf < $best_perf" | bc -l) )); then
best_perf="$perf"
best_output="$output"
fi
done
echo "${best_output}" | tee -a "${OUTPUT_FILE}"
done
done
done
done
done

View File

@ -41,6 +41,14 @@ void initAtom(Atom *atom)
atom->Nlocal = 0; atom->Nlocal = 0;
atom->Nghost = 0; atom->Nghost = 0;
atom->Nmax = 0; atom->Nmax = 0;
#ifdef EXPLICIT_TYPES
atom->type = NULL;
atom->ntypes = 0;
atom->epsilon = NULL;
atom->sigma6 = NULL;
atom->cutforcesq = NULL;
atom->cutneighsq = NULL;
#endif
} }
void createAtom(Atom *atom, Parameter *param) void createAtom(Atom *atom, Parameter *param)
@ -50,6 +58,21 @@ void createAtom(Atom *atom, Parameter *param)
MD_FLOAT zlo = 0.0; MD_FLOAT zhi = param->zprd; MD_FLOAT zlo = 0.0; MD_FLOAT zhi = param->zprd;
atom->Natoms = 4 * param->nx * param->ny * param->nz; atom->Natoms = 4 * param->nx * param->ny * param->nz;
atom->Nlocal = 0; atom->Nlocal = 0;
#ifdef EXPLICIT_TYPES
atom->ntypes = param->ntypes;
atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
atom->epsilon[i] = param->epsilon;
atom->sigma6[i] = param->sigma6;
atom->cutneighsq[i] = param->cutneigh * param->cutneigh;
atom->cutforcesq[i] = param->cutforce * param->cutforce;
}
#endif
MD_FLOAT alat = pow((4.0 / param->rho), (1.0 / 3.0)); MD_FLOAT alat = pow((4.0 / param->rho), (1.0 / 3.0));
int ilo = (int) (xlo / (0.5 * alat) - 1); int ilo = (int) (xlo / (0.5 * alat) - 1);
int ihi = (int) (xhi / (0.5 * alat) + 1); int ihi = (int) (xhi / (0.5 * alat) + 1);
@ -119,6 +142,9 @@ void createAtom(Atom *atom, Parameter *param)
atom->vx[atom->Nlocal] = vxtmp; atom->vx[atom->Nlocal] = vxtmp;
atom->vy[atom->Nlocal] = vytmp; atom->vy[atom->Nlocal] = vytmp;
atom->vz[atom->Nlocal] = vztmp; atom->vz[atom->Nlocal] = vztmp;
#ifdef EXPLICIT_TYPES
atom->type[atom->Nlocal] = rand() % atom->ntypes;
#endif
atom->Nlocal++; atom->Nlocal++;
} }
} }
@ -151,6 +177,9 @@ void growAtom(Atom *atom)
atom->fx = (MD_FLOAT*) reallocate(atom->fx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); atom->fx = (MD_FLOAT*) reallocate(atom->fx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
atom->fy = (MD_FLOAT*) reallocate(atom->fy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); atom->fy = (MD_FLOAT*) reallocate(atom->fy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
atom->fz = (MD_FLOAT*) reallocate(atom->fz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); atom->fz = (MD_FLOAT*) reallocate(atom->fz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
#ifdef EXPLICIT_TYPES
atom->type = (int *) reallocate(atom->type, ALIGNMENT, atom->Nmax * sizeof(int), nold * sizeof(int));
#endif
} }

View File

@ -27,19 +27,16 @@
#include <parameter.h> #include <parameter.h>
#include <atom.h> #include <atom.h>
double computeForce( double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, int ntimes) {
Parameter *param, double S = getTimeStamp();
Atom *atom,
Neighbor *neighbor,
int profile)
{
int Nlocal = atom->Nlocal; int Nlocal = atom->Nlocal;
int* neighs; int* neighs;
MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz;
#ifndef EXPLICIT_TYPES
MD_FLOAT cutforcesq = param->cutforce * param->cutforce; MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6; MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon; MD_FLOAT epsilon = param->epsilon;
MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz; #endif
double S, E;
for(int i = 0; i < Nlocal; i++) { for(int i = 0; i < Nlocal; i++) {
fx[i] = 0.0; fx[i] = 0.0;
@ -47,30 +44,34 @@ double computeForce(
fz[i] = 0.0; fz[i] = 0.0;
} }
if(profile) { #pragma omp parallel for
// LIKWID_MARKER_START("force");
}
#pragma omp parallel for
for(int i = 0; i < Nlocal; i++) { for(int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs]; neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i]; int numneighs = neighbor->numneigh[i];
MD_FLOAT xtmp = atom_x(i); MD_FLOAT xtmp = atom_x(i);
MD_FLOAT ytmp = atom_y(i); MD_FLOAT ytmp = atom_y(i);
MD_FLOAT ztmp = atom_z(i); MD_FLOAT ztmp = atom_z(i);
MD_FLOAT fix = 0; MD_FLOAT fix = 0;
MD_FLOAT fiy = 0; MD_FLOAT fiy = 0;
MD_FLOAT fiz = 0; MD_FLOAT fiz = 0;
#ifdef EXPLICIT_TYPES
const int type_i = atom->type[i];
#endif
// printf("%d: %d\n", i, numneighs); for(int n = 0; n < ntimes; n++) {
for(int k = 0; k < numneighs; k++) { for(int k = 0; k < numneighs; k++) {
int j = neighs[k]; int j = neighs[k];
MD_FLOAT delx = xtmp - atom_x(j); MD_FLOAT delx = xtmp - atom_x(j);
MD_FLOAT dely = ytmp - atom_y(j); MD_FLOAT dely = ytmp - atom_y(j);
MD_FLOAT delz = ztmp - atom_z(j); MD_FLOAT delz = ztmp - atom_z(j);
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz; MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
#ifdef EXPLICIT_TYPES
const int type_j = atom->type[j];
const int type_ij = type_i * atom->ntypes + type_j;
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
const MD_FLOAT epsilon = atom->epsilon[type_ij];
#endif
if(rsq < cutforcesq) { if(rsq < cutforcesq) {
MD_FLOAT sr2 = 1.0 / rsq; MD_FLOAT sr2 = 1.0 / rsq;
@ -81,15 +82,13 @@ double computeForce(
fiz += delz * force; fiz += delz * force;
} }
} }
}
fx[i] += fix; fx[i] += fix;
fy[i] += fiy; fy[i] += fiy;
fz[i] += fiz; fz[i] += fiz;
} }
if(profile) { double E = getTimeStamp();
// LIKWID_MARKER_STOP("force"); return E-S;
}
return 0.0;
} }

View File

@ -30,6 +30,14 @@ typedef struct {
MD_FLOAT *x, *y, *z; MD_FLOAT *x, *y, *z;
MD_FLOAT *vx, *vy, *vz; MD_FLOAT *vx, *vy, *vz;
MD_FLOAT *fx, *fy, *fz; MD_FLOAT *fx, *fy, *fz;
#ifdef EXPLICIT_TYPES
int *type;
int ntypes;
MD_FLOAT *epsilon;
MD_FLOAT *sigma6;
MD_FLOAT *cutforcesq;
MD_FLOAT *cutneighsq;
#endif
} Atom; } Atom;
extern void initAtom(Atom*); extern void initAtom(Atom*);

View File

@ -29,12 +29,18 @@
#define MD_FLOAT double #define MD_FLOAT double
#endif #endif
// Number of times to compute the most internal loop
#ifndef INTERNAL_LOOP_NTIMES
#define INTERNAL_LOOP_NTIMES 1
#endif
typedef struct { typedef struct {
MD_FLOAT epsilon; MD_FLOAT epsilon;
MD_FLOAT sigma6; MD_FLOAT sigma6;
MD_FLOAT temp; MD_FLOAT temp;
MD_FLOAT rho; MD_FLOAT rho;
MD_FLOAT mass; MD_FLOAT mass;
int ntypes;
int ntimes; int ntimes;
int nstat; int nstat;
int every; int every;

View File

@ -22,6 +22,7 @@ void init(Parameter *param) {
param->epsilon = 1.0; param->epsilon = 1.0;
param->sigma6 = 1.0; param->sigma6 = 1.0;
param->rho = 0.8442; param->rho = 0.8442;
param->ntypes = 4;
param->ntimes = 200; param->ntimes = 200;
param->nx = 4; param->nx = 4;
param->ny = 4; param->ny = 4;
@ -56,6 +57,9 @@ int main(int argc, const char *argv[]) {
Atom *atom = (Atom *)(&atom_data); Atom *atom = (Atom *)(&atom_data);
Neighbor neighbor; Neighbor neighbor;
Parameter param; Parameter param;
int atoms_per_unit_cell = 8;
int csv = 0;
double freq = 0.0;
LIKWID_MARKER_INIT; LIKWID_MARKER_INIT;
LIKWID_MARKER_REGISTER("force"); LIKWID_MARKER_REGISTER("force");
@ -84,12 +88,30 @@ int main(int argc, const char *argv[]) {
param.nz = atoi(argv[++i]); param.nz = atoi(argv[++i]);
continue; continue;
} }
if((strcmp(argv[i], "-na") == 0))
{
atoms_per_unit_cell = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-f") == 0))
{
freq = atof(argv[++i]) * 1.E9;
continue;
}
if((strcmp(argv[i], "-csv") == 0))
{
csv = 1;
continue;
}
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0))
{ {
printf("MD Bench: A minimalistic re-implementation of miniMD\n"); printf("MD Bench: A minimalistic re-implementation of miniMD\n");
printf(HLINE); printf(HLINE);
printf("-n / --nsteps <int>: set number of timesteps for simulation\n"); printf("-n / --nsteps <int>: set number of timesteps for simulation\n");
printf("-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n"); printf("-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n");
printf("-na <int>: set number of atoms per unit cell\n");
printf("-f <real>: set CPU frequency (GHz) and display average cycles per atom and neighbors\n");
printf("-csv: set output as CSV style\n");
printf(HLINE); printf(HLINE);
exit(EXIT_SUCCESS); exit(EXIT_SUCCESS);
} }
@ -102,12 +124,29 @@ int main(int argc, const char *argv[]) {
DEBUG("Initializing atoms...\n"); DEBUG("Initializing atoms...\n");
initAtom(atom); initAtom(atom);
DEBUG("Creating atoms...\n"); #ifdef EXPLICIT_TYPES
const int atoms_per_unit_cell = 16; atom->ntypes = param.ntypes;
atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
atom->epsilon[i] = param.epsilon;
atom->sigma6[i] = param.sigma6;
atom->cutneighsq[i] = param.cutneigh * param.cutneigh;
atom->cutforcesq[i] = param.cutforce * param.cutforce;
}
#endif
DEBUG("Creating atoms...\n");
for(int i = 0; i < param.nx; ++i) { for(int i = 0; i < param.nx; ++i) {
for(int j = 0; j < param.ny; ++j) { for(int j = 0; j < param.ny; ++j) {
for(int k = 0; k < param.nz; ++k) { for(int k = 0; k < param.nz; ++k) {
int added_atoms = 0;
int fac_x = 1;
int fac_y = 1;
int fac_z = 1;
int fmod = 0;
MD_FLOAT base_x = i * LATTICE_DISTANCE; MD_FLOAT base_x = i * LATTICE_DISTANCE;
MD_FLOAT base_y = j * LATTICE_DISTANCE; MD_FLOAT base_y = j * LATTICE_DISTANCE;
MD_FLOAT base_z = k * LATTICE_DISTANCE; MD_FLOAT base_z = k * LATTICE_DISTANCE;
@ -119,56 +158,47 @@ int main(int argc, const char *argv[]) {
growAtom(atom); growAtom(atom);
} }
if(atoms_per_unit_cell == 4) { while(fac_x * fac_y * fac_z < atoms_per_unit_cell) {
ADD_ATOM(0.0, 0.0, 0.0, vx, vy, vz); if(fmod == 0) { fac_x *= 2; }
ADD_ATOM(1.0, 0.0, 0.0, vx, vy, vz); if(fmod == 1) { fac_y *= 2; }
ADD_ATOM(0.0, 1.0, 0.0, vx, vy, vz); if(fmod == 2) { fac_z *= 2; }
ADD_ATOM(0.0, 0.0, 1.0, vx, vy, vz); fmod = (fmod + 1) % 3;
} else if(atoms_per_unit_cell == 8) { }
ADD_ATOM(0.0, 0.0, 0.0, vx, vy, vz);
ADD_ATOM(1.0, 0.0, 0.0, vx, vy, vz); MD_FLOAT offset_x = (fac_x > 1) ? 1.0 / (fac_x - 1) : (int)fac_x;
ADD_ATOM(0.0, 1.0, 0.0, vx, vy, vz); MD_FLOAT offset_y = (fac_y > 1) ? 1.0 / (fac_y - 1) : (int)fac_y;
ADD_ATOM(0.0, 0.0, 1.0, vx, vy, vz); MD_FLOAT offset_z = (fac_z > 1) ? 1.0 / (fac_z - 1) : (int)fac_z;
ADD_ATOM(1.0, 1.0, 0.0, vx, vy, vz); for(int ii = 0; ii < fac_x; ++ii) {
ADD_ATOM(1.0, 0.0, 1.0, vx, vy, vz); for(int jj = 0; jj < fac_y; ++jj) {
ADD_ATOM(0.0, 1.0, 1.0, vx, vy, vz); for(int kk = 0; kk < fac_z; ++kk) {
ADD_ATOM(1.0, 1.0, 1.0, vx, vy, vz); if(added_atoms < atoms_per_unit_cell) {
} else if(atoms_per_unit_cell == 16) { #ifdef EXPLICIT_TYPES
ADD_ATOM(0.0, 0.0, 0.0, vx, vy, vz); atom->type[atom->Nlocal] = rand() % atom->ntypes;
ADD_ATOM(1.0, 0.0, 0.0, vx, vy, vz); #endif
ADD_ATOM(0.0, 1.0, 0.0, vx, vy, vz); ADD_ATOM(ii * offset_x, jj * offset_y, kk * offset_z, vx, vy, vz);
ADD_ATOM(0.0, 0.0, 1.0, vx, vy, vz); added_atoms++;
ADD_ATOM(1.0, 1.0, 0.0, vx, vy, vz); }
ADD_ATOM(1.0, 0.0, 1.0, vx, vy, vz); }
ADD_ATOM(0.0, 1.0, 1.0, vx, vy, vz); }
ADD_ATOM(1.0, 1.0, 1.0, vx, vy, vz);
ADD_ATOM(0.5, 0.5, 0.5, vx, vy, vz);
ADD_ATOM(1.5, 0.5, 0.5, vx, vy, vz);
ADD_ATOM(0.5, 1.5, 0.5, vx, vy, vz);
ADD_ATOM(0.5, 0.5, 1.5, vx, vy, vz);
ADD_ATOM(1.5, 1.5, 0.5, vx, vy, vz);
ADD_ATOM(1.5, 0.5, 1.5, vx, vy, vz);
ADD_ATOM(0.5, 1.5, 1.5, vx, vy, vz);
ADD_ATOM(1.5, 1.5, 1.5, vx, vy, vz);
} else {
printf("Invalid number of atoms per unit cell, must be: 4, 8 or 16\n");
return EXIT_FAILURE;
} }
} }
} }
} }
const double estim_volume = (double) const double estim_atom_volume = (double)(atom->Nlocal * 3 * sizeof(MD_FLOAT));
(atom->Nlocal * 6 * sizeof(MD_FLOAT) + const double estim_neighbors_volume = (double)(atom->Nlocal * (atoms_per_unit_cell - 1 + 2) * sizeof(int));
atom->Nlocal * (atoms_per_unit_cell - 1 + 2) * sizeof(int)) / 1000.0; const double estim_volume = (double)(atom->Nlocal * 6 * sizeof(MD_FLOAT) + estim_neighbors_volume);
if(!csv) {
printf("Number of timesteps: %d\n", param.ntimes);
printf("Number of times to compute the most internal loop: %d\n", INTERNAL_LOOP_NTIMES);
printf("System size (unit cells): %dx%dx%d\n", param.nx, param.ny, param.nz); printf("System size (unit cells): %dx%dx%d\n", param.nx, param.ny, param.nz);
printf("Atoms per unit cell: %d\n", atoms_per_unit_cell); printf("Atoms per unit cell: %d\n", atoms_per_unit_cell);
printf("Total number of atoms: %d\n", atom->Nlocal); printf("Total number of atoms: %d\n", atom->Nlocal);
printf("Estimated total data volume (kB): %.4f\n", estim_volume ); printf("Estimated total data volume (kB): %.4f\n", estim_volume / 1000.0);
printf("Estimated atom data volume (kB): %.4f\n", printf("Estimated atom data volume (kB): %.4f\n", estim_atom_volume / 1000.0);
(double)(atom->Nlocal * 3 * sizeof(MD_FLOAT) / 1000.0)); printf("Estimated neighborlist data volume (kB): %.4f\n", estim_neighbors_volume / 1000.0);
printf("Estimated neighborlist data volume (kB): %.4f\n", }
(double)(atom->Nlocal * (atoms_per_unit_cell - 1 + 2) * sizeof(int)) / 1000.0);
DEBUG("Initializing neighbor lists...\n"); DEBUG("Initializing neighbor lists...\n");
initNeighbor(&neighbor, &param); initNeighbor(&neighbor, &param);
@ -177,20 +207,43 @@ int main(int argc, const char *argv[]) {
DEBUG("Building neighbor lists...\n"); DEBUG("Building neighbor lists...\n");
buildNeighbor(atom, &neighbor); buildNeighbor(atom, &neighbor);
DEBUG("Computing forces...\n"); DEBUG("Computing forces...\n");
computeForce(&param, atom, &neighbor, 0); computeForce(&param, atom, &neighbor, 1);
double S, E; double S, E;
S = getTimeStamp(); S = getTimeStamp();
LIKWID_MARKER_START("force"); LIKWID_MARKER_START("force");
for(int i = 0; i < param.ntimes; i++) { for(int i = 0; i < param.ntimes; i++) {
computeForce(&param, atom, &neighbor, 1); computeForce(&param, atom, &neighbor, INTERNAL_LOOP_NTIMES);
} }
LIKWID_MARKER_STOP("force"); LIKWID_MARKER_STOP("force");
E = getTimeStamp(); E = getTimeStamp();
double T_accum = E-S; double T_accum = E-S;
const double atoms_updates_per_sec = (double)(atom->Nlocal) / T_accum * (double)(param.ntimes * INTERNAL_LOOP_NTIMES);
const double cycles_per_atom = T_accum / (double)(atom->Nlocal) / (double)(param.ntimes * INTERNAL_LOOP_NTIMES) * freq;
const double cycles_per_neigh = cycles_per_atom / (double)(atoms_per_unit_cell - 1);
if(!csv) {
printf("Total time: %.4f, Mega atom updates/s: %.4f\n", T_accum, atoms_updates_per_sec / 1.E6);
if(freq > 0.0) {
printf("Cycles per atom: %.4f, Cycles per neighbor: %.4f\n", cycles_per_atom, cycles_per_neigh);
}
} else {
printf("steps,unit cells,atoms/unit cell,total atoms,total vol.(kB),atoms vol.(kB),neigh vol.(kB),time(s),atom upds/s(M)");
if(freq > 0.0) {
printf(",cy/atom,cy/neigh");
}
printf("\n");
printf("%d,%dx%dx%d,%d,%d,%.4f,%.4f,%.4f,%.4f,%.4f",
param.ntimes, param.nx, param.ny, param.nz, atoms_per_unit_cell, atom->Nlocal,
estim_volume / 1.E3, estim_atom_volume / 1.E3, estim_neighbors_volume / 1.E3, T_accum, atoms_updates_per_sec / 1.E6);
if(freq > 0.0) {
printf(",%.4f,%.4f", cycles_per_atom, cycles_per_neigh);
}
printf("\n");
}
printf("Total time: %.4f, Mega atom updates/s: %.4f\n",
T_accum, atom->Nlocal * param.ntimes/T_accum/1.E6);
LIKWID_MARKER_CLOSE; LIKWID_MARKER_CLOSE;
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }

View File

@ -54,6 +54,7 @@ void init(Parameter *param)
param->epsilon = 1.0; param->epsilon = 1.0;
param->sigma6 = 1.0; param->sigma6 = 1.0;
param->rho = 0.8442; param->rho = 0.8442;
param->ntypes = 4;
param->ntimes = 200; param->ntimes = 200;
param->dt = 0.005; param->dt = 0.005;
param->nx = 32; param->nx = 32;

View File

@ -196,7 +196,9 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor)
MD_FLOAT ytmp = atom_y(i); MD_FLOAT ytmp = atom_y(i);
MD_FLOAT ztmp = atom_z(i); MD_FLOAT ztmp = atom_z(i);
int ibin = coord2bin(xtmp, ytmp, ztmp); int ibin = coord2bin(xtmp, ytmp, ztmp);
#ifdef EXPLICIT_TYPES
int type_i = atom->type[i];
#endif
for(int k = 0; k < nstencil; k++) { for(int k = 0; k < nstencil; k++) {
int jbin = ibin + stencil[k]; int jbin = ibin + stencil[k];
int* loc_bin = &bins[jbin * atoms_per_bin]; int* loc_bin = &bins[jbin * atoms_per_bin];
@ -213,7 +215,13 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor)
MD_FLOAT delz = ztmp - atom_z(j); MD_FLOAT delz = ztmp - atom_z(j);
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz; MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
if( rsq <= cutneighsq ) { #ifdef EXPLICIT_TYPES
int type_j = atom->type[j];
const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j];
#else
const MD_FLOAT cutoff = cutneighsq;
#endif
if( rsq <= cutoff ) {
neighptr[n++] = j; neighptr[n++] = j;
} }
} }