Merge branch 'master' of github.com:RRZE-HPC/MD-Bench

This commit is contained in:
Jan Eitzinger 2022-03-10 16:30:40 +01:00
commit ba3a0524f6
36 changed files with 4143 additions and 174 deletions

View File

@ -1,6 +1,6 @@
#CONFIGURE BUILD SYSTEM #CONFIGURE BUILD SYSTEM
TARGET = MDBench-$(TAG)-$(OPT_SCHEME) TARGET = MDBench-$(TAG)-$(OPT_SCHEME)
BUILD_DIR = ./$(TAG) BUILD_DIR = ./$(TAG)-$(OPT_SCHEME)
SRC_DIR = ./$(OPT_SCHEME) SRC_DIR = ./$(OPT_SCHEME)
ASM_DIR = ./asm ASM_DIR = ./asm
MAKE_DIR = ./ MAKE_DIR = ./
@ -10,6 +10,7 @@ Q ?= @
include $(MAKE_DIR)/config.mk include $(MAKE_DIR)/config.mk
include $(MAKE_DIR)/include_$(TAG).mk include $(MAKE_DIR)/include_$(TAG).mk
include $(MAKE_DIR)/include_LIKWID.mk include $(MAKE_DIR)/include_LIKWID.mk
include $(MAKE_DIR)/include_GROMACS.mk
INCLUDES += -I./$(SRC_DIR)/includes INCLUDES += -I./$(SRC_DIR)/includes
ifeq ($(strip $(DATA_LAYOUT)),AOS) ifeq ($(strip $(DATA_LAYOUT)),AOS)
@ -52,6 +53,10 @@ ifeq ($(strip $(COMPUTE_STATS)),true)
DEFINES += -DCOMPUTE_STATS DEFINES += -DCOMPUTE_STATS
endif endif
ifeq ($(strip $(XTC_OUTPUT)),true)
DEFINES += -DXTC_OUTPUT
endif
ifeq ($(strip $(USE_REFERENCE_VERSION)),true) ifeq ($(strip $(USE_REFERENCE_VERSION)),true)
DEFINES += -DUSE_REFERENCE_VERSION DEFINES += -DUSE_REFERENCE_VERSION
endif endif
@ -64,6 +69,10 @@ ifneq ($(VECTOR_WIDTH),)
DEFINES += -DVECTOR_WIDTH=$(VECTOR_WIDTH) DEFINES += -DVECTOR_WIDTH=$(VECTOR_WIDTH)
endif endif
ifeq ($(strip $(NO_AVX2)),true)
DEFINES += -DNO_AVX2
endif
VPATH = $(SRC_DIR) $(ASM_DIR) VPATH = $(SRC_DIR) $(ASM_DIR)
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c)) ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c))
OVERWRITE:= $(patsubst $(ASM_DIR)/%-new.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*-new.s)) OVERWRITE:= $(patsubst $(ASM_DIR)/%-new.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*-new.s))

View File

@ -1,9 +1,9 @@
# Compiler tag (GCC/CLANG/ICC) # Compiler tag (GCC/CLANG/ICC)
TAG ?= ICC TAG ?= ICC
# Optimization scheme (lammps/gromacs/clusters_per_bin) # Optimization scheme (lammps/gromacs/clusters_per_bin)
OPT_SCHEME = gromacs OPT_SCHEME ?= gromacs
# Enable likwid (true or false) # Enable likwid (true or false)
ENABLE_LIKWID ?= false ENABLE_LIKWID ?= true
# SP or DP # SP or DP
DATA_TYPE ?= DP DATA_TYPE ?= DP
# AOS or SOA # AOS or SOA
@ -25,6 +25,8 @@ MEM_TRACER ?= false
INDEX_TRACER ?= false INDEX_TRACER ?= false
# Vector width (elements) for index and distance tracer # Vector width (elements) for index and distance tracer
VECTOR_WIDTH ?= 8 VECTOR_WIDTH ?= 8
# When vector width is 4 but AVX2 is not supported (AVX only), set this to true
NO_AVX2 ?= false
# Compute statistics # Compute statistics
COMPUTE_STATS ?= true COMPUTE_STATS ?= true
@ -33,6 +35,8 @@ COMPUTE_STATS ?= true
CLUSTER_LAYOUT ?= SOA CLUSTER_LAYOUT ?= SOA
# Use reference version # Use reference version
USE_REFERENCE_VERSION ?= false USE_REFERENCE_VERSION ?= false
# Enable XTC output
XTC_OUTPUT ?= false
#Feature options #Feature options
OPTIONS = -DALIGNMENT=64 OPTIONS = -DALIGNMENT=64

File diff suppressed because it is too large Load Diff

1003
data/argon_1000/conf.gro Normal file

File diff suppressed because it is too large Load Diff

244
data/argon_1000/grompp.mdp Normal file
View File

@ -0,0 +1,244 @@
;
; Generated by:
; Vitaly V. Chaban
; School of Chemistry
; University of Kharkiv
; Ukraine, Kharkiv-61077, Svoboda sq., 4
; email: chaban@univer.kharkov.ua, vvchaban@gmail.com
; skype: vvchaban
; System: Liquid argon (1000 atoms) at 80 K. Equilibrated for 500ps.
; VARIOUS PREPROCESSING OPTIONS
title = Yo
cpp = /usr/bin/cpp
include =
define =
; RUN CONTROL PARAMETERS
integrator = md
; Start time and timestep in ps
tinit = 0
dt = 0.001
nsteps = 250000
; For exact run continuation or redoing part of a run
init_step = 0
; mode for center of mass motion removal
comm-mode = Linear
; number of steps for center of mass motion removal
nstcomm = 1
; group(s) for center of mass motion removal
comm-grps =
; LANGEVIN DYNAMICS OPTIONS
; Temperature, friction coefficient (amu/ps) and random seed
bd-temp = 300
bd-fric = 0
ld-seed = 1993
; ENERGY MINIMIZATION OPTIONS
; Force tolerance and initial step-size
emtol = 100
emstep = 0.01
; Max number of iterations in relax_shells
niter = 20
; Step size (1/ps^2) for minimization of flexible constraints
fcstep = 0
; Frequency of steepest descents steps when doing CG
nstcgsteep = 1000
nbfgscorr = 10
; OUTPUT CONTROL OPTIONS
; Output frequency for coords (x), velocities (v) and forces (f)
nstxout = 500
nstvout = 5
nstfout = 0
; Checkpointing helps you continue after crashes
nstcheckpoint = 1000
; Output frequency for energies to log file and energy file
nstlog = 50
nstenergy = 50
; Output frequency and precision for xtc file
nstxtcout = 5
xtc-precision = 1000
; This selects the subset of atoms for the xtc file. You can
; select multiple groups. By default all atoms will be written.
xtc-grps =
; Selection of energy groups
energygrps =
; NEIGHBORSEARCHING PARAMETERS
; nblist update frequency
nstlist = 5
; ns algorithm (simple or grid)
ns_type = grid
; Periodic boundary conditions: xyz (default), no (vacuum)
; or full (infinite systems only)
pbc = xyz
; nblist cut-off
rlist = 0.9
domain-decomposition = no
; OPTIONS FOR ELECTROSTATICS AND VDW
; Method for doing electrostatics
coulombtype = Cut-off
rcoulomb-switch = 0
rcoulomb = 0.9
; Dielectric constant (DC) for cut-off or DC of reaction field
epsilon-r = 1
; Method for doing Van der Waals
vdw-type = Cut-off
; cut-off lengths
rvdw-switch = 0
rvdw = 0.9
; Apply long range dispersion corrections for Energy and Pressure
DispCorr = EnerPres
; Extension of the potential lookup tables beyond the cut-off
table-extension = 1
; Spacing for the PME/PPPM FFT grid
fourierspacing = 0.12
; FFT grid size, when a value is 0 fourierspacing will be used
fourier_nx = 0
fourier_ny = 0
fourier_nz = 0
; EWALD/PME/PPPM parameters
pme_order = 4
ewald_rtol = 1e-05
ewald_geometry = 3d
epsilon_surface = 0
optimize_fft = no
; GENERALIZED BORN ELECTROSTATICS
; Algorithm for calculating Born radii
gb_algorithm = Still
; Frequency of calculating the Born radii inside rlist
nstgbradii = 1
; Cutoff for Born radii calculation; the contribution from atoms
; between rlist and rgbradii is updated every nstlist steps
rgbradii = 2
; Salt concentration in M for Generalized Born models
gb_saltconc = 0
; IMPLICIT SOLVENT (for use with Generalized Born electrostatics)
implicit_solvent = No
; OPTIONS FOR WEAK COUPLING ALGORITHMS
; Temperature coupling
Tcoupl = berendsen
; Groups to couple separately
tc-grps = System
; Time constant (ps) and reference temperature (K)
tau_t = 0.1
ref_t = 80
; Pressure coupling
Pcoupl = no
Pcoupltype = isotropic
; Time constant (ps), compressibility (1/bar) and reference P (bar)
tau_p = 1.0
compressibility = 4.5e-5
ref_p = 1.0
; Random seed for Andersen thermostat
andersen_seed = 815131
; SIMULATED ANNEALING
; Type of annealing for each temperature group (no/single/periodic)
annealing = no
; Number of time points to use for specifying annealing in each group
annealing_npoints =
; List of times at the annealing points for each group
annealing_time =
; Temp. at each annealing point, for each group.
annealing_temp =
; GENERATE VELOCITIES FOR STARTUP RUN
gen_vel = yes
gen_temp = 80
gen_seed = 1993
; OPTIONS FOR BONDS
constraints = all-bonds
; Type of constraint algorithm
constraint-algorithm = Lincs
; Do not constrain the start configuration
unconstrained-start = no
; Use successive overrelaxation to reduce the number of shake iterations
Shake-SOR = no
; Relative tolerance of shake
shake-tol = 1e-04
; Highest order in the expansion of the constraint coupling matrix
lincs-order = 4
; Number of iterations in the final step of LINCS. 1 is fine for
; normal simulations, but use 2 to conserve energy in NVE runs.
; For energy minimization with constraints it should be 4 to 8.
lincs-iter = 1
; Lincs will write a warning to the stderr if in one step a bond
; rotates over more degrees than
lincs-warnangle = 30
; Convert harmonic bonds to morse potentials
morse = no
; ENERGY GROUP EXCLUSIONS
; Pairs of energy groups for which all non-bonded interactions are excluded
energygrp_excl =
; NMR refinement stuff
; Distance restraints type: No, Simple or Ensemble
disre = No
; Force weighting of pairs in one distance restraint: Conservative or Equal
disre-weighting = Conservative
; Use sqrt of the time averaged times the instantaneous violation
disre-mixed = no
disre-fc = 1000
disre-tau = 0
; Output frequency for pair distances to energy file
nstdisreout = 100
; Orientation restraints: No or Yes
orire = no
; Orientation restraints force constant and tau for time averaging
orire-fc = 0
orire-tau = 0
orire-fitgrp =
; Output frequency for trace(SD) to energy file
nstorireout = 100
; Dihedral angle restraints: No, Simple or Ensemble
dihre = No
dihre-fc = 1000
dihre-tau = 0
; Output frequency for dihedral values to energy file
nstdihreout = 100
; Free energy control stuff
free-energy = no
init-lambda = 0
delta-lambda = 0
sc-alpha = 0
sc-sigma = 0.3
; Non-equilibrium MD stuff
acc-grps =
accelerate =
freezegrps =
freezedim =
cos-acceleration = 0
; Electric fields
; Format is number of terms (int) and for all terms an amplitude (real)
; and a phase angle (real)
E-x =
E-xt =
E-y =
E-yt =
E-z =
E-zt =
; User defined thingies
user1-grps =
user2-grps =
userint1 = 0
userint2 = 0
userint3 = 0
userint4 = 0
userreal1 = 0
userreal2 = 0
userreal3 = 0
userreal4 = 0

View File

@ -0,0 +1,11 @@
mass 39.94
sigma 0.0062220
epsilon 0.0000096960
ntimes 250000
dt 0.001
temp 80
x_out_freq 500
v_out_freq 5
cutforce 0.9
reneigh_every 100
nstat 125000

1003
data/argon_1000/tprout.gro Normal file

File diff suppressed because it is too large Load Diff

View File

@ -31,18 +31,7 @@
#include <allocate.h> #include <allocate.h>
#include <util.h> #include <util.h>
#define DELTA 20000 void initAtom(Atom *atom) {
#ifndef MAXLINE
#define MAXLINE 4096
#endif
#ifndef MAX
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#endif
void initAtom(Atom *atom)
{
atom->x = NULL; atom->y = NULL; atom->z = NULL; atom->x = NULL; atom->y = NULL; atom->z = NULL;
atom->vx = NULL; atom->vy = NULL; atom->vz = NULL; atom->vx = NULL; atom->vy = NULL; atom->vz = NULL;
atom->cl_x = NULL; atom->cl_x = NULL;
@ -65,8 +54,7 @@ void initAtom(Atom *atom)
atom->clusters = NULL; atom->clusters = NULL;
} }
void createAtom(Atom *atom, Parameter *param) void createAtom(Atom *atom, Parameter *param) {
{
MD_FLOAT xlo = 0.0; MD_FLOAT xhi = param->xprd; MD_FLOAT xlo = 0.0; MD_FLOAT xhi = param->xprd;
MD_FLOAT ylo = 0.0; MD_FLOAT yhi = param->yprd; MD_FLOAT ylo = 0.0; MD_FLOAT yhi = param->yprd;
MD_FLOAT zlo = 0.0; MD_FLOAT zhi = param->zprd; MD_FLOAT zlo = 0.0; MD_FLOAT zhi = param->zprd;
@ -106,47 +94,25 @@ void createAtom(Atom *atom, Parameter *param)
int subboxdim = 8; int subboxdim = 8;
while(oz * subboxdim <= khi) { while(oz * subboxdim <= khi) {
k = oz * subboxdim + sz; k = oz * subboxdim + sz;
j = oy * subboxdim + sy; j = oy * subboxdim + sy;
i = ox * subboxdim + sx; i = ox * subboxdim + sx;
if(((i + j + k) % 2 == 0) && if(((i + j + k) % 2 == 0) && (i >= ilo) && (i <= ihi) && (j >= jlo) && (j <= jhi) && (k >= klo) && (k <= khi)) {
(i >= ilo) && (i <= ihi) &&
(j >= jlo) && (j <= jhi) &&
(k >= klo) && (k <= khi)) {
xtmp = 0.5 * alat * i; xtmp = 0.5 * alat * i;
ytmp = 0.5 * alat * j; ytmp = 0.5 * alat * j;
ztmp = 0.5 * alat * k; ztmp = 0.5 * alat * k;
if( xtmp >= xlo && xtmp < xhi && if(xtmp >= xlo && xtmp < xhi && ytmp >= ylo && ytmp < yhi && ztmp >= zlo && ztmp < zhi ) {
ytmp >= ylo && ytmp < yhi && n = k * (2 * param->ny) * (2 * param->nx) + j * (2 * param->nx) + i + 1;
ztmp >= zlo && ztmp < zhi ) { for(m = 0; m < 5; m++) { myrandom(&n); }
n = k * (2 * param->ny) * (2 * param->nx) +
j * (2 * param->nx) +
i + 1;
for(m = 0; m < 5; m++) {
myrandom(&n);
}
vxtmp = myrandom(&n); vxtmp = myrandom(&n);
for(m = 0; m < 5; m++){ myrandom(&n); }
for(m = 0; m < 5; m++){
myrandom(&n);
}
vytmp = myrandom(&n); vytmp = myrandom(&n);
for(m = 0; m < 5; m++) { myrandom(&n); }
for(m = 0; m < 5; m++) {
myrandom(&n);
}
vztmp = myrandom(&n); vztmp = myrandom(&n);
if(atom->Nlocal == atom->Nmax) { if(atom->Nlocal == atom->Nmax) { growAtom(atom); }
growAtom(atom);
}
atom_x(atom->Nlocal) = xtmp; atom_x(atom->Nlocal) = xtmp;
atom_y(atom->Nlocal) = ytmp; atom_y(atom->Nlocal) = ytmp;
atom_z(atom->Nlocal) = ztmp; atom_z(atom->Nlocal) = ztmp;
@ -159,7 +125,6 @@ void createAtom(Atom *atom, Parameter *param)
} }
sx++; sx++;
if(sx == subboxdim) { sx = 0; sy++; } if(sx == subboxdim) { sx = 0; sy++; }
if(sy == subboxdim) { sy = 0; sz++; } if(sy == subboxdim) { sy = 0; sz++; }
if(sz == subboxdim) { sz = 0; ox++; } if(sz == subboxdim) { sz = 0; ox++; }
@ -168,8 +133,188 @@ void createAtom(Atom *atom, Parameter *param)
} }
} }
int readAtom(Atom* atom, Parameter* param) int type_str2int(const char *type) {
{ if(strncmp(type, "Ar", 2) == 0) { return 0; } // Argon
fprintf(stderr, "Invalid atom type: %s\n", type);
exit(-1);
return -1;
}
int readAtom(Atom* atom, Parameter* param) {
int len = strlen(param->input_file);
if(strncmp(&param->input_file[len - 4], ".pdb", 4) == 0) { return readAtom_pdb(atom, param); }
if(strncmp(&param->input_file[len - 4], ".gro", 4) == 0) { return readAtom_gro(atom, param); }
if(strncmp(&param->input_file[len - 4], ".dmp", 4) == 0) { return readAtom_dmp(atom, param); }
fprintf(stderr, "Invalid input file extension: %s\nValid choices are: pdb, gro, dmp\n", param->input_file);
exit(-1);
return -1;
}
int readAtom_pdb(Atom* atom, Parameter* param) {
FILE *fp = fopen(param->input_file, "r");
char line[MAXLINE];
int read_atoms = 0;
if(!fp) {
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
exit(-1);
return -1;
}
while(!feof(fp)) {
fgets(line, MAXLINE, fp);
char *item = strtok(line, " ");
if(strncmp(item, "CRYST1", 6) == 0) {
param->xlo = 0.0;
param->xhi = atof(strtok(NULL, " "));
param->ylo = 0.0;
param->yhi = atof(strtok(NULL, " "));
param->zlo = 0.0;
param->zhi = atof(strtok(NULL, " "));
param->xprd = param->xhi - param->xlo;
param->yprd = param->yhi - param->ylo;
param->zprd = param->zhi - param->zlo;
// alpha, beta, gamma, sGroup, z
} else if(strncmp(item, "ATOM", 4) == 0) {
char *label;
int atom_id, comp_id;
MD_FLOAT occupancy, charge;
atom_id = atoi(strtok(NULL, " ")) - 1;
while(atom_id + 1 >= atom->Nmax) {
growAtom(atom);
}
atom->type[atom_id] = type_str2int(strtok(NULL, " "));
label = strtok(NULL, " ");
comp_id = atoi(strtok(NULL, " "));
atom_x(atom_id) = atof(strtok(NULL, " "));
atom_y(atom_id) = atof(strtok(NULL, " "));
atom_z(atom_id) = atof(strtok(NULL, " "));
atom->vx[atom_id] = 0.0;
atom->vy[atom_id] = 0.0;
atom->vz[atom_id] = 0.0;
occupancy = atof(strtok(NULL, " "));
charge = atof(strtok(NULL, " "));
atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
atom->Natoms++;
atom->Nlocal++;
read_atoms++;
} else if(strncmp(item, "HEADER", 6) == 0 ||
strncmp(item, "REMARK", 6) == 0 ||
strncmp(item, "MODEL", 5) == 0 ||
strncmp(item, "TER", 3) == 0 ||
strncmp(item, "ENDMDL", 6) == 0) {
// Do nothing
} else {
fprintf(stderr, "Invalid item: %s\n", item);
exit(-1);
return -1;
}
}
if(!read_atoms) {
fprintf(stderr, "Input error: No atoms read!\n");
exit(-1);
return -1;
}
atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
atom->epsilon[i] = param->epsilon;
atom->sigma6[i] = param->sigma6;
atom->cutneighsq[i] = param->cutneigh * param->cutneigh;
atom->cutforcesq[i] = param->cutforce * param->cutforce;
}
fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
fclose(fp);
return read_atoms;
}
int readAtom_gro(Atom* atom, Parameter* param) {
FILE *fp = fopen(param->input_file, "r");
char line[MAXLINE];
char desc[MAXLINE];
int read_atoms = 0;
int atoms_to_read = 0;
int i = 0;
if(!fp) {
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
exit(-1);
return -1;
}
fgets(desc, MAXLINE, fp);
for(i = 0; desc[i] != '\n'; i++);
desc[i] = '\0';
fgets(line, MAXLINE, fp);
atoms_to_read = atoi(strtok(line, " "));
fprintf(stdout, "System: %s with %d atoms\n", desc, atoms_to_read);
while(!feof(fp) && read_atoms < atoms_to_read) {
fgets(line, MAXLINE, fp);
char *label = strtok(line, " ");
int type = type_str2int(strtok(NULL, " "));
int atom_id = atoi(strtok(NULL, " ")) - 1;
atom_id = read_atoms;
while(atom_id + 1 >= atom->Nmax) {
growAtom(atom);
}
atom->type[atom_id] = type;
atom_x(atom_id) = atof(strtok(NULL, " "));
atom_y(atom_id) = atof(strtok(NULL, " "));
atom_z(atom_id) = atof(strtok(NULL, " "));
atom->vx[atom_id] = atof(strtok(NULL, " "));
atom->vy[atom_id] = atof(strtok(NULL, " "));
atom->vz[atom_id] = atof(strtok(NULL, " "));
atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
atom->Natoms++;
atom->Nlocal++;
read_atoms++;
}
if(!feof(fp)) {
fgets(line, MAXLINE, fp);
param->xlo = 0.0;
param->xhi = atof(strtok(line, " "));
param->ylo = 0.0;
param->yhi = atof(strtok(NULL, " "));
param->zlo = 0.0;
param->zhi = atof(strtok(NULL, " "));
param->xprd = param->xhi - param->xlo;
param->yprd = param->yhi - param->ylo;
param->zprd = param->zhi - param->zlo;
}
if(read_atoms != atoms_to_read) {
fprintf(stderr, "Input error: Number of atoms read do not match (%d/%d).\n", read_atoms, atoms_to_read);
exit(-1);
return -1;
}
atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
atom->epsilon[i] = param->epsilon;
atom->sigma6[i] = param->sigma6;
atom->cutneighsq[i] = param->cutneigh * param->cutneigh;
atom->cutforcesq[i] = param->cutforce * param->cutforce;
}
fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
fclose(fp);
return read_atoms;
}
int readAtom_dmp(Atom* atom, Parameter* param) {
FILE *fp = fopen(param->input_file, "r"); FILE *fp = fopen(param->input_file, "r");
char line[MAXLINE]; char line[MAXLINE];
int natoms = 0; int natoms = 0;
@ -258,11 +403,11 @@ int readAtom(Atom* atom, Parameter* param)
} }
fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file); fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
fclose(fp);
return natoms; return natoms;
} }
void growAtom(Atom *atom) void growAtom(Atom *atom) {
{
int nold = atom->Nmax; int nold = atom->Nmax;
atom->Nmax += DELTA; atom->Nmax += DELTA;
@ -279,8 +424,7 @@ void growAtom(Atom *atom)
atom->type = (int *) reallocate(atom->type, ALIGNMENT, atom->Nmax * sizeof(int), nold * sizeof(int)); atom->type = (int *) reallocate(atom->type, ALIGNMENT, atom->Nmax * sizeof(int), nold * sizeof(int));
} }
void growClusters(Atom *atom) void growClusters(Atom *atom) {
{
int nold = atom->Nclusters_max; int nold = atom->Nclusters_max;
atom->Nclusters_max += DELTA; atom->Nclusters_max += DELTA;
atom->clusters = (Cluster*) reallocate(atom->clusters, ALIGNMENT, atom->Nclusters_max * sizeof(Cluster), nold * sizeof(Cluster)); atom->clusters = (Cluster*) reallocate(atom->clusters, ALIGNMENT, atom->Nclusters_max * sizeof(Cluster), nold * sizeof(Cluster));

View File

@ -61,6 +61,7 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
for(int k = 0; k < numneighs; k++) { for(int k = 0; k < numneighs; k++) {
int cj = neighs[k]; int cj = neighs[k];
int any = 0;
MD_FLOAT *cjptr = cluster_pos_ptr(cj); MD_FLOAT *cjptr = cluster_pos_ptr(cj);
for(int cii = 0; cii < CLUSTER_DIM_M; cii++) { for(int cii = 0; cii < CLUSTER_DIM_M; cii++) {
MD_FLOAT xtmp = cluster_x(ciptr, cii); MD_FLOAT xtmp = cluster_x(ciptr, cii);
@ -83,18 +84,29 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
fix += delx * force; fix += delx * force;
fiy += dely * force; fiy += dely * force;
fiz += delz * force; fiz += delz * force;
any = 1;
addStat(stats->atoms_within_cutoff, 1);
} else {
addStat(stats->atoms_outside_cutoff, 1);
} }
} }
} }
if(any != 0) {
addStat(stats->clusters_within_cutoff, 1);
} else {
addStat(stats->clusters_outside_cutoff, 1);
}
cluster_x(cifptr, cii) += fix; cluster_x(cifptr, cii) += fix;
cluster_y(cifptr, cii) += fiy; cluster_y(cifptr, cii) += fiy;
cluster_z(cifptr, cii) += fiz; cluster_z(cifptr, cii) += fiz;
} }
} }
addStat(stats->calculated_forces, 1);
addStat(stats->num_neighs, numneighs); addStat(stats->num_neighs, numneighs);
addStat(stats->force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH); addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_DIM_M / CLUSTER_DIM_N));
} }
LIKWID_MARKER_STOP("force"); LIKWID_MARKER_STOP("force");
@ -250,7 +262,7 @@ double computeForceLJ_4xn(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
addStat(stats->calculated_forces, 1); addStat(stats->calculated_forces, 1);
addStat(stats->num_neighs, numneighs); addStat(stats->num_neighs, numneighs);
addStat(stats->force_iters, numneighs / 2); addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_DIM_M / CLUSTER_DIM_N));
} }
LIKWID_MARKER_STOP("force"); LIKWID_MARKER_STOP("force");

View File

@ -25,6 +25,8 @@
#ifndef __ATOM_H_ #ifndef __ATOM_H_
#define __ATOM_H_ #define __ATOM_H_
#define DELTA 20000
#define CLUSTER_DIM_M 4 #define CLUSTER_DIM_M 4
#define CLUSTER_DIM_N VECTOR_WIDTH #define CLUSTER_DIM_N VECTOR_WIDTH
@ -59,6 +61,9 @@ typedef struct {
extern void initAtom(Atom*); extern void initAtom(Atom*);
extern void createAtom(Atom*, Parameter*); extern void createAtom(Atom*, Parameter*);
extern int readAtom(Atom*, Parameter*); extern int readAtom(Atom*, Parameter*);
extern int readAtom_pdb(Atom*, Parameter*);
extern int readAtom_gro(Atom*, Parameter*);
extern int readAtom_dmp(Atom*, Parameter*);
extern void growAtom(Atom*); extern void growAtom(Atom*);
extern void growClusters(Atom*); extern void growClusters(Atom*);

View File

@ -37,6 +37,7 @@ extern void initNeighbor(Neighbor*, Parameter*);
extern void setupNeighbor(Parameter*, Atom*); extern void setupNeighbor(Parameter*, Atom*);
extern void binatoms(Atom*); extern void binatoms(Atom*);
extern void buildNeighbor(Atom*, Neighbor*); extern void buildNeighbor(Atom*, Neighbor*);
extern void pruneNeighbor(Parameter*, Atom*, Neighbor*);
extern void sortAtom(Atom*); extern void sortAtom(Atom*);
extern void buildClusters(Atom*); extern void buildClusters(Atom*);
extern void binClusters(Atom*); extern void binClusters(Atom*);

View File

@ -31,9 +31,12 @@
typedef struct { typedef struct {
int force_field; int force_field;
char* param_file;
char* input_file; char* input_file;
char* vtk_file; char* vtk_file;
char *xtc_file;
MD_FLOAT epsilon; MD_FLOAT epsilon;
MD_FLOAT sigma;
MD_FLOAT sigma6; MD_FLOAT sigma6;
MD_FLOAT temp; MD_FLOAT temp;
MD_FLOAT rho; MD_FLOAT rho;
@ -41,10 +44,14 @@ typedef struct {
int ntypes; int ntypes;
int ntimes; int ntimes;
int nstat; int nstat;
int every; int reneigh_every;
int prune_every;
int x_out_every;
int v_out_every;
MD_FLOAT dt; MD_FLOAT dt;
MD_FLOAT dtforce; MD_FLOAT dtforce;
MD_FLOAT cutforce; MD_FLOAT cutforce;
MD_FLOAT skin;
MD_FLOAT cutneigh; MD_FLOAT cutneigh;
int nx, ny, nz; int nx, ny, nz;
MD_FLOAT lattice; MD_FLOAT lattice;
@ -53,4 +60,9 @@ typedef struct {
double proc_freq; double proc_freq;
char* eam_file; char* eam_file;
} Parameter; } Parameter;
void initParameter(Parameter*);
void readParameter(Parameter*, const char*);
void printParameter(Parameter*);
#endif #endif

View File

@ -41,10 +41,10 @@ static inline MD_SIMD_FLOAT simd_mul(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return
static inline MD_SIMD_FLOAT simd_fma(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_FLOAT c) { return _mm512_fmadd_pd(a, b, c); } static inline MD_SIMD_FLOAT simd_fma(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_FLOAT c) { return _mm512_fmadd_pd(a, b, c); }
static inline MD_SIMD_FLOAT simd_reciprocal(MD_SIMD_FLOAT a) { return _mm512_rcp14_pd(a); } static inline MD_SIMD_FLOAT simd_reciprocal(MD_SIMD_FLOAT a) { return _mm512_rcp14_pd(a); }
static inline MD_SIMD_FLOAT simd_masked_add(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_MASK m) { return _mm512_mask_add_pd(a, m, a, b); } static inline MD_SIMD_FLOAT simd_masked_add(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_MASK m) { return _mm512_mask_add_pd(a, m, a, b); }
static inline MD_SIMD_MASK simd_mask_from_u32(unsigned int a) { return _cvtu32_mask8(a); }
static inline MD_SIMD_MASK simd_mask_to_u32(unsigned int a) { return _cvtmask8_u32(a); }
static inline MD_SIMD_MASK simd_mask_and(MD_SIMD_MASK a, MD_SIMD_MASK b) { return _kand_mask8(a, b); } static inline MD_SIMD_MASK simd_mask_and(MD_SIMD_MASK a, MD_SIMD_MASK b) { return _kand_mask8(a, b); }
static inline MD_SIMD_MASK simd_mask_cond_lt(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm512_cmp_pd_mask(a, b, _CMP_LT_OQ); } static inline MD_SIMD_MASK simd_mask_cond_lt(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm512_cmp_pd_mask(a, b, _CMP_LT_OQ); }
static inline MD_SIMD_MASK simd_mask_from_u32(unsigned int a) { return _cvtu32_mask8(a); }
static inline unsigned int simd_mask_to_u32(MD_SIMD_MASK a) { return _cvtmask8_u32(a); }
static MD_SIMD_FLOAT simd_load2(MD_FLOAT *c0, MD_FLOAT *c1, int d) { static MD_SIMD_FLOAT simd_load2(MD_FLOAT *c0, MD_FLOAT *c1, int d) {
MD_SIMD_FLOAT x; MD_SIMD_FLOAT x;
@ -64,39 +64,55 @@ static inline MD_FLOAT simd_horizontal_sum(MD_SIMD_FLOAT a) {
MD_SIMD_FLOAT x = _mm512_add_pd(a, _mm512_shuffle_f64x2(a, a, 0xee)); MD_SIMD_FLOAT x = _mm512_add_pd(a, _mm512_shuffle_f64x2(a, a, 0xee));
x = _mm512_add_pd(x, _mm512_shuffle_f64x2(x, x, 0x11)); x = _mm512_add_pd(x, _mm512_shuffle_f64x2(x, x, 0x11));
x = _mm512_add_pd(x, _mm512_permute_pd(x, 0x01)); x = _mm512_add_pd(x, _mm512_permute_pd(x, 0x01));
return *((double *) &x); return *((MD_FLOAT *) &x);
} }
#else // AVX2 #else // AVX or AVX2
#define MD_SIMD_FLOAT __m256d #define MD_SIMD_FLOAT __m256d
#ifdef NO_AVX2
#define MD_SIMD_MASK __m256d
#else
#define MD_SIMD_MASK __mmask8 #define MD_SIMD_MASK __mmask8
#endif
static inline MD_SIMD_FLOAT simd_broadcast(double scalar) { return _mm256_set1_pd(scalar); } static inline MD_SIMD_FLOAT simd_broadcast(double scalar) { return _mm256_set1_pd(scalar); }
static inline MD_SIMD_FLOAT simd_zero() { return _mm256_set1_pd(0.0); } static inline MD_SIMD_FLOAT simd_zero() { return _mm256_set1_pd(0.0); }
static inline MD_SIMD_FLOAT simd_add(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_add_pd(a, b); } static inline MD_SIMD_FLOAT simd_add(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_add_pd(a, b); }
static inline MD_SIMD_FLOAT simd_sub(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_sub_pd(a, b); } static inline MD_SIMD_FLOAT simd_sub(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_sub_pd(a, b); }
static inline MD_SIMD_FLOAT simd_mul(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_mul_pd(a, b); } static inline MD_SIMD_FLOAT simd_mul(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_mul_pd(a, b); }
static inline MD_SIMD_FLOAT simd_fma(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_FLOAT c) { return _mm256_fmadd_pd(a, b, c); }
static inline MD_SIMD_FLOAT simd_reciprocal(MD_SIMD_FLOAT a) { return _mm256_rcp14_pd(a); }
static inline MD_SIMD_FLOAT simd_masked_add(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_MASK m) { return _mm256_mask_add_pd(a, m, a, b); }
static inline MD_SIMD_MASK simd_mask_from_u32(unsigned int a) { return _cvtu32_mask8(a); }
static inline MD_SIMD_MASK simd_mask_to_u32(unsigned int a) { return _cvtmask8_u32(a); }
static inline MD_SIMD_MASK simd_mask_and(MD_SIMD_MASK a, MD_SIMD_MASK b) { return _kand_mask8(a, b); }
static inline MD_SIMD_MASK simd_mask_cond_lt(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_cmp_pd_mask(a, b, _CMP_LT_OQ); }
static MD_SIMD_FLOAT simd_load(MD_FLOAT *c0, int d) { #ifdef NO_AVX2
MD_SIMD_FLOAT x; static inline MD_SIMD_FLOAT simd_reciprocal(MD_SIMD_FLOAT a) { return _mm256_cvtps_pd(_mm_rcp_ps(_mm256_cvtpd_ps(a))); }
#ifdef CLUSTER_AOS static inline MD_SIMD_FLOAT simd_fma(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_FLOAT c) { return simd_add(simd_mul(a, b), c); }
__m128i aos_gather_vindex = _mm128_set_epi32(9, 6, 3, 0); static inline MD_SIMD_FLOAT simd_masked_add(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_MASK m) { return simd_add(a, _mm256_and_pd(b, m)); }
__m128i vindex = _mm128_add_epi32(aos_gather_vindex, _mm128_set1_epi32(d)); static inline MD_SIMD_MASK simd_mask_cond_lt(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_cmp_pd(a, b, _CMP_LT_OQ); }
x = _mm256_i32gather_pd(c0, vindex, sizeof(double)); static inline MD_SIMD_MASK simd_mask_and(MD_SIMD_MASK a, MD_SIMD_MASK b) { return _mm256_and_pd(a, b); }
#else // TODO: Initialize all diagonal cases and just select the proper one (all bits set or diagonal) based on cond0
x = _mm256_load_pd(&c0[d * CLUSTER_DIM_M]); static inline MD_SIMD_MASK simd_mask_from_u32(unsigned int a) {
#endif const unsigned long long int all = 0xFFFFFFFFFFFFFFFF;
return x; const unsigned long long int none = 0x0;
return _mm256_castsi256_pd(_mm256_set_epi64x((a & 0x8) ? all : none, (a & 0x4) ? all : none, (a & 0x2) ? all : none, (a & 0x1) ? all : none));
} }
// TODO: Implement this, althrough it is just required for debugging
static inline int simd_mask_to_u32(MD_SIMD_MASK a) { return 0; }
static inline MD_FLOAT simd_horizontal_sum(MD_SIMD_FLOAT a) {
__m128d a0, a1;
a = _mm256_add_pd(a, _mm256_permute_pd(a, 0b0101));
a0 = _mm256_castpd256_pd128(a);
a1 = _mm256_extractf128_pd(a, 0x1);
a0 = _mm_add_sd(a0, a1);
return *((MD_FLOAT *) &a0);
}
#else
static inline MD_SIMD_FLOAT simd_reciprocal(MD_SIMD_FLOAT a) { return _mm256_rcp14_pd(a); }
static inline MD_SIMD_FLOAT simd_fma(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_FLOAT c) { return _mm256_fmadd_pd(a, b, c); }
static inline MD_SIMD_FLOAT simd_masked_add(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_MASK m) { return _mm256_mask_add_pd(a, m, a, b); }
static inline MD_SIMD_MASK simd_mask_cond_lt(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_cmp_pd_mask(a, b, _CMP_LT_OQ); }
static inline MD_SIMD_MASK simd_mask_and(MD_SIMD_MASK a, MD_SIMD_MASK b) { return _kand_mask8(a, b); }
static inline MD_SIMD_MASK simd_mask_from_u32(unsigned int a) { return _cvtu32_mask8(a); }
static inline unsigned int simd_mask_to_u32(MD_SIMD_MASK a) { return _cvtmask8_u32(a); }
static inline MD_FLOAT simd_horizontal_sum(MD_SIMD_FLOAT a) { static inline MD_FLOAT simd_horizontal_sum(MD_SIMD_FLOAT a) {
__m128d a0, a1; __m128d a0, a1;
// test with shuffle & add as an alternative to hadd later // test with shuffle & add as an alternative to hadd later
@ -104,7 +120,23 @@ static inline MD_FLOAT simd_horizontal_sum(MD_SIMD_FLOAT a) {
a0 = _mm256_castpd256_pd128(a); a0 = _mm256_castpd256_pd128(a);
a1 = _mm256_extractf128_pd(a, 0x1); a1 = _mm256_extractf128_pd(a, 0x1);
a0 = _mm_add_sd(a0, a1); a0 = _mm_add_sd(a0, a1);
return *((double *) &a0); return *((MD_FLOAT *) &a0);
}
#endif
static MD_SIMD_FLOAT simd_load(MD_FLOAT *c0, int d) {
MD_SIMD_FLOAT x;
#ifdef CLUSTER_AOS
#ifdef NO_AVX2
#error "Not possible to use AoS cluster layout without AVX2 support!"
#endif
__m128i aos_gather_vindex = _mm128_set_epi32(9, 6, 3, 0);
__m128i vindex = _mm128_add_epi32(aos_gather_vindex, _mm128_set1_epi32(d));
x = _mm256_i32gather_pd(c0, vindex, sizeof(double));
#else
x = _mm256_load_pd(&c0[d * CLUSTER_DIM_M]);
#endif
return x;
} }
#endif #endif

View File

@ -29,6 +29,10 @@ typedef struct {
long long int calculated_forces; long long int calculated_forces;
long long int num_neighs; long long int num_neighs;
long long int force_iters; long long int force_iters;
long long int atoms_within_cutoff;
long long int atoms_outside_cutoff;
long long int clusters_within_cutoff;
long long int clusters_outside_cutoff;
} Stats; } Stats;
void initStats(Stats *s); void initStats(Stats *s);

View File

@ -38,6 +38,14 @@
#define DEBUG_MESSAGE #define DEBUG_MESSAGE
#endif #endif
#ifndef MAXLINE
#define MAXLINE 4096
#endif
#ifndef MAX
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#endif
#define FF_LJ 0 #define FF_LJ 0
#define FF_EAM 1 #define FF_EAM 1

37
gromacs/includes/xtc.h Normal file
View File

@ -0,0 +1,37 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* This file is part of MD-Bench.
*
* MD-Bench is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* =======================================================================================
*/
#include <atom.h>
#ifndef __XTC_H_
#define __XTC_H_
#ifdef XTC_OUTPUT
void xtc_init(const char *, Atom*, int);
void xtc_write(Atom*, int, int, int);
void xtc_end();
#else
#define xtc_init(a,b,c)
#define xtc_write(a,b,c,d)
#define xtc_end()
#endif
#endif

View File

@ -20,16 +20,11 @@
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>. * with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* ======================================================================================= * =======================================================================================
*/ */
#include <stdlib.h>
#include <stdio.h> #include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <limits.h>
#include <math.h> #include <math.h>
#include <float.h> //--
#include <likwid-marker.h> #include <likwid-marker.h>
//--
#include <timing.h> #include <timing.h>
#include <allocate.h> #include <allocate.h>
#include <neighbor.h> #include <neighbor.h>
@ -41,6 +36,7 @@
#include <timers.h> #include <timers.h>
#include <eam.h> #include <eam.h>
#include <vtk.h> #include <vtk.h>
#include <xtc.h>
#include <util.h> #include <util.h>
#define HLINE "----------------------------------------------------------------------------\n" #define HLINE "----------------------------------------------------------------------------\n"
@ -55,29 +51,6 @@ extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*);
# define computeForceLJ computeForceLJ_4xn # define computeForceLJ computeForceLJ_4xn
#endif #endif
void init(Parameter *param) {
param->input_file = NULL;
param->vtk_file = NULL;
param->force_field = FF_LJ;
param->epsilon = 1.0;
param->sigma6 = 1.0;
param->rho = 0.8442;
param->ntypes = 4;
param->ntimes = 200;
param->dt = 0.005;
param->nx = 32;
param->ny = 32;
param->nz = 32;
param->cutforce = 2.5;
param->cutneigh = param->cutforce + 0.30;
param->temp = 1.44;
param->nstat = 100;
param->mass = 1.0;
param->dtforce = 0.5 * param->dt;
param->every = 20;
param->proc_freq = 2.4;
}
double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats) { double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats) {
if(param->force_field == FF_EAM) { initEam(eam, param); } if(param->force_field == FF_EAM) { initEam(eam, param); }
double S, E; double S, E;
@ -188,75 +161,97 @@ int main(int argc, char** argv) {
//LIKWID_MARKER_REGISTER("reneighbour"); //LIKWID_MARKER_REGISTER("reneighbour");
//LIKWID_MARKER_REGISTER("pbc"); //LIKWID_MARKER_REGISTER("pbc");
} }
init(&param);
for(int i = 0; i < argc; i++) initParameter(&param);
{ for(int i = 0; i < argc; i++) {
if((strcmp(argv[i], "-f") == 0)) if((strcmp(argv[i], "-p") == 0)) {
{ readParameter(&param, argv[++i]);
continue;
}
if((strcmp(argv[i], "-f") == 0)) {
if((param.force_field = str2ff(argv[++i])) < 0) { if((param.force_field = str2ff(argv[++i])) < 0) {
fprintf(stderr, "Invalid force field!\n"); fprintf(stderr, "Invalid force field!\n");
exit(-1); exit(-1);
} }
continue; continue;
} }
if((strcmp(argv[i], "-i") == 0)) if((strcmp(argv[i], "-i") == 0)) {
{
param.input_file = strdup(argv[++i]); param.input_file = strdup(argv[++i]);
continue; continue;
} }
if((strcmp(argv[i], "-e") == 0)) if((strcmp(argv[i], "-e") == 0)) {
{
param.eam_file = strdup(argv[++i]); param.eam_file = strdup(argv[++i]);
continue; continue;
} }
if((strcmp(argv[i], "-n") == 0) || (strcmp(argv[i], "--nsteps") == 0)) if((strcmp(argv[i], "-n") == 0) || (strcmp(argv[i], "--nsteps") == 0)) {
{
param.ntimes = atoi(argv[++i]); param.ntimes = atoi(argv[++i]);
continue; continue;
} }
if((strcmp(argv[i], "-nx") == 0)) if((strcmp(argv[i], "-nx") == 0)) {
{
param.nx = atoi(argv[++i]); param.nx = atoi(argv[++i]);
continue; continue;
} }
if((strcmp(argv[i], "-ny") == 0)) if((strcmp(argv[i], "-ny") == 0)) {
{
param.ny = atoi(argv[++i]); param.ny = atoi(argv[++i]);
continue; continue;
} }
if((strcmp(argv[i], "-nz") == 0)) if((strcmp(argv[i], "-nz") == 0)) {
{
param.nz = atoi(argv[++i]); param.nz = atoi(argv[++i]);
continue; continue;
} }
if((strcmp(argv[i], "--freq") == 0)) if((strcmp(argv[i], "-m") == 0) || (strcmp(argv[i], "--mass") == 0)) {
{ param.mass = atof(argv[++i]);
continue;
}
if((strcmp(argv[i], "-r") == 0) || (strcmp(argv[i], "--radius") == 0)) {
param.cutforce = atof(argv[++i]);
continue;
}
if((strcmp(argv[i], "-s") == 0) || (strcmp(argv[i], "--skin") == 0)) {
param.skin = atof(argv[++i]);
continue;
}
if((strcmp(argv[i], "--freq") == 0)) {
param.proc_freq = atof(argv[++i]); param.proc_freq = atof(argv[++i]);
continue; continue;
} }
if((strcmp(argv[i], "--vtk") == 0)) if((strcmp(argv[i], "--vtk") == 0)) {
{
param.vtk_file = strdup(argv[++i]); param.vtk_file = strdup(argv[++i]);
continue; continue;
} }
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) if((strcmp(argv[i], "--xtc") == 0)) {
{ #ifndef XTC_OUTPUT
fprintf(stderr, "XTC not available, set XTC_OUTPUT option in config.mk file and recompile MD-Bench!");
exit(-1);
#else
param.xtc_file = strdup(argv[++i]);
#endif
continue;
}
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
printf("MD Bench: A minimalistic re-implementation of miniMD\n"); printf("MD Bench: A minimalistic re-implementation of miniMD\n");
printf(HLINE); printf(HLINE);
printf("-p <string>: file to read parameters from (can be specified more than once)\n");
printf("-f <string>: force field (lj or eam), default lj\n"); printf("-f <string>: force field (lj or eam), default lj\n");
printf("-i <string>: input file with atom positions (dump)\n"); printf("-i <string>: input file with atom positions (dump)\n");
printf("-e <string>: input file for EAM\n"); printf("-e <string>: input file for EAM\n");
printf("-n / --nsteps <int>: set number of timesteps for simulation\n"); printf("-n / --nsteps <int>: set number of timesteps for simulation\n");
printf("-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n"); printf("-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n");
printf("-r / --radius <real>: set cutoff radius\n");
printf("-s / --skin <real>: set skin (verlet buffer)\n");
printf("--freq <real>: processor frequency (GHz)\n"); printf("--freq <real>: processor frequency (GHz)\n");
printf("--vtk <string>: VTK file for visualization\n"); printf("--vtk <string>: VTK file for visualization\n");
printf("--xtc <string>: XTC file for visualization\n");
printf(HLINE); printf(HLINE);
exit(EXIT_SUCCESS); exit(EXIT_SUCCESS);
} }
} }
param.cutneigh = param.cutforce + param.skin;
setup(&param, &eam, &atom, &neighbor, &stats); setup(&param, &eam, &atom, &neighbor, &stats);
printParameter(&param);
printf("step\ttemp\t\tpressure\n");
computeThermo(0, &param, &atom); computeThermo(0, &param, &atom);
#if defined(MEM_TRACER) || defined(INDEX_TRACER) #if defined(MEM_TRACER) || defined(INDEX_TRACER)
traceAddresses(&param, &atom, &neighbor, n + 1); traceAddresses(&param, &atom, &neighbor, n + 1);
@ -274,10 +269,18 @@ int main(int argc, char** argv) {
write_data_to_vtk_file(param.vtk_file, &atom, 0); write_data_to_vtk_file(param.vtk_file, &atom, 0);
} }
if(param.xtc_file != NULL) {
xtc_init(param.xtc_file, &atom, 0);
}
for(int n = 0; n < param.ntimes; n++) { for(int n = 0; n < param.ntimes; n++) {
initialIntegrate(&param, &atom); initialIntegrate(&param, &atom);
if((n + 1) % param.every) { if((n + 1) % param.reneigh_every) {
if(!((n + 1) % param.prune_every)) {
pruneNeighbor(&param, &atom, &neighbor);
}
updatePbc(&atom, &param, 0); updatePbc(&atom, &param, 0);
} else { } else {
timer[NEIGH] += reneighbour(&param, &atom, &neighbor); timer[NEIGH] += reneighbour(&param, &atom, &neighbor);
@ -299,16 +302,27 @@ int main(int argc, char** argv) {
computeThermo(n + 1, &param, &atom); computeThermo(n + 1, &param, &atom);
} }
if(param.vtk_file != NULL) { int write_pos = !((n + 1) % param.x_out_every);
write_data_to_vtk_file(param.vtk_file, &atom, n + 1); int write_vel = !((n + 1) % param.v_out_every);
if(write_pos || write_vel) {
if(param.vtk_file != NULL) {
write_data_to_vtk_file(param.vtk_file, &atom, n + 1);
}
if(param.xtc_file != NULL) {
xtc_write(&atom, n + 1, write_pos, write_vel);
}
} }
} }
timer[TOTAL] = getTimeStamp() - timer[TOTAL]; timer[TOTAL] = getTimeStamp() - timer[TOTAL];
computeThermo(-1, &param, &atom); computeThermo(-1, &param, &atom);
if(param.xtc_file != NULL) {
xtc_end();
}
printf(HLINE); printf(HLINE);
printf("Force field: %s\n", ff2str(param.force_field));
printf("Data layout for positions: %s\n", POS_DATA_LAYOUT); printf("Data layout for positions: %s\n", POS_DATA_LAYOUT);
#if PRECISION == 1 #if PRECISION == 1
printf("Using single precision floating point.\n"); printf("Using single precision floating point.\n");

View File

@ -92,16 +92,15 @@ void setupNeighbor(Parameter *param, Atom *atom) {
MD_FLOAT atom_density = ((MD_FLOAT)(atom->Nlocal)) / ((xhi - xlo) * (yhi - ylo) * (zhi - zlo)); MD_FLOAT atom_density = ((MD_FLOAT)(atom->Nlocal)) / ((xhi - xlo) * (yhi - ylo) * (zhi - zlo));
MD_FLOAT atoms_in_cell = MAX(CLUSTER_DIM_M, CLUSTER_DIM_N); MD_FLOAT atoms_in_cell = MAX(CLUSTER_DIM_M, CLUSTER_DIM_N);
//MD_FLOAT atoms_in_cell = CLUSTER_DIM_M; MD_FLOAT targetsizex = cbrt(atoms_in_cell / atom_density);
binsizex = cbrt(atoms_in_cell / atom_density); MD_FLOAT targetsizey = cbrt(atoms_in_cell / atom_density);
binsizey = cbrt(atoms_in_cell / atom_density); nbinx = MAX(1, (int)ceil((xhi - xlo) / targetsizex));
cutneighsq = cutneigh * cutneigh; nbiny = MAX(1, (int)ceil((yhi - ylo) / targetsizey));
nbinx = (int)((xhi - xlo) / binsizex); binsizex = (xhi - xlo) / nbinx;
nbiny = (int)((yhi - ylo) / binsizey); binsizey = (yhi - ylo) / nbiny;
if(nbinx == 0) { nbinx = 1; }
if(nbiny == 0) { nbiny = 1; }
bininvx = 1.0 / binsizex; bininvx = 1.0 / binsizex;
bininvy = 1.0 / binsizey; bininvy = 1.0 / binsizey;
cutneighsq = cutneigh * cutneigh;
coord = xlo - cutneigh - SMALL * xprd; coord = xlo - cutneigh - SMALL * xprd;
mbinxlo = (int) (coord * bininvx); mbinxlo = (int) (coord * bininvx);
@ -161,6 +160,14 @@ void setupNeighbor(Parameter *param, Atom *atom) {
if (cluster_bins) { free(cluster_bins); } if (cluster_bins) { free(cluster_bins); }
cluster_bins = (int*) malloc(mbins * clusters_per_bin * sizeof(int)); cluster_bins = (int*) malloc(mbins * clusters_per_bin * sizeof(int));
/*
DEBUG_MESSAGE("lo, hi = (%e, %e, %e), (%e, %e, %e)\n", xlo, ylo, zlo, xhi, yhi, zhi);
DEBUG_MESSAGE("binsize = %e, %e\n", binsizex, binsizey);
DEBUG_MESSAGE("mbin lo, hi = (%d, %d), (%d, %d)\n", mbinxlo, mbinylo, mbinxhi, mbinyhi);
DEBUG_MESSAGE("mbins = %d (%d x %d)\n", mbins, mbinx, mbiny);
DEBUG_MESSAGE("nextx = %d, nexty = %d\n", nextx, nexty);
*/
} }
MD_FLOAT getBoundingBoxDistanceSq(Atom *atom, int ci, int cj) { MD_FLOAT getBoundingBoxDistanceSq(Atom *atom, int ci, int cj) {
@ -374,6 +381,47 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
DEBUG_MESSAGE("buildNeighbor end\n"); DEBUG_MESSAGE("buildNeighbor end\n");
} }
void pruneNeighbor(Parameter *param, Atom *atom, Neighbor *neighbor) {
DEBUG_MESSAGE("pruneNeighbor start\n");
int nall = atom->Nclusters_local + atom->Nclusters_ghost;
//MD_FLOAT cutsq = param->cutforce * param->cutforce;
MD_FLOAT cutsq = cutneighsq;
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int *neighs = &neighbor->neighbors[ci * neighbor->maxneighs];
int numneighs = neighbor->numneigh[ci];
int k = 0;
// Remove dummy clusters if necessary
if(CLUSTER_DIM_N > CLUSTER_DIM_M) {
while(neighs[numneighs - 1] == nall - 1) {
numneighs--;
}
}
while(k < numneighs) {
int cj = neighs[k];
if(atomDistanceInRange(atom, ci, cj, cutsq)) {
k++;
} else {
numneighs--;
neighs[k] = neighs[numneighs];
}
}
// Readd dummy clusters if necessary
if(CLUSTER_DIM_N > CLUSTER_DIM_M) {
while(numneighs % (CLUSTER_DIM_N / CLUSTER_DIM_M)) {
neighs[numneighs++] = nall - 1; // Last cluster is always a dummy cluster
}
}
neighbor->numneigh[ci] = numneighs;
}
DEBUG_MESSAGE("pruneNeighbor end\n");
}
/* internal subroutines */ /* internal subroutines */
MD_FLOAT bindist(int i, int j) { MD_FLOAT bindist(int i, int j) {
MD_FLOAT delx, dely, delz; MD_FLOAT delx, dely, delz;

156
gromacs/parameter.c Normal file
View File

@ -0,0 +1,156 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* This file is part of MD-Bench.
*
* MD-Bench is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* =======================================================================================
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
//---
#include <parameter.h>
#include <util.h>
void initParameter(Parameter *param) {
param->input_file = NULL;
param->vtk_file = NULL;
param->xtc_file = NULL;
param->eam_file = NULL;
param->force_field = FF_LJ;
param->epsilon = 1.0;
param->sigma = 1.0;
param->sigma6 = 1.0;
param->rho = 0.8442;
param->ntypes = 4;
param->ntimes = 200;
param->dt = 0.005;
param->nx = 32;
param->ny = 32;
param->nz = 32;
param->cutforce = 2.5;
param->skin = 0.3;
param->cutneigh = param->cutforce + param->skin;
param->temp = 1.44;
param->nstat = 100;
param->mass = 1.0;
param->dtforce = 0.5 * param->dt;
param->reneigh_every = 20;
param->prune_every = 1000;
param->x_out_every = 20;
param->v_out_every = 5;
param->proc_freq = 2.4;
}
void readParameter(Parameter *param, const char *filename) {
FILE *fp = fopen(filename, "r");
char line[MAXLINE];
int i;
if(!fp) {
fprintf(stderr, "Could not open parameter file: %s\n", filename);
exit(-1);
}
while(!feof(fp)) {
line[0] = '\0';
fgets(line, MAXLINE, fp);
for(i = 0; line[i] != '\0' && line[i] != '#'; i++);
line[i] = '\0';
char *tok = strtok(line, " ");
char *val = strtok(NULL, " ");
#define PARSE_PARAM(p,f) if(strncmp(tok, #p, sizeof(#p) / sizeof(#p[0]) - 1) == 0) { param->p = f(val); }
#define PARSE_STRING(p) PARSE_PARAM(p, strdup)
#define PARSE_INT(p) PARSE_PARAM(p, atoi)
#define PARSE_REAL(p) PARSE_PARAM(p, atof)
if(tok != NULL && val != NULL) {
PARSE_PARAM(force_field, str2ff);
PARSE_STRING(input_file);
PARSE_STRING(eam_file);
PARSE_STRING(vtk_file);
PARSE_STRING(xtc_file);
PARSE_REAL(epsilon);
PARSE_REAL(sigma);
PARSE_REAL(rho);
PARSE_REAL(dt);
PARSE_REAL(cutforce);
PARSE_REAL(skin);
PARSE_REAL(temp);
PARSE_REAL(mass);
PARSE_REAL(proc_freq);
PARSE_INT(ntypes);
PARSE_INT(ntimes);
PARSE_INT(nx);
PARSE_INT(ny);
PARSE_INT(nz);
PARSE_INT(nstat);
PARSE_INT(reneigh_every);
PARSE_INT(prune_every);
PARSE_INT(x_out_every);
PARSE_INT(v_out_every);
}
}
// Update sigma6 parameter
MD_FLOAT s2 = param->sigma * param->sigma;
param->sigma6 = s2 * s2 * s2;
fclose(fp);
}
void printParameter(Parameter *param) {
printf("Parameters:\n");
if(param->input_file != NULL) {
printf("Input file: %s\n", param->input_file);
}
if(param->vtk_file != NULL) {
printf("VTK file: %s\n", param->vtk_file);
}
if(param->xtc_file != NULL) {
printf("XTC file: %s\n", param->xtc_file);
}
if(param->eam_file != NULL) {
printf("EAM file: %s\n", param->eam_file);
}
printf("\tForce field: %s\n", ff2str(param->force_field));
printf("\tUnit cells (nx, ny, nz): %d, %d, %d\n", param->nx, param->ny, param->nz);
printf("\tDomain box sizes (x, y, z): %e, %e, %e\n", param->xprd, param->yprd, param->zprd);
printf("\tLattice size: %e\n", param->lattice);
printf("\tEpsilon: %e\n", param->epsilon);
printf("\tSigma: %e\n", param->sigma);
printf("\tTemperature: %e\n", param->temp);
printf("\tRHO: %e\n", param->rho);
printf("\tMass: %e\n", param->mass);
printf("\tNumber of types: %d\n", param->ntypes);
printf("\tNumber of timesteps: %d\n", param->ntimes);
printf("\tReport stats every (timesteps): %d\n", param->nstat);
printf("\tReneighbor every (timesteps): %d\n", param->reneigh_every);
printf("\tPrune every (timesteps): %d\n", param->prune_every);
printf("\tOutput positions every (timesteps): %d\n", param->x_out_every);
printf("\tOutput velocities every (timesteps): %d\n", param->v_out_every);
printf("\tDelta time (dt): %e\n", param->dt);
printf("\tCutoff radius: %e\n", param->cutforce);
printf("\tSkin: %e\n", param->skin);
printf("\tProcessor frequency (GHz): %.4f\n\n", param->proc_freq);
}

View File

@ -9,10 +9,15 @@ void initStats(Stats *s) {
s->calculated_forces = 0; s->calculated_forces = 0;
s->num_neighs = 0; s->num_neighs = 0;
s->force_iters = 0; s->force_iters = 0;
s->atoms_within_cutoff = 0;
s->atoms_outside_cutoff = 0;
s->clusters_within_cutoff = 0;
s->clusters_outside_cutoff = 0;
} }
void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer) { void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer) {
#ifdef COMPUTE_STATS #ifdef COMPUTE_STATS
const int MxN = CLUSTER_DIM_M * CLUSTER_DIM_N; const int MxN = CLUSTER_DIM_M * CLUSTER_DIM_N;
double avg_atoms_cluster = (double)(atom->Nlocal) / (double)(atom->Nclusters_local); double avg_atoms_cluster = (double)(atom->Nlocal) / (double)(atom->Nclusters_local);
double force_useful_volume = 1e-9 * ( (double)(atom->Nlocal * (param->ntimes + 1)) * (sizeof(MD_FLOAT) * 6 + sizeof(int)) + double force_useful_volume = 1e-9 * ( (double)(atom->Nlocal * (param->ntimes + 1)) * (sizeof(MD_FLOAT) * 6 + sizeof(int)) +
@ -20,9 +25,11 @@ void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer
double avg_neigh_atom = (stats->num_neighs * CLUSTER_DIM_N) / (double)(atom->Nlocal * (param->ntimes + 1)); double avg_neigh_atom = (stats->num_neighs * CLUSTER_DIM_N) / (double)(atom->Nlocal * (param->ntimes + 1));
double avg_neigh_cluster = (double)(stats->num_neighs) / (double)(stats->calculated_forces); double avg_neigh_cluster = (double)(stats->num_neighs) / (double)(stats->calculated_forces);
double avg_simd = stats->force_iters / (double)(atom->Nlocal * (param->ntimes + 1)); double avg_simd = stats->force_iters / (double)(atom->Nlocal * (param->ntimes + 1));
#ifdef EXPLICIT_TYPES
#ifdef EXPLICIT_TYPES
force_useful_volume += 1e-9 * (double)((atom->Nlocal * (param->ntimes + 1)) + stats->num_neighs) * sizeof(int); force_useful_volume += 1e-9 * (double)((atom->Nlocal * (param->ntimes + 1)) + stats->num_neighs) * sizeof(int);
#endif #endif
printf("Statistics:\n"); printf("Statistics:\n");
printf("\tVector width: %d, Processor frequency: %.4f GHz\n", VECTOR_WIDTH, param->proc_freq); printf("\tVector width: %d, Processor frequency: %.4f GHz\n", VECTOR_WIDTH, param->proc_freq);
printf("\tAverage atoms per cluster: %.4f\n", avg_atoms_cluster); printf("\tAverage atoms per cluster: %.4f\n", avg_atoms_cluster);
@ -33,5 +40,13 @@ void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer
printf("\tTotal number of SIMD iterations: %lld\n", stats->force_iters); printf("\tTotal number of SIMD iterations: %lld\n", stats->force_iters);
printf("\tUseful read data volume for force computation: %.2fGB\n", force_useful_volume); printf("\tUseful read data volume for force computation: %.2fGB\n", force_useful_volume);
printf("\tCycles/SIMD iteration: %.4f\n", timer[FORCE] * param->proc_freq * 1e9 / stats->force_iters); printf("\tCycles/SIMD iteration: %.4f\n", timer[FORCE] * param->proc_freq * 1e9 / stats->force_iters);
#ifdef USE_REFERENCE_VERSION
const double atoms_eff = (double)stats->atoms_within_cutoff / (double)(stats->atoms_within_cutoff + stats->atoms_outside_cutoff) * 100.0;
printf("\tAtoms within/outside cutoff radius: %lld/%lld (%.2f%%)\n", stats->atoms_within_cutoff, stats->atoms_outside_cutoff, atoms_eff);
const double clusters_eff = (double)stats->clusters_within_cutoff / (double)(stats->clusters_within_cutoff + stats->clusters_outside_cutoff) * 100.0;
printf("\tClusters within/outside cutoff radius: %lld/%lld (%.2f%%)\n", stats->clusters_within_cutoff, stats->clusters_outside_cutoff, clusters_eff);
#endif
#endif #endif
} }

View File

@ -65,8 +65,6 @@ void setupThermo(Parameter *param, int natoms)
e_scale = 524287.985533;//16.0; e_scale = 524287.985533;//16.0;
param->dtforce /= mvv2e; param->dtforce /= mvv2e;
} }
printf("step\ttemp\t\tpressure\n");
} }
void computeThermo(int iflag, Parameter *param, Atom *atom) void computeThermo(int iflag, Parameter *param, Atom *atom)

71
gromacs/xtc.c Normal file
View File

@ -0,0 +1,71 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* This file is part of MD-Bench.
*
* MD-Bench is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* =======================================================================================
*/
#include <stdlib.h>
//---
#include <atom.h>
#include <allocate.h>
#include <xtc.h>
#ifdef XTC_OUTPUT
#include <gromacs/fileio/xtcio.h>
static struct t_fileio *xtc_file = NULL;
static rvec *x_buf = NULL;
static rvec basis[3];
void xtc_init(const char *filename, Atom *atom, int timestep) {
basis[0][XX] = 1.0;
basis[0][YY] = 0.0;
basis[0][ZZ] = 0.0;
basis[1][XX] = 0.0;
basis[1][YY] = 1.0;
basis[1][ZZ] = 0.0;
basis[2][XX] = 0.0;
basis[2][YY] = 0.0;
basis[2][ZZ] = 1.0;
xtc_file = open_xtc(filename, "w");
x_buf = (rvec *) allocate(ALIGNMENT, sizeof(rvec) * (atom->Nlocal + 1));
xtc_write(atom, timestep, 1, 1);
}
void xtc_write(Atom *atom, int timestep, int write_pos, int write_vel) {
int i = 0;
for(int ci = 0; ci < atom->Nclusters_local; ++ci) {
MD_FLOAT *cptr = cluster_pos_ptr(ci);
for(int cii = 0; cii < atom->clusters[ci].natoms; ++cii) {
x_buf[i][XX] = cluster_x(cptr, cii);
x_buf[i][YY] = cluster_y(cptr, cii);
x_buf[i][ZZ] = cluster_z(cptr, cii);
i++;
}
}
write_xtc(xtc_file, atom->Nlocal, timestep, 0.0, (const rvec *) basis, (const rvec *) x_buf, 1000);
}
void xtc_end() {
free(x_buf);
close_xtc(xtc_file);
}
#endif

11
include_GROMACS.mk Normal file
View File

@ -0,0 +1,11 @@
GROMACS_PATH=/apps/Gromacs/2018.1-mkl
GROMACS_INC ?= -I${GROMACS_PATH}/include
GROMACS_DEFINES ?=
GROMACS_LIB ?= -L${GROMACS_PATH}/lib64
ifeq ($(strip $(XTC_OUTPUT)),true)
INCLUDES += ${GROMACS_INC}
DEFINES += ${GROMACS_DEFINES}
LIBS += -lgromacs
LFLAGS += ${GROMACS_LIB}
endif

View File

@ -4,7 +4,7 @@ LINKER = $(CC)
OPENMP = #-qopenmp OPENMP = #-qopenmp
PROFILE = #-profile-functions -g -pg PROFILE = #-profile-functions -g -pg
OPTS = -Ofast -xCORE-AVX512 -qopt-zmm-usage=high $(PROFILE) OPTS = -Ofast -xCORE-AVX512 -qopt-zmm-usage=high $(PROFILE)
#OPTS = -fast -xCORE-AVX2 $(PROFILE) #OPTS = -Ofast -xCORE-AVX2 $(PROFILE)
#OPTS = -fast -xAVX $(PROFILE) #OPTS = -fast -xAVX $(PROFILE)
#OPTS = -fast -xSSE4.2 $(PROFILE) #OPTS = -fast -xSSE4.2 $(PROFILE)
#OPTS = -fast -no-vec $(PROFILE) #OPTS = -fast -no-vec $(PROFILE)
@ -12,6 +12,6 @@ OPTS = -Ofast -xCORE-AVX512 -qopt-zmm-usage=high $(PROFILE)
CFLAGS = $(PROFILE) -restrict $(OPENMP) $(OPTS) CFLAGS = $(PROFILE) -restrict $(OPENMP) $(OPTS)
ASFLAGS = #-masm=intel ASFLAGS = #-masm=intel
LFLAGS = $(PROFILE) $(OPTS) $(OPENMP) LFLAGS = $(PROFILE) $(OPTS) $(OPENMP)
DEFINES = -D_GNU_SOURCE #-DLIKWID_PERFMON DEFINES = -std=c11 -pedantic-errors -D_GNU_SOURCE #-DLIKWID_PERFMON
INCLUDES = #$(LIKWID_INC) INCLUDES = #$(LIKWID_INC)
LIBS = -lm #$(LIKWID_LIB) -llikwid LIBS = -lm #$(LIKWID_LIB) -llikwid

View File

@ -161,7 +161,108 @@ void createAtom(Atom *atom, Parameter *param)
} }
} }
int readAtom(Atom* atom, Parameter* param) int type_str2int(const char *type) {
if(strncmp(type, "Ar", 2) == 0) { return 0; } // Argon
fprintf(stderr, "Invalid atom type: %s\n", type);
exit(-1);
return -1;
}
int readAtom(Atom* atom, Parameter* param) {
int len = strlen(param->input_file);
if(strncmp(&param->input_file[len - 4], ".pdb", 4) == 0) { return readAtom_pdb(atom, param); }
if(strncmp(&param->input_file[len - 4], ".dmp", 4) == 0) { return readAtom_dmp(atom, param); }
fprintf(stderr, "Invalid input file extension: %s\nValid choices are: pdb, dmp\n", param->input_file);
exit(-1);
return -1;
}
int readAtom_pdb(Atom* atom, Parameter* param) {
FILE *fp = fopen(param->input_file, "r");
char line[MAXLINE];
int read_atoms = 0;
if(!fp) {
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
exit(-1);
return -1;
}
while(!feof(fp)) {
fgets(line, MAXLINE, fp);
char *item = strtok(line, " ");
if(strncmp(item, "CRYST1", 6) == 0) {
param->xlo = 0.0;
param->xhi = atof(strtok(NULL, " "));
param->ylo = 0.0;
param->yhi = atof(strtok(NULL, " "));
param->zlo = 0.0;
param->zhi = atof(strtok(NULL, " "));
param->xprd = param->xhi - param->xlo;
param->yprd = param->yhi - param->ylo;
param->zprd = param->zhi - param->zlo;
// alpha, beta, gamma, sGroup, z
} else if(strncmp(item, "ATOM", 4) == 0) {
char *label;
int atom_id, comp_id;
MD_FLOAT occupancy, charge;
atom_id = atoi(strtok(NULL, " ")) - 1;
while(atom_id + 1 >= atom->Nmax) {
growAtom(atom);
}
atom->type[atom_id] = type_str2int(strtok(NULL, " "));
label = strtok(NULL, " ");
comp_id = atoi(strtok(NULL, " "));
atom_x(atom_id) = atof(strtok(NULL, " "));
atom_y(atom_id) = atof(strtok(NULL, " "));
atom_z(atom_id) = atof(strtok(NULL, " "));
atom->vx[atom_id] = 0.0;
atom->vy[atom_id] = 0.0;
atom->vz[atom_id] = 0.0;
occupancy = atof(strtok(NULL, " "));
charge = atof(strtok(NULL, " "));
atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
atom->Natoms++;
atom->Nlocal++;
read_atoms++;
} else if(strncmp(item, "HEADER", 6) == 0 ||
strncmp(item, "REMARK", 6) == 0 ||
strncmp(item, "MODEL", 5) == 0 ||
strncmp(item, "TER", 3) == 0 ||
strncmp(item, "ENDMDL", 6) == 0) {
// Do nothing
} else {
fprintf(stderr, "Invalid item: %s\n", item);
exit(-1);
return -1;
}
}
if(!read_atoms) {
fprintf(stderr, "Input error: No atoms read!\n");
exit(-1);
return -1;
}
atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
atom->epsilon[i] = param->epsilon;
atom->sigma6[i] = param->sigma6;
atom->cutneighsq[i] = param->cutneigh * param->cutneigh;
atom->cutforcesq[i] = param->cutforce * param->cutforce;
}
fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
fclose(fp);
return read_atoms;
}
int readAtom_dmp(Atom* atom, Parameter* param)
{ {
FILE *fp = fopen(param->input_file, "r"); FILE *fp = fopen(param->input_file, "r");
char line[MAXLINE]; char line[MAXLINE];

View File

@ -87,6 +87,11 @@ double computeForceLJFullNeigh(Parameter *param, Atom *atom, Neighbor *neighbor,
fix += delx * force; fix += delx * force;
fiy += dely * force; fiy += dely * force;
fiz += delz * force; fiz += delz * force;
#ifdef USE_REFERENCE_VERSION
addStat(stats->atoms_within_cutoff, 1);
} else {
addStat(stats->atoms_outside_cutoff, 1);
#endif
} }
} }

View File

@ -42,6 +42,8 @@ typedef struct {
extern void initAtom(Atom*); extern void initAtom(Atom*);
extern void createAtom(Atom*, Parameter*); extern void createAtom(Atom*, Parameter*);
extern int readAtom(Atom*, Parameter*); extern int readAtom(Atom*, Parameter*);
extern int readAtom_pdb(Atom*, Parameter*);
extern int readAtom_dmp(Atom*, Parameter*);
extern void growAtom(Atom*); extern void growAtom(Atom*);
#ifdef AOS #ifdef AOS

View File

@ -45,6 +45,7 @@ typedef struct {
int halfneigh; int halfneigh;
MD_FLOAT dt; MD_FLOAT dt;
MD_FLOAT dtforce; MD_FLOAT dtforce;
MD_FLOAT skin;
MD_FLOAT cutforce; MD_FLOAT cutforce;
MD_FLOAT cutneigh; MD_FLOAT cutneigh;
int nx, ny, nz; int nx, ny, nz;

View File

@ -28,6 +28,8 @@
typedef struct { typedef struct {
long long int total_force_neighs; long long int total_force_neighs;
long long int total_force_iters; long long int total_force_iters;
long long int atoms_within_cutoff;
long long int atoms_outside_cutoff;
} Stats; } Stats;
void initStats(Stats *s); void initStats(Stats *s);

View File

@ -64,7 +64,8 @@ void init(Parameter *param)
param->ny = 32; param->ny = 32;
param->nz = 32; param->nz = 32;
param->cutforce = 2.5; param->cutforce = 2.5;
param->cutneigh = param->cutforce + 0.30; param->skin = 0.3;
param->cutneigh = param->cutforce + param->skin;
param->temp = 1.44; param->temp = 1.44;
param->nstat = 100; param->nstat = 100;
param->mass = 1.0; param->mass = 1.0;
@ -188,56 +189,54 @@ int main(int argc, char** argv)
for(int i = 0; i < argc; i++) for(int i = 0; i < argc; i++)
{ {
if((strcmp(argv[i], "-f") == 0)) if((strcmp(argv[i], "-f") == 0)) {
{
if((param.force_field = str2ff(argv[++i])) < 0) { if((param.force_field = str2ff(argv[++i])) < 0) {
fprintf(stderr, "Invalid force field!\n"); fprintf(stderr, "Invalid force field!\n");
exit(-1); exit(-1);
} }
continue; continue;
} }
if((strcmp(argv[i], "-i") == 0)) if((strcmp(argv[i], "-i") == 0)) {
{
param.input_file = strdup(argv[++i]); param.input_file = strdup(argv[++i]);
continue; continue;
} }
if((strcmp(argv[i], "-e") == 0)) if((strcmp(argv[i], "-e") == 0)) {
{
param.eam_file = strdup(argv[++i]); param.eam_file = strdup(argv[++i]);
continue; continue;
} }
if((strcmp(argv[i], "-n") == 0) || (strcmp(argv[i], "--nsteps") == 0)) if((strcmp(argv[i], "-n") == 0) || (strcmp(argv[i], "--nsteps") == 0)) {
{
param.ntimes = atoi(argv[++i]); param.ntimes = atoi(argv[++i]);
continue; continue;
} }
if((strcmp(argv[i], "-nx") == 0)) if((strcmp(argv[i], "-nx") == 0)) {
{
param.nx = atoi(argv[++i]); param.nx = atoi(argv[++i]);
continue; continue;
} }
if((strcmp(argv[i], "-ny") == 0)) if((strcmp(argv[i], "-ny") == 0)) {
{
param.ny = atoi(argv[++i]); param.ny = atoi(argv[++i]);
continue; continue;
} }
if((strcmp(argv[i], "-nz") == 0)) if((strcmp(argv[i], "-nz") == 0)) {
{
param.nz = atoi(argv[++i]); param.nz = atoi(argv[++i]);
continue; continue;
} }
if((strcmp(argv[i], "--freq") == 0)) if((strcmp(argv[i], "-r") == 0) || (strcmp(argv[i], "--radius") == 0)) {
{ param.cutforce = atof(argv[++i]);
continue;
}
if((strcmp(argv[i], "-s") == 0) || (strcmp(argv[i], "--skin") == 0)) {
param.skin = atof(argv[++i]);
continue;
}
if((strcmp(argv[i], "--freq") == 0)) {
param.proc_freq = atof(argv[++i]); param.proc_freq = atof(argv[++i]);
continue; continue;
} }
if((strcmp(argv[i], "--vtk") == 0)) if((strcmp(argv[i], "--vtk") == 0)) {
{
param.vtk_file = strdup(argv[++i]); param.vtk_file = strdup(argv[++i]);
continue; continue;
} }
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
{
printf("MD Bench: A minimalistic re-implementation of miniMD\n"); printf("MD Bench: A minimalistic re-implementation of miniMD\n");
printf(HLINE); printf(HLINE);
printf("-f <string>: force field (lj or eam), default lj\n"); printf("-f <string>: force field (lj or eam), default lj\n");
@ -245,6 +244,8 @@ int main(int argc, char** argv)
printf("-e <string>: input file for EAM\n"); printf("-e <string>: input file for EAM\n");
printf("-n / --nsteps <int>: set number of timesteps for simulation\n"); printf("-n / --nsteps <int>: set number of timesteps for simulation\n");
printf("-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n"); printf("-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n");
printf("-r / --radius <real>: set cutoff radius\n");
printf("-s / --skin <real>: set skin (verlet buffer)\n");
printf("--freq <real>: processor frequency (GHz)\n"); printf("--freq <real>: processor frequency (GHz)\n");
printf("--vtk <string>: VTK file for visualization\n"); printf("--vtk <string>: VTK file for visualization\n");
printf(HLINE); printf(HLINE);
@ -252,6 +253,7 @@ int main(int argc, char** argv)
} }
} }
param.cutneigh = param.cutforce + param.skin;
setup(&param, &eam, &atom, &neighbor, &stats); setup(&param, &eam, &atom, &neighbor, &stats);
computeThermo(0, &param, &atom); computeThermo(0, &param, &atom);
#if defined(MEM_TRACER) || defined(INDEX_TRACER) #if defined(MEM_TRACER) || defined(INDEX_TRACER)

View File

@ -8,17 +8,22 @@
void initStats(Stats *s) { void initStats(Stats *s) {
s->total_force_neighs = 0; s->total_force_neighs = 0;
s->total_force_iters = 0; s->total_force_iters = 0;
s->atoms_within_cutoff = 0;
s->atoms_outside_cutoff = 0;
} }
void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer) { void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer) {
#ifdef COMPUTE_STATS #ifdef COMPUTE_STATS
double force_useful_volume = 1e-9 * ( (double)(atom->Nlocal * (param->ntimes + 1)) * (sizeof(MD_FLOAT) * 6 + sizeof(int)) + double force_useful_volume = 1e-9 * ( (double)(atom->Nlocal * (param->ntimes + 1)) * (sizeof(MD_FLOAT) * 6 + sizeof(int)) +
(double)(stats->total_force_neighs) * (sizeof(MD_FLOAT) * 3 + sizeof(int)) ); (double)(stats->total_force_neighs) * (sizeof(MD_FLOAT) * 3 + sizeof(int)) );
double avg_neigh = stats->total_force_neighs / (double)(atom->Nlocal * (param->ntimes + 1)); double avg_neigh = stats->total_force_neighs / (double)(atom->Nlocal * (param->ntimes + 1));
double avg_simd = stats->total_force_iters / (double)(atom->Nlocal * (param->ntimes + 1)); double avg_simd = stats->total_force_iters / (double)(atom->Nlocal * (param->ntimes + 1));
#ifdef EXPLICIT_TYPES
#ifdef EXPLICIT_TYPES
force_useful_volume += 1e-9 * (double)((atom->Nlocal * (param->ntimes + 1)) + stats->total_force_neighs) * sizeof(int); force_useful_volume += 1e-9 * (double)((atom->Nlocal * (param->ntimes + 1)) + stats->total_force_neighs) * sizeof(int);
#endif #endif
printf("Statistics:\n"); printf("Statistics:\n");
printf("\tVector width: %d, Processor frequency: %.4f GHz\n", VECTOR_WIDTH, param->proc_freq); printf("\tVector width: %d, Processor frequency: %.4f GHz\n", VECTOR_WIDTH, param->proc_freq);
printf("\tAverage neighbors per atom: %.4f\n", avg_neigh); printf("\tAverage neighbors per atom: %.4f\n", avg_neigh);
@ -27,5 +32,11 @@ void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer
printf("\tTotal number of SIMD iterations: %lld\n", stats->total_force_iters); printf("\tTotal number of SIMD iterations: %lld\n", stats->total_force_iters);
printf("\tUseful read data volume for force computation: %.2fGB\n", force_useful_volume); printf("\tUseful read data volume for force computation: %.2fGB\n", force_useful_volume);
printf("\tCycles/SIMD iteration: %.4f\n", timer[FORCE] * param->proc_freq * 1e9 / stats->total_force_iters); printf("\tCycles/SIMD iteration: %.4f\n", timer[FORCE] * param->proc_freq * 1e9 / stats->total_force_iters);
#ifdef USE_REFERENCE_VERSION
const double eff_pct = (double)stats->atoms_within_cutoff / (double)(stats->atoms_within_cutoff + stats->atoms_outside_cutoff) * 100.0;
printf("\tAtoms within/outside cutoff radius: %lld/%lld (%.2f%%)\n", stats->atoms_within_cutoff, stats->atoms_outside_cutoff, eff_pct);
#endif
#endif #endif
} }