Merge branch 'master' of github.com:RRZE-HPC/MD-Bench
This commit is contained in:
commit
ba3a0524f6
11
Makefile
11
Makefile
@ -1,6 +1,6 @@
|
|||||||
#CONFIGURE BUILD SYSTEM
|
#CONFIGURE BUILD SYSTEM
|
||||||
TARGET = MDBench-$(TAG)-$(OPT_SCHEME)
|
TARGET = MDBench-$(TAG)-$(OPT_SCHEME)
|
||||||
BUILD_DIR = ./$(TAG)
|
BUILD_DIR = ./$(TAG)-$(OPT_SCHEME)
|
||||||
SRC_DIR = ./$(OPT_SCHEME)
|
SRC_DIR = ./$(OPT_SCHEME)
|
||||||
ASM_DIR = ./asm
|
ASM_DIR = ./asm
|
||||||
MAKE_DIR = ./
|
MAKE_DIR = ./
|
||||||
@ -10,6 +10,7 @@ Q ?= @
|
|||||||
include $(MAKE_DIR)/config.mk
|
include $(MAKE_DIR)/config.mk
|
||||||
include $(MAKE_DIR)/include_$(TAG).mk
|
include $(MAKE_DIR)/include_$(TAG).mk
|
||||||
include $(MAKE_DIR)/include_LIKWID.mk
|
include $(MAKE_DIR)/include_LIKWID.mk
|
||||||
|
include $(MAKE_DIR)/include_GROMACS.mk
|
||||||
INCLUDES += -I./$(SRC_DIR)/includes
|
INCLUDES += -I./$(SRC_DIR)/includes
|
||||||
|
|
||||||
ifeq ($(strip $(DATA_LAYOUT)),AOS)
|
ifeq ($(strip $(DATA_LAYOUT)),AOS)
|
||||||
@ -52,6 +53,10 @@ ifeq ($(strip $(COMPUTE_STATS)),true)
|
|||||||
DEFINES += -DCOMPUTE_STATS
|
DEFINES += -DCOMPUTE_STATS
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(strip $(XTC_OUTPUT)),true)
|
||||||
|
DEFINES += -DXTC_OUTPUT
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(strip $(USE_REFERENCE_VERSION)),true)
|
ifeq ($(strip $(USE_REFERENCE_VERSION)),true)
|
||||||
DEFINES += -DUSE_REFERENCE_VERSION
|
DEFINES += -DUSE_REFERENCE_VERSION
|
||||||
endif
|
endif
|
||||||
@ -64,6 +69,10 @@ ifneq ($(VECTOR_WIDTH),)
|
|||||||
DEFINES += -DVECTOR_WIDTH=$(VECTOR_WIDTH)
|
DEFINES += -DVECTOR_WIDTH=$(VECTOR_WIDTH)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(strip $(NO_AVX2)),true)
|
||||||
|
DEFINES += -DNO_AVX2
|
||||||
|
endif
|
||||||
|
|
||||||
VPATH = $(SRC_DIR) $(ASM_DIR)
|
VPATH = $(SRC_DIR) $(ASM_DIR)
|
||||||
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c))
|
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c))
|
||||||
OVERWRITE:= $(patsubst $(ASM_DIR)/%-new.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*-new.s))
|
OVERWRITE:= $(patsubst $(ASM_DIR)/%-new.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*-new.s))
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
# Compiler tag (GCC/CLANG/ICC)
|
# Compiler tag (GCC/CLANG/ICC)
|
||||||
TAG ?= ICC
|
TAG ?= ICC
|
||||||
# Optimization scheme (lammps/gromacs/clusters_per_bin)
|
# Optimization scheme (lammps/gromacs/clusters_per_bin)
|
||||||
OPT_SCHEME = gromacs
|
OPT_SCHEME ?= gromacs
|
||||||
# Enable likwid (true or false)
|
# Enable likwid (true or false)
|
||||||
ENABLE_LIKWID ?= false
|
ENABLE_LIKWID ?= true
|
||||||
# SP or DP
|
# SP or DP
|
||||||
DATA_TYPE ?= DP
|
DATA_TYPE ?= DP
|
||||||
# AOS or SOA
|
# AOS or SOA
|
||||||
@ -25,6 +25,8 @@ MEM_TRACER ?= false
|
|||||||
INDEX_TRACER ?= false
|
INDEX_TRACER ?= false
|
||||||
# Vector width (elements) for index and distance tracer
|
# Vector width (elements) for index and distance tracer
|
||||||
VECTOR_WIDTH ?= 8
|
VECTOR_WIDTH ?= 8
|
||||||
|
# When vector width is 4 but AVX2 is not supported (AVX only), set this to true
|
||||||
|
NO_AVX2 ?= false
|
||||||
# Compute statistics
|
# Compute statistics
|
||||||
COMPUTE_STATS ?= true
|
COMPUTE_STATS ?= true
|
||||||
|
|
||||||
@ -33,6 +35,8 @@ COMPUTE_STATS ?= true
|
|||||||
CLUSTER_LAYOUT ?= SOA
|
CLUSTER_LAYOUT ?= SOA
|
||||||
# Use reference version
|
# Use reference version
|
||||||
USE_REFERENCE_VERSION ?= false
|
USE_REFERENCE_VERSION ?= false
|
||||||
|
# Enable XTC output
|
||||||
|
XTC_OUTPUT ?= false
|
||||||
|
|
||||||
#Feature options
|
#Feature options
|
||||||
OPTIONS = -DALIGNMENT=64
|
OPTIONS = -DALIGNMENT=64
|
||||||
|
1003
data/argon_1000/argon_1ns.gro
Normal file
1003
data/argon_1000/argon_1ns.gro
Normal file
File diff suppressed because it is too large
Load Diff
1003
data/argon_1000/conf.gro
Normal file
1003
data/argon_1000/conf.gro
Normal file
File diff suppressed because it is too large
Load Diff
244
data/argon_1000/grompp.mdp
Normal file
244
data/argon_1000/grompp.mdp
Normal file
@ -0,0 +1,244 @@
|
|||||||
|
;
|
||||||
|
; Generated by:
|
||||||
|
; Vitaly V. Chaban
|
||||||
|
; School of Chemistry
|
||||||
|
; University of Kharkiv
|
||||||
|
; Ukraine, Kharkiv-61077, Svoboda sq., 4
|
||||||
|
; email: chaban@univer.kharkov.ua, vvchaban@gmail.com
|
||||||
|
; skype: vvchaban
|
||||||
|
|
||||||
|
; System: Liquid argon (1000 atoms) at 80 K. Equilibrated for 500ps.
|
||||||
|
|
||||||
|
; VARIOUS PREPROCESSING OPTIONS
|
||||||
|
title = Yo
|
||||||
|
cpp = /usr/bin/cpp
|
||||||
|
include =
|
||||||
|
define =
|
||||||
|
|
||||||
|
; RUN CONTROL PARAMETERS
|
||||||
|
integrator = md
|
||||||
|
; Start time and timestep in ps
|
||||||
|
tinit = 0
|
||||||
|
dt = 0.001
|
||||||
|
nsteps = 250000
|
||||||
|
; For exact run continuation or redoing part of a run
|
||||||
|
init_step = 0
|
||||||
|
; mode for center of mass motion removal
|
||||||
|
comm-mode = Linear
|
||||||
|
; number of steps for center of mass motion removal
|
||||||
|
nstcomm = 1
|
||||||
|
; group(s) for center of mass motion removal
|
||||||
|
comm-grps =
|
||||||
|
|
||||||
|
; LANGEVIN DYNAMICS OPTIONS
|
||||||
|
; Temperature, friction coefficient (amu/ps) and random seed
|
||||||
|
bd-temp = 300
|
||||||
|
bd-fric = 0
|
||||||
|
ld-seed = 1993
|
||||||
|
|
||||||
|
; ENERGY MINIMIZATION OPTIONS
|
||||||
|
; Force tolerance and initial step-size
|
||||||
|
emtol = 100
|
||||||
|
emstep = 0.01
|
||||||
|
; Max number of iterations in relax_shells
|
||||||
|
niter = 20
|
||||||
|
; Step size (1/ps^2) for minimization of flexible constraints
|
||||||
|
fcstep = 0
|
||||||
|
; Frequency of steepest descents steps when doing CG
|
||||||
|
nstcgsteep = 1000
|
||||||
|
nbfgscorr = 10
|
||||||
|
|
||||||
|
; OUTPUT CONTROL OPTIONS
|
||||||
|
; Output frequency for coords (x), velocities (v) and forces (f)
|
||||||
|
nstxout = 500
|
||||||
|
nstvout = 5
|
||||||
|
nstfout = 0
|
||||||
|
; Checkpointing helps you continue after crashes
|
||||||
|
nstcheckpoint = 1000
|
||||||
|
; Output frequency for energies to log file and energy file
|
||||||
|
nstlog = 50
|
||||||
|
nstenergy = 50
|
||||||
|
; Output frequency and precision for xtc file
|
||||||
|
nstxtcout = 5
|
||||||
|
xtc-precision = 1000
|
||||||
|
; This selects the subset of atoms for the xtc file. You can
|
||||||
|
; select multiple groups. By default all atoms will be written.
|
||||||
|
xtc-grps =
|
||||||
|
; Selection of energy groups
|
||||||
|
energygrps =
|
||||||
|
|
||||||
|
; NEIGHBORSEARCHING PARAMETERS
|
||||||
|
; nblist update frequency
|
||||||
|
nstlist = 5
|
||||||
|
; ns algorithm (simple or grid)
|
||||||
|
ns_type = grid
|
||||||
|
; Periodic boundary conditions: xyz (default), no (vacuum)
|
||||||
|
; or full (infinite systems only)
|
||||||
|
pbc = xyz
|
||||||
|
; nblist cut-off
|
||||||
|
rlist = 0.9
|
||||||
|
domain-decomposition = no
|
||||||
|
|
||||||
|
; OPTIONS FOR ELECTROSTATICS AND VDW
|
||||||
|
; Method for doing electrostatics
|
||||||
|
coulombtype = Cut-off
|
||||||
|
rcoulomb-switch = 0
|
||||||
|
rcoulomb = 0.9
|
||||||
|
; Dielectric constant (DC) for cut-off or DC of reaction field
|
||||||
|
epsilon-r = 1
|
||||||
|
; Method for doing Van der Waals
|
||||||
|
vdw-type = Cut-off
|
||||||
|
; cut-off lengths
|
||||||
|
rvdw-switch = 0
|
||||||
|
rvdw = 0.9
|
||||||
|
; Apply long range dispersion corrections for Energy and Pressure
|
||||||
|
DispCorr = EnerPres
|
||||||
|
; Extension of the potential lookup tables beyond the cut-off
|
||||||
|
table-extension = 1
|
||||||
|
; Spacing for the PME/PPPM FFT grid
|
||||||
|
fourierspacing = 0.12
|
||||||
|
; FFT grid size, when a value is 0 fourierspacing will be used
|
||||||
|
fourier_nx = 0
|
||||||
|
fourier_ny = 0
|
||||||
|
fourier_nz = 0
|
||||||
|
; EWALD/PME/PPPM parameters
|
||||||
|
pme_order = 4
|
||||||
|
ewald_rtol = 1e-05
|
||||||
|
ewald_geometry = 3d
|
||||||
|
epsilon_surface = 0
|
||||||
|
optimize_fft = no
|
||||||
|
|
||||||
|
; GENERALIZED BORN ELECTROSTATICS
|
||||||
|
; Algorithm for calculating Born radii
|
||||||
|
gb_algorithm = Still
|
||||||
|
; Frequency of calculating the Born radii inside rlist
|
||||||
|
nstgbradii = 1
|
||||||
|
; Cutoff for Born radii calculation; the contribution from atoms
|
||||||
|
; between rlist and rgbradii is updated every nstlist steps
|
||||||
|
rgbradii = 2
|
||||||
|
; Salt concentration in M for Generalized Born models
|
||||||
|
gb_saltconc = 0
|
||||||
|
|
||||||
|
; IMPLICIT SOLVENT (for use with Generalized Born electrostatics)
|
||||||
|
implicit_solvent = No
|
||||||
|
|
||||||
|
; OPTIONS FOR WEAK COUPLING ALGORITHMS
|
||||||
|
; Temperature coupling
|
||||||
|
Tcoupl = berendsen
|
||||||
|
; Groups to couple separately
|
||||||
|
tc-grps = System
|
||||||
|
; Time constant (ps) and reference temperature (K)
|
||||||
|
tau_t = 0.1
|
||||||
|
ref_t = 80
|
||||||
|
; Pressure coupling
|
||||||
|
Pcoupl = no
|
||||||
|
Pcoupltype = isotropic
|
||||||
|
; Time constant (ps), compressibility (1/bar) and reference P (bar)
|
||||||
|
tau_p = 1.0
|
||||||
|
compressibility = 4.5e-5
|
||||||
|
ref_p = 1.0
|
||||||
|
; Random seed for Andersen thermostat
|
||||||
|
andersen_seed = 815131
|
||||||
|
|
||||||
|
; SIMULATED ANNEALING
|
||||||
|
; Type of annealing for each temperature group (no/single/periodic)
|
||||||
|
annealing = no
|
||||||
|
; Number of time points to use for specifying annealing in each group
|
||||||
|
annealing_npoints =
|
||||||
|
; List of times at the annealing points for each group
|
||||||
|
annealing_time =
|
||||||
|
; Temp. at each annealing point, for each group.
|
||||||
|
annealing_temp =
|
||||||
|
|
||||||
|
; GENERATE VELOCITIES FOR STARTUP RUN
|
||||||
|
gen_vel = yes
|
||||||
|
gen_temp = 80
|
||||||
|
gen_seed = 1993
|
||||||
|
|
||||||
|
; OPTIONS FOR BONDS
|
||||||
|
constraints = all-bonds
|
||||||
|
; Type of constraint algorithm
|
||||||
|
constraint-algorithm = Lincs
|
||||||
|
; Do not constrain the start configuration
|
||||||
|
unconstrained-start = no
|
||||||
|
; Use successive overrelaxation to reduce the number of shake iterations
|
||||||
|
Shake-SOR = no
|
||||||
|
; Relative tolerance of shake
|
||||||
|
shake-tol = 1e-04
|
||||||
|
; Highest order in the expansion of the constraint coupling matrix
|
||||||
|
lincs-order = 4
|
||||||
|
; Number of iterations in the final step of LINCS. 1 is fine for
|
||||||
|
; normal simulations, but use 2 to conserve energy in NVE runs.
|
||||||
|
; For energy minimization with constraints it should be 4 to 8.
|
||||||
|
lincs-iter = 1
|
||||||
|
; Lincs will write a warning to the stderr if in one step a bond
|
||||||
|
; rotates over more degrees than
|
||||||
|
lincs-warnangle = 30
|
||||||
|
; Convert harmonic bonds to morse potentials
|
||||||
|
morse = no
|
||||||
|
|
||||||
|
; ENERGY GROUP EXCLUSIONS
|
||||||
|
; Pairs of energy groups for which all non-bonded interactions are excluded
|
||||||
|
energygrp_excl =
|
||||||
|
|
||||||
|
; NMR refinement stuff
|
||||||
|
; Distance restraints type: No, Simple or Ensemble
|
||||||
|
disre = No
|
||||||
|
; Force weighting of pairs in one distance restraint: Conservative or Equal
|
||||||
|
disre-weighting = Conservative
|
||||||
|
; Use sqrt of the time averaged times the instantaneous violation
|
||||||
|
disre-mixed = no
|
||||||
|
disre-fc = 1000
|
||||||
|
disre-tau = 0
|
||||||
|
; Output frequency for pair distances to energy file
|
||||||
|
nstdisreout = 100
|
||||||
|
; Orientation restraints: No or Yes
|
||||||
|
orire = no
|
||||||
|
; Orientation restraints force constant and tau for time averaging
|
||||||
|
orire-fc = 0
|
||||||
|
orire-tau = 0
|
||||||
|
orire-fitgrp =
|
||||||
|
; Output frequency for trace(SD) to energy file
|
||||||
|
nstorireout = 100
|
||||||
|
; Dihedral angle restraints: No, Simple or Ensemble
|
||||||
|
dihre = No
|
||||||
|
dihre-fc = 1000
|
||||||
|
dihre-tau = 0
|
||||||
|
; Output frequency for dihedral values to energy file
|
||||||
|
nstdihreout = 100
|
||||||
|
|
||||||
|
; Free energy control stuff
|
||||||
|
free-energy = no
|
||||||
|
init-lambda = 0
|
||||||
|
delta-lambda = 0
|
||||||
|
sc-alpha = 0
|
||||||
|
sc-sigma = 0.3
|
||||||
|
|
||||||
|
; Non-equilibrium MD stuff
|
||||||
|
acc-grps =
|
||||||
|
accelerate =
|
||||||
|
freezegrps =
|
||||||
|
freezedim =
|
||||||
|
cos-acceleration = 0
|
||||||
|
|
||||||
|
; Electric fields
|
||||||
|
; Format is number of terms (int) and for all terms an amplitude (real)
|
||||||
|
; and a phase angle (real)
|
||||||
|
E-x =
|
||||||
|
E-xt =
|
||||||
|
E-y =
|
||||||
|
E-yt =
|
||||||
|
E-z =
|
||||||
|
E-zt =
|
||||||
|
|
||||||
|
; User defined thingies
|
||||||
|
user1-grps =
|
||||||
|
user2-grps =
|
||||||
|
userint1 = 0
|
||||||
|
userint2 = 0
|
||||||
|
userint3 = 0
|
||||||
|
userint4 = 0
|
||||||
|
userreal1 = 0
|
||||||
|
userreal2 = 0
|
||||||
|
userreal3 = 0
|
||||||
|
userreal4 = 0
|
11
data/argon_1000/mdbench_params.conf
Normal file
11
data/argon_1000/mdbench_params.conf
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
mass 39.94
|
||||||
|
sigma 0.0062220
|
||||||
|
epsilon 0.0000096960
|
||||||
|
ntimes 250000
|
||||||
|
dt 0.001
|
||||||
|
temp 80
|
||||||
|
x_out_freq 500
|
||||||
|
v_out_freq 5
|
||||||
|
cutforce 0.9
|
||||||
|
reneigh_every 100
|
||||||
|
nstat 125000
|
1003
data/argon_1000/tprout.gro
Normal file
1003
data/argon_1000/tprout.gro
Normal file
File diff suppressed because it is too large
Load Diff
244
gromacs/atom.c
244
gromacs/atom.c
@ -31,18 +31,7 @@
|
|||||||
#include <allocate.h>
|
#include <allocate.h>
|
||||||
#include <util.h>
|
#include <util.h>
|
||||||
|
|
||||||
#define DELTA 20000
|
void initAtom(Atom *atom) {
|
||||||
|
|
||||||
#ifndef MAXLINE
|
|
||||||
#define MAXLINE 4096
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef MAX
|
|
||||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void initAtom(Atom *atom)
|
|
||||||
{
|
|
||||||
atom->x = NULL; atom->y = NULL; atom->z = NULL;
|
atom->x = NULL; atom->y = NULL; atom->z = NULL;
|
||||||
atom->vx = NULL; atom->vy = NULL; atom->vz = NULL;
|
atom->vx = NULL; atom->vy = NULL; atom->vz = NULL;
|
||||||
atom->cl_x = NULL;
|
atom->cl_x = NULL;
|
||||||
@ -65,8 +54,7 @@ void initAtom(Atom *atom)
|
|||||||
atom->clusters = NULL;
|
atom->clusters = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void createAtom(Atom *atom, Parameter *param)
|
void createAtom(Atom *atom, Parameter *param) {
|
||||||
{
|
|
||||||
MD_FLOAT xlo = 0.0; MD_FLOAT xhi = param->xprd;
|
MD_FLOAT xlo = 0.0; MD_FLOAT xhi = param->xprd;
|
||||||
MD_FLOAT ylo = 0.0; MD_FLOAT yhi = param->yprd;
|
MD_FLOAT ylo = 0.0; MD_FLOAT yhi = param->yprd;
|
||||||
MD_FLOAT zlo = 0.0; MD_FLOAT zhi = param->zprd;
|
MD_FLOAT zlo = 0.0; MD_FLOAT zhi = param->zprd;
|
||||||
@ -106,47 +94,25 @@ void createAtom(Atom *atom, Parameter *param)
|
|||||||
int subboxdim = 8;
|
int subboxdim = 8;
|
||||||
|
|
||||||
while(oz * subboxdim <= khi) {
|
while(oz * subboxdim <= khi) {
|
||||||
|
|
||||||
k = oz * subboxdim + sz;
|
k = oz * subboxdim + sz;
|
||||||
j = oy * subboxdim + sy;
|
j = oy * subboxdim + sy;
|
||||||
i = ox * subboxdim + sx;
|
i = ox * subboxdim + sx;
|
||||||
|
|
||||||
if(((i + j + k) % 2 == 0) &&
|
if(((i + j + k) % 2 == 0) && (i >= ilo) && (i <= ihi) && (j >= jlo) && (j <= jhi) && (k >= klo) && (k <= khi)) {
|
||||||
(i >= ilo) && (i <= ihi) &&
|
|
||||||
(j >= jlo) && (j <= jhi) &&
|
|
||||||
(k >= klo) && (k <= khi)) {
|
|
||||||
|
|
||||||
xtmp = 0.5 * alat * i;
|
xtmp = 0.5 * alat * i;
|
||||||
ytmp = 0.5 * alat * j;
|
ytmp = 0.5 * alat * j;
|
||||||
ztmp = 0.5 * alat * k;
|
ztmp = 0.5 * alat * k;
|
||||||
|
|
||||||
if( xtmp >= xlo && xtmp < xhi &&
|
if(xtmp >= xlo && xtmp < xhi && ytmp >= ylo && ytmp < yhi && ztmp >= zlo && ztmp < zhi ) {
|
||||||
ytmp >= ylo && ytmp < yhi &&
|
n = k * (2 * param->ny) * (2 * param->nx) + j * (2 * param->nx) + i + 1;
|
||||||
ztmp >= zlo && ztmp < zhi ) {
|
for(m = 0; m < 5; m++) { myrandom(&n); }
|
||||||
|
|
||||||
n = k * (2 * param->ny) * (2 * param->nx) +
|
|
||||||
j * (2 * param->nx) +
|
|
||||||
i + 1;
|
|
||||||
|
|
||||||
for(m = 0; m < 5; m++) {
|
|
||||||
myrandom(&n);
|
|
||||||
}
|
|
||||||
vxtmp = myrandom(&n);
|
vxtmp = myrandom(&n);
|
||||||
|
for(m = 0; m < 5; m++){ myrandom(&n); }
|
||||||
for(m = 0; m < 5; m++){
|
|
||||||
myrandom(&n);
|
|
||||||
}
|
|
||||||
vytmp = myrandom(&n);
|
vytmp = myrandom(&n);
|
||||||
|
for(m = 0; m < 5; m++) { myrandom(&n); }
|
||||||
for(m = 0; m < 5; m++) {
|
|
||||||
myrandom(&n);
|
|
||||||
}
|
|
||||||
vztmp = myrandom(&n);
|
vztmp = myrandom(&n);
|
||||||
|
|
||||||
if(atom->Nlocal == atom->Nmax) {
|
if(atom->Nlocal == atom->Nmax) { growAtom(atom); }
|
||||||
growAtom(atom);
|
|
||||||
}
|
|
||||||
|
|
||||||
atom_x(atom->Nlocal) = xtmp;
|
atom_x(atom->Nlocal) = xtmp;
|
||||||
atom_y(atom->Nlocal) = ytmp;
|
atom_y(atom->Nlocal) = ytmp;
|
||||||
atom_z(atom->Nlocal) = ztmp;
|
atom_z(atom->Nlocal) = ztmp;
|
||||||
@ -159,7 +125,6 @@ void createAtom(Atom *atom, Parameter *param)
|
|||||||
}
|
}
|
||||||
|
|
||||||
sx++;
|
sx++;
|
||||||
|
|
||||||
if(sx == subboxdim) { sx = 0; sy++; }
|
if(sx == subboxdim) { sx = 0; sy++; }
|
||||||
if(sy == subboxdim) { sy = 0; sz++; }
|
if(sy == subboxdim) { sy = 0; sz++; }
|
||||||
if(sz == subboxdim) { sz = 0; ox++; }
|
if(sz == subboxdim) { sz = 0; ox++; }
|
||||||
@ -168,8 +133,188 @@ void createAtom(Atom *atom, Parameter *param)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int readAtom(Atom* atom, Parameter* param)
|
int type_str2int(const char *type) {
|
||||||
{
|
if(strncmp(type, "Ar", 2) == 0) { return 0; } // Argon
|
||||||
|
fprintf(stderr, "Invalid atom type: %s\n", type);
|
||||||
|
exit(-1);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readAtom(Atom* atom, Parameter* param) {
|
||||||
|
int len = strlen(param->input_file);
|
||||||
|
if(strncmp(¶m->input_file[len - 4], ".pdb", 4) == 0) { return readAtom_pdb(atom, param); }
|
||||||
|
if(strncmp(¶m->input_file[len - 4], ".gro", 4) == 0) { return readAtom_gro(atom, param); }
|
||||||
|
if(strncmp(¶m->input_file[len - 4], ".dmp", 4) == 0) { return readAtom_dmp(atom, param); }
|
||||||
|
fprintf(stderr, "Invalid input file extension: %s\nValid choices are: pdb, gro, dmp\n", param->input_file);
|
||||||
|
exit(-1);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readAtom_pdb(Atom* atom, Parameter* param) {
|
||||||
|
FILE *fp = fopen(param->input_file, "r");
|
||||||
|
char line[MAXLINE];
|
||||||
|
int read_atoms = 0;
|
||||||
|
|
||||||
|
if(!fp) {
|
||||||
|
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||||
|
exit(-1);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
while(!feof(fp)) {
|
||||||
|
fgets(line, MAXLINE, fp);
|
||||||
|
char *item = strtok(line, " ");
|
||||||
|
if(strncmp(item, "CRYST1", 6) == 0) {
|
||||||
|
param->xlo = 0.0;
|
||||||
|
param->xhi = atof(strtok(NULL, " "));
|
||||||
|
param->ylo = 0.0;
|
||||||
|
param->yhi = atof(strtok(NULL, " "));
|
||||||
|
param->zlo = 0.0;
|
||||||
|
param->zhi = atof(strtok(NULL, " "));
|
||||||
|
param->xprd = param->xhi - param->xlo;
|
||||||
|
param->yprd = param->yhi - param->ylo;
|
||||||
|
param->zprd = param->zhi - param->zlo;
|
||||||
|
// alpha, beta, gamma, sGroup, z
|
||||||
|
} else if(strncmp(item, "ATOM", 4) == 0) {
|
||||||
|
char *label;
|
||||||
|
int atom_id, comp_id;
|
||||||
|
MD_FLOAT occupancy, charge;
|
||||||
|
atom_id = atoi(strtok(NULL, " ")) - 1;
|
||||||
|
|
||||||
|
while(atom_id + 1 >= atom->Nmax) {
|
||||||
|
growAtom(atom);
|
||||||
|
}
|
||||||
|
|
||||||
|
atom->type[atom_id] = type_str2int(strtok(NULL, " "));
|
||||||
|
label = strtok(NULL, " ");
|
||||||
|
comp_id = atoi(strtok(NULL, " "));
|
||||||
|
atom_x(atom_id) = atof(strtok(NULL, " "));
|
||||||
|
atom_y(atom_id) = atof(strtok(NULL, " "));
|
||||||
|
atom_z(atom_id) = atof(strtok(NULL, " "));
|
||||||
|
atom->vx[atom_id] = 0.0;
|
||||||
|
atom->vy[atom_id] = 0.0;
|
||||||
|
atom->vz[atom_id] = 0.0;
|
||||||
|
occupancy = atof(strtok(NULL, " "));
|
||||||
|
charge = atof(strtok(NULL, " "));
|
||||||
|
atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
|
||||||
|
atom->Natoms++;
|
||||||
|
atom->Nlocal++;
|
||||||
|
read_atoms++;
|
||||||
|
} else if(strncmp(item, "HEADER", 6) == 0 ||
|
||||||
|
strncmp(item, "REMARK", 6) == 0 ||
|
||||||
|
strncmp(item, "MODEL", 5) == 0 ||
|
||||||
|
strncmp(item, "TER", 3) == 0 ||
|
||||||
|
strncmp(item, "ENDMDL", 6) == 0) {
|
||||||
|
// Do nothing
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "Invalid item: %s\n", item);
|
||||||
|
exit(-1);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!read_atoms) {
|
||||||
|
fprintf(stderr, "Input error: No atoms read!\n");
|
||||||
|
exit(-1);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||||
|
atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||||
|
atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||||
|
atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||||
|
for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
|
||||||
|
atom->epsilon[i] = param->epsilon;
|
||||||
|
atom->sigma6[i] = param->sigma6;
|
||||||
|
atom->cutneighsq[i] = param->cutneigh * param->cutneigh;
|
||||||
|
atom->cutforcesq[i] = param->cutforce * param->cutforce;
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
|
||||||
|
fclose(fp);
|
||||||
|
return read_atoms;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readAtom_gro(Atom* atom, Parameter* param) {
|
||||||
|
FILE *fp = fopen(param->input_file, "r");
|
||||||
|
char line[MAXLINE];
|
||||||
|
char desc[MAXLINE];
|
||||||
|
int read_atoms = 0;
|
||||||
|
int atoms_to_read = 0;
|
||||||
|
int i = 0;
|
||||||
|
|
||||||
|
if(!fp) {
|
||||||
|
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||||
|
exit(-1);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
fgets(desc, MAXLINE, fp);
|
||||||
|
for(i = 0; desc[i] != '\n'; i++);
|
||||||
|
desc[i] = '\0';
|
||||||
|
fgets(line, MAXLINE, fp);
|
||||||
|
atoms_to_read = atoi(strtok(line, " "));
|
||||||
|
fprintf(stdout, "System: %s with %d atoms\n", desc, atoms_to_read);
|
||||||
|
|
||||||
|
while(!feof(fp) && read_atoms < atoms_to_read) {
|
||||||
|
fgets(line, MAXLINE, fp);
|
||||||
|
char *label = strtok(line, " ");
|
||||||
|
int type = type_str2int(strtok(NULL, " "));
|
||||||
|
int atom_id = atoi(strtok(NULL, " ")) - 1;
|
||||||
|
atom_id = read_atoms;
|
||||||
|
while(atom_id + 1 >= atom->Nmax) {
|
||||||
|
growAtom(atom);
|
||||||
|
}
|
||||||
|
|
||||||
|
atom->type[atom_id] = type;
|
||||||
|
atom_x(atom_id) = atof(strtok(NULL, " "));
|
||||||
|
atom_y(atom_id) = atof(strtok(NULL, " "));
|
||||||
|
atom_z(atom_id) = atof(strtok(NULL, " "));
|
||||||
|
atom->vx[atom_id] = atof(strtok(NULL, " "));
|
||||||
|
atom->vy[atom_id] = atof(strtok(NULL, " "));
|
||||||
|
atom->vz[atom_id] = atof(strtok(NULL, " "));
|
||||||
|
atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
|
||||||
|
atom->Natoms++;
|
||||||
|
atom->Nlocal++;
|
||||||
|
read_atoms++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!feof(fp)) {
|
||||||
|
fgets(line, MAXLINE, fp);
|
||||||
|
param->xlo = 0.0;
|
||||||
|
param->xhi = atof(strtok(line, " "));
|
||||||
|
param->ylo = 0.0;
|
||||||
|
param->yhi = atof(strtok(NULL, " "));
|
||||||
|
param->zlo = 0.0;
|
||||||
|
param->zhi = atof(strtok(NULL, " "));
|
||||||
|
param->xprd = param->xhi - param->xlo;
|
||||||
|
param->yprd = param->yhi - param->ylo;
|
||||||
|
param->zprd = param->zhi - param->zlo;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(read_atoms != atoms_to_read) {
|
||||||
|
fprintf(stderr, "Input error: Number of atoms read do not match (%d/%d).\n", read_atoms, atoms_to_read);
|
||||||
|
exit(-1);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||||
|
atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||||
|
atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||||
|
atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||||
|
for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
|
||||||
|
atom->epsilon[i] = param->epsilon;
|
||||||
|
atom->sigma6[i] = param->sigma6;
|
||||||
|
atom->cutneighsq[i] = param->cutneigh * param->cutneigh;
|
||||||
|
atom->cutforcesq[i] = param->cutforce * param->cutforce;
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
|
||||||
|
fclose(fp);
|
||||||
|
return read_atoms;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readAtom_dmp(Atom* atom, Parameter* param) {
|
||||||
FILE *fp = fopen(param->input_file, "r");
|
FILE *fp = fopen(param->input_file, "r");
|
||||||
char line[MAXLINE];
|
char line[MAXLINE];
|
||||||
int natoms = 0;
|
int natoms = 0;
|
||||||
@ -258,11 +403,11 @@ int readAtom(Atom* atom, Parameter* param)
|
|||||||
}
|
}
|
||||||
|
|
||||||
fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
|
fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
|
||||||
|
fclose(fp);
|
||||||
return natoms;
|
return natoms;
|
||||||
}
|
}
|
||||||
|
|
||||||
void growAtom(Atom *atom)
|
void growAtom(Atom *atom) {
|
||||||
{
|
|
||||||
int nold = atom->Nmax;
|
int nold = atom->Nmax;
|
||||||
atom->Nmax += DELTA;
|
atom->Nmax += DELTA;
|
||||||
|
|
||||||
@ -279,8 +424,7 @@ void growAtom(Atom *atom)
|
|||||||
atom->type = (int *) reallocate(atom->type, ALIGNMENT, atom->Nmax * sizeof(int), nold * sizeof(int));
|
atom->type = (int *) reallocate(atom->type, ALIGNMENT, atom->Nmax * sizeof(int), nold * sizeof(int));
|
||||||
}
|
}
|
||||||
|
|
||||||
void growClusters(Atom *atom)
|
void growClusters(Atom *atom) {
|
||||||
{
|
|
||||||
int nold = atom->Nclusters_max;
|
int nold = atom->Nclusters_max;
|
||||||
atom->Nclusters_max += DELTA;
|
atom->Nclusters_max += DELTA;
|
||||||
atom->clusters = (Cluster*) reallocate(atom->clusters, ALIGNMENT, atom->Nclusters_max * sizeof(Cluster), nold * sizeof(Cluster));
|
atom->clusters = (Cluster*) reallocate(atom->clusters, ALIGNMENT, atom->Nclusters_max * sizeof(Cluster), nold * sizeof(Cluster));
|
||||||
|
@ -61,6 +61,7 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
|||||||
|
|
||||||
for(int k = 0; k < numneighs; k++) {
|
for(int k = 0; k < numneighs; k++) {
|
||||||
int cj = neighs[k];
|
int cj = neighs[k];
|
||||||
|
int any = 0;
|
||||||
MD_FLOAT *cjptr = cluster_pos_ptr(cj);
|
MD_FLOAT *cjptr = cluster_pos_ptr(cj);
|
||||||
for(int cii = 0; cii < CLUSTER_DIM_M; cii++) {
|
for(int cii = 0; cii < CLUSTER_DIM_M; cii++) {
|
||||||
MD_FLOAT xtmp = cluster_x(ciptr, cii);
|
MD_FLOAT xtmp = cluster_x(ciptr, cii);
|
||||||
@ -83,18 +84,29 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
|||||||
fix += delx * force;
|
fix += delx * force;
|
||||||
fiy += dely * force;
|
fiy += dely * force;
|
||||||
fiz += delz * force;
|
fiz += delz * force;
|
||||||
|
any = 1;
|
||||||
|
addStat(stats->atoms_within_cutoff, 1);
|
||||||
|
} else {
|
||||||
|
addStat(stats->atoms_outside_cutoff, 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(any != 0) {
|
||||||
|
addStat(stats->clusters_within_cutoff, 1);
|
||||||
|
} else {
|
||||||
|
addStat(stats->clusters_outside_cutoff, 1);
|
||||||
|
}
|
||||||
|
|
||||||
cluster_x(cifptr, cii) += fix;
|
cluster_x(cifptr, cii) += fix;
|
||||||
cluster_y(cifptr, cii) += fiy;
|
cluster_y(cifptr, cii) += fiy;
|
||||||
cluster_z(cifptr, cii) += fiz;
|
cluster_z(cifptr, cii) += fiz;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
addStat(stats->calculated_forces, 1);
|
||||||
addStat(stats->num_neighs, numneighs);
|
addStat(stats->num_neighs, numneighs);
|
||||||
addStat(stats->force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
|
addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_DIM_M / CLUSTER_DIM_N));
|
||||||
}
|
}
|
||||||
|
|
||||||
LIKWID_MARKER_STOP("force");
|
LIKWID_MARKER_STOP("force");
|
||||||
@ -250,7 +262,7 @@ double computeForceLJ_4xn(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
|||||||
|
|
||||||
addStat(stats->calculated_forces, 1);
|
addStat(stats->calculated_forces, 1);
|
||||||
addStat(stats->num_neighs, numneighs);
|
addStat(stats->num_neighs, numneighs);
|
||||||
addStat(stats->force_iters, numneighs / 2);
|
addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_DIM_M / CLUSTER_DIM_N));
|
||||||
}
|
}
|
||||||
|
|
||||||
LIKWID_MARKER_STOP("force");
|
LIKWID_MARKER_STOP("force");
|
||||||
|
@ -25,6 +25,8 @@
|
|||||||
#ifndef __ATOM_H_
|
#ifndef __ATOM_H_
|
||||||
#define __ATOM_H_
|
#define __ATOM_H_
|
||||||
|
|
||||||
|
#define DELTA 20000
|
||||||
|
|
||||||
#define CLUSTER_DIM_M 4
|
#define CLUSTER_DIM_M 4
|
||||||
#define CLUSTER_DIM_N VECTOR_WIDTH
|
#define CLUSTER_DIM_N VECTOR_WIDTH
|
||||||
|
|
||||||
@ -59,6 +61,9 @@ typedef struct {
|
|||||||
extern void initAtom(Atom*);
|
extern void initAtom(Atom*);
|
||||||
extern void createAtom(Atom*, Parameter*);
|
extern void createAtom(Atom*, Parameter*);
|
||||||
extern int readAtom(Atom*, Parameter*);
|
extern int readAtom(Atom*, Parameter*);
|
||||||
|
extern int readAtom_pdb(Atom*, Parameter*);
|
||||||
|
extern int readAtom_gro(Atom*, Parameter*);
|
||||||
|
extern int readAtom_dmp(Atom*, Parameter*);
|
||||||
extern void growAtom(Atom*);
|
extern void growAtom(Atom*);
|
||||||
extern void growClusters(Atom*);
|
extern void growClusters(Atom*);
|
||||||
|
|
||||||
|
@ -37,6 +37,7 @@ extern void initNeighbor(Neighbor*, Parameter*);
|
|||||||
extern void setupNeighbor(Parameter*, Atom*);
|
extern void setupNeighbor(Parameter*, Atom*);
|
||||||
extern void binatoms(Atom*);
|
extern void binatoms(Atom*);
|
||||||
extern void buildNeighbor(Atom*, Neighbor*);
|
extern void buildNeighbor(Atom*, Neighbor*);
|
||||||
|
extern void pruneNeighbor(Parameter*, Atom*, Neighbor*);
|
||||||
extern void sortAtom(Atom*);
|
extern void sortAtom(Atom*);
|
||||||
extern void buildClusters(Atom*);
|
extern void buildClusters(Atom*);
|
||||||
extern void binClusters(Atom*);
|
extern void binClusters(Atom*);
|
||||||
|
@ -31,9 +31,12 @@
|
|||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int force_field;
|
int force_field;
|
||||||
|
char* param_file;
|
||||||
char* input_file;
|
char* input_file;
|
||||||
char* vtk_file;
|
char* vtk_file;
|
||||||
|
char *xtc_file;
|
||||||
MD_FLOAT epsilon;
|
MD_FLOAT epsilon;
|
||||||
|
MD_FLOAT sigma;
|
||||||
MD_FLOAT sigma6;
|
MD_FLOAT sigma6;
|
||||||
MD_FLOAT temp;
|
MD_FLOAT temp;
|
||||||
MD_FLOAT rho;
|
MD_FLOAT rho;
|
||||||
@ -41,10 +44,14 @@ typedef struct {
|
|||||||
int ntypes;
|
int ntypes;
|
||||||
int ntimes;
|
int ntimes;
|
||||||
int nstat;
|
int nstat;
|
||||||
int every;
|
int reneigh_every;
|
||||||
|
int prune_every;
|
||||||
|
int x_out_every;
|
||||||
|
int v_out_every;
|
||||||
MD_FLOAT dt;
|
MD_FLOAT dt;
|
||||||
MD_FLOAT dtforce;
|
MD_FLOAT dtforce;
|
||||||
MD_FLOAT cutforce;
|
MD_FLOAT cutforce;
|
||||||
|
MD_FLOAT skin;
|
||||||
MD_FLOAT cutneigh;
|
MD_FLOAT cutneigh;
|
||||||
int nx, ny, nz;
|
int nx, ny, nz;
|
||||||
MD_FLOAT lattice;
|
MD_FLOAT lattice;
|
||||||
@ -53,4 +60,9 @@ typedef struct {
|
|||||||
double proc_freq;
|
double proc_freq;
|
||||||
char* eam_file;
|
char* eam_file;
|
||||||
} Parameter;
|
} Parameter;
|
||||||
|
|
||||||
|
void initParameter(Parameter*);
|
||||||
|
void readParameter(Parameter*, const char*);
|
||||||
|
void printParameter(Parameter*);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -41,10 +41,10 @@ static inline MD_SIMD_FLOAT simd_mul(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return
|
|||||||
static inline MD_SIMD_FLOAT simd_fma(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_FLOAT c) { return _mm512_fmadd_pd(a, b, c); }
|
static inline MD_SIMD_FLOAT simd_fma(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_FLOAT c) { return _mm512_fmadd_pd(a, b, c); }
|
||||||
static inline MD_SIMD_FLOAT simd_reciprocal(MD_SIMD_FLOAT a) { return _mm512_rcp14_pd(a); }
|
static inline MD_SIMD_FLOAT simd_reciprocal(MD_SIMD_FLOAT a) { return _mm512_rcp14_pd(a); }
|
||||||
static inline MD_SIMD_FLOAT simd_masked_add(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_MASK m) { return _mm512_mask_add_pd(a, m, a, b); }
|
static inline MD_SIMD_FLOAT simd_masked_add(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_MASK m) { return _mm512_mask_add_pd(a, m, a, b); }
|
||||||
static inline MD_SIMD_MASK simd_mask_from_u32(unsigned int a) { return _cvtu32_mask8(a); }
|
|
||||||
static inline MD_SIMD_MASK simd_mask_to_u32(unsigned int a) { return _cvtmask8_u32(a); }
|
|
||||||
static inline MD_SIMD_MASK simd_mask_and(MD_SIMD_MASK a, MD_SIMD_MASK b) { return _kand_mask8(a, b); }
|
static inline MD_SIMD_MASK simd_mask_and(MD_SIMD_MASK a, MD_SIMD_MASK b) { return _kand_mask8(a, b); }
|
||||||
static inline MD_SIMD_MASK simd_mask_cond_lt(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm512_cmp_pd_mask(a, b, _CMP_LT_OQ); }
|
static inline MD_SIMD_MASK simd_mask_cond_lt(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm512_cmp_pd_mask(a, b, _CMP_LT_OQ); }
|
||||||
|
static inline MD_SIMD_MASK simd_mask_from_u32(unsigned int a) { return _cvtu32_mask8(a); }
|
||||||
|
static inline unsigned int simd_mask_to_u32(MD_SIMD_MASK a) { return _cvtmask8_u32(a); }
|
||||||
|
|
||||||
static MD_SIMD_FLOAT simd_load2(MD_FLOAT *c0, MD_FLOAT *c1, int d) {
|
static MD_SIMD_FLOAT simd_load2(MD_FLOAT *c0, MD_FLOAT *c1, int d) {
|
||||||
MD_SIMD_FLOAT x;
|
MD_SIMD_FLOAT x;
|
||||||
@ -64,39 +64,55 @@ static inline MD_FLOAT simd_horizontal_sum(MD_SIMD_FLOAT a) {
|
|||||||
MD_SIMD_FLOAT x = _mm512_add_pd(a, _mm512_shuffle_f64x2(a, a, 0xee));
|
MD_SIMD_FLOAT x = _mm512_add_pd(a, _mm512_shuffle_f64x2(a, a, 0xee));
|
||||||
x = _mm512_add_pd(x, _mm512_shuffle_f64x2(x, x, 0x11));
|
x = _mm512_add_pd(x, _mm512_shuffle_f64x2(x, x, 0x11));
|
||||||
x = _mm512_add_pd(x, _mm512_permute_pd(x, 0x01));
|
x = _mm512_add_pd(x, _mm512_permute_pd(x, 0x01));
|
||||||
return *((double *) &x);
|
return *((MD_FLOAT *) &x);
|
||||||
}
|
}
|
||||||
|
|
||||||
#else // AVX2
|
#else // AVX or AVX2
|
||||||
|
|
||||||
#define MD_SIMD_FLOAT __m256d
|
#define MD_SIMD_FLOAT __m256d
|
||||||
|
|
||||||
|
#ifdef NO_AVX2
|
||||||
|
#define MD_SIMD_MASK __m256d
|
||||||
|
#else
|
||||||
#define MD_SIMD_MASK __mmask8
|
#define MD_SIMD_MASK __mmask8
|
||||||
|
#endif
|
||||||
|
|
||||||
static inline MD_SIMD_FLOAT simd_broadcast(double scalar) { return _mm256_set1_pd(scalar); }
|
static inline MD_SIMD_FLOAT simd_broadcast(double scalar) { return _mm256_set1_pd(scalar); }
|
||||||
static inline MD_SIMD_FLOAT simd_zero() { return _mm256_set1_pd(0.0); }
|
static inline MD_SIMD_FLOAT simd_zero() { return _mm256_set1_pd(0.0); }
|
||||||
static inline MD_SIMD_FLOAT simd_add(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_add_pd(a, b); }
|
static inline MD_SIMD_FLOAT simd_add(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_add_pd(a, b); }
|
||||||
static inline MD_SIMD_FLOAT simd_sub(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_sub_pd(a, b); }
|
static inline MD_SIMD_FLOAT simd_sub(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_sub_pd(a, b); }
|
||||||
static inline MD_SIMD_FLOAT simd_mul(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_mul_pd(a, b); }
|
static inline MD_SIMD_FLOAT simd_mul(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_mul_pd(a, b); }
|
||||||
static inline MD_SIMD_FLOAT simd_fma(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_FLOAT c) { return _mm256_fmadd_pd(a, b, c); }
|
|
||||||
static inline MD_SIMD_FLOAT simd_reciprocal(MD_SIMD_FLOAT a) { return _mm256_rcp14_pd(a); }
|
|
||||||
static inline MD_SIMD_FLOAT simd_masked_add(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_MASK m) { return _mm256_mask_add_pd(a, m, a, b); }
|
|
||||||
static inline MD_SIMD_MASK simd_mask_from_u32(unsigned int a) { return _cvtu32_mask8(a); }
|
|
||||||
static inline MD_SIMD_MASK simd_mask_to_u32(unsigned int a) { return _cvtmask8_u32(a); }
|
|
||||||
static inline MD_SIMD_MASK simd_mask_and(MD_SIMD_MASK a, MD_SIMD_MASK b) { return _kand_mask8(a, b); }
|
|
||||||
static inline MD_SIMD_MASK simd_mask_cond_lt(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_cmp_pd_mask(a, b, _CMP_LT_OQ); }
|
|
||||||
|
|
||||||
static MD_SIMD_FLOAT simd_load(MD_FLOAT *c0, int d) {
|
#ifdef NO_AVX2
|
||||||
MD_SIMD_FLOAT x;
|
static inline MD_SIMD_FLOAT simd_reciprocal(MD_SIMD_FLOAT a) { return _mm256_cvtps_pd(_mm_rcp_ps(_mm256_cvtpd_ps(a))); }
|
||||||
#ifdef CLUSTER_AOS
|
static inline MD_SIMD_FLOAT simd_fma(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_FLOAT c) { return simd_add(simd_mul(a, b), c); }
|
||||||
__m128i aos_gather_vindex = _mm128_set_epi32(9, 6, 3, 0);
|
static inline MD_SIMD_FLOAT simd_masked_add(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_MASK m) { return simd_add(a, _mm256_and_pd(b, m)); }
|
||||||
__m128i vindex = _mm128_add_epi32(aos_gather_vindex, _mm128_set1_epi32(d));
|
static inline MD_SIMD_MASK simd_mask_cond_lt(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_cmp_pd(a, b, _CMP_LT_OQ); }
|
||||||
x = _mm256_i32gather_pd(c0, vindex, sizeof(double));
|
static inline MD_SIMD_MASK simd_mask_and(MD_SIMD_MASK a, MD_SIMD_MASK b) { return _mm256_and_pd(a, b); }
|
||||||
#else
|
// TODO: Initialize all diagonal cases and just select the proper one (all bits set or diagonal) based on cond0
|
||||||
x = _mm256_load_pd(&c0[d * CLUSTER_DIM_M]);
|
static inline MD_SIMD_MASK simd_mask_from_u32(unsigned int a) {
|
||||||
#endif
|
const unsigned long long int all = 0xFFFFFFFFFFFFFFFF;
|
||||||
return x;
|
const unsigned long long int none = 0x0;
|
||||||
|
return _mm256_castsi256_pd(_mm256_set_epi64x((a & 0x8) ? all : none, (a & 0x4) ? all : none, (a & 0x2) ? all : none, (a & 0x1) ? all : none));
|
||||||
}
|
}
|
||||||
|
// TODO: Implement this, althrough it is just required for debugging
|
||||||
|
static inline int simd_mask_to_u32(MD_SIMD_MASK a) { return 0; }
|
||||||
|
static inline MD_FLOAT simd_horizontal_sum(MD_SIMD_FLOAT a) {
|
||||||
|
__m128d a0, a1;
|
||||||
|
a = _mm256_add_pd(a, _mm256_permute_pd(a, 0b0101));
|
||||||
|
a0 = _mm256_castpd256_pd128(a);
|
||||||
|
a1 = _mm256_extractf128_pd(a, 0x1);
|
||||||
|
a0 = _mm_add_sd(a0, a1);
|
||||||
|
return *((MD_FLOAT *) &a0);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static inline MD_SIMD_FLOAT simd_reciprocal(MD_SIMD_FLOAT a) { return _mm256_rcp14_pd(a); }
|
||||||
|
static inline MD_SIMD_FLOAT simd_fma(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_FLOAT c) { return _mm256_fmadd_pd(a, b, c); }
|
||||||
|
static inline MD_SIMD_FLOAT simd_masked_add(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_MASK m) { return _mm256_mask_add_pd(a, m, a, b); }
|
||||||
|
static inline MD_SIMD_MASK simd_mask_cond_lt(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_cmp_pd_mask(a, b, _CMP_LT_OQ); }
|
||||||
|
static inline MD_SIMD_MASK simd_mask_and(MD_SIMD_MASK a, MD_SIMD_MASK b) { return _kand_mask8(a, b); }
|
||||||
|
static inline MD_SIMD_MASK simd_mask_from_u32(unsigned int a) { return _cvtu32_mask8(a); }
|
||||||
|
static inline unsigned int simd_mask_to_u32(MD_SIMD_MASK a) { return _cvtmask8_u32(a); }
|
||||||
static inline MD_FLOAT simd_horizontal_sum(MD_SIMD_FLOAT a) {
|
static inline MD_FLOAT simd_horizontal_sum(MD_SIMD_FLOAT a) {
|
||||||
__m128d a0, a1;
|
__m128d a0, a1;
|
||||||
// test with shuffle & add as an alternative to hadd later
|
// test with shuffle & add as an alternative to hadd later
|
||||||
@ -104,7 +120,23 @@ static inline MD_FLOAT simd_horizontal_sum(MD_SIMD_FLOAT a) {
|
|||||||
a0 = _mm256_castpd256_pd128(a);
|
a0 = _mm256_castpd256_pd128(a);
|
||||||
a1 = _mm256_extractf128_pd(a, 0x1);
|
a1 = _mm256_extractf128_pd(a, 0x1);
|
||||||
a0 = _mm_add_sd(a0, a1);
|
a0 = _mm_add_sd(a0, a1);
|
||||||
return *((double *) &a0);
|
return *((MD_FLOAT *) &a0);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static MD_SIMD_FLOAT simd_load(MD_FLOAT *c0, int d) {
|
||||||
|
MD_SIMD_FLOAT x;
|
||||||
|
#ifdef CLUSTER_AOS
|
||||||
|
#ifdef NO_AVX2
|
||||||
|
#error "Not possible to use AoS cluster layout without AVX2 support!"
|
||||||
|
#endif
|
||||||
|
__m128i aos_gather_vindex = _mm128_set_epi32(9, 6, 3, 0);
|
||||||
|
__m128i vindex = _mm128_add_epi32(aos_gather_vindex, _mm128_set1_epi32(d));
|
||||||
|
x = _mm256_i32gather_pd(c0, vindex, sizeof(double));
|
||||||
|
#else
|
||||||
|
x = _mm256_load_pd(&c0[d * CLUSTER_DIM_M]);
|
||||||
|
#endif
|
||||||
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -29,6 +29,10 @@ typedef struct {
|
|||||||
long long int calculated_forces;
|
long long int calculated_forces;
|
||||||
long long int num_neighs;
|
long long int num_neighs;
|
||||||
long long int force_iters;
|
long long int force_iters;
|
||||||
|
long long int atoms_within_cutoff;
|
||||||
|
long long int atoms_outside_cutoff;
|
||||||
|
long long int clusters_within_cutoff;
|
||||||
|
long long int clusters_outside_cutoff;
|
||||||
} Stats;
|
} Stats;
|
||||||
|
|
||||||
void initStats(Stats *s);
|
void initStats(Stats *s);
|
||||||
|
@ -38,6 +38,14 @@
|
|||||||
#define DEBUG_MESSAGE
|
#define DEBUG_MESSAGE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef MAXLINE
|
||||||
|
#define MAXLINE 4096
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef MAX
|
||||||
|
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||||
|
#endif
|
||||||
|
|
||||||
#define FF_LJ 0
|
#define FF_LJ 0
|
||||||
#define FF_EAM 1
|
#define FF_EAM 1
|
||||||
|
|
||||||
|
37
gromacs/includes/xtc.h
Normal file
37
gromacs/includes/xtc.h
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
/*
|
||||||
|
* =======================================================================================
|
||||||
|
*
|
||||||
|
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||||
|
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
||||||
|
*
|
||||||
|
* This file is part of MD-Bench.
|
||||||
|
*
|
||||||
|
* MD-Bench is free software: you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU Lesser General Public License as published
|
||||||
|
* by the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||||
|
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||||
|
* details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public License along
|
||||||
|
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
* =======================================================================================
|
||||||
|
*/
|
||||||
|
#include <atom.h>
|
||||||
|
|
||||||
|
#ifndef __XTC_H_
|
||||||
|
#define __XTC_H_
|
||||||
|
|
||||||
|
#ifdef XTC_OUTPUT
|
||||||
|
void xtc_init(const char *, Atom*, int);
|
||||||
|
void xtc_write(Atom*, int, int, int);
|
||||||
|
void xtc_end();
|
||||||
|
#else
|
||||||
|
#define xtc_init(a,b,c)
|
||||||
|
#define xtc_write(a,b,c,d)
|
||||||
|
#define xtc_end()
|
||||||
|
#endif
|
||||||
|
#endif
|
124
gromacs/main.c
124
gromacs/main.c
@ -20,16 +20,11 @@
|
|||||||
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||||
* =======================================================================================
|
* =======================================================================================
|
||||||
*/
|
*/
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <limits.h>
|
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <float.h>
|
//--
|
||||||
|
|
||||||
#include <likwid-marker.h>
|
#include <likwid-marker.h>
|
||||||
|
//--
|
||||||
#include <timing.h>
|
#include <timing.h>
|
||||||
#include <allocate.h>
|
#include <allocate.h>
|
||||||
#include <neighbor.h>
|
#include <neighbor.h>
|
||||||
@ -41,6 +36,7 @@
|
|||||||
#include <timers.h>
|
#include <timers.h>
|
||||||
#include <eam.h>
|
#include <eam.h>
|
||||||
#include <vtk.h>
|
#include <vtk.h>
|
||||||
|
#include <xtc.h>
|
||||||
#include <util.h>
|
#include <util.h>
|
||||||
|
|
||||||
#define HLINE "----------------------------------------------------------------------------\n"
|
#define HLINE "----------------------------------------------------------------------------\n"
|
||||||
@ -55,29 +51,6 @@ extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*);
|
|||||||
# define computeForceLJ computeForceLJ_4xn
|
# define computeForceLJ computeForceLJ_4xn
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void init(Parameter *param) {
|
|
||||||
param->input_file = NULL;
|
|
||||||
param->vtk_file = NULL;
|
|
||||||
param->force_field = FF_LJ;
|
|
||||||
param->epsilon = 1.0;
|
|
||||||
param->sigma6 = 1.0;
|
|
||||||
param->rho = 0.8442;
|
|
||||||
param->ntypes = 4;
|
|
||||||
param->ntimes = 200;
|
|
||||||
param->dt = 0.005;
|
|
||||||
param->nx = 32;
|
|
||||||
param->ny = 32;
|
|
||||||
param->nz = 32;
|
|
||||||
param->cutforce = 2.5;
|
|
||||||
param->cutneigh = param->cutforce + 0.30;
|
|
||||||
param->temp = 1.44;
|
|
||||||
param->nstat = 100;
|
|
||||||
param->mass = 1.0;
|
|
||||||
param->dtforce = 0.5 * param->dt;
|
|
||||||
param->every = 20;
|
|
||||||
param->proc_freq = 2.4;
|
|
||||||
}
|
|
||||||
|
|
||||||
double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
||||||
if(param->force_field == FF_EAM) { initEam(eam, param); }
|
if(param->force_field == FF_EAM) { initEam(eam, param); }
|
||||||
double S, E;
|
double S, E;
|
||||||
@ -188,75 +161,97 @@ int main(int argc, char** argv) {
|
|||||||
//LIKWID_MARKER_REGISTER("reneighbour");
|
//LIKWID_MARKER_REGISTER("reneighbour");
|
||||||
//LIKWID_MARKER_REGISTER("pbc");
|
//LIKWID_MARKER_REGISTER("pbc");
|
||||||
}
|
}
|
||||||
init(¶m);
|
|
||||||
|
|
||||||
for(int i = 0; i < argc; i++)
|
initParameter(¶m);
|
||||||
{
|
for(int i = 0; i < argc; i++) {
|
||||||
if((strcmp(argv[i], "-f") == 0))
|
if((strcmp(argv[i], "-p") == 0)) {
|
||||||
{
|
readParameter(¶m, argv[++i]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if((strcmp(argv[i], "-f") == 0)) {
|
||||||
if((param.force_field = str2ff(argv[++i])) < 0) {
|
if((param.force_field = str2ff(argv[++i])) < 0) {
|
||||||
fprintf(stderr, "Invalid force field!\n");
|
fprintf(stderr, "Invalid force field!\n");
|
||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if((strcmp(argv[i], "-i") == 0))
|
if((strcmp(argv[i], "-i") == 0)) {
|
||||||
{
|
|
||||||
param.input_file = strdup(argv[++i]);
|
param.input_file = strdup(argv[++i]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if((strcmp(argv[i], "-e") == 0))
|
if((strcmp(argv[i], "-e") == 0)) {
|
||||||
{
|
|
||||||
param.eam_file = strdup(argv[++i]);
|
param.eam_file = strdup(argv[++i]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if((strcmp(argv[i], "-n") == 0) || (strcmp(argv[i], "--nsteps") == 0))
|
if((strcmp(argv[i], "-n") == 0) || (strcmp(argv[i], "--nsteps") == 0)) {
|
||||||
{
|
|
||||||
param.ntimes = atoi(argv[++i]);
|
param.ntimes = atoi(argv[++i]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if((strcmp(argv[i], "-nx") == 0))
|
if((strcmp(argv[i], "-nx") == 0)) {
|
||||||
{
|
|
||||||
param.nx = atoi(argv[++i]);
|
param.nx = atoi(argv[++i]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if((strcmp(argv[i], "-ny") == 0))
|
if((strcmp(argv[i], "-ny") == 0)) {
|
||||||
{
|
|
||||||
param.ny = atoi(argv[++i]);
|
param.ny = atoi(argv[++i]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if((strcmp(argv[i], "-nz") == 0))
|
if((strcmp(argv[i], "-nz") == 0)) {
|
||||||
{
|
|
||||||
param.nz = atoi(argv[++i]);
|
param.nz = atoi(argv[++i]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if((strcmp(argv[i], "--freq") == 0))
|
if((strcmp(argv[i], "-m") == 0) || (strcmp(argv[i], "--mass") == 0)) {
|
||||||
{
|
param.mass = atof(argv[++i]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if((strcmp(argv[i], "-r") == 0) || (strcmp(argv[i], "--radius") == 0)) {
|
||||||
|
param.cutforce = atof(argv[++i]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if((strcmp(argv[i], "-s") == 0) || (strcmp(argv[i], "--skin") == 0)) {
|
||||||
|
param.skin = atof(argv[++i]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if((strcmp(argv[i], "--freq") == 0)) {
|
||||||
param.proc_freq = atof(argv[++i]);
|
param.proc_freq = atof(argv[++i]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if((strcmp(argv[i], "--vtk") == 0))
|
if((strcmp(argv[i], "--vtk") == 0)) {
|
||||||
{
|
|
||||||
param.vtk_file = strdup(argv[++i]);
|
param.vtk_file = strdup(argv[++i]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0))
|
if((strcmp(argv[i], "--xtc") == 0)) {
|
||||||
{
|
#ifndef XTC_OUTPUT
|
||||||
|
fprintf(stderr, "XTC not available, set XTC_OUTPUT option in config.mk file and recompile MD-Bench!");
|
||||||
|
exit(-1);
|
||||||
|
#else
|
||||||
|
param.xtc_file = strdup(argv[++i]);
|
||||||
|
#endif
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
|
||||||
printf("MD Bench: A minimalistic re-implementation of miniMD\n");
|
printf("MD Bench: A minimalistic re-implementation of miniMD\n");
|
||||||
printf(HLINE);
|
printf(HLINE);
|
||||||
|
printf("-p <string>: file to read parameters from (can be specified more than once)\n");
|
||||||
printf("-f <string>: force field (lj or eam), default lj\n");
|
printf("-f <string>: force field (lj or eam), default lj\n");
|
||||||
printf("-i <string>: input file with atom positions (dump)\n");
|
printf("-i <string>: input file with atom positions (dump)\n");
|
||||||
printf("-e <string>: input file for EAM\n");
|
printf("-e <string>: input file for EAM\n");
|
||||||
printf("-n / --nsteps <int>: set number of timesteps for simulation\n");
|
printf("-n / --nsteps <int>: set number of timesteps for simulation\n");
|
||||||
printf("-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n");
|
printf("-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n");
|
||||||
|
printf("-r / --radius <real>: set cutoff radius\n");
|
||||||
|
printf("-s / --skin <real>: set skin (verlet buffer)\n");
|
||||||
printf("--freq <real>: processor frequency (GHz)\n");
|
printf("--freq <real>: processor frequency (GHz)\n");
|
||||||
printf("--vtk <string>: VTK file for visualization\n");
|
printf("--vtk <string>: VTK file for visualization\n");
|
||||||
|
printf("--xtc <string>: XTC file for visualization\n");
|
||||||
printf(HLINE);
|
printf(HLINE);
|
||||||
exit(EXIT_SUCCESS);
|
exit(EXIT_SUCCESS);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
param.cutneigh = param.cutforce + param.skin;
|
||||||
setup(¶m, &eam, &atom, &neighbor, &stats);
|
setup(¶m, &eam, &atom, &neighbor, &stats);
|
||||||
|
printParameter(¶m);
|
||||||
|
|
||||||
|
printf("step\ttemp\t\tpressure\n");
|
||||||
computeThermo(0, ¶m, &atom);
|
computeThermo(0, ¶m, &atom);
|
||||||
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
|
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
|
||||||
traceAddresses(¶m, &atom, &neighbor, n + 1);
|
traceAddresses(¶m, &atom, &neighbor, n + 1);
|
||||||
@ -274,10 +269,18 @@ int main(int argc, char** argv) {
|
|||||||
write_data_to_vtk_file(param.vtk_file, &atom, 0);
|
write_data_to_vtk_file(param.vtk_file, &atom, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(param.xtc_file != NULL) {
|
||||||
|
xtc_init(param.xtc_file, &atom, 0);
|
||||||
|
}
|
||||||
|
|
||||||
for(int n = 0; n < param.ntimes; n++) {
|
for(int n = 0; n < param.ntimes; n++) {
|
||||||
initialIntegrate(¶m, &atom);
|
initialIntegrate(¶m, &atom);
|
||||||
|
|
||||||
if((n + 1) % param.every) {
|
if((n + 1) % param.reneigh_every) {
|
||||||
|
if(!((n + 1) % param.prune_every)) {
|
||||||
|
pruneNeighbor(¶m, &atom, &neighbor);
|
||||||
|
}
|
||||||
|
|
||||||
updatePbc(&atom, ¶m, 0);
|
updatePbc(&atom, ¶m, 0);
|
||||||
} else {
|
} else {
|
||||||
timer[NEIGH] += reneighbour(¶m, &atom, &neighbor);
|
timer[NEIGH] += reneighbour(¶m, &atom, &neighbor);
|
||||||
@ -299,16 +302,27 @@ int main(int argc, char** argv) {
|
|||||||
computeThermo(n + 1, ¶m, &atom);
|
computeThermo(n + 1, ¶m, &atom);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int write_pos = !((n + 1) % param.x_out_every);
|
||||||
|
int write_vel = !((n + 1) % param.v_out_every);
|
||||||
|
if(write_pos || write_vel) {
|
||||||
if(param.vtk_file != NULL) {
|
if(param.vtk_file != NULL) {
|
||||||
write_data_to_vtk_file(param.vtk_file, &atom, n + 1);
|
write_data_to_vtk_file(param.vtk_file, &atom, n + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(param.xtc_file != NULL) {
|
||||||
|
xtc_write(&atom, n + 1, write_pos, write_vel);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
timer[TOTAL] = getTimeStamp() - timer[TOTAL];
|
timer[TOTAL] = getTimeStamp() - timer[TOTAL];
|
||||||
computeThermo(-1, ¶m, &atom);
|
computeThermo(-1, ¶m, &atom);
|
||||||
|
|
||||||
|
if(param.xtc_file != NULL) {
|
||||||
|
xtc_end();
|
||||||
|
}
|
||||||
|
|
||||||
printf(HLINE);
|
printf(HLINE);
|
||||||
printf("Force field: %s\n", ff2str(param.force_field));
|
|
||||||
printf("Data layout for positions: %s\n", POS_DATA_LAYOUT);
|
printf("Data layout for positions: %s\n", POS_DATA_LAYOUT);
|
||||||
#if PRECISION == 1
|
#if PRECISION == 1
|
||||||
printf("Using single precision floating point.\n");
|
printf("Using single precision floating point.\n");
|
||||||
|
@ -92,16 +92,15 @@ void setupNeighbor(Parameter *param, Atom *atom) {
|
|||||||
|
|
||||||
MD_FLOAT atom_density = ((MD_FLOAT)(atom->Nlocal)) / ((xhi - xlo) * (yhi - ylo) * (zhi - zlo));
|
MD_FLOAT atom_density = ((MD_FLOAT)(atom->Nlocal)) / ((xhi - xlo) * (yhi - ylo) * (zhi - zlo));
|
||||||
MD_FLOAT atoms_in_cell = MAX(CLUSTER_DIM_M, CLUSTER_DIM_N);
|
MD_FLOAT atoms_in_cell = MAX(CLUSTER_DIM_M, CLUSTER_DIM_N);
|
||||||
//MD_FLOAT atoms_in_cell = CLUSTER_DIM_M;
|
MD_FLOAT targetsizex = cbrt(atoms_in_cell / atom_density);
|
||||||
binsizex = cbrt(atoms_in_cell / atom_density);
|
MD_FLOAT targetsizey = cbrt(atoms_in_cell / atom_density);
|
||||||
binsizey = cbrt(atoms_in_cell / atom_density);
|
nbinx = MAX(1, (int)ceil((xhi - xlo) / targetsizex));
|
||||||
cutneighsq = cutneigh * cutneigh;
|
nbiny = MAX(1, (int)ceil((yhi - ylo) / targetsizey));
|
||||||
nbinx = (int)((xhi - xlo) / binsizex);
|
binsizex = (xhi - xlo) / nbinx;
|
||||||
nbiny = (int)((yhi - ylo) / binsizey);
|
binsizey = (yhi - ylo) / nbiny;
|
||||||
if(nbinx == 0) { nbinx = 1; }
|
|
||||||
if(nbiny == 0) { nbiny = 1; }
|
|
||||||
bininvx = 1.0 / binsizex;
|
bininvx = 1.0 / binsizex;
|
||||||
bininvy = 1.0 / binsizey;
|
bininvy = 1.0 / binsizey;
|
||||||
|
cutneighsq = cutneigh * cutneigh;
|
||||||
|
|
||||||
coord = xlo - cutneigh - SMALL * xprd;
|
coord = xlo - cutneigh - SMALL * xprd;
|
||||||
mbinxlo = (int) (coord * bininvx);
|
mbinxlo = (int) (coord * bininvx);
|
||||||
@ -161,6 +160,14 @@ void setupNeighbor(Parameter *param, Atom *atom) {
|
|||||||
|
|
||||||
if (cluster_bins) { free(cluster_bins); }
|
if (cluster_bins) { free(cluster_bins); }
|
||||||
cluster_bins = (int*) malloc(mbins * clusters_per_bin * sizeof(int));
|
cluster_bins = (int*) malloc(mbins * clusters_per_bin * sizeof(int));
|
||||||
|
|
||||||
|
/*
|
||||||
|
DEBUG_MESSAGE("lo, hi = (%e, %e, %e), (%e, %e, %e)\n", xlo, ylo, zlo, xhi, yhi, zhi);
|
||||||
|
DEBUG_MESSAGE("binsize = %e, %e\n", binsizex, binsizey);
|
||||||
|
DEBUG_MESSAGE("mbin lo, hi = (%d, %d), (%d, %d)\n", mbinxlo, mbinylo, mbinxhi, mbinyhi);
|
||||||
|
DEBUG_MESSAGE("mbins = %d (%d x %d)\n", mbins, mbinx, mbiny);
|
||||||
|
DEBUG_MESSAGE("nextx = %d, nexty = %d\n", nextx, nexty);
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
MD_FLOAT getBoundingBoxDistanceSq(Atom *atom, int ci, int cj) {
|
MD_FLOAT getBoundingBoxDistanceSq(Atom *atom, int ci, int cj) {
|
||||||
@ -374,6 +381,47 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
|
|||||||
DEBUG_MESSAGE("buildNeighbor end\n");
|
DEBUG_MESSAGE("buildNeighbor end\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void pruneNeighbor(Parameter *param, Atom *atom, Neighbor *neighbor) {
|
||||||
|
DEBUG_MESSAGE("pruneNeighbor start\n");
|
||||||
|
int nall = atom->Nclusters_local + atom->Nclusters_ghost;
|
||||||
|
//MD_FLOAT cutsq = param->cutforce * param->cutforce;
|
||||||
|
MD_FLOAT cutsq = cutneighsq;
|
||||||
|
|
||||||
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||||
|
int *neighs = &neighbor->neighbors[ci * neighbor->maxneighs];
|
||||||
|
int numneighs = neighbor->numneigh[ci];
|
||||||
|
int k = 0;
|
||||||
|
|
||||||
|
// Remove dummy clusters if necessary
|
||||||
|
if(CLUSTER_DIM_N > CLUSTER_DIM_M) {
|
||||||
|
while(neighs[numneighs - 1] == nall - 1) {
|
||||||
|
numneighs--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while(k < numneighs) {
|
||||||
|
int cj = neighs[k];
|
||||||
|
if(atomDistanceInRange(atom, ci, cj, cutsq)) {
|
||||||
|
k++;
|
||||||
|
} else {
|
||||||
|
numneighs--;
|
||||||
|
neighs[k] = neighs[numneighs];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Readd dummy clusters if necessary
|
||||||
|
if(CLUSTER_DIM_N > CLUSTER_DIM_M) {
|
||||||
|
while(numneighs % (CLUSTER_DIM_N / CLUSTER_DIM_M)) {
|
||||||
|
neighs[numneighs++] = nall - 1; // Last cluster is always a dummy cluster
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
neighbor->numneigh[ci] = numneighs;
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_MESSAGE("pruneNeighbor end\n");
|
||||||
|
}
|
||||||
|
|
||||||
/* internal subroutines */
|
/* internal subroutines */
|
||||||
MD_FLOAT bindist(int i, int j) {
|
MD_FLOAT bindist(int i, int j) {
|
||||||
MD_FLOAT delx, dely, delz;
|
MD_FLOAT delx, dely, delz;
|
||||||
|
156
gromacs/parameter.c
Normal file
156
gromacs/parameter.c
Normal file
@ -0,0 +1,156 @@
|
|||||||
|
/*
|
||||||
|
* =======================================================================================
|
||||||
|
*
|
||||||
|
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||||
|
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
||||||
|
*
|
||||||
|
* This file is part of MD-Bench.
|
||||||
|
*
|
||||||
|
* MD-Bench is free software: you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU Lesser General Public License as published
|
||||||
|
* by the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||||
|
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||||
|
* details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public License along
|
||||||
|
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
* =======================================================================================
|
||||||
|
*/
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
//---
|
||||||
|
#include <parameter.h>
|
||||||
|
#include <util.h>
|
||||||
|
|
||||||
|
void initParameter(Parameter *param) {
|
||||||
|
param->input_file = NULL;
|
||||||
|
param->vtk_file = NULL;
|
||||||
|
param->xtc_file = NULL;
|
||||||
|
param->eam_file = NULL;
|
||||||
|
param->force_field = FF_LJ;
|
||||||
|
param->epsilon = 1.0;
|
||||||
|
param->sigma = 1.0;
|
||||||
|
param->sigma6 = 1.0;
|
||||||
|
param->rho = 0.8442;
|
||||||
|
param->ntypes = 4;
|
||||||
|
param->ntimes = 200;
|
||||||
|
param->dt = 0.005;
|
||||||
|
param->nx = 32;
|
||||||
|
param->ny = 32;
|
||||||
|
param->nz = 32;
|
||||||
|
param->cutforce = 2.5;
|
||||||
|
param->skin = 0.3;
|
||||||
|
param->cutneigh = param->cutforce + param->skin;
|
||||||
|
param->temp = 1.44;
|
||||||
|
param->nstat = 100;
|
||||||
|
param->mass = 1.0;
|
||||||
|
param->dtforce = 0.5 * param->dt;
|
||||||
|
param->reneigh_every = 20;
|
||||||
|
param->prune_every = 1000;
|
||||||
|
param->x_out_every = 20;
|
||||||
|
param->v_out_every = 5;
|
||||||
|
param->proc_freq = 2.4;
|
||||||
|
}
|
||||||
|
|
||||||
|
void readParameter(Parameter *param, const char *filename) {
|
||||||
|
FILE *fp = fopen(filename, "r");
|
||||||
|
char line[MAXLINE];
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if(!fp) {
|
||||||
|
fprintf(stderr, "Could not open parameter file: %s\n", filename);
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
while(!feof(fp)) {
|
||||||
|
line[0] = '\0';
|
||||||
|
fgets(line, MAXLINE, fp);
|
||||||
|
for(i = 0; line[i] != '\0' && line[i] != '#'; i++);
|
||||||
|
line[i] = '\0';
|
||||||
|
|
||||||
|
char *tok = strtok(line, " ");
|
||||||
|
char *val = strtok(NULL, " ");
|
||||||
|
|
||||||
|
#define PARSE_PARAM(p,f) if(strncmp(tok, #p, sizeof(#p) / sizeof(#p[0]) - 1) == 0) { param->p = f(val); }
|
||||||
|
#define PARSE_STRING(p) PARSE_PARAM(p, strdup)
|
||||||
|
#define PARSE_INT(p) PARSE_PARAM(p, atoi)
|
||||||
|
#define PARSE_REAL(p) PARSE_PARAM(p, atof)
|
||||||
|
|
||||||
|
if(tok != NULL && val != NULL) {
|
||||||
|
PARSE_PARAM(force_field, str2ff);
|
||||||
|
PARSE_STRING(input_file);
|
||||||
|
PARSE_STRING(eam_file);
|
||||||
|
PARSE_STRING(vtk_file);
|
||||||
|
PARSE_STRING(xtc_file);
|
||||||
|
PARSE_REAL(epsilon);
|
||||||
|
PARSE_REAL(sigma);
|
||||||
|
PARSE_REAL(rho);
|
||||||
|
PARSE_REAL(dt);
|
||||||
|
PARSE_REAL(cutforce);
|
||||||
|
PARSE_REAL(skin);
|
||||||
|
PARSE_REAL(temp);
|
||||||
|
PARSE_REAL(mass);
|
||||||
|
PARSE_REAL(proc_freq);
|
||||||
|
PARSE_INT(ntypes);
|
||||||
|
PARSE_INT(ntimes);
|
||||||
|
PARSE_INT(nx);
|
||||||
|
PARSE_INT(ny);
|
||||||
|
PARSE_INT(nz);
|
||||||
|
PARSE_INT(nstat);
|
||||||
|
PARSE_INT(reneigh_every);
|
||||||
|
PARSE_INT(prune_every);
|
||||||
|
PARSE_INT(x_out_every);
|
||||||
|
PARSE_INT(v_out_every);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update sigma6 parameter
|
||||||
|
MD_FLOAT s2 = param->sigma * param->sigma;
|
||||||
|
param->sigma6 = s2 * s2 * s2;
|
||||||
|
fclose(fp);
|
||||||
|
}
|
||||||
|
|
||||||
|
void printParameter(Parameter *param) {
|
||||||
|
printf("Parameters:\n");
|
||||||
|
if(param->input_file != NULL) {
|
||||||
|
printf("Input file: %s\n", param->input_file);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(param->vtk_file != NULL) {
|
||||||
|
printf("VTK file: %s\n", param->vtk_file);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(param->xtc_file != NULL) {
|
||||||
|
printf("XTC file: %s\n", param->xtc_file);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(param->eam_file != NULL) {
|
||||||
|
printf("EAM file: %s\n", param->eam_file);
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("\tForce field: %s\n", ff2str(param->force_field));
|
||||||
|
printf("\tUnit cells (nx, ny, nz): %d, %d, %d\n", param->nx, param->ny, param->nz);
|
||||||
|
printf("\tDomain box sizes (x, y, z): %e, %e, %e\n", param->xprd, param->yprd, param->zprd);
|
||||||
|
printf("\tLattice size: %e\n", param->lattice);
|
||||||
|
printf("\tEpsilon: %e\n", param->epsilon);
|
||||||
|
printf("\tSigma: %e\n", param->sigma);
|
||||||
|
printf("\tTemperature: %e\n", param->temp);
|
||||||
|
printf("\tRHO: %e\n", param->rho);
|
||||||
|
printf("\tMass: %e\n", param->mass);
|
||||||
|
printf("\tNumber of types: %d\n", param->ntypes);
|
||||||
|
printf("\tNumber of timesteps: %d\n", param->ntimes);
|
||||||
|
printf("\tReport stats every (timesteps): %d\n", param->nstat);
|
||||||
|
printf("\tReneighbor every (timesteps): %d\n", param->reneigh_every);
|
||||||
|
printf("\tPrune every (timesteps): %d\n", param->prune_every);
|
||||||
|
printf("\tOutput positions every (timesteps): %d\n", param->x_out_every);
|
||||||
|
printf("\tOutput velocities every (timesteps): %d\n", param->v_out_every);
|
||||||
|
printf("\tDelta time (dt): %e\n", param->dt);
|
||||||
|
printf("\tCutoff radius: %e\n", param->cutforce);
|
||||||
|
printf("\tSkin: %e\n", param->skin);
|
||||||
|
printf("\tProcessor frequency (GHz): %.4f\n\n", param->proc_freq);
|
||||||
|
}
|
@ -9,10 +9,15 @@ void initStats(Stats *s) {
|
|||||||
s->calculated_forces = 0;
|
s->calculated_forces = 0;
|
||||||
s->num_neighs = 0;
|
s->num_neighs = 0;
|
||||||
s->force_iters = 0;
|
s->force_iters = 0;
|
||||||
|
s->atoms_within_cutoff = 0;
|
||||||
|
s->atoms_outside_cutoff = 0;
|
||||||
|
s->clusters_within_cutoff = 0;
|
||||||
|
s->clusters_outside_cutoff = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer) {
|
void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer) {
|
||||||
#ifdef COMPUTE_STATS
|
#ifdef COMPUTE_STATS
|
||||||
|
|
||||||
const int MxN = CLUSTER_DIM_M * CLUSTER_DIM_N;
|
const int MxN = CLUSTER_DIM_M * CLUSTER_DIM_N;
|
||||||
double avg_atoms_cluster = (double)(atom->Nlocal) / (double)(atom->Nclusters_local);
|
double avg_atoms_cluster = (double)(atom->Nlocal) / (double)(atom->Nclusters_local);
|
||||||
double force_useful_volume = 1e-9 * ( (double)(atom->Nlocal * (param->ntimes + 1)) * (sizeof(MD_FLOAT) * 6 + sizeof(int)) +
|
double force_useful_volume = 1e-9 * ( (double)(atom->Nlocal * (param->ntimes + 1)) * (sizeof(MD_FLOAT) * 6 + sizeof(int)) +
|
||||||
@ -20,9 +25,11 @@ void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer
|
|||||||
double avg_neigh_atom = (stats->num_neighs * CLUSTER_DIM_N) / (double)(atom->Nlocal * (param->ntimes + 1));
|
double avg_neigh_atom = (stats->num_neighs * CLUSTER_DIM_N) / (double)(atom->Nlocal * (param->ntimes + 1));
|
||||||
double avg_neigh_cluster = (double)(stats->num_neighs) / (double)(stats->calculated_forces);
|
double avg_neigh_cluster = (double)(stats->num_neighs) / (double)(stats->calculated_forces);
|
||||||
double avg_simd = stats->force_iters / (double)(atom->Nlocal * (param->ntimes + 1));
|
double avg_simd = stats->force_iters / (double)(atom->Nlocal * (param->ntimes + 1));
|
||||||
|
|
||||||
#ifdef EXPLICIT_TYPES
|
#ifdef EXPLICIT_TYPES
|
||||||
force_useful_volume += 1e-9 * (double)((atom->Nlocal * (param->ntimes + 1)) + stats->num_neighs) * sizeof(int);
|
force_useful_volume += 1e-9 * (double)((atom->Nlocal * (param->ntimes + 1)) + stats->num_neighs) * sizeof(int);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
printf("Statistics:\n");
|
printf("Statistics:\n");
|
||||||
printf("\tVector width: %d, Processor frequency: %.4f GHz\n", VECTOR_WIDTH, param->proc_freq);
|
printf("\tVector width: %d, Processor frequency: %.4f GHz\n", VECTOR_WIDTH, param->proc_freq);
|
||||||
printf("\tAverage atoms per cluster: %.4f\n", avg_atoms_cluster);
|
printf("\tAverage atoms per cluster: %.4f\n", avg_atoms_cluster);
|
||||||
@ -33,5 +40,13 @@ void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer
|
|||||||
printf("\tTotal number of SIMD iterations: %lld\n", stats->force_iters);
|
printf("\tTotal number of SIMD iterations: %lld\n", stats->force_iters);
|
||||||
printf("\tUseful read data volume for force computation: %.2fGB\n", force_useful_volume);
|
printf("\tUseful read data volume for force computation: %.2fGB\n", force_useful_volume);
|
||||||
printf("\tCycles/SIMD iteration: %.4f\n", timer[FORCE] * param->proc_freq * 1e9 / stats->force_iters);
|
printf("\tCycles/SIMD iteration: %.4f\n", timer[FORCE] * param->proc_freq * 1e9 / stats->force_iters);
|
||||||
|
|
||||||
|
#ifdef USE_REFERENCE_VERSION
|
||||||
|
const double atoms_eff = (double)stats->atoms_within_cutoff / (double)(stats->atoms_within_cutoff + stats->atoms_outside_cutoff) * 100.0;
|
||||||
|
printf("\tAtoms within/outside cutoff radius: %lld/%lld (%.2f%%)\n", stats->atoms_within_cutoff, stats->atoms_outside_cutoff, atoms_eff);
|
||||||
|
const double clusters_eff = (double)stats->clusters_within_cutoff / (double)(stats->clusters_within_cutoff + stats->clusters_outside_cutoff) * 100.0;
|
||||||
|
printf("\tClusters within/outside cutoff radius: %lld/%lld (%.2f%%)\n", stats->clusters_within_cutoff, stats->clusters_outside_cutoff, clusters_eff);
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -65,8 +65,6 @@ void setupThermo(Parameter *param, int natoms)
|
|||||||
e_scale = 524287.985533;//16.0;
|
e_scale = 524287.985533;//16.0;
|
||||||
param->dtforce /= mvv2e;
|
param->dtforce /= mvv2e;
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("step\ttemp\t\tpressure\n");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void computeThermo(int iflag, Parameter *param, Atom *atom)
|
void computeThermo(int iflag, Parameter *param, Atom *atom)
|
||||||
|
71
gromacs/xtc.c
Normal file
71
gromacs/xtc.c
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
/*
|
||||||
|
* =======================================================================================
|
||||||
|
*
|
||||||
|
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||||
|
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
||||||
|
*
|
||||||
|
* This file is part of MD-Bench.
|
||||||
|
*
|
||||||
|
* MD-Bench is free software: you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU Lesser General Public License as published
|
||||||
|
* by the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||||
|
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||||
|
* details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public License along
|
||||||
|
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
* =======================================================================================
|
||||||
|
*/
|
||||||
|
#include <stdlib.h>
|
||||||
|
//---
|
||||||
|
#include <atom.h>
|
||||||
|
#include <allocate.h>
|
||||||
|
#include <xtc.h>
|
||||||
|
|
||||||
|
#ifdef XTC_OUTPUT
|
||||||
|
#include <gromacs/fileio/xtcio.h>
|
||||||
|
|
||||||
|
static struct t_fileio *xtc_file = NULL;
|
||||||
|
static rvec *x_buf = NULL;
|
||||||
|
static rvec basis[3];
|
||||||
|
|
||||||
|
void xtc_init(const char *filename, Atom *atom, int timestep) {
|
||||||
|
basis[0][XX] = 1.0;
|
||||||
|
basis[0][YY] = 0.0;
|
||||||
|
basis[0][ZZ] = 0.0;
|
||||||
|
basis[1][XX] = 0.0;
|
||||||
|
basis[1][YY] = 1.0;
|
||||||
|
basis[1][ZZ] = 0.0;
|
||||||
|
basis[2][XX] = 0.0;
|
||||||
|
basis[2][YY] = 0.0;
|
||||||
|
basis[2][ZZ] = 1.0;
|
||||||
|
|
||||||
|
xtc_file = open_xtc(filename, "w");
|
||||||
|
x_buf = (rvec *) allocate(ALIGNMENT, sizeof(rvec) * (atom->Nlocal + 1));
|
||||||
|
xtc_write(atom, timestep, 1, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void xtc_write(Atom *atom, int timestep, int write_pos, int write_vel) {
|
||||||
|
int i = 0;
|
||||||
|
for(int ci = 0; ci < atom->Nclusters_local; ++ci) {
|
||||||
|
MD_FLOAT *cptr = cluster_pos_ptr(ci);
|
||||||
|
for(int cii = 0; cii < atom->clusters[ci].natoms; ++cii) {
|
||||||
|
x_buf[i][XX] = cluster_x(cptr, cii);
|
||||||
|
x_buf[i][YY] = cluster_y(cptr, cii);
|
||||||
|
x_buf[i][ZZ] = cluster_z(cptr, cii);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
write_xtc(xtc_file, atom->Nlocal, timestep, 0.0, (const rvec *) basis, (const rvec *) x_buf, 1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
void xtc_end() {
|
||||||
|
free(x_buf);
|
||||||
|
close_xtc(xtc_file);
|
||||||
|
}
|
||||||
|
#endif
|
11
include_GROMACS.mk
Normal file
11
include_GROMACS.mk
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
GROMACS_PATH=/apps/Gromacs/2018.1-mkl
|
||||||
|
GROMACS_INC ?= -I${GROMACS_PATH}/include
|
||||||
|
GROMACS_DEFINES ?=
|
||||||
|
GROMACS_LIB ?= -L${GROMACS_PATH}/lib64
|
||||||
|
|
||||||
|
ifeq ($(strip $(XTC_OUTPUT)),true)
|
||||||
|
INCLUDES += ${GROMACS_INC}
|
||||||
|
DEFINES += ${GROMACS_DEFINES}
|
||||||
|
LIBS += -lgromacs
|
||||||
|
LFLAGS += ${GROMACS_LIB}
|
||||||
|
endif
|
@ -4,7 +4,7 @@ LINKER = $(CC)
|
|||||||
OPENMP = #-qopenmp
|
OPENMP = #-qopenmp
|
||||||
PROFILE = #-profile-functions -g -pg
|
PROFILE = #-profile-functions -g -pg
|
||||||
OPTS = -Ofast -xCORE-AVX512 -qopt-zmm-usage=high $(PROFILE)
|
OPTS = -Ofast -xCORE-AVX512 -qopt-zmm-usage=high $(PROFILE)
|
||||||
#OPTS = -fast -xCORE-AVX2 $(PROFILE)
|
#OPTS = -Ofast -xCORE-AVX2 $(PROFILE)
|
||||||
#OPTS = -fast -xAVX $(PROFILE)
|
#OPTS = -fast -xAVX $(PROFILE)
|
||||||
#OPTS = -fast -xSSE4.2 $(PROFILE)
|
#OPTS = -fast -xSSE4.2 $(PROFILE)
|
||||||
#OPTS = -fast -no-vec $(PROFILE)
|
#OPTS = -fast -no-vec $(PROFILE)
|
||||||
@ -12,6 +12,6 @@ OPTS = -Ofast -xCORE-AVX512 -qopt-zmm-usage=high $(PROFILE)
|
|||||||
CFLAGS = $(PROFILE) -restrict $(OPENMP) $(OPTS)
|
CFLAGS = $(PROFILE) -restrict $(OPENMP) $(OPTS)
|
||||||
ASFLAGS = #-masm=intel
|
ASFLAGS = #-masm=intel
|
||||||
LFLAGS = $(PROFILE) $(OPTS) $(OPENMP)
|
LFLAGS = $(PROFILE) $(OPTS) $(OPENMP)
|
||||||
DEFINES = -D_GNU_SOURCE #-DLIKWID_PERFMON
|
DEFINES = -std=c11 -pedantic-errors -D_GNU_SOURCE #-DLIKWID_PERFMON
|
||||||
INCLUDES = #$(LIKWID_INC)
|
INCLUDES = #$(LIKWID_INC)
|
||||||
LIBS = -lm #$(LIKWID_LIB) -llikwid
|
LIBS = -lm #$(LIKWID_LIB) -llikwid
|
||||||
|
103
lammps/atom.c
103
lammps/atom.c
@ -161,7 +161,108 @@ void createAtom(Atom *atom, Parameter *param)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int readAtom(Atom* atom, Parameter* param)
|
int type_str2int(const char *type) {
|
||||||
|
if(strncmp(type, "Ar", 2) == 0) { return 0; } // Argon
|
||||||
|
fprintf(stderr, "Invalid atom type: %s\n", type);
|
||||||
|
exit(-1);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readAtom(Atom* atom, Parameter* param) {
|
||||||
|
int len = strlen(param->input_file);
|
||||||
|
if(strncmp(¶m->input_file[len - 4], ".pdb", 4) == 0) { return readAtom_pdb(atom, param); }
|
||||||
|
if(strncmp(¶m->input_file[len - 4], ".dmp", 4) == 0) { return readAtom_dmp(atom, param); }
|
||||||
|
fprintf(stderr, "Invalid input file extension: %s\nValid choices are: pdb, dmp\n", param->input_file);
|
||||||
|
exit(-1);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readAtom_pdb(Atom* atom, Parameter* param) {
|
||||||
|
FILE *fp = fopen(param->input_file, "r");
|
||||||
|
char line[MAXLINE];
|
||||||
|
int read_atoms = 0;
|
||||||
|
|
||||||
|
if(!fp) {
|
||||||
|
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||||
|
exit(-1);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
while(!feof(fp)) {
|
||||||
|
fgets(line, MAXLINE, fp);
|
||||||
|
char *item = strtok(line, " ");
|
||||||
|
if(strncmp(item, "CRYST1", 6) == 0) {
|
||||||
|
param->xlo = 0.0;
|
||||||
|
param->xhi = atof(strtok(NULL, " "));
|
||||||
|
param->ylo = 0.0;
|
||||||
|
param->yhi = atof(strtok(NULL, " "));
|
||||||
|
param->zlo = 0.0;
|
||||||
|
param->zhi = atof(strtok(NULL, " "));
|
||||||
|
param->xprd = param->xhi - param->xlo;
|
||||||
|
param->yprd = param->yhi - param->ylo;
|
||||||
|
param->zprd = param->zhi - param->zlo;
|
||||||
|
// alpha, beta, gamma, sGroup, z
|
||||||
|
} else if(strncmp(item, "ATOM", 4) == 0) {
|
||||||
|
char *label;
|
||||||
|
int atom_id, comp_id;
|
||||||
|
MD_FLOAT occupancy, charge;
|
||||||
|
atom_id = atoi(strtok(NULL, " ")) - 1;
|
||||||
|
|
||||||
|
while(atom_id + 1 >= atom->Nmax) {
|
||||||
|
growAtom(atom);
|
||||||
|
}
|
||||||
|
|
||||||
|
atom->type[atom_id] = type_str2int(strtok(NULL, " "));
|
||||||
|
label = strtok(NULL, " ");
|
||||||
|
comp_id = atoi(strtok(NULL, " "));
|
||||||
|
atom_x(atom_id) = atof(strtok(NULL, " "));
|
||||||
|
atom_y(atom_id) = atof(strtok(NULL, " "));
|
||||||
|
atom_z(atom_id) = atof(strtok(NULL, " "));
|
||||||
|
atom->vx[atom_id] = 0.0;
|
||||||
|
atom->vy[atom_id] = 0.0;
|
||||||
|
atom->vz[atom_id] = 0.0;
|
||||||
|
occupancy = atof(strtok(NULL, " "));
|
||||||
|
charge = atof(strtok(NULL, " "));
|
||||||
|
atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
|
||||||
|
atom->Natoms++;
|
||||||
|
atom->Nlocal++;
|
||||||
|
read_atoms++;
|
||||||
|
} else if(strncmp(item, "HEADER", 6) == 0 ||
|
||||||
|
strncmp(item, "REMARK", 6) == 0 ||
|
||||||
|
strncmp(item, "MODEL", 5) == 0 ||
|
||||||
|
strncmp(item, "TER", 3) == 0 ||
|
||||||
|
strncmp(item, "ENDMDL", 6) == 0) {
|
||||||
|
// Do nothing
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "Invalid item: %s\n", item);
|
||||||
|
exit(-1);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!read_atoms) {
|
||||||
|
fprintf(stderr, "Input error: No atoms read!\n");
|
||||||
|
exit(-1);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||||
|
atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||||
|
atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||||
|
atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||||
|
for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
|
||||||
|
atom->epsilon[i] = param->epsilon;
|
||||||
|
atom->sigma6[i] = param->sigma6;
|
||||||
|
atom->cutneighsq[i] = param->cutneigh * param->cutneigh;
|
||||||
|
atom->cutforcesq[i] = param->cutforce * param->cutforce;
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
|
||||||
|
fclose(fp);
|
||||||
|
return read_atoms;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readAtom_dmp(Atom* atom, Parameter* param)
|
||||||
{
|
{
|
||||||
FILE *fp = fopen(param->input_file, "r");
|
FILE *fp = fopen(param->input_file, "r");
|
||||||
char line[MAXLINE];
|
char line[MAXLINE];
|
||||||
|
@ -87,6 +87,11 @@ double computeForceLJFullNeigh(Parameter *param, Atom *atom, Neighbor *neighbor,
|
|||||||
fix += delx * force;
|
fix += delx * force;
|
||||||
fiy += dely * force;
|
fiy += dely * force;
|
||||||
fiz += delz * force;
|
fiz += delz * force;
|
||||||
|
#ifdef USE_REFERENCE_VERSION
|
||||||
|
addStat(stats->atoms_within_cutoff, 1);
|
||||||
|
} else {
|
||||||
|
addStat(stats->atoms_outside_cutoff, 1);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -42,6 +42,8 @@ typedef struct {
|
|||||||
extern void initAtom(Atom*);
|
extern void initAtom(Atom*);
|
||||||
extern void createAtom(Atom*, Parameter*);
|
extern void createAtom(Atom*, Parameter*);
|
||||||
extern int readAtom(Atom*, Parameter*);
|
extern int readAtom(Atom*, Parameter*);
|
||||||
|
extern int readAtom_pdb(Atom*, Parameter*);
|
||||||
|
extern int readAtom_dmp(Atom*, Parameter*);
|
||||||
extern void growAtom(Atom*);
|
extern void growAtom(Atom*);
|
||||||
|
|
||||||
#ifdef AOS
|
#ifdef AOS
|
||||||
|
@ -45,6 +45,7 @@ typedef struct {
|
|||||||
int halfneigh;
|
int halfneigh;
|
||||||
MD_FLOAT dt;
|
MD_FLOAT dt;
|
||||||
MD_FLOAT dtforce;
|
MD_FLOAT dtforce;
|
||||||
|
MD_FLOAT skin;
|
||||||
MD_FLOAT cutforce;
|
MD_FLOAT cutforce;
|
||||||
MD_FLOAT cutneigh;
|
MD_FLOAT cutneigh;
|
||||||
int nx, ny, nz;
|
int nx, ny, nz;
|
||||||
|
@ -28,6 +28,8 @@
|
|||||||
typedef struct {
|
typedef struct {
|
||||||
long long int total_force_neighs;
|
long long int total_force_neighs;
|
||||||
long long int total_force_iters;
|
long long int total_force_iters;
|
||||||
|
long long int atoms_within_cutoff;
|
||||||
|
long long int atoms_outside_cutoff;
|
||||||
} Stats;
|
} Stats;
|
||||||
|
|
||||||
void initStats(Stats *s);
|
void initStats(Stats *s);
|
||||||
|
@ -64,7 +64,8 @@ void init(Parameter *param)
|
|||||||
param->ny = 32;
|
param->ny = 32;
|
||||||
param->nz = 32;
|
param->nz = 32;
|
||||||
param->cutforce = 2.5;
|
param->cutforce = 2.5;
|
||||||
param->cutneigh = param->cutforce + 0.30;
|
param->skin = 0.3;
|
||||||
|
param->cutneigh = param->cutforce + param->skin;
|
||||||
param->temp = 1.44;
|
param->temp = 1.44;
|
||||||
param->nstat = 100;
|
param->nstat = 100;
|
||||||
param->mass = 1.0;
|
param->mass = 1.0;
|
||||||
@ -188,56 +189,54 @@ int main(int argc, char** argv)
|
|||||||
|
|
||||||
for(int i = 0; i < argc; i++)
|
for(int i = 0; i < argc; i++)
|
||||||
{
|
{
|
||||||
if((strcmp(argv[i], "-f") == 0))
|
if((strcmp(argv[i], "-f") == 0)) {
|
||||||
{
|
|
||||||
if((param.force_field = str2ff(argv[++i])) < 0) {
|
if((param.force_field = str2ff(argv[++i])) < 0) {
|
||||||
fprintf(stderr, "Invalid force field!\n");
|
fprintf(stderr, "Invalid force field!\n");
|
||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if((strcmp(argv[i], "-i") == 0))
|
if((strcmp(argv[i], "-i") == 0)) {
|
||||||
{
|
|
||||||
param.input_file = strdup(argv[++i]);
|
param.input_file = strdup(argv[++i]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if((strcmp(argv[i], "-e") == 0))
|
if((strcmp(argv[i], "-e") == 0)) {
|
||||||
{
|
|
||||||
param.eam_file = strdup(argv[++i]);
|
param.eam_file = strdup(argv[++i]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if((strcmp(argv[i], "-n") == 0) || (strcmp(argv[i], "--nsteps") == 0))
|
if((strcmp(argv[i], "-n") == 0) || (strcmp(argv[i], "--nsteps") == 0)) {
|
||||||
{
|
|
||||||
param.ntimes = atoi(argv[++i]);
|
param.ntimes = atoi(argv[++i]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if((strcmp(argv[i], "-nx") == 0))
|
if((strcmp(argv[i], "-nx") == 0)) {
|
||||||
{
|
|
||||||
param.nx = atoi(argv[++i]);
|
param.nx = atoi(argv[++i]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if((strcmp(argv[i], "-ny") == 0))
|
if((strcmp(argv[i], "-ny") == 0)) {
|
||||||
{
|
|
||||||
param.ny = atoi(argv[++i]);
|
param.ny = atoi(argv[++i]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if((strcmp(argv[i], "-nz") == 0))
|
if((strcmp(argv[i], "-nz") == 0)) {
|
||||||
{
|
|
||||||
param.nz = atoi(argv[++i]);
|
param.nz = atoi(argv[++i]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if((strcmp(argv[i], "--freq") == 0))
|
if((strcmp(argv[i], "-r") == 0) || (strcmp(argv[i], "--radius") == 0)) {
|
||||||
{
|
param.cutforce = atof(argv[++i]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if((strcmp(argv[i], "-s") == 0) || (strcmp(argv[i], "--skin") == 0)) {
|
||||||
|
param.skin = atof(argv[++i]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if((strcmp(argv[i], "--freq") == 0)) {
|
||||||
param.proc_freq = atof(argv[++i]);
|
param.proc_freq = atof(argv[++i]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if((strcmp(argv[i], "--vtk") == 0))
|
if((strcmp(argv[i], "--vtk") == 0)) {
|
||||||
{
|
|
||||||
param.vtk_file = strdup(argv[++i]);
|
param.vtk_file = strdup(argv[++i]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0))
|
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
|
||||||
{
|
|
||||||
printf("MD Bench: A minimalistic re-implementation of miniMD\n");
|
printf("MD Bench: A minimalistic re-implementation of miniMD\n");
|
||||||
printf(HLINE);
|
printf(HLINE);
|
||||||
printf("-f <string>: force field (lj or eam), default lj\n");
|
printf("-f <string>: force field (lj or eam), default lj\n");
|
||||||
@ -245,6 +244,8 @@ int main(int argc, char** argv)
|
|||||||
printf("-e <string>: input file for EAM\n");
|
printf("-e <string>: input file for EAM\n");
|
||||||
printf("-n / --nsteps <int>: set number of timesteps for simulation\n");
|
printf("-n / --nsteps <int>: set number of timesteps for simulation\n");
|
||||||
printf("-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n");
|
printf("-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n");
|
||||||
|
printf("-r / --radius <real>: set cutoff radius\n");
|
||||||
|
printf("-s / --skin <real>: set skin (verlet buffer)\n");
|
||||||
printf("--freq <real>: processor frequency (GHz)\n");
|
printf("--freq <real>: processor frequency (GHz)\n");
|
||||||
printf("--vtk <string>: VTK file for visualization\n");
|
printf("--vtk <string>: VTK file for visualization\n");
|
||||||
printf(HLINE);
|
printf(HLINE);
|
||||||
@ -252,6 +253,7 @@ int main(int argc, char** argv)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
param.cutneigh = param.cutforce + param.skin;
|
||||||
setup(¶m, &eam, &atom, &neighbor, &stats);
|
setup(¶m, &eam, &atom, &neighbor, &stats);
|
||||||
computeThermo(0, ¶m, &atom);
|
computeThermo(0, ¶m, &atom);
|
||||||
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
|
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
|
||||||
|
@ -8,17 +8,22 @@
|
|||||||
void initStats(Stats *s) {
|
void initStats(Stats *s) {
|
||||||
s->total_force_neighs = 0;
|
s->total_force_neighs = 0;
|
||||||
s->total_force_iters = 0;
|
s->total_force_iters = 0;
|
||||||
|
s->atoms_within_cutoff = 0;
|
||||||
|
s->atoms_outside_cutoff = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer) {
|
void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer) {
|
||||||
#ifdef COMPUTE_STATS
|
#ifdef COMPUTE_STATS
|
||||||
|
|
||||||
double force_useful_volume = 1e-9 * ( (double)(atom->Nlocal * (param->ntimes + 1)) * (sizeof(MD_FLOAT) * 6 + sizeof(int)) +
|
double force_useful_volume = 1e-9 * ( (double)(atom->Nlocal * (param->ntimes + 1)) * (sizeof(MD_FLOAT) * 6 + sizeof(int)) +
|
||||||
(double)(stats->total_force_neighs) * (sizeof(MD_FLOAT) * 3 + sizeof(int)) );
|
(double)(stats->total_force_neighs) * (sizeof(MD_FLOAT) * 3 + sizeof(int)) );
|
||||||
double avg_neigh = stats->total_force_neighs / (double)(atom->Nlocal * (param->ntimes + 1));
|
double avg_neigh = stats->total_force_neighs / (double)(atom->Nlocal * (param->ntimes + 1));
|
||||||
double avg_simd = stats->total_force_iters / (double)(atom->Nlocal * (param->ntimes + 1));
|
double avg_simd = stats->total_force_iters / (double)(atom->Nlocal * (param->ntimes + 1));
|
||||||
|
|
||||||
#ifdef EXPLICIT_TYPES
|
#ifdef EXPLICIT_TYPES
|
||||||
force_useful_volume += 1e-9 * (double)((atom->Nlocal * (param->ntimes + 1)) + stats->total_force_neighs) * sizeof(int);
|
force_useful_volume += 1e-9 * (double)((atom->Nlocal * (param->ntimes + 1)) + stats->total_force_neighs) * sizeof(int);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
printf("Statistics:\n");
|
printf("Statistics:\n");
|
||||||
printf("\tVector width: %d, Processor frequency: %.4f GHz\n", VECTOR_WIDTH, param->proc_freq);
|
printf("\tVector width: %d, Processor frequency: %.4f GHz\n", VECTOR_WIDTH, param->proc_freq);
|
||||||
printf("\tAverage neighbors per atom: %.4f\n", avg_neigh);
|
printf("\tAverage neighbors per atom: %.4f\n", avg_neigh);
|
||||||
@ -27,5 +32,11 @@ void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer
|
|||||||
printf("\tTotal number of SIMD iterations: %lld\n", stats->total_force_iters);
|
printf("\tTotal number of SIMD iterations: %lld\n", stats->total_force_iters);
|
||||||
printf("\tUseful read data volume for force computation: %.2fGB\n", force_useful_volume);
|
printf("\tUseful read data volume for force computation: %.2fGB\n", force_useful_volume);
|
||||||
printf("\tCycles/SIMD iteration: %.4f\n", timer[FORCE] * param->proc_freq * 1e9 / stats->total_force_iters);
|
printf("\tCycles/SIMD iteration: %.4f\n", timer[FORCE] * param->proc_freq * 1e9 / stats->total_force_iters);
|
||||||
|
|
||||||
|
#ifdef USE_REFERENCE_VERSION
|
||||||
|
const double eff_pct = (double)stats->atoms_within_cutoff / (double)(stats->atoms_within_cutoff + stats->atoms_outside_cutoff) * 100.0;
|
||||||
|
printf("\tAtoms within/outside cutoff radius: %lld/%lld (%.2f%%)\n", stats->atoms_within_cutoff, stats->atoms_outside_cutoff, eff_pct);
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user