Merge branch 'master' of github.com:RRZE-HPC/MD-Bench
This commit is contained in:
commit
98583cdade
1541
asm/unused/force_lj_lammps_avx512_dp_no_newton_raphson.s
Normal file
1541
asm/unused/force_lj_lammps_avx512_dp_no_newton_raphson.s
Normal file
File diff suppressed because it is too large
Load Diff
1466
asm/unused/force_lj_lammps_avx512_sp_no_newton_raphson.s
Normal file
1466
asm/unused/force_lj_lammps_avx512_sp_no_newton_raphson.s
Normal file
File diff suppressed because it is too large
Load Diff
@ -35,9 +35,12 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
|||||||
}
|
}
|
||||||
|
|
||||||
double S = getTimeStamp();
|
double S = getTimeStamp();
|
||||||
|
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
LIKWID_MARKER_START("force");
|
LIKWID_MARKER_START("force");
|
||||||
|
|
||||||
#pragma omp parallel for
|
#pragma omp for
|
||||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||||
int ci_cj0 = CJ0_FROM_CI(ci);
|
int ci_cj0 = CJ0_FROM_CI(ci);
|
||||||
int ci_cj1 = CJ1_FROM_CI(ci);
|
int ci_cj1 = CJ1_FROM_CI(ci);
|
||||||
@ -119,6 +122,8 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
|||||||
}
|
}
|
||||||
|
|
||||||
LIKWID_MARKER_STOP("force");
|
LIKWID_MARKER_STOP("force");
|
||||||
|
}
|
||||||
|
|
||||||
double E = getTimeStamp();
|
double E = getTimeStamp();
|
||||||
DEBUG_MESSAGE("computeForceLJ end\n");
|
DEBUG_MESSAGE("computeForceLJ end\n");
|
||||||
return E-S;
|
return E-S;
|
||||||
@ -149,9 +154,12 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
|
|||||||
}
|
}
|
||||||
|
|
||||||
double S = getTimeStamp();
|
double S = getTimeStamp();
|
||||||
|
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
LIKWID_MARKER_START("force");
|
LIKWID_MARKER_START("force");
|
||||||
|
|
||||||
#pragma omp parallel for
|
#pragma omp for
|
||||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||||
int ci_cj0 = CJ0_FROM_CI(ci);
|
int ci_cj0 = CJ0_FROM_CI(ci);
|
||||||
#if CLUSTER_M > CLUSTER_N
|
#if CLUSTER_M > CLUSTER_N
|
||||||
@ -266,6 +274,8 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
|
|||||||
}
|
}
|
||||||
|
|
||||||
LIKWID_MARKER_STOP("force");
|
LIKWID_MARKER_STOP("force");
|
||||||
|
}
|
||||||
|
|
||||||
double E = getTimeStamp();
|
double E = getTimeStamp();
|
||||||
DEBUG_MESSAGE("computeForceLJ_2xnn end\n");
|
DEBUG_MESSAGE("computeForceLJ_2xnn end\n");
|
||||||
return E-S;
|
return E-S;
|
||||||
@ -296,9 +306,12 @@ double computeForceLJ_2xnn_full(Parameter *param, Atom *atom, Neighbor *neighbor
|
|||||||
}
|
}
|
||||||
|
|
||||||
double S = getTimeStamp();
|
double S = getTimeStamp();
|
||||||
|
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
LIKWID_MARKER_START("force");
|
LIKWID_MARKER_START("force");
|
||||||
|
|
||||||
#pragma omp parallel for
|
#pragma omp for
|
||||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||||
int ci_cj0 = CJ0_FROM_CI(ci);
|
int ci_cj0 = CJ0_FROM_CI(ci);
|
||||||
#if CLUSTER_M > CLUSTER_N
|
#if CLUSTER_M > CLUSTER_N
|
||||||
@ -398,6 +411,8 @@ double computeForceLJ_2xnn_full(Parameter *param, Atom *atom, Neighbor *neighbor
|
|||||||
}
|
}
|
||||||
|
|
||||||
LIKWID_MARKER_STOP("force");
|
LIKWID_MARKER_STOP("force");
|
||||||
|
}
|
||||||
|
|
||||||
double E = getTimeStamp();
|
double E = getTimeStamp();
|
||||||
DEBUG_MESSAGE("computeForceLJ_2xnn end\n");
|
DEBUG_MESSAGE("computeForceLJ_2xnn end\n");
|
||||||
return E-S;
|
return E-S;
|
||||||
@ -435,9 +450,12 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
|
|||||||
}
|
}
|
||||||
|
|
||||||
double S = getTimeStamp();
|
double S = getTimeStamp();
|
||||||
|
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
LIKWID_MARKER_START("force");
|
LIKWID_MARKER_START("force");
|
||||||
|
|
||||||
#pragma omp parallel for
|
#pragma omp for
|
||||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||||
int ci_cj0 = CJ0_FROM_CI(ci);
|
int ci_cj0 = CJ0_FROM_CI(ci);
|
||||||
#if CLUSTER_M > CLUSTER_N
|
#if CLUSTER_M > CLUSTER_N
|
||||||
@ -591,6 +609,8 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
|
|||||||
}
|
}
|
||||||
|
|
||||||
LIKWID_MARKER_STOP("force");
|
LIKWID_MARKER_STOP("force");
|
||||||
|
}
|
||||||
|
|
||||||
double E = getTimeStamp();
|
double E = getTimeStamp();
|
||||||
DEBUG_MESSAGE("computeForceLJ_4xn end\n");
|
DEBUG_MESSAGE("computeForceLJ_4xn end\n");
|
||||||
return E-S;
|
return E-S;
|
||||||
@ -620,9 +640,12 @@ double computeForceLJ_4xn_full(Parameter *param, Atom *atom, Neighbor *neighbor,
|
|||||||
}
|
}
|
||||||
|
|
||||||
double S = getTimeStamp();
|
double S = getTimeStamp();
|
||||||
|
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
LIKWID_MARKER_START("force");
|
LIKWID_MARKER_START("force");
|
||||||
|
|
||||||
#pragma omp parallel for
|
#pragma omp for
|
||||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||||
int ci_cj0 = CJ0_FROM_CI(ci);
|
int ci_cj0 = CJ0_FROM_CI(ci);
|
||||||
#if CLUSTER_M > CLUSTER_N
|
#if CLUSTER_M > CLUSTER_N
|
||||||
@ -751,6 +774,8 @@ double computeForceLJ_4xn_full(Parameter *param, Atom *atom, Neighbor *neighbor,
|
|||||||
}
|
}
|
||||||
|
|
||||||
LIKWID_MARKER_STOP("force");
|
LIKWID_MARKER_STOP("force");
|
||||||
|
}
|
||||||
|
|
||||||
double E = getTimeStamp();
|
double E = getTimeStamp();
|
||||||
DEBUG_MESSAGE("computeForceLJ_4xn end\n");
|
DEBUG_MESSAGE("computeForceLJ_4xn end\n");
|
||||||
return E-S;
|
return E-S;
|
||||||
|
@ -11,7 +11,7 @@ endif
|
|||||||
|
|
||||||
ifeq ($(ISA),AVX2)
|
ifeq ($(ISA),AVX2)
|
||||||
OPTS = -Ofast -xCORE-AVX2 $(PROFILE)
|
OPTS = -Ofast -xCORE-AVX2 $(PROFILE)
|
||||||
#OPTS = -Ofast -xAVX2 $(PROFILE)
|
#OPTS = -Ofast -xHost $(PROFILE)
|
||||||
#OPTS = -Ofast -march=core-avx2 $(PROFILE)
|
#OPTS = -Ofast -march=core-avx2 $(PROFILE)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
@ -31,8 +31,12 @@ double computeForceEam(Eam* eam, Parameter* param, Atom *atom, Neighbor *neighbo
|
|||||||
int nrho = eam->nrho; int nrho_tot = eam->nrho_tot;
|
int nrho = eam->nrho; int nrho_tot = eam->nrho_tot;
|
||||||
double S = getTimeStamp();
|
double S = getTimeStamp();
|
||||||
|
|
||||||
|
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
LIKWID_MARKER_START("force_eam_fp");
|
LIKWID_MARKER_START("force_eam_fp");
|
||||||
#pragma omp parallel for
|
|
||||||
|
#pragma omp for
|
||||||
for(int i = 0; i < Nlocal; i++) {
|
for(int i = 0; i < Nlocal; i++) {
|
||||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||||
int numneighs = neighbor->numneigh[i];
|
int numneighs = neighbor->numneigh[i];
|
||||||
@ -95,13 +99,19 @@ double computeForceEam(Eam* eam, Parameter* param, Atom *atom, Neighbor *neighbo
|
|||||||
}
|
}
|
||||||
|
|
||||||
LIKWID_MARKER_STOP("force_eam_fp");
|
LIKWID_MARKER_STOP("force_eam_fp");
|
||||||
|
}
|
||||||
|
|
||||||
// We still need to update fp for PBC atoms
|
// We still need to update fp for PBC atoms
|
||||||
for(int i = 0; i < atom->Nghost; i++) {
|
for(int i = 0; i < atom->Nghost; i++) {
|
||||||
fp[Nlocal + i] = fp[atom->border_map[i]];
|
fp[Nlocal + i] = fp[atom->border_map[i]];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
LIKWID_MARKER_START("force_eam");
|
LIKWID_MARKER_START("force_eam");
|
||||||
|
|
||||||
|
#pragma omp for
|
||||||
for(int i = 0; i < Nlocal; i++) {
|
for(int i = 0; i < Nlocal; i++) {
|
||||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||||
int numneighs = neighbor->numneigh[i];
|
int numneighs = neighbor->numneigh[i];
|
||||||
@ -192,6 +202,8 @@ double computeForceEam(Eam* eam, Parameter* param, Atom *atom, Neighbor *neighbo
|
|||||||
}
|
}
|
||||||
|
|
||||||
LIKWID_MARKER_STOP("force_eam");
|
LIKWID_MARKER_STOP("force_eam");
|
||||||
|
}
|
||||||
|
|
||||||
double E = getTimeStamp();
|
double E = getTimeStamp();
|
||||||
return E-S;
|
return E-S;
|
||||||
}
|
}
|
||||||
|
@ -36,9 +36,12 @@ double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *n
|
|||||||
atom_fz(i) = 0.0;
|
atom_fz(i) = 0.0;
|
||||||
}
|
}
|
||||||
double S = getTimeStamp();
|
double S = getTimeStamp();
|
||||||
|
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
LIKWID_MARKER_START("force");
|
LIKWID_MARKER_START("force");
|
||||||
|
|
||||||
#pragma omp parallel for
|
#pragma omp for
|
||||||
for(int i = 0; i < Nlocal; i++) {
|
for(int i = 0; i < Nlocal; i++) {
|
||||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||||
int numneighs = neighbor->numneigh[i];
|
int numneighs = neighbor->numneigh[i];
|
||||||
@ -92,6 +95,8 @@ double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *n
|
|||||||
}
|
}
|
||||||
|
|
||||||
LIKWID_MARKER_STOP("force");
|
LIKWID_MARKER_STOP("force");
|
||||||
|
}
|
||||||
|
|
||||||
double E = getTimeStamp();
|
double E = getTimeStamp();
|
||||||
return E-S;
|
return E-S;
|
||||||
}
|
}
|
||||||
@ -112,8 +117,12 @@ double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor,
|
|||||||
}
|
}
|
||||||
|
|
||||||
double S = getTimeStamp();
|
double S = getTimeStamp();
|
||||||
|
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
LIKWID_MARKER_START("forceLJ-halfneigh");
|
LIKWID_MARKER_START("forceLJ-halfneigh");
|
||||||
|
|
||||||
|
#pragma omp for
|
||||||
for(int i = 0; i < Nlocal; i++) {
|
for(int i = 0; i < Nlocal; i++) {
|
||||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||||
int numneighs = neighbor->numneigh[i];
|
int numneighs = neighbor->numneigh[i];
|
||||||
@ -173,6 +182,8 @@ double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor,
|
|||||||
}
|
}
|
||||||
|
|
||||||
LIKWID_MARKER_STOP("forceLJ-halfneigh");
|
LIKWID_MARKER_STOP("forceLJ-halfneigh");
|
||||||
|
}
|
||||||
|
|
||||||
double E = getTimeStamp();
|
double E = getTimeStamp();
|
||||||
return E-S;
|
return E-S;
|
||||||
}
|
}
|
||||||
@ -191,7 +202,6 @@ double computeForceLJFullNeigh_simd(Parameter *param, Atom *atom, Neighbor *neig
|
|||||||
}
|
}
|
||||||
|
|
||||||
double S = getTimeStamp();
|
double S = getTimeStamp();
|
||||||
LIKWID_MARKER_START("force");
|
|
||||||
|
|
||||||
#ifndef __SIMD_KERNEL__
|
#ifndef __SIMD_KERNEL__
|
||||||
fprintf(stderr, "Error: SIMD kernel not implemented for specified instruction set!");
|
fprintf(stderr, "Error: SIMD kernel not implemented for specified instruction set!");
|
||||||
@ -203,7 +213,12 @@ double computeForceLJFullNeigh_simd(Parameter *param, Atom *atom, Neighbor *neig
|
|||||||
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
|
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
|
||||||
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
|
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
|
||||||
|
|
||||||
#pragma omp parallel for
|
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
LIKWID_MARKER_START("force");
|
||||||
|
|
||||||
|
#pragma omp for
|
||||||
for(int i = 0; i < Nlocal; i++) {
|
for(int i = 0; i < Nlocal; i++) {
|
||||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||||
int numneighs = neighbor->numneigh[i];
|
int numneighs = neighbor->numneigh[i];
|
||||||
@ -244,9 +259,11 @@ double computeForceLJFullNeigh_simd(Parameter *param, Atom *atom, Neighbor *neig
|
|||||||
atom_fy(i) += simd_h_reduce_sum(fiy);
|
atom_fy(i) += simd_h_reduce_sum(fiy);
|
||||||
atom_fz(i) += simd_h_reduce_sum(fiz);
|
atom_fz(i) += simd_h_reduce_sum(fiz);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
LIKWID_MARKER_STOP("force");
|
LIKWID_MARKER_STOP("force");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
double E = getTimeStamp();
|
double E = getTimeStamp();
|
||||||
return E-S;
|
return E-S;
|
||||||
}
|
}
|
||||||
|
@ -1,16 +1,36 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
# Adjustable variables
|
[[ -z "$1" ]] && echo "Use: $0 <binary> [-c <core>] [-f <freq>] [-n <nruns>] [-l <log>] [-s]" && exit
|
||||||
TAG="${TAG:-ICC}"
|
[[ ! -f "$1" ]] && echo "Binary file not found, make sure to use 'make'" && exit
|
||||||
OPT_SCHEME="${OPT_SCHEME:-lammps}"
|
[[ ! -f "$1-stub" ]] && echo "Binary file for stubbed case not found, make sure to use 'make VARIANT=stub'" && exit
|
||||||
|
|
||||||
|
MDBENCH_BIN=$1
|
||||||
|
BIN_INFO="${MDBENCH_BIN#*-}" # $OPT_SCHEME-$TAG-$ISA-$PREC
|
||||||
|
OPT_SCHEME="${BIN_INFO%%-*}"
|
||||||
|
PREC="${BIN_INFO##*-}"
|
||||||
|
BIN_INFO="${BIN_INFO#*-}" # $TAG-$ISA-$PREC
|
||||||
|
BIN_INFO="${BIN_INFO%-*}" # $TAG-$ISA
|
||||||
|
TAG="${BIN_INFO%%-*}"
|
||||||
|
ISA="${BIN_INFO##*-}"
|
||||||
CORE="${CORE:-0}"
|
CORE="${CORE:-0}"
|
||||||
FREQ="${FREQ:-2.4}"
|
FREQ="${FREQ:-2.4}"
|
||||||
NRUNS="${NRUNS:-3}"
|
NRUNS="${NRUNS:-3}"
|
||||||
LOG="${LOG:-latencies_and_cfds.log}"
|
LOG="${LOG:-latencies_and_cfds.log}"
|
||||||
STUB_ONLY="${STUB_ONLY:-false}"
|
STUB_ONLY="${STUB_ONLY:-false}"
|
||||||
|
|
||||||
|
OPTIND=2
|
||||||
|
while getopts "c:f:n:l:s" flag; do
|
||||||
|
case "${flag}" in
|
||||||
|
c) CORE=${OPTARG};;
|
||||||
|
f) FREQ=${OPTARG};;
|
||||||
|
n) NRUNS=${OPTARG};;
|
||||||
|
l) LOG=${OPTARG};;
|
||||||
|
s) STUB_ONLY=true;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
# Other useful variables
|
# Other useful variables
|
||||||
MDBENCH_BIN=./MDBench-$TAG-$OPT_SCHEME
|
MDBENCH_BIN=./MDBench-$OPT_SCHEME-$TAG-$ISA-$PREC
|
||||||
FIXED_PARAMS="--freq $FREQ"
|
FIXED_PARAMS="--freq $FREQ"
|
||||||
CPU_VENDOR=$(lscpu | grep "Vendor ID" | tr -s ' ' | cut -d ' ' -f3)
|
CPU_VENDOR=$(lscpu | grep "Vendor ID" | tr -s ' ' | cut -d ' ' -f3)
|
||||||
|
|
||||||
@ -46,6 +66,8 @@ function run_benchmark() {
|
|||||||
|
|
||||||
echo "Tag: $TAG" | tee -a $LOG
|
echo "Tag: $TAG" | tee -a $LOG
|
||||||
echo "Optimization scheme: $OPT_SCHEME" | tee -a $LOG
|
echo "Optimization scheme: $OPT_SCHEME" | tee -a $LOG
|
||||||
|
echo "Instruction set: $ISA" | tee -a $LOG
|
||||||
|
echo "Precision: $PREC" | tee -a $LOG
|
||||||
echo "Binary: $MDBENCH_BIN(-stub)" | tee -a $LOG
|
echo "Binary: $MDBENCH_BIN(-stub)" | tee -a $LOG
|
||||||
echo "Frequency: $FREQ" | tee -a $LOG
|
echo "Frequency: $FREQ" | tee -a $LOG
|
||||||
echo "Number of runs: $NRUNS" | tee -a $LOG
|
echo "Number of runs: $NRUNS" | tee -a $LOG
|
||||||
|
Loading…
Reference in New Issue
Block a user