From e5c233e072accd611b83c83f3e048777acbfafb1 Mon Sep 17 00:00:00 2001 From: Rafael Ravedutti Date: Wed, 21 Dec 2022 18:04:18 +0100 Subject: [PATCH] Update script Signed-off-by: Rafael Ravedutti --- common/includes/simd/avx512_float.h | 11 ++++++---- include_ICX.mk | 5 +++-- util/evaluate_latency_and_cfd.sh | 32 ++++++++++++++++++----------- 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/common/includes/simd/avx512_float.h b/common/includes/simd/avx512_float.h index 954e54f..1fe5803 100644 --- a/common/includes/simd/avx512_float.h +++ b/common/includes/simd/avx512_float.h @@ -7,10 +7,13 @@ #include #include #include -#include +#ifndef NO_ZMM_INTRIN +# include +#endif -#define MD_SIMD_FLOAT __m512 -#define MD_SIMD_MASK __mmask16 +#define MD_SIMD_FLOAT __m512 +#define MD_SIMD_MASK __mmask16 +#define MD_SIMD_INT __m256i static inline MD_SIMD_FLOAT simd_broadcast(float scalar) { return _mm512_set1_ps(scalar); } static inline MD_SIMD_FLOAT simd_zero() { return _mm512_set1_ps(0.0f); } @@ -69,7 +72,7 @@ static inline MD_FLOAT simd_h_dual_incr_reduced_sum(float* m, MD_SIMD_FLOAT v0, return _mm_cvtss_f32(t3); } -inline void simd_h_decr(MD_FLOAT *m, MD_SIMD_FLOAT a) { +static inline void simd_h_decr(MD_FLOAT *m, MD_SIMD_FLOAT a) { __m256 t; a = _mm512_add_ps(a, _mm512_shuffle_f32x4(a, a, 0xee)); t = _mm256_load_ps(m); diff --git a/include_ICX.mk b/include_ICX.mk index 0f21d33..ca45ee7 100644 --- a/include_ICX.mk +++ b/include_ICX.mk @@ -3,13 +3,14 @@ LINKER = $(CC) OPENMP = #-qopenmp PROFILE = #-profile-functions -g -pg -#OPTS = -Ofast -xCORE-AVX512 -qopt-zmm-usage=high $(PROFILE) +OPTS = -Ofast -xCORE-AVX512 -qopt-zmm-usage=high $(PROFILE) +#OPTS = -Ofast -march=cascadelake -xCORE-AVX512 -qopt-zmm-usage=high $(PROFILE) #OPTS = -Ofast -xCORE-AVX2 $(PROFILE) #OPTS = -Ofast -xAVX $(PROFILE) #OPTS = -Ofast -xAVX2 $(PROFILE) #OPTS = -Ofast -xSSE4.2 $(PROFILE) #OPTS = -Ofast -no-vec $(PROFILE) -OPTS = -Ofast -xHost $(PROFILE) +#OPTS = -Ofast -xHost $(PROFILE) CFLAGS = $(PROFILE) $(OPENMP) $(OPTS) ASFLAGS = #-masm=intel LFLAGS = $(PROFILE) $(OPTS) $(OPENMP) diff --git a/util/evaluate_latency_and_cfd.sh b/util/evaluate_latency_and_cfd.sh index 140fe38..5d02093 100644 --- a/util/evaluate_latency_and_cfd.sh +++ b/util/evaluate_latency_and_cfd.sh @@ -1,14 +1,18 @@ #!/bin/bash -TAG=ICC -OPT_SCHEME=lammps +# Adjustable variables +TAG="${TAG:-ICX}" +OPT_SCHEME="${OPT_SCHEME:-gromacs}" +CORE="${CORE:-0}" +FREQ="${FREQ:-2.4}" +NRUNS="${NRUNS:-3}" +LOG="${LOG:-latencies_and_cfds.log}" +STUB_ONLY="${STUB_ONLY:-false}" + +# Other useful variables MDBENCH_BIN=./MDBench-$TAG-$OPT_SCHEME -CORE=0 -FREQ=2.4 -NRUNS=3 FIXED_PARAMS="--freq $FREQ" CPU_VENDOR=$(lscpu | grep "Vendor ID" | tr -s ' ' | cut -d ' ' -f3) -LOG=latencies_and_cfds.log if [ "$CPU_VENDOR" == "GenuineIntel" ]; then ALL_PREFETCHERS="HW_PREFETCHER,CL_PREFETCHER,DCU_PREFETCHER,IP_PREFETCHER" @@ -45,6 +49,7 @@ echo "Optimization scheme: $OPT_SCHEME" | tee -a $LOG echo "Binary: $MDBENCH_BIN(-stub)" | tee -a $LOG echo "Frequency: $FREQ" | tee -a $LOG echo "Number of runs: $NRUNS" | tee -a $LOG +echo "Run only stubbed cases: $STUB_ONLY" | tee -a $LOG echo "Fixing frequencies..." likwid-setFrequencies -f $FREQ -t 0 @@ -65,12 +70,15 @@ for p in $PREFETCHERS; do fi MSG="$p: " - run_benchmark $MDBENCH_BIN - MSG+="standard=$BEST, " - run_benchmark $MDBENCH_BIN -i data/copper_melting/input_lj_cu_one_atomtype_20x20x20.dmp - MSG+="melt=$BEST, " - run_benchmark $MDBENCH_BIN -p data/argon_1000/mdbench_params.conf -i data/argon_1000/tprout.gro - MSG+="argon=$BEST, " + if [ "$STUB_ONLY" == "false" ]; then + run_benchmark $MDBENCH_BIN + MSG+="standard=$BEST, " + run_benchmark $MDBENCH_BIN -i data/copper_melting/input_lj_cu_one_atomtype_20x20x20.dmp + MSG+="melt=$BEST, " + run_benchmark $MDBENCH_BIN -p data/argon_1000/mdbench_params.conf -i data/argon_1000/tprout.gro + MSG+="argon=$BEST, " + fi + run_benchmark $MDBENCH_BIN-stub $STUB1_PARAMS MSG+="$STUB1_NAME=$BEST, " run_benchmark $MDBENCH_BIN-stub $STUB2_PARAMS