Add AVX_FMA ISA

Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
Rafael Ravedutti 2022-11-15 01:24:30 +01:00
parent cd1fbfb3c8
commit efa462d0af
4 changed files with 13 additions and 1 deletions

View File

@ -77,6 +77,10 @@ ifeq ($(strip $(__ISA_AVX__)),true)
DEFINES += -D__ISA_AVX__
endif
ifeq ($(strip $(__ISA_AVX_FMA__)),true)
DEFINES += -D__ISA_AVX_FMA__
endif
ifeq ($(strip $(__ISA_AVX2__)),true)
DEFINES += -D__ISA_AVX2__
endif

View File

@ -61,7 +61,11 @@ static inline MD_FLOAT simd_incr_reduced_sum(MD_FLOAT *m, MD_SIMD_FLOAT v0, MD_S
static inline MD_SIMD_FLOAT select_by_mask(MD_SIMD_FLOAT a, MD_SIMD_MASK m) { return _mm256_and_pd(a, m); }
static inline MD_SIMD_FLOAT simd_reciprocal(MD_SIMD_FLOAT a) { return _mm256_cvtps_pd(_mm_rcp_ps(_mm256_cvtpd_ps(a))); }
#ifdef __ISA_AVX_FMA__
static inline MD_SIMD_FLOAT simd_fma(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_FLOAT c) { return _mm256_fmadd_pd(a, b, c); }
#else
static inline MD_SIMD_FLOAT simd_fma(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_FLOAT c) { return simd_add(simd_mul(a, b), c); }
#endif
static inline MD_SIMD_FLOAT simd_masked_add(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_MASK m) { return simd_add(a, _mm256_and_pd(b, m)); }
static inline MD_SIMD_MASK simd_mask_cond_lt(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_cmp_pd(a, b, _CMP_LT_OQ); }
static inline MD_SIMD_MASK simd_mask_int_cond_lt(MD_SIMD_INT a, MD_SIMD_INT b) { return _mm256_cvtepi32_pd(_mm_cmplt_epi32(a, b)); }

View File

@ -1,6 +1,6 @@
# Compiler tag (GCC/CLANG/ICC/ONEAPI/NVCC)
TAG ?= ICC
# Instruction set (SSE/AVX/AVX2/AVX512)
# Instruction set (SSE/AVX/AVX_FMA/AVX2/AVX512)
ISA ?= AVX512
# Optimization scheme (lammps/gromacs/clusters_per_bin)
OPT_SCHEME ?= lammps

View File

@ -4,6 +4,10 @@ ifeq ($(strip $(ISA)), SSE)
else ifeq ($(strip $(ISA)), AVX)
__ISA_AVX__=true
__SIMD_WIDTH_DBL__=4
else ifeq ($(strip $(ISA)), AVX_FMA)
__ISA_AVX__=true
__ISA_AVX_FMA__=true
__SIMD_WIDTH_DBL__=4
else ifeq ($(strip $(ISA)), AVX2)
__ISA_AVX2__=true
#__SIMD_KERNEL__=true