Add AVX_FMA ISA
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
parent
cd1fbfb3c8
commit
efa462d0af
4
Makefile
4
Makefile
@ -77,6 +77,10 @@ ifeq ($(strip $(__ISA_AVX__)),true)
|
|||||||
DEFINES += -D__ISA_AVX__
|
DEFINES += -D__ISA_AVX__
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(strip $(__ISA_AVX_FMA__)),true)
|
||||||
|
DEFINES += -D__ISA_AVX_FMA__
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(strip $(__ISA_AVX2__)),true)
|
ifeq ($(strip $(__ISA_AVX2__)),true)
|
||||||
DEFINES += -D__ISA_AVX2__
|
DEFINES += -D__ISA_AVX2__
|
||||||
endif
|
endif
|
||||||
|
@ -61,7 +61,11 @@ static inline MD_FLOAT simd_incr_reduced_sum(MD_FLOAT *m, MD_SIMD_FLOAT v0, MD_S
|
|||||||
|
|
||||||
static inline MD_SIMD_FLOAT select_by_mask(MD_SIMD_FLOAT a, MD_SIMD_MASK m) { return _mm256_and_pd(a, m); }
|
static inline MD_SIMD_FLOAT select_by_mask(MD_SIMD_FLOAT a, MD_SIMD_MASK m) { return _mm256_and_pd(a, m); }
|
||||||
static inline MD_SIMD_FLOAT simd_reciprocal(MD_SIMD_FLOAT a) { return _mm256_cvtps_pd(_mm_rcp_ps(_mm256_cvtpd_ps(a))); }
|
static inline MD_SIMD_FLOAT simd_reciprocal(MD_SIMD_FLOAT a) { return _mm256_cvtps_pd(_mm_rcp_ps(_mm256_cvtpd_ps(a))); }
|
||||||
|
#ifdef __ISA_AVX_FMA__
|
||||||
|
static inline MD_SIMD_FLOAT simd_fma(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_FLOAT c) { return _mm256_fmadd_pd(a, b, c); }
|
||||||
|
#else
|
||||||
static inline MD_SIMD_FLOAT simd_fma(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_FLOAT c) { return simd_add(simd_mul(a, b), c); }
|
static inline MD_SIMD_FLOAT simd_fma(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_FLOAT c) { return simd_add(simd_mul(a, b), c); }
|
||||||
|
#endif
|
||||||
static inline MD_SIMD_FLOAT simd_masked_add(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_MASK m) { return simd_add(a, _mm256_and_pd(b, m)); }
|
static inline MD_SIMD_FLOAT simd_masked_add(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_MASK m) { return simd_add(a, _mm256_and_pd(b, m)); }
|
||||||
static inline MD_SIMD_MASK simd_mask_cond_lt(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_cmp_pd(a, b, _CMP_LT_OQ); }
|
static inline MD_SIMD_MASK simd_mask_cond_lt(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_cmp_pd(a, b, _CMP_LT_OQ); }
|
||||||
static inline MD_SIMD_MASK simd_mask_int_cond_lt(MD_SIMD_INT a, MD_SIMD_INT b) { return _mm256_cvtepi32_pd(_mm_cmplt_epi32(a, b)); }
|
static inline MD_SIMD_MASK simd_mask_int_cond_lt(MD_SIMD_INT a, MD_SIMD_INT b) { return _mm256_cvtepi32_pd(_mm_cmplt_epi32(a, b)); }
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
# Compiler tag (GCC/CLANG/ICC/ONEAPI/NVCC)
|
# Compiler tag (GCC/CLANG/ICC/ONEAPI/NVCC)
|
||||||
TAG ?= ICC
|
TAG ?= ICC
|
||||||
# Instruction set (SSE/AVX/AVX2/AVX512)
|
# Instruction set (SSE/AVX/AVX_FMA/AVX2/AVX512)
|
||||||
ISA ?= AVX512
|
ISA ?= AVX512
|
||||||
# Optimization scheme (lammps/gromacs/clusters_per_bin)
|
# Optimization scheme (lammps/gromacs/clusters_per_bin)
|
||||||
OPT_SCHEME ?= lammps
|
OPT_SCHEME ?= lammps
|
||||||
|
@ -4,6 +4,10 @@ ifeq ($(strip $(ISA)), SSE)
|
|||||||
else ifeq ($(strip $(ISA)), AVX)
|
else ifeq ($(strip $(ISA)), AVX)
|
||||||
__ISA_AVX__=true
|
__ISA_AVX__=true
|
||||||
__SIMD_WIDTH_DBL__=4
|
__SIMD_WIDTH_DBL__=4
|
||||||
|
else ifeq ($(strip $(ISA)), AVX_FMA)
|
||||||
|
__ISA_AVX__=true
|
||||||
|
__ISA_AVX_FMA__=true
|
||||||
|
__SIMD_WIDTH_DBL__=4
|
||||||
else ifeq ($(strip $(ISA)), AVX2)
|
else ifeq ($(strip $(ISA)), AVX2)
|
||||||
__ISA_AVX2__=true
|
__ISA_AVX2__=true
|
||||||
#__SIMD_KERNEL__=true
|
#__SIMD_KERNEL__=true
|
||||||
|
Loading…
Reference in New Issue
Block a user