diff --git a/README.md b/README.md index e2b7940..71059f4 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ Apart from that, many tools to study and evaluate the in-depth performance of su Properly configure your building by changing `config.mk` file. The following options are available: - **TAG:** Compiler tag (available options: GCC, CLANG, ICC, ONEAPI, NVCC). -- **ISA:** Instruction set (available options: SSE, AVX, AVX2, AVX512). +- **ISA:** Instruction set (available options: SSE, AVX, AVX\_FMA, AVX2, AVX512). - **MASK\_REGISTERS:** Use AVX512 mask registers (always true when ISA is set to AVX512). - **OPT\_SCHEME:** Optimization algorithm (available options: lammps, gromacs). - **ENABLE\_LIKWID:** Enable likwid to make use of HPM counters. diff --git a/common/includes/simd/avx2_double.h b/common/includes/simd/avx2_double.h index beebc16..c48fe6c 100644 --- a/common/includes/simd/avx2_double.h +++ b/common/includes/simd/avx2_double.h @@ -60,7 +60,8 @@ static inline MD_FLOAT simd_incr_reduced_sum(MD_FLOAT *m, MD_SIMD_FLOAT v0, MD_S } static inline MD_SIMD_FLOAT select_by_mask(MD_SIMD_FLOAT a, MD_SIMD_MASK m) { return _mm256_and_pd(a, m); } -static inline MD_SIMD_FLOAT simd_reciprocal(MD_SIMD_FLOAT a) { return _mm256_rcp14_pd(a); } +static inline MD_SIMD_FLOAT simd_reciprocal(MD_SIMD_FLOAT a) { return _mm256_cvtps_pd(_mm_rcp_ps(_mm256_cvtpd_ps(a))); } +//static inline MD_SIMD_FLOAT simd_reciprocal(MD_SIMD_FLOAT a) { return _mm256_rcp14_pd(a); } static inline MD_SIMD_FLOAT simd_fma(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_FLOAT c) { return _mm256_fmadd_pd(a, b, c); } static inline MD_SIMD_FLOAT simd_masked_add(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_MASK m) { return simd_add(a, _mm256_and_pd(b, m)); } static inline MD_SIMD_MASK simd_mask_cond_lt(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_cmp_pd(a, b, _CMP_LT_OQ); }