Make code compilable
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
@@ -45,20 +45,7 @@ static inline MD_SIMD_MASK simd_mask_and(MD_SIMD_MASK a, MD_SIMD_MASK b) { retur
|
||||
static inline MD_SIMD_MASK simd_mask_cond_lt(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm512_cmp_pd_mask(a, b, _CMP_LT_OQ); }
|
||||
static inline MD_SIMD_MASK simd_mask_from_u32(unsigned int a) { return _cvtu32_mask8(a); }
|
||||
static inline unsigned int simd_mask_to_u32(MD_SIMD_MASK a) { return _cvtmask8_u32(a); }
|
||||
|
||||
static MD_SIMD_FLOAT simd_load2(MD_FLOAT *c0, MD_FLOAT *c1, int d) {
|
||||
MD_SIMD_FLOAT x;
|
||||
#ifdef CLUSTER_AOS
|
||||
__m256i aos_gather_vindex = _mm256_set_epi32(9, 6, 3, 0, 9, 6, 3, 0);
|
||||
__m256i vindex = _mm256_add_epi32(aos_gather_vindex, _mm256_set1_epi32(d));
|
||||
x = _mm512_mask_i32gather_pd(simd_zero(), simd_mask_from_u32(0x0f), vindex, c0, sizeof(double));
|
||||
x = _mm512_mask_i32gather_pd(x, simd_mask_from_u32(0xf0), vindex, c1, sizeof(double));
|
||||
#else
|
||||
x = _mm512_load_pd(&c0[d * CLUSTER_M]);
|
||||
x = _mm512_insertf64x4(x, _mm256_load_pd(&c1[d * CLUSTER_M]), 1);
|
||||
#endif
|
||||
return x;
|
||||
}
|
||||
static inline MD_SIMD_FLOAT simd_load(MD_FLOAT *p) { return _mm512_load_pd(p); }
|
||||
|
||||
static inline MD_FLOAT simd_horizontal_sum(MD_SIMD_FLOAT a) {
|
||||
MD_SIMD_FLOAT x = _mm512_add_pd(a, _mm512_shuffle_f64x2(a, a, 0xee));
|
||||
@@ -82,6 +69,7 @@ static inline MD_SIMD_FLOAT simd_zero() { return _mm256_set1_pd(0.0); }
|
||||
static inline MD_SIMD_FLOAT simd_add(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_add_pd(a, b); }
|
||||
static inline MD_SIMD_FLOAT simd_sub(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_sub_pd(a, b); }
|
||||
static inline MD_SIMD_FLOAT simd_mul(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_mul_pd(a, b); }
|
||||
static inline MD_SIMD_FLOAT simd_load(MD_FLOAT *p) { return _mm256_load_pd(p); }
|
||||
|
||||
#ifdef NO_AVX2
|
||||
static inline MD_SIMD_FLOAT simd_reciprocal(MD_SIMD_FLOAT a) { return _mm256_cvtps_pd(_mm_rcp_ps(_mm256_cvtpd_ps(a))); }
|
||||
@@ -124,20 +112,6 @@ static inline MD_FLOAT simd_horizontal_sum(MD_SIMD_FLOAT a) {
|
||||
}
|
||||
#endif
|
||||
|
||||
static MD_SIMD_FLOAT simd_load(MD_FLOAT *c0, int d) {
|
||||
MD_SIMD_FLOAT x;
|
||||
#ifdef CLUSTER_AOS
|
||||
#ifdef NO_AVX2
|
||||
#error "Not possible to use AoS cluster layout without AVX2 support!"
|
||||
#endif
|
||||
__m128i aos_gather_vindex = _mm128_set_epi32(9, 6, 3, 0);
|
||||
__m128i vindex = _mm128_add_epi32(aos_gather_vindex, _mm128_set1_epi32(d));
|
||||
x = _mm256_i32gather_pd(c0, vindex, sizeof(double));
|
||||
#else
|
||||
x = _mm256_load_pd(&c0[d * CLUSTER_M]);
|
||||
#endif
|
||||
return x;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
Reference in New Issue
Block a user