Fix code for AVX and remove warnings
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
@@ -71,6 +71,7 @@ static inline MD_SIMD_FLOAT simd_reciprocal(MD_SIMD_FLOAT a) { return _mm256_cvt
|
||||
static inline MD_SIMD_FLOAT simd_fma(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_FLOAT c) { return simd_add(simd_mul(a, b), c); }
|
||||
static inline MD_SIMD_FLOAT simd_masked_add(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_MASK m) { return simd_add(a, _mm256_and_pd(b, m)); }
|
||||
static inline MD_SIMD_MASK simd_mask_cond_lt(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_cmp_pd(a, b, _CMP_LT_OQ); }
|
||||
static inline MD_SIMD_MASK simd_mask_int_cond_lt(MD_SIMD_INT a, MD_SIMD_INT b) { return _mm256_cvtepi32_pd(_mm_cmplt_epi32(a, b)); }
|
||||
static inline MD_SIMD_MASK simd_mask_and(MD_SIMD_MASK a, MD_SIMD_MASK b) { return _mm256_and_pd(a, b); }
|
||||
// TODO: Initialize all diagonal cases and just select the proper one (all bits set or diagonal) based on cond0
|
||||
static inline MD_SIMD_MASK simd_mask_from_u32(unsigned int a) {
|
||||
@@ -96,6 +97,7 @@ static inline MD_SIMD_FLOAT simd_reciprocal(MD_SIMD_FLOAT a) { return _mm256_rcp
|
||||
static inline MD_SIMD_FLOAT simd_fma(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_FLOAT c) { return _mm256_fmadd_pd(a, b, c); }
|
||||
static inline MD_SIMD_FLOAT simd_masked_add(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_MASK m) { return _mm256_mask_add_pd(a, m, a, b); }
|
||||
static inline MD_SIMD_MASK simd_mask_cond_lt(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_cmp_pd_mask(a, b, _CMP_LT_OQ); }
|
||||
static inline MD_SIMD_MASK simd_mask_int_cond_lt(MD_SIMD_INT a, MD_SIMD_INT b) { return _mm_cmplt_epi32_mask(a, b); }
|
||||
static inline MD_SIMD_MASK simd_mask_and(MD_SIMD_MASK a, MD_SIMD_MASK b) { return _kand_mask8(a, b); }
|
||||
static inline MD_SIMD_MASK simd_mask_from_u32(unsigned int a) { return _cvtu32_mask8(a); }
|
||||
static inline unsigned int simd_mask_to_u32(MD_SIMD_MASK a) { return _cvtmask8_u32(a); }
|
||||
@@ -109,13 +111,13 @@ static inline MD_FLOAT simd_h_reduce_sum(MD_SIMD_FLOAT a) {
|
||||
return *((MD_FLOAT *) &a0);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static inline void simd_h_decr3(MD_FLOAT *m, MD_SIMD_FLOAT a0, MD_SIMD_FLOAT a1, MD_SIMD_FLOAT a2) {
|
||||
fprintf(stderr, "simd_h_decr3(): Not implemented for AVX/AVX2 with double precision!");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// Functions used in LAMMPS kernel
|
||||
static inline MD_SIMD_FLOAT simd_gather(MD_SIMD_INT vidx, const MD_FLOAT *m, int s) { return _mm256_i32gather_pd(m, vidx, s); }
|
||||
static inline MD_SIMD_INT simd_int_broadcast(int scalar) { return _mm_set1_epi32(scalar); }
|
||||
@@ -125,4 +127,3 @@ static inline MD_SIMD_INT simd_int_load(const int *m) { return _mm_load_si128((_
|
||||
static inline MD_SIMD_INT simd_int_add(MD_SIMD_INT a, MD_SIMD_INT b) { return _mm_add_epi32(a, b); }
|
||||
static inline MD_SIMD_INT simd_int_mul(MD_SIMD_INT a, MD_SIMD_INT b) { return _mm_mul_epi32(a, b); }
|
||||
static inline MD_SIMD_INT simd_int_mask_load(const int *m, MD_SIMD_MASK k) { return simd_int_load(m) & _mm256_cvtpd_epi32(k); }
|
||||
static inline MD_SIMD_MASK simd_mask_int_cond_lt(MD_SIMD_INT a, MD_SIMD_INT b) { return _mm256_cvtepi32_pd(_mm_cmplt_epi32(a, b)); }
|
||||
|
@@ -44,5 +44,6 @@ extern void random_reset(int *seed, int ibase, double *coord);
|
||||
extern int str2ff(const char *string);
|
||||
extern const char* ff2str(int ff);
|
||||
extern int get_num_threads();
|
||||
extern void readline(char *line, FILE *fp);
|
||||
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user