Write LAMMPS kernel with SIMD intrinsics and implement AVX512 with double-precision functions

Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
Rafael Ravedutti
2022-04-05 02:57:23 +02:00
parent af1756bfe4
commit ab2eb1ff50
6 changed files with 113 additions and 4 deletions

View File

@@ -21,11 +21,20 @@
* =======================================================================================
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <immintrin.h>
#include <zmmintrin.h>
#ifndef CLUSTER_M
# define CLUSTER_M 1
#endif
#ifndef CLUSTER_N
# define CLUSTER_N 1
#endif
#ifdef AVX512
# if PRECISION == 2
# include "simd/avx512_double.h"

View File

@@ -26,6 +26,7 @@
#define MD_SIMD_FLOAT __m512d
#define MD_SIMD_MASK __mmask8
#define MD_SIMD_INT __m256i
static inline MD_SIMD_FLOAT simd_broadcast(MD_FLOAT scalar) { return _mm512_set1_pd(scalar); }
static inline MD_SIMD_FLOAT simd_zero() { return _mm512_set1_pd(0.0); }
@@ -110,3 +111,14 @@ static inline void simd_h_decr3(MD_FLOAT *m, MD_SIMD_FLOAT a0, MD_SIMD_FLOAT a1,
simd_h_decr(m + CLUSTER_N, a1);
simd_h_decr(m + CLUSTER_N * 2, a2);
}
// Functions used in LAMMPS kernel
static inline MD_SIMD_FLOAT simd_gather(MD_SIMD_INT vidx, const MD_FLOAT *m, int s) { return _mm512_i32gather_pd(vidx, m, s); }
static inline MD_SIMD_INT simd_int_broadcast(int scalar) { return _mm256_set1_epi32(scalar); }
static inline MD_SIMD_INT simd_int_zero() { return _mm256_setzero_si256(); }
static inline MD_SIMD_INT simd_int_seq() { return _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0); }
static inline MD_SIMD_INT simd_int_load(const int *m) { return _mm256_load_epi32(m); }
static inline MD_SIMD_INT simd_int_add(MD_SIMD_INT a, MD_SIMD_INT b) { return _mm256_add_epi32(a, b); }
static inline MD_SIMD_INT simd_int_mul(MD_SIMD_INT a, MD_SIMD_INT b) { return _mm256_mul_epi32(a, b); }
static inline MD_SIMD_INT simd_int_mask_load(const int *m, MD_SIMD_MASK k) { return _mm256_mask_load_epi32(simd_int_zero(), k, m); }
static inline MD_SIMD_MASK simd_mask_int_cond_lt(MD_SIMD_INT a, MD_SIMD_INT b) { return _mm256_cmp_epi32_mask(a, b, _MM_CMPINT_LT); }