Fix GROMACS AVX2 code
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
parent
17e239ed6d
commit
c438fc6832
@ -48,11 +48,13 @@ static inline MD_FLOAT simd_incr_reduced_sum(MD_FLOAT *m, MD_SIMD_FLOAT v0, MD_S
|
|||||||
t2 = _mm256_permute2f128_pd(t0, t1, 0x21);
|
t2 = _mm256_permute2f128_pd(t0, t1, 0x21);
|
||||||
t0 = _mm256_add_pd(t0, t2);
|
t0 = _mm256_add_pd(t0, t2);
|
||||||
t1 = _mm256_add_pd(t1, t2);
|
t1 = _mm256_add_pd(t1, t2);
|
||||||
t0 = _mm256_blend_pd(t0, t1, 0b1100);
|
t0 = _mm256_blend_pd(t0, t1, 0xC);
|
||||||
|
//t0 = _mm256_blend_pd(t0, t1, 0b1100);
|
||||||
t1 = _mm256_add_pd(t0, _mm256_load_pd(m));
|
t1 = _mm256_add_pd(t0, _mm256_load_pd(m));
|
||||||
_mm256_store_pd(m, t1);
|
_mm256_store_pd(m, t1);
|
||||||
|
|
||||||
t0 = _mm256_add_pd(t0, _mm256_permute_pd(t0, 0b0101));
|
t0 = _mm256_add_pd(t0, _mm256_permute_pd(t0, 0x5));
|
||||||
|
//t0 = _mm256_add_pd(t0, _mm256_permute_pd(t0, 0b0101));
|
||||||
a0 = _mm256_castpd256_pd128(t0);
|
a0 = _mm256_castpd256_pd128(t0);
|
||||||
a1 = _mm256_extractf128_pd(t0, 0x1);
|
a1 = _mm256_extractf128_pd(t0, 0x1);
|
||||||
a0 = _mm_add_sd(a0, a1);
|
a0 = _mm_add_sd(a0, a1);
|
||||||
@ -91,7 +93,7 @@ static inline void simd_h_decr3(MD_FLOAT *m, MD_SIMD_FLOAT a0, MD_SIMD_FLOAT a1,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Functions used in LAMMPS kernel
|
// Functions used in LAMMPS kernel
|
||||||
static inline MD_SIMD_FLOAT simd_gather(MD_SIMD_INT vidx, const MD_FLOAT *m, int s) { return _mm256_i32gather_pd(m, vidx, s); }
|
#define simd_gather(vidx, m, s) _mm256_i32gather_pd(m, vidx, s);
|
||||||
static inline MD_SIMD_INT simd_int_broadcast(int scalar) { return _mm_set1_epi32(scalar); }
|
static inline MD_SIMD_INT simd_int_broadcast(int scalar) { return _mm_set1_epi32(scalar); }
|
||||||
static inline MD_SIMD_INT simd_int_zero() { return _mm_setzero_si128(); }
|
static inline MD_SIMD_INT simd_int_zero() { return _mm_setzero_si128(); }
|
||||||
static inline MD_SIMD_INT simd_int_seq() { return _mm_set_epi32(3, 2, 1, 0); }
|
static inline MD_SIMD_INT simd_int_seq() { return _mm_set_epi32(3, 2, 1, 0); }
|
||||||
|
@ -58,6 +58,7 @@ void initNeighbor(Neighbor *neighbor, Parameter *param) {
|
|||||||
neighbor->numneigh = NULL;
|
neighbor->numneigh = NULL;
|
||||||
neighbor->numneigh_masked = NULL;
|
neighbor->numneigh_masked = NULL;
|
||||||
neighbor->neighbors = NULL;
|
neighbor->neighbors = NULL;
|
||||||
|
neighbor->neighbors_imask = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void setupNeighbor(Parameter *param, Atom *atom) {
|
void setupNeighbor(Parameter *param, Atom *atom) {
|
||||||
@ -229,7 +230,9 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
|
|||||||
if(atom->Nclusters_local > nmax) {
|
if(atom->Nclusters_local > nmax) {
|
||||||
nmax = atom->Nclusters_local;
|
nmax = atom->Nclusters_local;
|
||||||
if(neighbor->numneigh) free(neighbor->numneigh);
|
if(neighbor->numneigh) free(neighbor->numneigh);
|
||||||
|
if(neighbor->numneigh_masked) free(neighbor->numneigh_masked);
|
||||||
if(neighbor->neighbors) free(neighbor->neighbors);
|
if(neighbor->neighbors) free(neighbor->neighbors);
|
||||||
|
if(neighbor->neighbors_imask) free(neighbor->neighbors_imask);
|
||||||
neighbor->numneigh = (int*) malloc(nmax * sizeof(int));
|
neighbor->numneigh = (int*) malloc(nmax * sizeof(int));
|
||||||
neighbor->numneigh_masked = (int*) malloc(nmax * sizeof(int));
|
neighbor->numneigh_masked = (int*) malloc(nmax * sizeof(int));
|
||||||
neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int));
|
neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int));
|
||||||
@ -326,15 +329,17 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
|
|||||||
imask = get_imask_simd_4xn(1, ci, cj);
|
imask = get_imask_simd_4xn(1, ci, cj);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if(imask == NBNXN_INTERACTION_MASK_ALL) {
|
if(n < neighbor->maxneighs) {
|
||||||
neighptr[n] = cj;
|
if(imask == NBNXN_INTERACTION_MASK_ALL) {
|
||||||
neighptr_imask[n] = imask;
|
neighptr[n] = cj;
|
||||||
} else {
|
neighptr_imask[n] = imask;
|
||||||
neighptr[n] = neighptr[nmasked];
|
} else {
|
||||||
neighptr_imask[n] = neighptr_imask[nmasked];
|
neighptr[n] = neighptr[nmasked];
|
||||||
neighptr[nmasked] = cj;
|
neighptr_imask[n] = neighptr_imask[nmasked];
|
||||||
neighptr_imask[nmasked] = imask;
|
neighptr[nmasked] = cj;
|
||||||
nmasked++;
|
neighptr_imask[nmasked] = imask;
|
||||||
|
nmasked++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
n++;
|
n++;
|
||||||
@ -377,11 +382,12 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if(resize) {
|
if(resize) {
|
||||||
fprintf(stdout, "RESIZE %d\n", neighbor->maxneighs);
|
|
||||||
neighbor->maxneighs = new_maxneighs * 1.2;
|
neighbor->maxneighs = new_maxneighs * 1.2;
|
||||||
|
fprintf(stdout, "RESIZE %d\n", neighbor->maxneighs);
|
||||||
free(neighbor->neighbors);
|
free(neighbor->neighbors);
|
||||||
neighbor->neighbors = (int *) malloc(atom->Nmax * neighbor->maxneighs * sizeof(int));
|
free(neighbor->neighbors_imask);
|
||||||
neighbor->neighbors_imask = (unsigned int *) malloc(atom->Nmax * neighbor->maxneighs * sizeof(unsigned int));
|
neighbor->neighbors = (int *) malloc(nmax * neighbor->maxneighs * sizeof(int));
|
||||||
|
neighbor->neighbors_imask = (unsigned int *) malloc(nmax * neighbor->maxneighs * sizeof(unsigned int));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user