Fix GROMACS AVX2 code
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
		| @@ -48,11 +48,13 @@ static inline MD_FLOAT simd_incr_reduced_sum(MD_FLOAT *m, MD_SIMD_FLOAT v0, MD_S | |||||||
|     t2 = _mm256_permute2f128_pd(t0, t1, 0x21); |     t2 = _mm256_permute2f128_pd(t0, t1, 0x21); | ||||||
|     t0 = _mm256_add_pd(t0, t2); |     t0 = _mm256_add_pd(t0, t2); | ||||||
|     t1 = _mm256_add_pd(t1, t2); |     t1 = _mm256_add_pd(t1, t2); | ||||||
|     t0 = _mm256_blend_pd(t0, t1, 0b1100); |     t0 = _mm256_blend_pd(t0, t1, 0xC); | ||||||
|  |     //t0 = _mm256_blend_pd(t0, t1, 0b1100); | ||||||
|     t1 = _mm256_add_pd(t0, _mm256_load_pd(m)); |     t1 = _mm256_add_pd(t0, _mm256_load_pd(m)); | ||||||
|     _mm256_store_pd(m, t1); |     _mm256_store_pd(m, t1); | ||||||
|  |  | ||||||
|     t0 = _mm256_add_pd(t0, _mm256_permute_pd(t0, 0b0101)); |     t0 = _mm256_add_pd(t0, _mm256_permute_pd(t0, 0x5)); | ||||||
|  |     //t0 = _mm256_add_pd(t0, _mm256_permute_pd(t0, 0b0101)); | ||||||
|     a0 = _mm256_castpd256_pd128(t0); |     a0 = _mm256_castpd256_pd128(t0); | ||||||
|     a1 = _mm256_extractf128_pd(t0, 0x1); |     a1 = _mm256_extractf128_pd(t0, 0x1); | ||||||
|     a0 = _mm_add_sd(a0, a1); |     a0 = _mm_add_sd(a0, a1); | ||||||
| @@ -91,7 +93,7 @@ static inline void simd_h_decr3(MD_FLOAT *m, MD_SIMD_FLOAT a0, MD_SIMD_FLOAT a1, | |||||||
| } | } | ||||||
|  |  | ||||||
| // Functions used in LAMMPS kernel | // Functions used in LAMMPS kernel | ||||||
| static inline MD_SIMD_FLOAT simd_gather(MD_SIMD_INT vidx, const MD_FLOAT *m, int s) { return _mm256_i32gather_pd(m, vidx, s); } | #define simd_gather(vidx, m, s)     _mm256_i32gather_pd(m, vidx, s); | ||||||
| static inline MD_SIMD_INT simd_int_broadcast(int scalar) { return _mm_set1_epi32(scalar); } | static inline MD_SIMD_INT simd_int_broadcast(int scalar) { return _mm_set1_epi32(scalar); } | ||||||
| static inline MD_SIMD_INT simd_int_zero() { return _mm_setzero_si128(); } | static inline MD_SIMD_INT simd_int_zero() { return _mm_setzero_si128(); } | ||||||
| static inline MD_SIMD_INT simd_int_seq() { return _mm_set_epi32(3, 2, 1, 0); } | static inline MD_SIMD_INT simd_int_seq() { return _mm_set_epi32(3, 2, 1, 0); } | ||||||
|   | |||||||
| @@ -58,6 +58,7 @@ void initNeighbor(Neighbor *neighbor, Parameter *param) { | |||||||
|     neighbor->numneigh = NULL; |     neighbor->numneigh = NULL; | ||||||
|     neighbor->numneigh_masked = NULL; |     neighbor->numneigh_masked = NULL; | ||||||
|     neighbor->neighbors = NULL; |     neighbor->neighbors = NULL; | ||||||
|  |     neighbor->neighbors_imask = NULL; | ||||||
| } | } | ||||||
|  |  | ||||||
| void setupNeighbor(Parameter *param, Atom *atom) { | void setupNeighbor(Parameter *param, Atom *atom) { | ||||||
| @@ -229,7 +230,9 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) { | |||||||
|     if(atom->Nclusters_local > nmax) { |     if(atom->Nclusters_local > nmax) { | ||||||
|         nmax = atom->Nclusters_local; |         nmax = atom->Nclusters_local; | ||||||
|         if(neighbor->numneigh) free(neighbor->numneigh); |         if(neighbor->numneigh) free(neighbor->numneigh); | ||||||
|  |         if(neighbor->numneigh_masked) free(neighbor->numneigh_masked); | ||||||
|         if(neighbor->neighbors) free(neighbor->neighbors); |         if(neighbor->neighbors) free(neighbor->neighbors); | ||||||
|  |         if(neighbor->neighbors_imask) free(neighbor->neighbors_imask); | ||||||
|         neighbor->numneigh = (int*) malloc(nmax * sizeof(int)); |         neighbor->numneigh = (int*) malloc(nmax * sizeof(int)); | ||||||
|         neighbor->numneigh_masked = (int*) malloc(nmax * sizeof(int)); |         neighbor->numneigh_masked = (int*) malloc(nmax * sizeof(int)); | ||||||
|         neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int)); |         neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int)); | ||||||
| @@ -326,6 +329,7 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) { | |||||||
|                                     imask = get_imask_simd_4xn(1, ci, cj); |                                     imask = get_imask_simd_4xn(1, ci, cj); | ||||||
|                                     #endif |                                     #endif | ||||||
|  |  | ||||||
|  |                                     if(n < neighbor->maxneighs) { | ||||||
|                                         if(imask == NBNXN_INTERACTION_MASK_ALL) { |                                         if(imask == NBNXN_INTERACTION_MASK_ALL) { | ||||||
|                                             neighptr[n] = cj; |                                             neighptr[n] = cj; | ||||||
|                                             neighptr_imask[n] = imask; |                                             neighptr_imask[n] = imask; | ||||||
| @@ -336,6 +340,7 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) { | |||||||
|                                             neighptr_imask[nmasked] = imask; |                                             neighptr_imask[nmasked] = imask; | ||||||
|                                             nmasked++; |                                             nmasked++; | ||||||
|                                         } |                                         } | ||||||
|  |                                     } | ||||||
|  |  | ||||||
|                                     n++; |                                     n++; | ||||||
|                                 } |                                 } | ||||||
| @@ -377,11 +382,12 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) { | |||||||
|         } |         } | ||||||
|  |  | ||||||
|         if(resize) { |         if(resize) { | ||||||
|             fprintf(stdout, "RESIZE %d\n", neighbor->maxneighs); |  | ||||||
|             neighbor->maxneighs = new_maxneighs * 1.2; |             neighbor->maxneighs = new_maxneighs * 1.2; | ||||||
|  |             fprintf(stdout, "RESIZE %d\n", neighbor->maxneighs); | ||||||
|             free(neighbor->neighbors); |             free(neighbor->neighbors); | ||||||
|             neighbor->neighbors = (int *) malloc(atom->Nmax * neighbor->maxneighs * sizeof(int)); |             free(neighbor->neighbors_imask); | ||||||
|             neighbor->neighbors_imask = (unsigned int *) malloc(atom->Nmax * neighbor->maxneighs * sizeof(unsigned int)); |             neighbor->neighbors = (int *) malloc(nmax * neighbor->maxneighs * sizeof(int)); | ||||||
|  |             neighbor->neighbors_imask = (unsigned int *) malloc(nmax * neighbor->maxneighs * sizeof(unsigned int)); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user