Add diagonal checks
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
parent
296a4c4e01
commit
d138f975f6
@ -51,6 +51,8 @@ void createAtom(Atom *atom, Parameter *param) {
|
|||||||
atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||||
atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||||
atom->exclusion_filter = allocate(ALIGNMENT, CLUSTER_M * VECTOR_WIDTH * sizeof(MD_UINT));
|
atom->exclusion_filter = allocate(ALIGNMENT, CLUSTER_M * VECTOR_WIDTH * sizeof(MD_UINT));
|
||||||
|
atom->diagonal_4xn_j_minus_i = allocate(ALIGNMENT, MAX(CLUSTER_M, VECTOR_WIDTH) * sizeof(MD_UINT));
|
||||||
|
atom->diagonal_2xnn_j_minus_i = allocate(ALIGNMENT, VECTOR_WIDTH * sizeof(MD_UINT));
|
||||||
|
|
||||||
for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
|
for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
|
||||||
atom->epsilon[i] = param->epsilon;
|
atom->epsilon[i] = param->epsilon;
|
||||||
@ -59,6 +61,15 @@ void createAtom(Atom *atom, Parameter *param) {
|
|||||||
atom->cutforcesq[i] = param->cutforce * param->cutforce;
|
atom->cutforcesq[i] = param->cutforce * param->cutforce;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for(int j = 0; j < MAX(CLUSTER_M, VECTOR_WIDTH); j++) {
|
||||||
|
atom->diagonal_4xn_j_minus_i[j] = j - 0.5;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int j = 0; j < VECTOR_WIDTH / 2; j++) {
|
||||||
|
atom->diagonal_2xnn_j_minus_i[j] = j - 0.5;
|
||||||
|
atom->diagonal_2xnn_j_minus_i[VECTOR_WIDTH / 2 + j] = j - 1 - 0.5;
|
||||||
|
}
|
||||||
|
|
||||||
for(int i = 0; i < CLUSTER_M * VECTOR_WIDTH; i++) {
|
for(int i = 0; i < CLUSTER_M * VECTOR_WIDTH; i++) {
|
||||||
atom->exclusion_filter[i] = (1U << i);
|
atom->exclusion_filter[i] = (1U << i);
|
||||||
}
|
}
|
||||||
|
@ -23,10 +23,8 @@ static inline void gmx_load_simd_2xnn_interactions(
|
|||||||
|
|
||||||
//SimdInt32 mask_pr_S(excl);
|
//SimdInt32 mask_pr_S(excl);
|
||||||
MD_SIMD_INT32 mask_pr_S = simd_int32_broadcast(excl);
|
MD_SIMD_INT32 mask_pr_S = simd_int32_broadcast(excl);
|
||||||
*interact0 = cvtIB2B(simd_test_bits(mask_pr_S));
|
*interact0 = cvtIB2B(simd_test_bits(mask_pr_S & filter0));
|
||||||
*interact2 = cvtIB2B(simd_test_bits(mask_pr_S));
|
*interact2 = cvtIB2B(simd_test_bits(mask_pr_S & filter2));
|
||||||
//*interact0 = cvtIB2B(simd_test_bits(mask_pr_S & filter0));
|
|
||||||
//*interact2 = cvtIB2B(simd_test_bits(mask_pr_S & filter2));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void gmx_load_simd_4xn_interactions(
|
static inline void gmx_load_simd_4xn_interactions(
|
||||||
@ -188,6 +186,32 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
|
|||||||
MD_SIMD_BITMASK filter0 = simd_load_bitmask((const int *) &atom->exclusion_filter[0 * (VECTOR_WIDTH / UNROLL_J)]);
|
MD_SIMD_BITMASK filter0 = simd_load_bitmask((const int *) &atom->exclusion_filter[0 * (VECTOR_WIDTH / UNROLL_J)]);
|
||||||
MD_SIMD_BITMASK filter2 = simd_load_bitmask((const int *) &atom->exclusion_filter[2 * (VECTOR_WIDTH / UNROLL_J)]);
|
MD_SIMD_BITMASK filter2 = simd_load_bitmask((const int *) &atom->exclusion_filter[2 * (VECTOR_WIDTH / UNROLL_J)]);
|
||||||
|
|
||||||
|
MD_SIMD_FLOAT diagonal_jmi_S = simd_load(atom->diagonal_2xnn_j_minus_i);
|
||||||
|
MD_SIMD_FLOAT zero_S = simd_broadcast(0.0);
|
||||||
|
MD_SIMD_FLOAT one_S = simd_broadcast(1.0);
|
||||||
|
|
||||||
|
/*
|
||||||
|
#if UNROLL_I == UNROLL_J
|
||||||
|
MD_SIMD_MASK diagonal_mask_S0, diagonal_mask_S2;
|
||||||
|
diagonal_mask0 = (zero_S < diagonal_jmi_S);
|
||||||
|
diagonal_jmi_S = diagonal_jmi_S - one_S;
|
||||||
|
diagonal_jmi_S = diagonal_jmi_S - one_S;
|
||||||
|
diagonal_mask2 = (zero_S < diagonal_jmi_S);
|
||||||
|
#elif 2 * UNROLL_I == UNROLL_J
|
||||||
|
*/
|
||||||
|
MD_SIMD_MASK diagonal_mask00, diagonal_mask02, diagonal_mask10, diagonal_mask12;
|
||||||
|
diagonal_mask00 = simd_mask_cond_lt(zero_S, diagonal_jmi_S);
|
||||||
|
diagonal_jmi_S = diagonal_jmi_S - one_S;
|
||||||
|
diagonal_jmi_S = diagonal_jmi_S - one_S;
|
||||||
|
diagonal_mask02 = simd_mask_cond_lt(zero_S, diagonal_jmi_S);
|
||||||
|
diagonal_jmi_S = diagonal_jmi_S - one_S;
|
||||||
|
diagonal_jmi_S = diagonal_jmi_S - one_S;
|
||||||
|
diagonal_mask10 = simd_mask_cond_lt(zero_S, diagonal_jmi_S);
|
||||||
|
diagonal_jmi_S = diagonal_jmi_S - one_S;
|
||||||
|
diagonal_jmi_S = diagonal_jmi_S - one_S;
|
||||||
|
diagonal_mask12 = simd_mask_cond_lt(zero_S, diagonal_jmi_S);
|
||||||
|
//#endif
|
||||||
|
|
||||||
#pragma omp for
|
#pragma omp for
|
||||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||||
int ci_cj0 = CJ0_FROM_CI(ci);
|
int ci_cj0 = CJ0_FROM_CI(ci);
|
||||||
@ -222,6 +246,8 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
|
|||||||
MD_SIMD_MASK interact0;
|
MD_SIMD_MASK interact0;
|
||||||
MD_SIMD_MASK interact2;
|
MD_SIMD_MASK interact2;
|
||||||
|
|
||||||
|
gmx_load_simd_2xnn_interactions((int)imask, filter0, filter2, &interact0, &interact2);
|
||||||
|
|
||||||
MD_SIMD_FLOAT xj_tmp = simd_load_h_duplicate(&cj_x[CL_X_OFFSET]);
|
MD_SIMD_FLOAT xj_tmp = simd_load_h_duplicate(&cj_x[CL_X_OFFSET]);
|
||||||
MD_SIMD_FLOAT yj_tmp = simd_load_h_duplicate(&cj_x[CL_Y_OFFSET]);
|
MD_SIMD_FLOAT yj_tmp = simd_load_h_duplicate(&cj_x[CL_Y_OFFSET]);
|
||||||
MD_SIMD_FLOAT zj_tmp = simd_load_h_duplicate(&cj_x[CL_Z_OFFSET]);
|
MD_SIMD_FLOAT zj_tmp = simd_load_h_duplicate(&cj_x[CL_Z_OFFSET]);
|
||||||
@ -231,14 +257,28 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
|
|||||||
MD_SIMD_FLOAT delx2 = simd_sub(xi2_tmp, xj_tmp);
|
MD_SIMD_FLOAT delx2 = simd_sub(xi2_tmp, xj_tmp);
|
||||||
MD_SIMD_FLOAT dely2 = simd_sub(yi2_tmp, yj_tmp);
|
MD_SIMD_FLOAT dely2 = simd_sub(yi2_tmp, yj_tmp);
|
||||||
MD_SIMD_FLOAT delz2 = simd_sub(zi2_tmp, zj_tmp);
|
MD_SIMD_FLOAT delz2 = simd_sub(zi2_tmp, zj_tmp);
|
||||||
|
|
||||||
gmx_load_simd_2xnn_interactions((int) imask, filter0, filter2, &interact0, &interact2);
|
|
||||||
|
|
||||||
MD_SIMD_FLOAT rsq0 = simd_fma(delx0, delx0, simd_fma(dely0, dely0, simd_mul(delz0, delz0)));
|
MD_SIMD_FLOAT rsq0 = simd_fma(delx0, delx0, simd_fma(dely0, dely0, simd_mul(delz0, delz0)));
|
||||||
MD_SIMD_FLOAT rsq2 = simd_fma(delx2, delx2, simd_fma(dely2, dely2, simd_mul(delz2, delz2)));
|
MD_SIMD_FLOAT rsq2 = simd_fma(delx2, delx2, simd_fma(dely2, dely2, simd_mul(delz2, delz2)));
|
||||||
|
|
||||||
MD_SIMD_MASK cutoff_mask0 = simd_mask_and(interact0, simd_mask_cond_lt(rsq0, cutforcesq_vec));
|
MD_SIMD_MASK cutoff_mask0 = simd_mask_cond_lt(rsq0, cutforcesq_vec);
|
||||||
MD_SIMD_MASK cutoff_mask2 = simd_mask_and(interact2, simd_mask_cond_lt(rsq2, cutforcesq_vec));
|
MD_SIMD_MASK cutoff_mask2 = simd_mask_cond_lt(rsq2, cutforcesq_vec);
|
||||||
|
|
||||||
|
/*#if UNROLL_J == UNROLL_I
|
||||||
|
if(cj == ci) {
|
||||||
|
cutoff_mask0 = cutoff_mask0 && diagonal_mask0;
|
||||||
|
cutoff_mask2 = cutoff_mask2 && diagonal_mask2;
|
||||||
|
}
|
||||||
|
#elif UNROLL_J == 2 * UNROLL_I*/
|
||||||
|
if(cj * 2 == ci) {
|
||||||
|
cutoff_mask0 = cutoff_mask0 && diagonal_mask00;
|
||||||
|
cutoff_mask2 = cutoff_mask2 && diagonal_mask02;
|
||||||
|
} else if (cj * 2 + 1 == ci) {
|
||||||
|
cutoff_mask0 = cutoff_mask0 && diagonal_mask10;
|
||||||
|
cutoff_mask2 = cutoff_mask2 && diagonal_mask12;
|
||||||
|
}
|
||||||
|
/*else
|
||||||
|
# error "Invalid cluster configuration!"
|
||||||
|
#endif*/
|
||||||
|
|
||||||
MD_SIMD_FLOAT sr2_0 = simd_reciprocal(rsq0);
|
MD_SIMD_FLOAT sr2_0 = simd_reciprocal(rsq0);
|
||||||
MD_SIMD_FLOAT sr2_2 = simd_reciprocal(rsq2);
|
MD_SIMD_FLOAT sr2_2 = simd_reciprocal(rsq2);
|
||||||
|
@ -33,12 +33,14 @@
|
|||||||
# if VECTOR_WIDTH > CLUSTER_M * 2
|
# if VECTOR_WIDTH > CLUSTER_M * 2
|
||||||
# define KERNEL_NAME "Simd2xNN"
|
# define KERNEL_NAME "Simd2xNN"
|
||||||
# define CLUSTER_N (VECTOR_WIDTH / 2)
|
# define CLUSTER_N (VECTOR_WIDTH / 2)
|
||||||
|
# define UNROLL_I 4
|
||||||
# define UNROLL_J 2
|
# define UNROLL_J 2
|
||||||
# define computeForceLJ computeForceLJ_2xnn
|
# define computeForceLJ computeForceLJ_2xnn
|
||||||
// Simd4xN
|
// Simd4xN
|
||||||
# else
|
# else
|
||||||
# define KERNEL_NAME "Simd4xN"
|
# define KERNEL_NAME "Simd4xN"
|
||||||
# define CLUSTER_N VECTOR_WIDTH
|
# define CLUSTER_N VECTOR_WIDTH
|
||||||
|
# define UNROLL_I 4
|
||||||
# define UNROLL_J 1
|
# define UNROLL_J 1
|
||||||
# define computeForceLJ computeForceLJ_4xn
|
# define computeForceLJ computeForceLJ_4xn
|
||||||
# endif
|
# endif
|
||||||
@ -120,6 +122,8 @@ typedef struct {
|
|||||||
int *icluster_bin;
|
int *icluster_bin;
|
||||||
int dummy_cj;
|
int dummy_cj;
|
||||||
MD_UINT *exclusion_filter;
|
MD_UINT *exclusion_filter;
|
||||||
|
MD_FLOAT *diagonal_4xn_j_minus_i;
|
||||||
|
MD_FLOAT *diagonal_2xnn_j_minus_i;
|
||||||
} Atom;
|
} Atom;
|
||||||
|
|
||||||
extern void initAtom(Atom*);
|
extern void initAtom(Atom*);
|
||||||
|
Loading…
Reference in New Issue
Block a user