Adjust time and likwid measurements on 4xN kernels

Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
Rafael Ravedutti 2023-01-02 14:19:59 +01:00
parent ae1cfa2800
commit fe86c948a8

View File

@ -423,8 +423,6 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
MD_SIMD_FLOAT eps_vec = simd_broadcast(epsilon); MD_SIMD_FLOAT eps_vec = simd_broadcast(epsilon);
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0); MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5); MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
double S = getTimeStamp();
LIKWID_MARKER_START("force");
for(int ci = 0; ci < atom->Nclusters_local; ci++) { for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci); int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
@ -436,6 +434,9 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
} }
} }
double S = getTimeStamp();
LIKWID_MARKER_START("force");
#pragma omp parallel for #pragma omp parallel for
for(int ci = 0; ci < atom->Nclusters_local; ci++) { for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj0 = CJ0_FROM_CI(ci); int ci_cj0 = CJ0_FROM_CI(ci);
@ -607,8 +608,6 @@ double computeForceLJ_4xn_full(Parameter *param, Atom *atom, Neighbor *neighbor,
MD_SIMD_FLOAT eps_vec = simd_broadcast(epsilon); MD_SIMD_FLOAT eps_vec = simd_broadcast(epsilon);
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0); MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5); MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
double S = getTimeStamp();
LIKWID_MARKER_START("force");
for(int ci = 0; ci < atom->Nclusters_local; ci++) { for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci); int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
@ -620,6 +619,9 @@ double computeForceLJ_4xn_full(Parameter *param, Atom *atom, Neighbor *neighbor,
} }
} }
double S = getTimeStamp();
LIKWID_MARKER_START("force");
#pragma omp parallel for #pragma omp parallel for
for(int ci = 0; ci < atom->Nclusters_local; ci++) { for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj0 = CJ0_FROM_CI(ci); int ci_cj0 = CJ0_FROM_CI(ci);