Merge pull request #5 from scarboot/master

Move likwid marker calls into OpenMP parallel region
This commit is contained in:
rafaelravedutti 2023-01-22 17:20:02 +01:00 committed by GitHub
commit 01cc05a5d6
3 changed files with 63 additions and 9 deletions

View File

@ -35,9 +35,12 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
} }
double S = getTimeStamp(); double S = getTimeStamp();
#pragma omp parallel
{
LIKWID_MARKER_START("force"); LIKWID_MARKER_START("force");
#pragma omp parallel for #pragma omp for
for(int ci = 0; ci < atom->Nclusters_local; ci++) { for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj0 = CJ0_FROM_CI(ci); int ci_cj0 = CJ0_FROM_CI(ci);
int ci_cj1 = CJ1_FROM_CI(ci); int ci_cj1 = CJ1_FROM_CI(ci);
@ -119,6 +122,8 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
} }
LIKWID_MARKER_STOP("force"); LIKWID_MARKER_STOP("force");
}
double E = getTimeStamp(); double E = getTimeStamp();
DEBUG_MESSAGE("computeForceLJ end\n"); DEBUG_MESSAGE("computeForceLJ end\n");
return E-S; return E-S;
@ -149,9 +154,12 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
} }
double S = getTimeStamp(); double S = getTimeStamp();
#pragma omp parallel
{
LIKWID_MARKER_START("force"); LIKWID_MARKER_START("force");
#pragma omp parallel for #pragma omp for
for(int ci = 0; ci < atom->Nclusters_local; ci++) { for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj0 = CJ0_FROM_CI(ci); int ci_cj0 = CJ0_FROM_CI(ci);
#if CLUSTER_M > CLUSTER_N #if CLUSTER_M > CLUSTER_N
@ -266,6 +274,8 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
} }
LIKWID_MARKER_STOP("force"); LIKWID_MARKER_STOP("force");
}
double E = getTimeStamp(); double E = getTimeStamp();
DEBUG_MESSAGE("computeForceLJ_2xnn end\n"); DEBUG_MESSAGE("computeForceLJ_2xnn end\n");
return E-S; return E-S;
@ -296,9 +306,12 @@ double computeForceLJ_2xnn_full(Parameter *param, Atom *atom, Neighbor *neighbor
} }
double S = getTimeStamp(); double S = getTimeStamp();
#pragma omp parallel
{
LIKWID_MARKER_START("force"); LIKWID_MARKER_START("force");
#pragma omp parallel for #pragma omp for
for(int ci = 0; ci < atom->Nclusters_local; ci++) { for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj0 = CJ0_FROM_CI(ci); int ci_cj0 = CJ0_FROM_CI(ci);
#if CLUSTER_M > CLUSTER_N #if CLUSTER_M > CLUSTER_N
@ -398,6 +411,8 @@ double computeForceLJ_2xnn_full(Parameter *param, Atom *atom, Neighbor *neighbor
} }
LIKWID_MARKER_STOP("force"); LIKWID_MARKER_STOP("force");
}
double E = getTimeStamp(); double E = getTimeStamp();
DEBUG_MESSAGE("computeForceLJ_2xnn end\n"); DEBUG_MESSAGE("computeForceLJ_2xnn end\n");
return E-S; return E-S;
@ -435,9 +450,12 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
} }
double S = getTimeStamp(); double S = getTimeStamp();
#pragma omp parallel
{
LIKWID_MARKER_START("force"); LIKWID_MARKER_START("force");
#pragma omp parallel for #pragma omp for
for(int ci = 0; ci < atom->Nclusters_local; ci++) { for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj0 = CJ0_FROM_CI(ci); int ci_cj0 = CJ0_FROM_CI(ci);
#if CLUSTER_M > CLUSTER_N #if CLUSTER_M > CLUSTER_N
@ -591,6 +609,8 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
} }
LIKWID_MARKER_STOP("force"); LIKWID_MARKER_STOP("force");
}
double E = getTimeStamp(); double E = getTimeStamp();
DEBUG_MESSAGE("computeForceLJ_4xn end\n"); DEBUG_MESSAGE("computeForceLJ_4xn end\n");
return E-S; return E-S;
@ -620,9 +640,12 @@ double computeForceLJ_4xn_full(Parameter *param, Atom *atom, Neighbor *neighbor,
} }
double S = getTimeStamp(); double S = getTimeStamp();
#pragma omp parallel
{
LIKWID_MARKER_START("force"); LIKWID_MARKER_START("force");
#pragma omp parallel for #pragma omp for
for(int ci = 0; ci < atom->Nclusters_local; ci++) { for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj0 = CJ0_FROM_CI(ci); int ci_cj0 = CJ0_FROM_CI(ci);
#if CLUSTER_M > CLUSTER_N #if CLUSTER_M > CLUSTER_N
@ -751,6 +774,8 @@ double computeForceLJ_4xn_full(Parameter *param, Atom *atom, Neighbor *neighbor,
} }
LIKWID_MARKER_STOP("force"); LIKWID_MARKER_STOP("force");
}
double E = getTimeStamp(); double E = getTimeStamp();
DEBUG_MESSAGE("computeForceLJ_4xn end\n"); DEBUG_MESSAGE("computeForceLJ_4xn end\n");
return E-S; return E-S;

View File

@ -31,8 +31,12 @@ double computeForceEam(Eam* eam, Parameter* param, Atom *atom, Neighbor *neighbo
int nrho = eam->nrho; int nrho_tot = eam->nrho_tot; int nrho = eam->nrho; int nrho_tot = eam->nrho_tot;
double S = getTimeStamp(); double S = getTimeStamp();
#pragma omp parallel
{
LIKWID_MARKER_START("force_eam_fp"); LIKWID_MARKER_START("force_eam_fp");
#pragma omp parallel for
#pragma omp for
for(int i = 0; i < Nlocal; i++) { for(int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs]; neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i]; int numneighs = neighbor->numneigh[i];
@ -95,13 +99,19 @@ double computeForceEam(Eam* eam, Parameter* param, Atom *atom, Neighbor *neighbo
} }
LIKWID_MARKER_STOP("force_eam_fp"); LIKWID_MARKER_STOP("force_eam_fp");
}
// We still need to update fp for PBC atoms // We still need to update fp for PBC atoms
for(int i = 0; i < atom->Nghost; i++) { for(int i = 0; i < atom->Nghost; i++) {
fp[Nlocal + i] = fp[atom->border_map[i]]; fp[Nlocal + i] = fp[atom->border_map[i]];
} }
#pragma omp parallel
{
LIKWID_MARKER_START("force_eam"); LIKWID_MARKER_START("force_eam");
#pragma omp for
for(int i = 0; i < Nlocal; i++) { for(int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs]; neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i]; int numneighs = neighbor->numneigh[i];
@ -192,6 +202,8 @@ double computeForceEam(Eam* eam, Parameter* param, Atom *atom, Neighbor *neighbo
} }
LIKWID_MARKER_STOP("force_eam"); LIKWID_MARKER_STOP("force_eam");
}
double E = getTimeStamp(); double E = getTimeStamp();
return E-S; return E-S;
} }

View File

@ -34,9 +34,12 @@ double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *n
} }
double S = getTimeStamp(); double S = getTimeStamp();
#pragma omp parallel
{
LIKWID_MARKER_START("force"); LIKWID_MARKER_START("force");
#pragma omp parallel for #pragma omp for
for(int i = 0; i < Nlocal; i++) { for(int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs]; neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i]; int numneighs = neighbor->numneigh[i];
@ -90,6 +93,8 @@ double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *n
} }
LIKWID_MARKER_STOP("force"); LIKWID_MARKER_STOP("force");
}
double E = getTimeStamp(); double E = getTimeStamp();
return E-S; return E-S;
} }
@ -110,8 +115,12 @@ double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor,
} }
double S = getTimeStamp(); double S = getTimeStamp();
#pragma omp parallel
{
LIKWID_MARKER_START("forceLJ-halfneigh"); LIKWID_MARKER_START("forceLJ-halfneigh");
#pragma omp for
for(int i = 0; i < Nlocal; i++) { for(int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs]; neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i]; int numneighs = neighbor->numneigh[i];
@ -171,6 +180,8 @@ double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor,
} }
LIKWID_MARKER_STOP("forceLJ-halfneigh"); LIKWID_MARKER_STOP("forceLJ-halfneigh");
}
double E = getTimeStamp(); double E = getTimeStamp();
return E-S; return E-S;
} }
@ -189,7 +200,6 @@ double computeForceLJFullNeigh_simd(Parameter *param, Atom *atom, Neighbor *neig
} }
double S = getTimeStamp(); double S = getTimeStamp();
LIKWID_MARKER_START("force");
#ifndef __SIMD_KERNEL__ #ifndef __SIMD_KERNEL__
fprintf(stderr, "Error: SIMD kernel not implemented for specified instruction set!"); fprintf(stderr, "Error: SIMD kernel not implemented for specified instruction set!");
@ -201,7 +211,12 @@ double computeForceLJFullNeigh_simd(Parameter *param, Atom *atom, Neighbor *neig
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0); MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5); MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
#pragma omp parallel for
#pragma omp parallel
{
LIKWID_MARKER_START("force");
#pragma omp for
for(int i = 0; i < Nlocal; i++) { for(int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs]; neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i]; int numneighs = neighbor->numneigh[i];
@ -245,6 +260,8 @@ double computeForceLJFullNeigh_simd(Parameter *param, Atom *atom, Neighbor *neig
#endif #endif
LIKWID_MARKER_STOP("force"); LIKWID_MARKER_STOP("force");
}
double E = getTimeStamp(); double E = getTimeStamp();
return E-S; return E-S;
} }