From 4cfa664533c3bd255c83074c5f08a91c45785609 Mon Sep 17 00:00:00 2001 From: TejeshPala Date: Thu, 11 Jan 2024 17:09:18 +0100 Subject: [PATCH] schedule options for force kernels and to print in main fn Signed-off-by: TejeshPala --- gromacs/force_lj.c | 10 +++++----- gromacs/main.c | 16 ++++++++++++++++ lammps/force_lj.c | 6 +++--- lammps/main.c | 19 +++++++++++++++++-- 4 files changed, 41 insertions(+), 10 deletions(-) diff --git a/gromacs/force_lj.c b/gromacs/force_lj.c index ca280ea..84e384a 100644 --- a/gromacs/force_lj.c +++ b/gromacs/force_lj.c @@ -66,7 +66,7 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat { LIKWID_MARKER_START("force"); - #pragma omp for + #pragma omp for schedule(runtime) for(int ci = 0; ci < atom->Nclusters_local; ci++) { int ci_cj0 = CJ0_FROM_CI(ci); int ci_cj1 = CJ1_FROM_CI(ci); @@ -213,7 +213,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor #endif */ - #pragma omp for + #pragma omp for schedule(runtime) for(int ci = 0; ci < atom->Nclusters_local; ci++) { int ci_cj0 = CJ0_FROM_CI(ci); #if CLUSTER_M > CLUSTER_N @@ -427,7 +427,7 @@ double computeForceLJ_2xnn_full(Parameter *param, Atom *atom, Neighbor *neighbor { LIKWID_MARKER_START("force"); - #pragma omp for + #pragma omp for schedule(runtime) for(int ci = 0; ci < atom->Nclusters_local; ci++) { int ci_cj0 = CJ0_FROM_CI(ci); #if CLUSTER_M > CLUSTER_N @@ -595,7 +595,7 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor, { LIKWID_MARKER_START("force"); - #pragma omp for + #pragma omp for schedule(runtime) for(int ci = 0; ci < atom->Nclusters_local; ci++) { int ci_cj0 = CJ0_FROM_CI(ci); #if CLUSTER_M > CLUSTER_N @@ -869,7 +869,7 @@ double computeForceLJ_4xn_full(Parameter *param, Atom *atom, Neighbor *neighbor, { LIKWID_MARKER_START("force"); - #pragma omp for + #pragma omp for schedule(runtime) for(int ci = 0; ci < atom->Nclusters_local; ci++) { int ci_cj0 = CJ0_FROM_CI(ci); #if CLUSTER_M > CLUSTER_N diff --git a/gromacs/main.c b/gromacs/main.c index e849b12..50ff3df 100644 --- a/gromacs/main.c +++ b/gromacs/main.c @@ -5,6 +5,7 @@ * license that can be found in the LICENSE file. */ #include +#include #include #include //-- @@ -311,12 +312,27 @@ int main(int argc, char** argv) { printf(HLINE); int nthreads = 0; + int chunkSize = 0; + omp_sched_t schedKind; + char schedType[10]; #pragma omp parallel +#pragma omp master { + omp_get_schedule(&schedKind, &chunkSize); + + switch (schedKind) + { + case omp_sched_static: strcpy(schedType, "static"); break; + case omp_sched_dynamic: strcpy(schedType, "dynamic"); break; + case omp_sched_guided: strcpy(schedType, "guided"); break; + case omp_sched_auto: strcpy(schedType, "auto"); break; + } + nthreads = omp_get_num_threads(); } printf("Num threads: %d\n", nthreads); + printf("Schedule: (%s,%d)\n", schedType, chunkSize); printf("Performance: %.2f million atom updates per second\n", 1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]); diff --git a/lammps/force_lj.c b/lammps/force_lj.c index ac34297..599d14d 100644 --- a/lammps/force_lj.c +++ b/lammps/force_lj.c @@ -41,7 +41,7 @@ double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *n { LIKWID_MARKER_START("force"); - #pragma omp for + #pragma omp for schedule(runtime) for(int i = 0; i < Nlocal; i++) { neighs = &neighbor->neighbors[i * neighbor->maxneighs]; int numneighs = neighbor->numneigh[i]; @@ -131,7 +131,7 @@ double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor, { LIKWID_MARKER_START("forceLJ-halfneigh"); - #pragma omp for + #pragma omp for schedule(runtime) for(int i = 0; i < Nlocal; i++) { neighs = &neighbor->neighbors[i * neighbor->maxneighs]; int numneighs = neighbor->numneigh[i]; @@ -227,7 +227,7 @@ double computeForceLJFullNeigh_simd(Parameter *param, Atom *atom, Neighbor *neig { LIKWID_MARKER_START("force"); - #pragma omp for + #pragma omp for schedule(runtime) for(int i = 0; i < Nlocal; i++) { neighs = &neighbor->neighbors[i * neighbor->maxneighs]; int numneighs = neighbor->numneigh[i]; diff --git a/lammps/main.c b/lammps/main.c index 145c812..d769a65 100644 --- a/lammps/main.c +++ b/lammps/main.c @@ -295,13 +295,28 @@ int main(int argc, char** argv) { printf(HLINE); int nthreads = 0; + int chunkSize = 0; + omp_sched_t schedKind; + char schedType[10]; #pragma omp parallel +#pragma omp master { - nthreads = omp_get_num_threads(); + omp_get_schedule(&schedKind, &chunkSize); + + switch (schedKind) + { + case omp_sched_static: strcpy(schedType, "static"); break; + case omp_sched_dynamic: strcpy(schedType, "dynamic"); break; + case omp_sched_guided: strcpy(schedType, "guided"); break; + case omp_sched_auto: strcpy(schedType, "auto"); break; + } + + nthreads = omp_get_num_threads(); } printf("Num threads: %d\n", nthreads); - + printf("Schedule: (%s,%d)\n", schedType, chunkSize); + printf("Performance: %.2f million atom updates per second\n", 1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]); #ifdef COMPUTE_STATS