Merge pull request #7 from RRZE-HPC/mucosim23

Mucosim23
This commit is contained in:
rafaelravedutti 2024-01-17 15:14:08 +01:00 committed by GitHub
commit a6a269703d
5 changed files with 43 additions and 12 deletions

View File

@ -66,7 +66,7 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
{ {
LIKWID_MARKER_START("force"); LIKWID_MARKER_START("force");
#pragma omp for #pragma omp for schedule(runtime)
for(int ci = 0; ci < atom->Nclusters_local; ci++) { for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj0 = CJ0_FROM_CI(ci); int ci_cj0 = CJ0_FROM_CI(ci);
int ci_cj1 = CJ1_FROM_CI(ci); int ci_cj1 = CJ1_FROM_CI(ci);
@ -213,7 +213,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
#endif #endif
*/ */
#pragma omp for #pragma omp for schedule(runtime)
for(int ci = 0; ci < atom->Nclusters_local; ci++) { for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj0 = CJ0_FROM_CI(ci); int ci_cj0 = CJ0_FROM_CI(ci);
#if CLUSTER_M > CLUSTER_N #if CLUSTER_M > CLUSTER_N
@ -427,7 +427,7 @@ double computeForceLJ_2xnn_full(Parameter *param, Atom *atom, Neighbor *neighbor
{ {
LIKWID_MARKER_START("force"); LIKWID_MARKER_START("force");
#pragma omp for #pragma omp for schedule(runtime)
for(int ci = 0; ci < atom->Nclusters_local; ci++) { for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj0 = CJ0_FROM_CI(ci); int ci_cj0 = CJ0_FROM_CI(ci);
#if CLUSTER_M > CLUSTER_N #if CLUSTER_M > CLUSTER_N
@ -595,7 +595,7 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
{ {
LIKWID_MARKER_START("force"); LIKWID_MARKER_START("force");
#pragma omp for #pragma omp for schedule(runtime)
for(int ci = 0; ci < atom->Nclusters_local; ci++) { for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj0 = CJ0_FROM_CI(ci); int ci_cj0 = CJ0_FROM_CI(ci);
#if CLUSTER_M > CLUSTER_N #if CLUSTER_M > CLUSTER_N
@ -869,7 +869,7 @@ double computeForceLJ_4xn_full(Parameter *param, Atom *atom, Neighbor *neighbor,
{ {
LIKWID_MARKER_START("force"); LIKWID_MARKER_START("force");
#pragma omp for #pragma omp for schedule(runtime)
for(int ci = 0; ci < atom->Nclusters_local; ci++) { for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj0 = CJ0_FROM_CI(ci); int ci_cj0 = CJ0_FROM_CI(ci);
#if CLUSTER_M > CLUSTER_N #if CLUSTER_M > CLUSTER_N

View File

@ -5,6 +5,7 @@
* license that can be found in the LICENSE file. * license that can be found in the LICENSE file.
*/ */
#include <stdio.h> #include <stdio.h>
#include <string.h>
#include <math.h> #include <math.h>
#include <omp.h> #include <omp.h>
//-- //--
@ -311,12 +312,27 @@ int main(int argc, char** argv) {
printf(HLINE); printf(HLINE);
int nthreads = 0; int nthreads = 0;
int chunkSize = 0;
omp_sched_t schedKind;
char schedType[10];
#pragma omp parallel #pragma omp parallel
#pragma omp master
{ {
nthreads = omp_get_num_threads(); omp_get_schedule(&schedKind, &chunkSize);
switch (schedKind)
{
case omp_sched_static: strcpy(schedType, "static"); break;
case omp_sched_dynamic: strcpy(schedType, "dynamic"); break;
case omp_sched_guided: strcpy(schedType, "guided"); break;
case omp_sched_auto: strcpy(schedType, "auto"); break;
}
nthreads = omp_get_max_threads();
} }
printf("Num threads: %d\n", nthreads); printf("Num threads: %d\n", nthreads);
printf("Schedule: (%s,%d)\n", schedType, chunkSize);
printf("Performance: %.2f million atom updates per second\n", printf("Performance: %.2f million atom updates per second\n",
1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]); 1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);

View File

@ -1,7 +1,7 @@
CC = icc CC = icc
LINKER = $(CC) LINKER = $(CC)
OPENMP = #-qopenmp OPENMP = -qopenmp
PROFILE = #-profile-functions -g -pg PROFILE = #-profile-functions -g -pg
ifeq ($(ISA),AVX512) ifeq ($(ISA),AVX512)

View File

@ -41,7 +41,7 @@ double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *n
{ {
LIKWID_MARKER_START("force"); LIKWID_MARKER_START("force");
#pragma omp for #pragma omp for schedule(runtime)
for(int i = 0; i < Nlocal; i++) { for(int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs]; neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i]; int numneighs = neighbor->numneigh[i];
@ -131,7 +131,7 @@ double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor,
{ {
LIKWID_MARKER_START("forceLJ-halfneigh"); LIKWID_MARKER_START("forceLJ-halfneigh");
#pragma omp for #pragma omp for schedule(runtime)
for(int i = 0; i < Nlocal; i++) { for(int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs]; neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i]; int numneighs = neighbor->numneigh[i];
@ -227,7 +227,7 @@ double computeForceLJFullNeigh_simd(Parameter *param, Atom *atom, Neighbor *neig
{ {
LIKWID_MARKER_START("force"); LIKWID_MARKER_START("force");
#pragma omp for #pragma omp for schedule(runtime)
for(int i = 0; i < Nlocal; i++) { for(int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs]; neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i]; int numneighs = neighbor->numneigh[i];

View File

@ -295,12 +295,27 @@ int main(int argc, char** argv) {
printf(HLINE); printf(HLINE);
int nthreads = 0; int nthreads = 0;
int chunkSize = 0;
omp_sched_t schedKind;
char schedType[10];
#pragma omp parallel #pragma omp parallel
#pragma omp master
{ {
nthreads = omp_get_num_threads(); omp_get_schedule(&schedKind, &chunkSize);
switch (schedKind)
{
case omp_sched_static: strcpy(schedType, "static"); break;
case omp_sched_dynamic: strcpy(schedType, "dynamic"); break;
case omp_sched_guided: strcpy(schedType, "guided"); break;
case omp_sched_auto: strcpy(schedType, "auto"); break;
}
nthreads = omp_get_max_threads();
} }
printf("Num threads: %d\n", nthreads); printf("Num threads: %d\n", nthreads);
printf("Schedule: (%s,%d)\n", schedType, chunkSize);
printf("Performance: %.2f million atom updates per second\n", printf("Performance: %.2f million atom updates per second\n",
1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]); 1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);