Merge pull request #7 from RRZE-HPC/mucosim23

Mucosim23
omp_get_max_threads instead of omp_get_num_threads for gcc compiler adaption
2024-01-17 15:14:08 +01:00 · 2024-01-13 15:09:03 +01:00 · 2024-01-11 17:16:17 +01:00 · 2024-01-11 17:09:18 +01:00 · 2023-12-13 10:52:55 +01:00 · 2023-11-21 17:11:18 +01:00
5 changed files with 60 additions and 9 deletions
@@ -66,7 +66,7 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
    {
    LIKWID_MARKER_START("force");

-    #pragma omp for
+    #pragma omp for schedule(runtime)
    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
        int ci_cj0 = CJ0_FROM_CI(ci);
        int ci_cj1 = CJ1_FROM_CI(ci);
@@ -213,7 +213,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
    #endif
    */

-    #pragma omp for
+    #pragma omp for schedule(runtime)
    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
        int ci_cj0 = CJ0_FROM_CI(ci);
        #if CLUSTER_M > CLUSTER_N
@@ -427,7 +427,7 @@ double computeForceLJ_2xnn_full(Parameter *param, Atom *atom, Neighbor *neighbor
    {
    LIKWID_MARKER_START("force");

-    #pragma omp for
+    #pragma omp for schedule(runtime)
    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
        int ci_cj0 = CJ0_FROM_CI(ci);
        #if CLUSTER_M > CLUSTER_N
@@ -595,7 +595,7 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
    {
    LIKWID_MARKER_START("force");

-    #pragma omp for
+    #pragma omp for schedule(runtime)
    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
        int ci_cj0 = CJ0_FROM_CI(ci);
        #if CLUSTER_M > CLUSTER_N
@@ -869,7 +869,7 @@ double computeForceLJ_4xn_full(Parameter *param, Atom *atom, Neighbor *neighbor,
    {
    LIKWID_MARKER_START("force");

-    #pragma omp for
+    #pragma omp for schedule(runtime)
    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
        int ci_cj0 = CJ0_FROM_CI(ci);
        #if CLUSTER_M > CLUSTER_N
@@ -5,7 +5,9 @@
 * license that can be found in the LICENSE file.
 */
 #include <stdio.h>
+#include <string.h>
 #include <math.h>
+#include <omp.h>
 //--
 #include <likwid-marker.h>
 //--
@@ -308,6 +310,30 @@ int main(int argc, char** argv) {
    printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n",
            timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH]);
    printf(HLINE);
+    
+    int nthreads = 0;
+    int chunkSize = 0;
+    omp_sched_t schedKind;
+    char schedType[10];
+#pragma omp parallel
+#pragma omp master
+    {
+	omp_get_schedule(&schedKind, &chunkSize);
+
+    	switch (schedKind)
+    	{
+        	case omp_sched_static:  strcpy(schedType, "static"); break;
+        	case omp_sched_dynamic: strcpy(schedType, "dynamic"); break;
+        	case omp_sched_guided:  strcpy(schedType, "guided"); break;
+        	case omp_sched_auto:    strcpy(schedType, "auto"); break;
+    	}
+
+    	nthreads = omp_get_max_threads();
+    }
+
+    printf("Num threads: %d\n", nthreads);
+    printf("Schedule: (%s,%d)\n", schedType, chunkSize);
+
    printf("Performance: %.2f million atom updates per second\n",
            1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
    #ifdef COMPUTE_STATS
@@ -1,7 +1,7 @@
 CC  = icc
 LINKER = $(CC)

-OPENMP  = #-qopenmp
+OPENMP  = -qopenmp
 PROFILE  = #-profile-functions -g  -pg

 ifeq ($(ISA),AVX512)
@@ -41,7 +41,7 @@ double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *n
    {
    LIKWID_MARKER_START("force");

-    #pragma omp for
+    #pragma omp for schedule(runtime)
    for(int i = 0; i < Nlocal; i++) {
        neighs = &neighbor->neighbors[i * neighbor->maxneighs];
        int numneighs = neighbor->numneigh[i];
@@ -131,7 +131,7 @@ double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor,
    {
    LIKWID_MARKER_START("forceLJ-halfneigh");

-    #pragma omp for
+    #pragma omp for schedule(runtime)
    for(int i = 0; i < Nlocal; i++) {
        neighs = &neighbor->neighbors[i * neighbor->maxneighs];
        int numneighs = neighbor->numneigh[i];
@@ -227,7 +227,7 @@ double computeForceLJFullNeigh_simd(Parameter *param, Atom *atom, Neighbor *neig
    {
    LIKWID_MARKER_START("force");

-    #pragma omp for
+    #pragma omp for schedule(runtime)
    for(int i = 0; i < Nlocal; i++) {
        neighs = &neighbor->neighbors[i * neighbor->maxneighs];
        int numneighs = neighbor->numneigh[i];
@@ -11,6 +11,7 @@
 #include <limits.h>
 #include <math.h>
 #include <float.h>
+#include <omp.h>

 #include <likwid-marker.h>

@@ -292,6 +293,30 @@ int main(int argc, char** argv) {
    printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n",
            timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH]);
    printf(HLINE);
+
+    int nthreads = 0;
+    int chunkSize = 0;
+    omp_sched_t schedKind;
+    char schedType[10];
+#pragma omp parallel
+#pragma omp master
+    {
+    	omp_get_schedule(&schedKind, &chunkSize);
+
+    	switch (schedKind)
+    	{
+        	case omp_sched_static:  strcpy(schedType, "static"); break;
+        	case omp_sched_dynamic: strcpy(schedType, "dynamic"); break;
+        	case omp_sched_guided:  strcpy(schedType, "guided"); break;
+        	case omp_sched_auto:    strcpy(schedType, "auto"); break;
+    	}
+	
+	nthreads = omp_get_max_threads();
+    }
+
+    printf("Num threads: %d\n", nthreads);
+    printf("Schedule: (%s,%d)\n", schedType, chunkSize);
+    
    printf("Performance: %.2f million atom updates per second\n",
            1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
 #ifdef COMPUTE_STATS
Author	SHA1	Message	Date
rafaelravedutti	a6a269703d	Merge pull request #7 from RRZE-HPC/mucosim23 Mucosim23	2024-01-17 15:14:08 +01:00
TejeshPala	7ee250161a	omp_get_max_threads instead of omp_get_num_threads for gcc compiler adaption Signed-off-by: TejeshPala <tejesh.pala@fau.de>	2024-01-13 15:09:03 +01:00
TejeshPala	c73efea786	include openmp in ICC Signed-off-by: TejeshPala <tejesh.pala@fau.de>	2024-01-11 17:16:17 +01:00
TejeshPala	4cfa664533	schedule options for force kernels and to print in main fn Signed-off-by: TejeshPala <tejesh.pala@fau.de>	2024-01-11 17:09:18 +01:00
Rafael Ravedutti	1837403326	Merge branch 'master' of github.com:RRZE-HPC/MD-Bench	2023-12-13 10:52:55 +01:00
TEJESH PALA	ce00aa0042	Merge pull request #6 from RRZE-HPC/mucosim23 omp print threads	2023-11-21 17:11:18 +01:00
TejeshPala	c4e5e87265	omp print threads	2023-11-21 15:31:27 +01:00