From ba8cb1ae01c4576098baf48864e9f325e9f2ede4 Mon Sep 17 00:00:00 2001
From: Erik Fabrizzi <erik.fabrizzi@fau.de>
Date: Sun, 1 Jun 2025 18:58:54 +0200
Subject: [PATCH] Added some Metadata

---
 launch_bench_singlenode.py | 50 ++++++++++++++++++++------------------
 postprocess_data.py        | 43 +++++++++++++++++++++++++++-----
 templates/bench.template   | 12 ++++++---
 3 files changed, 73 insertions(+), 32 deletions(-)

diff --git a/launch_bench_singlenode.py b/launch_bench_singlenode.py
index 78a2c1e..fb662ea 100755
--- a/launch_bench_singlenode.py
+++ b/launch_bench_singlenode.py
@@ -15,19 +15,20 @@ def write_batch(batch_fpath: str, batch_content: str):
         _ = handle.write(batch_content)
 
 
-collectives = ["Reduce",
-               "Reduce_scatter",
-               "Allreduce",
-               "Allgather",
-               "Allgatherv",
-               "Scatter",
-               "Scatterv",
-               "Gather",
-               "Gatherv",
-               "Alltoall",
-               "Bcast",
-               # "Barrier"
-               ]
+collectives = [
+    "Reduce",
+    "Reduce_scatter",
+    "Allreduce",
+    "Allgather",
+    "Allgatherv",
+    "Scatter",
+    "Scatterv",
+    "Gather",
+    "Gatherv",
+    "Alltoall",
+    "Bcast",
+    # "Barrier"
+]
 
 procnt = [
     18,
@@ -35,18 +36,21 @@ procnt = [
     54,
     72
 ]
+
 mpi1_bin = "/home/hpc/ihpc/ihpc136h/workspace/prototyping/bin/IMB-MPI1"
 slurm_template = load_template("templates/bench.template")
 
-template_parameter = {"time_stamp": datetime.now().strftime("%y_%m_%d_%H-%M-%S"),
-                      "job_name": "",
-                      "output_dir": os.getcwd()+"/output/",
-                      "err_dir": os.getcwd()+"/error/",
-                      "data_dir": os.getcwd()+"/data/",
-                      "n_procs": 18,
-                      "off_mem_flag": "",
-                      "bin": mpi1_bin
-                      }
+template_parameter = {
+    "time_stamp": datetime.now().strftime("%y_%m_%d_%H-%M-%S"),
+    "job_name": "",
+    "output_dir": os.getcwd()+"/output/",
+    "err_dir": os.getcwd()+"/error/",
+    "data_dir": os.getcwd()+"/data/",
+    "n_procs": 18,
+    "off_mem_flag": "-off_cache 50",
+    "bin": mpi1_bin,
+    "n_nodes": 1
+}
 
 output_dir = os.getcwd()+"/output/"
 err_dir = os.getcwd()+"/error/"
@@ -76,4 +80,4 @@ for n_procs in procnt:
         log += "\tSTDOUT:" + result.stdout + "\n"
         log += "\tSTDERR:" + result.stderr + "\n"
 print(log)
-_ = subprocess.run(["./clean.sh"])
+# _ = subprocess.run(["./clean.sh"])
diff --git a/postprocess_data.py b/postprocess_data.py
index ec6bf58..df7a88b 100755
--- a/postprocess_data.py
+++ b/postprocess_data.py
@@ -1,3 +1,4 @@
+from venv import create
 import pandas as pd
 import os
 
@@ -11,6 +12,9 @@ data_markers = {
     "mpi_red_datatype": "# MPI_Datatype for reductions    :",
     "mpi_red_op": "# MPI_Op",
     "end_of_table": "# All processes entering MPI_Finalize",
+    "creation_time": "# CREATION_TIME :",
+    "n_nodes": "# N_NODES       :",
+    "off_mem_flag": "# OFF_MEM_FLAG  :"
 }
 
 column_names = [
@@ -24,20 +28,29 @@ column_names = [
     "mpi_datatype",
     "mpi_red_datatype",
     "mpi_red_op",
+    "creation_time",
+    "n_nodes",
+    "off_mem_flag",
 ]
 
 data = list()
+
 for file in os.listdir("data/"):
     with open("data/"+file, 'r') as f:
         lines = f.readlines()
+
         past_preheader = False
         in_header = False
         in_body = False
-        btype = None
-        proc_num = None
-        mpi_datatype = None
-        mpi_red_datatype = None
-        mpi_red_op = None
+
+        btype = "NA"
+        proc_num = "NA"
+        mpi_datatype = "NA"
+        mpi_red_datatype = "NA"
+        mpi_red_op = "NA"
+        creation_time = "NA"
+        n_nodes = "NA"
+        off_mem_flag = "NA"
 
         for line in lines:
             if data_markers["block_separator"] in line:
@@ -55,6 +68,16 @@ for file in os.listdir("data/"):
                 elif data_markers["mpi_red_op"] in line:
                     mpi_red_op = line.split()[-1]
 
+            if not in_header and not in_body and not past_preheader:
+                if data_markers["n_nodes"] in line:
+                    n_nodes = line.split()[-1]
+                if data_markers["creation_time"] in line:
+                    creation_time = line.split()[-1]
+                if data_markers["off_mem_flag"] in line:
+                    off_mem_flag = line.split(":")[-1].strip()
+                    if off_mem_flag == "": off_mem_flag = "NA"
+                    else: off_mem_flag = off_mem_flag.replace("-off_cache","")
+
             if past_preheader and in_header:
                 if data_markers["benchmark_type"] in line:
                     btype = line.split()[2]
@@ -66,7 +89,15 @@ for file in os.listdir("data/"):
                 if data_markers["end_of_table"] in line:
                     break
                 data.append([btype, proc_num]+[int(s) if s.isdigit()
-                            else float(s) for s in line.split()] + [mpi_datatype, mpi_red_datatype, mpi_red_op])
+                            else float(s) for s in line.split()] +
+                            [
+                                mpi_datatype,
+                                mpi_red_datatype,
+                                mpi_red_op,
+                                creation_time,
+                                n_nodes,
+                                off_mem_flag,
+                ])
 
 df = pd.DataFrame(data, columns=column_names)
 df.to_csv("data.csv", index=False)
diff --git a/templates/bench.template b/templates/bench.template
index 960f894..333f51c 100644
--- a/templates/bench.template
+++ b/templates/bench.template
@@ -2,7 +2,7 @@
 #SBATCH --job-name={job_name}_{n_procs}
 #SBATCH --output={output_dir}{job_name}_{n_procs}.out
 #SBATCH --error={err_dir}{job_name}_{n_procs}.err
-#SBATCH --nodes=1
+#SBATCH --nodes={n_nodes}
 #SBATCH --time=00:10:00
 #SBATCH --export=NONE
 
@@ -12,7 +12,13 @@ module load intel intelmpi likwid
 
 unset I_MPI_PMI_LIBRARY
 export  LIKWID_SILENT=1
-echo CREATION_TIME {time_stamp}
 
-srun --cpu-freq=2000000-2000000:performance  ./likwid-mpirun -np {n_procs} -mpi intelmpi -omp intel -nperdomain M:18 {bin} {job_name} -npmin {n_procs} {off_mem_flag}  > {data_dir}/{job_name}_{n_procs}.dat
+OUTPUT_FILENAME="{data_dir}/{job_name}_$SLURM_JOB_ID.dat"
+
+echo  "# CREATION_TIME : {time_stamp}" > $OUTPUT_FILENAME
+echo  "# N_NODES       : {n_nodes}" >> $OUTPUT_FILENAME
+echo  "# OFF_MEM_FLAG  : {off_mem_flag}">> $OUTPUT_FILENAME
+
+srun --cpu-freq=2000000-2000000:performance  ./likwid-mpirun -np {n_procs} -mpi intelmpi -omp intel -nperdomain M:18 {bin} {job_name} -npmin {n_procs} {off_mem_flag}  >> $OUTPUT_FILENAME 
+