alg benchmarking runner and processor

2025-11-05 12:08:34 +01:00
parent 79bc443bcb
commit d494228d77
2 changed files with 118 additions and 9 deletions
--- a/launch_alg_bench.py
+++ b/launch_alg_bench.py
@@ -3,8 +3,6 @@ import subprocess
 from datetime import datetime
 ################ HELPER FUNCTIONS ################
 def load_template(template_path: str):
    output_template = ""
    with open(template_path, "r") as handle:
@@ -16,7 +14,6 @@ def write_batch(batch_fpath: str, batch_content: str):
    with open(batch_fpath, "w") as handle:
        _ = handle.write(batch_content)
 ################### SETUP DIRS ###################
 output_dir = os.getcwd()+"/output/"
 err_dir = os.getcwd()+"/error/"
@@ -33,7 +30,7 @@ if os.path.isdir(batch_files_dir) == False:
    os.mkdir(batch_files_dir)
 ################ GLOBAL DEFAULTS #################
-mpi1_bin = "/home/hpc/ihpc/ihpc136h/workspace/prototyping/bin/IMB-MPI1"
+mpi1_bin = "/home/hpc/ihpc/ihpc136h/workspace/mpi-benchmark-tool/bin/IMB-MPI1"
 default_parameter = {
    "time_stamp": datetime.now().strftime("%y_%m_%d_%H-%M-%S"),
    "job_name": "",
@@ -198,9 +195,9 @@ for flag in off_cache_flags:
                                          f"{collective}_{alg.strip().replace('`','').replace(' ','_').replace('/','_')}.sh")
                write_batch(batch_file,
                            multiple_node_template.format(**multiple_node_parameter))
-                # result = subprocess.run(["sbatch", batch_files_dir+collective+".sh"],
+                result = subprocess.run(["sbatch", batch_file],
-                #                         capture_output=True, text=True)
+                                        capture_output=True, text=True)
-                # log += f"#{collective} {n_procs}" + "\n"
+                log += f"#{collective} {n_procs}" + "\n"
-                # log += "\tSTDOUT:" + result.stdout + "\n"
+                log += "\tSTDOUT:" + result.stdout + "\n"
-                # log += "\tSTDERR:" + result.stderr + "\n"
+                log += "\tSTDERR:" + result.stderr + "\n"
 print(log)
--- a/postprocess_data_algs.py
+++ b/postprocess_data_algs.py
@@ -0,0 +1,112 @@
 from venv import create
 import pandas as pd
 import os
 data_markers = {
    "block_separator": "#----------------------------------------------------------------",
    "benchmark_type": "# Benchmarking",
    "processes_num": "# #processes = ",
    "min_bytelen": "# Minimum message length in bytes",
    "max_bytelen": "# Maximum message length in bytes",
    "mpi_datatype": "# MPI_Datatype                   :",
    "mpi_red_datatype": "# MPI_Datatype for reductions    :",
    "mpi_red_op": "# MPI_Op",
    "end_of_table": "# All processes entering MPI_Finalize",
    "creation_time": "# CREATION_TIME :",
    "n_nodes": "# N_NODES       :",
    "off_cache_flag": "# OFF_CACHE_FLAG  :",
    "algorithm":"# ALGORITHM  :"
 }
 column_names = [
    "benchmark_type",
    "proc_num",
    "msg_size_bytes",
    "repetitions",
    "t_min_usec",
    "t_max_usec",
    "t_avg_usec",
    "mpi_datatype",
    "mpi_red_datatype",
    "mpi_red_op",
    "creation_time",
    "n_nodes",
    "off_cache_flag",
    "algorithm"
 ]
 data = list()
 for file in os.listdir("data/"):
    with open("data/"+file, 'r') as f:
        lines = f.readlines()
        past_preheader = False
        in_header = False
        in_body = False
        btype = "NA"
        proc_num = "NA"
        mpi_datatype = "NA"
        mpi_red_datatype = "NA"
        mpi_red_op = "NA"
        creation_time = "NA"
        n_nodes = "NA"
        off_cache_flag = "NA"
        algorithm = "NA"
        for line in lines:
            if data_markers["block_separator"] in line:
                if in_header and not past_preheader:
                    past_preheader = True
                elif in_header and past_preheader:
                    in_body = True
                in_header = not in_header
                continue
            if not in_header and not in_body and past_preheader:
                if data_markers["mpi_datatype"] in line:
                    mpi_datatype = line.split()[-1]
                elif data_markers["mpi_red_datatype"] in line:
                    mpi_red_datatype = line.split()[-1]
                elif data_markers["mpi_red_op"] in line:
                    mpi_red_op = line.split()[-1]
            if not in_header and not in_body and not past_preheader:
                if data_markers["n_nodes"] in line:
                    n_nodes = line.split()[-1]
                if data_markers["creation_time"] in line:
                    creation_time = line.split()[-1]
                if data_markers["off_cache_flag"] in line:
                    off_cache_flag = line.split(":")[-1].strip()
                    if off_cache_flag == "": off_cache_flag = "NA"
                    else: off_cache_flag = off_cache_flag.replace("-off_cache","")
                if data_markers["algorithm"] in line:
                    algorithm = line.split(":")[-1].strip()
            if past_preheader and in_header:
                if data_markers["benchmark_type"] in line:
                    btype = line.split()[2]
                if data_markers["processes_num"] in line:
                    proc_num = int(line.split()[3])
            if in_body:
                if "#" in line or "".join(line.split()) == "":
                    continue
                if data_markers["end_of_table"] in line:
                    break
                if("int-overflow" in line) : continue 
                if("out-of-mem" in line) : continue 
                data.append([btype, proc_num]+[int(s) if s.isdigit()
                            else float(s) for s in line.split()] +
                            [
                                mpi_datatype,
                                mpi_red_datatype,
                                mpi_red_op,
                                creation_time,
                                n_nodes,
                                off_cache_flag,
                                algorithm
                ])
 df = pd.DataFrame(data, columns=column_names)
 df.to_csv("data.csv", index=False)