From d494228d77902d504bb7f61d335657e4702fecea Mon Sep 17 00:00:00 2001 From: Erik Fabrizzi Date: Wed, 5 Nov 2025 12:08:34 +0100 Subject: [PATCH] alg benchmarking runner and processor --- launch_alg_bench.py | 15 +++--- postprocess_data_algs.py | 112 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+), 9 deletions(-) create mode 100755 postprocess_data_algs.py diff --git a/launch_alg_bench.py b/launch_alg_bench.py index 4b10d29..dbf27ab 100755 --- a/launch_alg_bench.py +++ b/launch_alg_bench.py @@ -3,8 +3,6 @@ import subprocess from datetime import datetime ################ HELPER FUNCTIONS ################ - - def load_template(template_path: str): output_template = "" with open(template_path, "r") as handle: @@ -16,7 +14,6 @@ def write_batch(batch_fpath: str, batch_content: str): with open(batch_fpath, "w") as handle: _ = handle.write(batch_content) - ################### SETUP DIRS ################### output_dir = os.getcwd()+"/output/" err_dir = os.getcwd()+"/error/" @@ -33,7 +30,7 @@ if os.path.isdir(batch_files_dir) == False: os.mkdir(batch_files_dir) ################ GLOBAL DEFAULTS ################# -mpi1_bin = "/home/hpc/ihpc/ihpc136h/workspace/prototyping/bin/IMB-MPI1" +mpi1_bin = "/home/hpc/ihpc/ihpc136h/workspace/mpi-benchmark-tool/bin/IMB-MPI1" default_parameter = { "time_stamp": datetime.now().strftime("%y_%m_%d_%H-%M-%S"), "job_name": "", @@ -198,9 +195,9 @@ for flag in off_cache_flags: f"{collective}_{alg.strip().replace('`','').replace(' ','_').replace('/','_')}.sh") write_batch(batch_file, multiple_node_template.format(**multiple_node_parameter)) - # result = subprocess.run(["sbatch", batch_files_dir+collective+".sh"], - # capture_output=True, text=True) - # log += f"#{collective} {n_procs}" + "\n" - # log += "\tSTDOUT:" + result.stdout + "\n" - # log += "\tSTDERR:" + result.stderr + "\n" + result = subprocess.run(["sbatch", batch_file], + capture_output=True, text=True) + log += f"#{collective} {n_procs}" + "\n" + log += "\tSTDOUT:" + result.stdout + "\n" + log += "\tSTDERR:" + result.stderr + "\n" print(log) diff --git a/postprocess_data_algs.py b/postprocess_data_algs.py new file mode 100755 index 0000000..e383dd6 --- /dev/null +++ b/postprocess_data_algs.py @@ -0,0 +1,112 @@ +from venv import create +import pandas as pd +import os + +data_markers = { + "block_separator": "#----------------------------------------------------------------", + "benchmark_type": "# Benchmarking", + "processes_num": "# #processes = ", + "min_bytelen": "# Minimum message length in bytes", + "max_bytelen": "# Maximum message length in bytes", + "mpi_datatype": "# MPI_Datatype :", + "mpi_red_datatype": "# MPI_Datatype for reductions :", + "mpi_red_op": "# MPI_Op", + "end_of_table": "# All processes entering MPI_Finalize", + "creation_time": "# CREATION_TIME :", + "n_nodes": "# N_NODES :", + "off_cache_flag": "# OFF_CACHE_FLAG :", + "algorithm":"# ALGORITHM :" +} + +column_names = [ + "benchmark_type", + "proc_num", + "msg_size_bytes", + "repetitions", + "t_min_usec", + "t_max_usec", + "t_avg_usec", + "mpi_datatype", + "mpi_red_datatype", + "mpi_red_op", + "creation_time", + "n_nodes", + "off_cache_flag", + "algorithm" +] + +data = list() + +for file in os.listdir("data/"): + with open("data/"+file, 'r') as f: + lines = f.readlines() + + past_preheader = False + in_header = False + in_body = False + + btype = "NA" + proc_num = "NA" + mpi_datatype = "NA" + mpi_red_datatype = "NA" + mpi_red_op = "NA" + creation_time = "NA" + n_nodes = "NA" + off_cache_flag = "NA" + algorithm = "NA" + + for line in lines: + if data_markers["block_separator"] in line: + if in_header and not past_preheader: + past_preheader = True + elif in_header and past_preheader: + in_body = True + in_header = not in_header + continue + if not in_header and not in_body and past_preheader: + if data_markers["mpi_datatype"] in line: + mpi_datatype = line.split()[-1] + elif data_markers["mpi_red_datatype"] in line: + mpi_red_datatype = line.split()[-1] + elif data_markers["mpi_red_op"] in line: + mpi_red_op = line.split()[-1] + + if not in_header and not in_body and not past_preheader: + if data_markers["n_nodes"] in line: + n_nodes = line.split()[-1] + if data_markers["creation_time"] in line: + creation_time = line.split()[-1] + if data_markers["off_cache_flag"] in line: + off_cache_flag = line.split(":")[-1].strip() + if off_cache_flag == "": off_cache_flag = "NA" + else: off_cache_flag = off_cache_flag.replace("-off_cache","") + if data_markers["algorithm"] in line: + algorithm = line.split(":")[-1].strip() + + if past_preheader and in_header: + if data_markers["benchmark_type"] in line: + btype = line.split()[2] + if data_markers["processes_num"] in line: + proc_num = int(line.split()[3]) + + if in_body: + if "#" in line or "".join(line.split()) == "": + continue + if data_markers["end_of_table"] in line: + break + if("int-overflow" in line) : continue + if("out-of-mem" in line) : continue + data.append([btype, proc_num]+[int(s) if s.isdigit() + else float(s) for s in line.split()] + + [ + mpi_datatype, + mpi_red_datatype, + mpi_red_op, + creation_time, + n_nodes, + off_cache_flag, + algorithm + ]) + +df = pd.DataFrame(data, columns=column_names) +df.to_csv("data.csv", index=False)