From ba8cb1ae01c4576098baf48864e9f325e9f2ede4 Mon Sep 17 00:00:00 2001 From: Erik Fabrizzi Date: Sun, 1 Jun 2025 18:58:54 +0200 Subject: [PATCH] Added some Metadata --- launch_bench_singlenode.py | 50 ++++++++++++++++++++------------------ postprocess_data.py | 43 +++++++++++++++++++++++++++----- templates/bench.template | 12 ++++++--- 3 files changed, 73 insertions(+), 32 deletions(-) diff --git a/launch_bench_singlenode.py b/launch_bench_singlenode.py index 78a2c1e..fb662ea 100755 --- a/launch_bench_singlenode.py +++ b/launch_bench_singlenode.py @@ -15,19 +15,20 @@ def write_batch(batch_fpath: str, batch_content: str): _ = handle.write(batch_content) -collectives = ["Reduce", - "Reduce_scatter", - "Allreduce", - "Allgather", - "Allgatherv", - "Scatter", - "Scatterv", - "Gather", - "Gatherv", - "Alltoall", - "Bcast", - # "Barrier" - ] +collectives = [ + "Reduce", + "Reduce_scatter", + "Allreduce", + "Allgather", + "Allgatherv", + "Scatter", + "Scatterv", + "Gather", + "Gatherv", + "Alltoall", + "Bcast", + # "Barrier" +] procnt = [ 18, @@ -35,18 +36,21 @@ procnt = [ 54, 72 ] + mpi1_bin = "/home/hpc/ihpc/ihpc136h/workspace/prototyping/bin/IMB-MPI1" slurm_template = load_template("templates/bench.template") -template_parameter = {"time_stamp": datetime.now().strftime("%y_%m_%d_%H-%M-%S"), - "job_name": "", - "output_dir": os.getcwd()+"/output/", - "err_dir": os.getcwd()+"/error/", - "data_dir": os.getcwd()+"/data/", - "n_procs": 18, - "off_mem_flag": "", - "bin": mpi1_bin - } +template_parameter = { + "time_stamp": datetime.now().strftime("%y_%m_%d_%H-%M-%S"), + "job_name": "", + "output_dir": os.getcwd()+"/output/", + "err_dir": os.getcwd()+"/error/", + "data_dir": os.getcwd()+"/data/", + "n_procs": 18, + "off_mem_flag": "-off_cache 50", + "bin": mpi1_bin, + "n_nodes": 1 +} output_dir = os.getcwd()+"/output/" err_dir = os.getcwd()+"/error/" @@ -76,4 +80,4 @@ for n_procs in procnt: log += "\tSTDOUT:" + result.stdout + "\n" log += "\tSTDERR:" + result.stderr + "\n" print(log) -_ = subprocess.run(["./clean.sh"]) +# _ = subprocess.run(["./clean.sh"]) diff --git a/postprocess_data.py b/postprocess_data.py index ec6bf58..df7a88b 100755 --- a/postprocess_data.py +++ b/postprocess_data.py @@ -1,3 +1,4 @@ +from venv import create import pandas as pd import os @@ -11,6 +12,9 @@ data_markers = { "mpi_red_datatype": "# MPI_Datatype for reductions :", "mpi_red_op": "# MPI_Op", "end_of_table": "# All processes entering MPI_Finalize", + "creation_time": "# CREATION_TIME :", + "n_nodes": "# N_NODES :", + "off_mem_flag": "# OFF_MEM_FLAG :" } column_names = [ @@ -24,20 +28,29 @@ column_names = [ "mpi_datatype", "mpi_red_datatype", "mpi_red_op", + "creation_time", + "n_nodes", + "off_mem_flag", ] data = list() + for file in os.listdir("data/"): with open("data/"+file, 'r') as f: lines = f.readlines() + past_preheader = False in_header = False in_body = False - btype = None - proc_num = None - mpi_datatype = None - mpi_red_datatype = None - mpi_red_op = None + + btype = "NA" + proc_num = "NA" + mpi_datatype = "NA" + mpi_red_datatype = "NA" + mpi_red_op = "NA" + creation_time = "NA" + n_nodes = "NA" + off_mem_flag = "NA" for line in lines: if data_markers["block_separator"] in line: @@ -55,6 +68,16 @@ for file in os.listdir("data/"): elif data_markers["mpi_red_op"] in line: mpi_red_op = line.split()[-1] + if not in_header and not in_body and not past_preheader: + if data_markers["n_nodes"] in line: + n_nodes = line.split()[-1] + if data_markers["creation_time"] in line: + creation_time = line.split()[-1] + if data_markers["off_mem_flag"] in line: + off_mem_flag = line.split(":")[-1].strip() + if off_mem_flag == "": off_mem_flag = "NA" + else: off_mem_flag = off_mem_flag.replace("-off_cache","") + if past_preheader and in_header: if data_markers["benchmark_type"] in line: btype = line.split()[2] @@ -66,7 +89,15 @@ for file in os.listdir("data/"): if data_markers["end_of_table"] in line: break data.append([btype, proc_num]+[int(s) if s.isdigit() - else float(s) for s in line.split()] + [mpi_datatype, mpi_red_datatype, mpi_red_op]) + else float(s) for s in line.split()] + + [ + mpi_datatype, + mpi_red_datatype, + mpi_red_op, + creation_time, + n_nodes, + off_mem_flag, + ]) df = pd.DataFrame(data, columns=column_names) df.to_csv("data.csv", index=False) diff --git a/templates/bench.template b/templates/bench.template index 960f894..333f51c 100644 --- a/templates/bench.template +++ b/templates/bench.template @@ -2,7 +2,7 @@ #SBATCH --job-name={job_name}_{n_procs} #SBATCH --output={output_dir}{job_name}_{n_procs}.out #SBATCH --error={err_dir}{job_name}_{n_procs}.err -#SBATCH --nodes=1 +#SBATCH --nodes={n_nodes} #SBATCH --time=00:10:00 #SBATCH --export=NONE @@ -12,7 +12,13 @@ module load intel intelmpi likwid unset I_MPI_PMI_LIBRARY export LIKWID_SILENT=1 -echo CREATION_TIME {time_stamp} -srun --cpu-freq=2000000-2000000:performance ./likwid-mpirun -np {n_procs} -mpi intelmpi -omp intel -nperdomain M:18 {bin} {job_name} -npmin {n_procs} {off_mem_flag} > {data_dir}/{job_name}_{n_procs}.dat +OUTPUT_FILENAME="{data_dir}/{job_name}_$SLURM_JOB_ID.dat" + +echo "# CREATION_TIME : {time_stamp}" > $OUTPUT_FILENAME +echo "# N_NODES : {n_nodes}" >> $OUTPUT_FILENAME +echo "# OFF_MEM_FLAG : {off_mem_flag}">> $OUTPUT_FILENAME + +srun --cpu-freq=2000000-2000000:performance ./likwid-mpirun -np {n_procs} -mpi intelmpi -omp intel -nperdomain M:18 {bin} {job_name} -npmin {n_procs} {off_mem_flag} >> $OUTPUT_FILENAME +