initial_commit

2025-05-28 19:46:24 +02:00
commit 8b80f1fd28
6 changed files with 261 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,11 @@
 # Ignore everything
 *
 # But not these!
 !.gitignore
 !README.md
 !*.py
 !*.template
 # Optional: Keep subdirectories and their Python files
 !*/
--- a/README.md
+++ b/README.md
@@ -0,0 +1,2 @@
 # Benchmarking Scripts for IMB
--- a/launch_bench_multinode.py
+++ b/launch_bench_multinode.py
@@ -0,0 +1,79 @@
 import os
 import subprocess
 from datetime import datetime
 def load_template(template_path: str):
    output_template = ""
    with open(template_path, "r") as handle:
        output_template = handle.read()
    return output_template
 def write_batch(batch_fpath: str, batch_content: str):
    with open(batch_fpath, "w") as handle:
        _ = handle.write(batch_content)
 collectives = ["Reduce",
               # "Reduce_scatter",
               # "Reduce_scatter_block",
               # "Allreduce",
               # "Allgather",
               # "Allgatherv",
               # "Scatter",
               # "Scatterv",
               # "Gather",
               # "Gatherv",
               # "Alltoall",
               # "Bcast",
               # "Barrier"
               ]
 procnt = [
    18,
    # 36,
    # 54,
    # 72
 ]
 mpi1_bin = "/home/hpc/ihpc/ihpc136h/workspace/prototyping/bin"
 slurm_template = load_template("templates/bench.template")
 template_parameter = {"time_stamp": datetime.now().strftime("%y_%m_%d_%H-%M-%S"),
                      "job_name": "",
                      "output_dir": os.getcwd()+"/output/",
                      "err_dir": os.getcwd()+"/error/",
                      "data_dir": os.getcwd()+"/data/",
                      "n_procs": 18,
                      "off_mem_flag": "",
                      "bin": mpi1_bin
                      }
 output_dir = os.getcwd()+"/output/"
 err_dir = os.getcwd()+"/error/"
 batch_files_dir = os.getcwd()+"/batchs/"
 data_dir = os.getcwd()+"/data/"
 if os.path.isdir(output_dir) == False:
    os.mkdir(output_dir)
 if os.path.isdir(err_dir) == False:
    os.mkdir(err_dir)
 if os.path.isdir(data_dir) == False:
    os.mkdir(data_dir)
 if os.path.isdir(batch_files_dir) == False:
    os.mkdir(batch_files_dir)
 log = ""
 for n_procs in procnt:
    template_parameter["n_procs"] = n_procs
    for collective in collectives:
        template_parameter["job_name"] = collective
        write_batch(batch_files_dir+collective+".sh",
                    slurm_template.format(**template_parameter))
        result = subprocess.run(["sbatch", batch_files_dir+collective+".sh"],
                                capture_output=True, text=True)
        log += f"#{collective} {n_procs}" + "\n"
        log += "\tSTDOUT:" + result.stdout + "\n"
        log += "\tSTDERR:" + result.stderr + "\n"
 print(log)
--- a/launch_bench_singlenode.py
+++ b/launch_bench_singlenode.py
@@ -0,0 +1,79 @@
 import os
 import subprocess
 from datetime import datetime
 def load_template(template_path: str):
    output_template = ""
    with open(template_path, "r") as handle:
        output_template = handle.read()
    return output_template
 def write_batch(batch_fpath: str, batch_content: str):
    with open(batch_fpath, "w") as handle:
        _ = handle.write(batch_content)
 collectives = ["Reduce",
               "Reduce_scatter",
               "Allreduce",
               "Allgather",
               "Allgatherv",
               "Scatter",
               "Scatterv",
               "Gather",
               "Gatherv",
               "Alltoall",
               "Bcast",
               # "Barrier"
               ]
 procnt = [
    18,
    36,
    54,
    72
 ]
 mpi1_bin = "/home/hpc/ihpc/ihpc136h/workspace/prototyping/bin/IMB-MPI1"
 slurm_template = load_template("templates/bench.template")
 template_parameter = {"time_stamp": datetime.now().strftime("%y_%m_%d_%H-%M-%S"),
                      "job_name": "",
                      "output_dir": os.getcwd()+"/output/",
                      "err_dir": os.getcwd()+"/error/",
                      "data_dir": os.getcwd()+"/data/",
                      "n_procs": 18,
                      "off_mem_flag": "",
                      "bin": mpi1_bin
                      }
 output_dir = os.getcwd()+"/output/"
 err_dir = os.getcwd()+"/error/"
 batch_files_dir = os.getcwd()+"/batchs/"
 data_dir = os.getcwd()+"/data/"
 if os.path.isdir(output_dir) == False:
    os.mkdir(output_dir)
 if os.path.isdir(err_dir) == False:
    os.mkdir(err_dir)
 if os.path.isdir(data_dir) == False:
    os.mkdir(data_dir)
 if os.path.isdir(batch_files_dir) == False:
    os.mkdir(batch_files_dir)
 log = ""
 for n_procs in procnt:
    template_parameter["n_procs"] = n_procs
    for collective in collectives:
        template_parameter["job_name"] = collective
        write_batch(batch_files_dir+collective+".sh",
                    slurm_template.format(**template_parameter))
        result = subprocess.run(["sbatch", batch_files_dir+collective+".sh"],
                                capture_output=True, text=True)
        log += f"#{collective} {n_procs}" + "\n"
        log += "\tSTDOUT:" + result.stdout + "\n"
        log += "\tSTDERR:" + result.stderr + "\n"
 print(log)
 _ = subprocess.run(["./clean.sh"])
--- a/postprocess_data.py
+++ b/postprocess_data.py
@@ -0,0 +1,72 @@
 import pandas as pd
 import os
 data_markers = {
    "block_separator": "#----------------------------------------------------------------",
    "benchmark_type": "# Benchmarking",
    "processes_num": "# #processes = ",
    "min_bytelen": "# Minimum message length in bytes",
    "max_bytelen": "# Maximum message length in bytes",
    "mpi_datatype": "# MPI_Datatype                   :",
    "mpi_red_datatype": "# MPI_Datatype for reductions    :",
    "mpi_red_op": "# MPI_Op",
    "end_of_table": "# All processes entering MPI_Finalize",
 }
 column_names = [
    "benchmark_type",
    "proc_num",
    "msg_size_bytes",
    "repetitions",
    "t_min_usec",
    "t_max_usec",
    "t_avg_usec",
    "mpi_datatype",
    "mpi_red_datatype",
    "mpi_red_op",
 ]
 data = list()
 for file in os.listdir("data/"):
    with open("data/"+file, 'r') as f:
        lines = f.readlines()
        past_preheader = False
        in_header = False
        in_body = False
        btype = None
        proc_num = None
        mpi_datatype = None
        mpi_red_datatype = None
        mpi_red_op = None
        for line in lines:
            if data_markers["block_separator"] in line:
                if in_header and not past_preheader:
                    past_preheader = True
                elif in_header and past_preheader:
                    in_body = True
                in_header = not in_header
                continue
            if not in_header and not in_body and past_preheader:
                if data_markers["mpi_datatype"] in line:
                    mpi_datatype = line.split()[-1]
                elif data_markers["mpi_red_datatype"] in line:
                    mpi_red_datatype = line.split()[-1]
                elif data_markers["mpi_red_op"] in line:
                    mpi_red_op = line.split()[-1]
            if past_preheader and in_header:
                if data_markers["benchmark_type"] in line:
                    btype = line.split()[2]
                if data_markers["processes_num"] in line:
                    proc_num = int(line.split()[3])
            if in_body:
                if "#" in line or "".join(line.split()) == "":
                    continue
                if data_markers["end_of_table"] in line:
                    break
                data.append([btype, proc_num]+[int(s) if s.isdigit()
                            else float(s) for s in line.split()] + [mpi_datatype, mpi_red_datatype, mpi_red_op])
 df = pd.DataFrame(data, columns=column_names)
 df.to_csv("data.csv", index=False)
--- a/templates/bench.template
+++ b/templates/bench.template
@@ -0,0 +1,18 @@
 #!/bin/bash -l
 #SBATCH --job-name={job_name}_{n_procs}
 #SBATCH --output={output_dir}{job_name}_{n_procs}.out
 #SBATCH --error={err_dir}{job_name}_{n_procs}.err
 #SBATCH --nodes=1
 #SBATCH --time=00:10:00
 #SBATCH --export=NONE
 unset SLURM_EXPORT_ENV
 module load intel intelmpi likwid 
 unset I_MPI_PMI_LIBRARY
 export  LIKWID_SILENT=1
 echo CREATION_TIME {time_stamp}
 srun --cpu-freq=2000000-2000000:performance  ./likwid-mpirun -np {n_procs} -mpi intelmpi -omp intel -nperdomain M:18 {bin} {job_name} -npmin {n_procs} {off_mem_flag}  > {data_dir}/{job_name}_{n_procs}.dat