IMB-Benchmarking-tools/launch_alg_bench.py

import os
import subprocess
from datetime import datetime

################ HELPER FUNCTIONS ################


def load_template(template_path: str):
    output_template = ""
    with open(template_path, "r") as handle:
        output_template = handle.read()
    return output_template


def write_batch(batch_fpath: str, batch_content: str):
    with open(batch_fpath, "w") as handle:
        _ = handle.write(batch_content)


################### SETUP DIRS ###################
output_dir = os.getcwd()+"/output/"
err_dir = os.getcwd()+"/error/"
batch_files_dir = os.getcwd()+"/batchs/"
data_dir = os.getcwd()+"/data/"

if os.path.isdir(output_dir) == False:
    os.mkdir(output_dir)
if os.path.isdir(err_dir) == False:
    os.mkdir(err_dir)
if os.path.isdir(data_dir) == False:
    os.mkdir(data_dir)
if os.path.isdir(batch_files_dir) == False:
    os.mkdir(batch_files_dir)

################ GLOBAL DEFAULTS #################
mpi1_bin = "/home/hpc/ihpc/ihpc136h/workspace/mpi-benchmark-tool/bin/IMB-MPI1"
default_parameter = {
    "time_stamp": datetime.now().strftime("%y_%m_%d_%H-%M-%S"),
    "job_name": "",
    "output_dir": os.getcwd()+"/output/",
    "err_dir": os.getcwd()+"/error/",
    "data_dir": os.getcwd()+"/data/",
    "n_procs": 18,
    "off_cache_flag": "",
    "bin": mpi1_bin,
    "n_nodes": 1
}

algs_dic = [{'name': "Allgather",
             'flag': "I_MPI_ADJUST_ALLGATHER",
             'algs': [
                 "Recursive doubling ",
                 "Bruck`s ",
                 "Ring ",
                 "Topology aware Gatherv + Bcast ",
                 "Knomial ",
             ]},
            {'name': "Allreduce",
             'flag': "I_MPI_ADJUST_ALLREDUCE",
             'algs': [
                 "Recursive doubling ",
                 "Rabenseifner`s ",
                 "Reduce + Bcast ",
                 "Topology aware Reduce + Bcast ",
                 "Binomial gather + scatter ",
                 "Topology aware binominal gather + scatter ",
                 "Shumilin`s ring ",
                 "Ring ",
                 "Knomial ",
                 "Topology aware SHM-based flat ",
                 "Topology aware SHM-based Knomial ",
                 "Topology aware SHM-based Knary ",
             ]},

            {'name': "Alltoall",
             'flag': "I_MPI_ADJUST_ALLTOALL",
             'algs': [
                 "Bruck`s ",
                 "Isend/Irecv + waitall ",
                 "Pair wise exchange ",
                 "Plum`s ",
             ]},
            {'name': "Barrier",
             'flag': "I_MPI_ADJUST_BARRIER",
             'algs': [
                 "Dissemination ",
                 "Recursive doubling ",
                 "Topology aware dissemination ",
                 "Topology aware recursive doubling ",
                 "Binominal gather + scatter ",
                 "Topology aware binominal gather + scatter ",
                 "Topology aware SHM-based flat ",
                 "Topology aware SHM-based Knomial ",
                 "Topology aware SHM-based Knary ",
             ]},
            {'name': "Bcast",
             'flag': "I_MPI_ADJUST_BCAST",
             'algs': [
                 "Binomial ",
                 "Recursive doubling ",
                 "Ring ",
                 "Topology aware binomial ",
                 "Topology aware recursive doubling ",
                 "Topology aware ring ",
                 "Shumilin`s ",
                 "Knomial ",
                 "Topology aware SHM-based flat ",
                 "Topology aware SHM-based Knomial ",
                 "Topology aware SHM-based Knary ",
                 "NUMA aware SHM-based (SSE4.2) ",
                 "NUMA aware SHM-based (AVX2) ",
                 "NUMA aware SHM-based (AVX512) ",
             ]},

            {'name': "Gather",
             'flag': "I_MPI_ADJUST_GATHER",
             'algs': [
                 "Binomial ",
                 "Topology aware binomial ",
                 "Shumilin`s ",
                 "Binomial with segmentation ",
             ]},

            {'name': "Reduce_scatter",
             'flag': "I_MPI_ADJUST_REDUCE_SCATTER",
             'algs': [
                 "Recursive halving ",
                 "Pair wise exchange ",
                 "Recursive doubling ",
                 "Reduce + Scatterv ",
                 "Topology aware Reduce + Scatterv ",
             ]},

            {'name': "Reduce",
             'flag': "I_MPI_ADJUST_REDUCE",
             'algs': [
                 "Shumilin`s ",
                 "Binomial ",
                 "Topology aware Shumilin`s ",
                 "Topology aware binomial ",
                 "Rabenseifner`s ",
                 "Topology aware Rabenseifner`s ",
                 "Knomial ",
                 "Topology aware SHM-based flat ",
                 "Topology aware SHM-based Knomial ",
                 "Topology aware SHM-based Knary ",
                 "Topology aware SHM-based binomial ",
             ]},

            {'name': "Scatter",
             'flag': "I_MPI_ADJUST_SCATTER",
             'algs': [
                 "Binomial ",
                 "Topology aware binomial ",
                 "Shumilin`s ",
             ]},
            ]
log = ""

############## MULTIPLE-NODE LAUNCH ##############
off_cache_flags = [
    "-off_cache -1",
    "-off_cache 50",
    ""
]

ndcnt = [
    2,
    3,
    4,
    5,
    6,
    7,
    8,
    9,
    10
]

proc_per_node = 72
multiple_node_parameter = dict(default_parameter)
multiple_node_template = load_template("./templates/multinode_algs.template")

for flag in off_cache_flags:
    multiple_node_parameter["off_cache_flag"] = flag
    for n_nodes in ndcnt:
        n_procs = n_nodes*proc_per_node
        multiple_node_parameter["n_procs"] = int(n_procs)
        multiple_node_parameter["n_nodes"] = n_nodes
        for alg_conf in algs_dic:
            collective = alg_conf['name']
            multiple_node_parameter["job_name"] = collective
            multiple_node_parameter["alg_flag"] = alg_conf['flag']
            algs = alg_conf["algs"]
            for idx, alg in enumerate(algs):
                multiple_node_parameter["alg_name"] = alg
                multiple_node_parameter["alg_idx"] = idx
                batch_file = os.path.join(batch_files_dir,
                                          f"{collective}_{alg.strip().replace('`','').replace(' ','_').replace('/','_')}.sh")
                write_batch(batch_file,
                            multiple_node_template.format(**multiple_node_parameter))
                result = subprocess.run(["sbatch", batch_file],
                                        capture_output=True, text=True)
                log += f"#{collective} {n_procs}" + "\n"
                log += "\tSTDOUT:" + result.stdout + "\n"
                log += "\tSTDERR:" + result.stderr + "\n"
print(log)