import os import subprocess from datetime import datetime ################ HELPER FUNCTIONS ################ def load_template(template_path: str): output_template = "" with open(template_path, "r") as handle: output_template = handle.read() return output_template def write_batch(batch_fpath: str, batch_content: str): with open(batch_fpath, "w") as handle: _ = handle.write(batch_content) ################### SETUP DIRS ################### output_dir = os.getcwd()+"/output/" err_dir = os.getcwd()+"/error/" batch_files_dir = os.getcwd()+"/batchs/" data_dir = os.getcwd()+"/data/" if os.path.isdir(output_dir) == False: os.mkdir(output_dir) if os.path.isdir(err_dir) == False: os.mkdir(err_dir) if os.path.isdir(data_dir) == False: os.mkdir(data_dir) if os.path.isdir(batch_files_dir) == False: os.mkdir(batch_files_dir) ################ GLOBAL DEFAULTS ################# mpi1_bin = "/home/hpc/ihpc/ihpc136h/workspace/mpi-benchmark-tool/bin/IMB-MPI1" default_parameter = { "time_stamp": datetime.now().strftime("%y_%m_%d_%H-%M-%S"), "job_name": "", "output_dir": os.getcwd()+"/output/", "err_dir": os.getcwd()+"/error/", "data_dir": os.getcwd()+"/data/", "n_procs": 18, "off_cache_flag": "", "bin": mpi1_bin, "n_nodes": 1 } algs_dic = [{'name': "Allgather", 'flag': "I_MPI_ADJUST_ALLGATHER", 'algs': [ "Recursive doubling ", "Bruck`s ", "Ring ", "Topology aware Gatherv + Bcast ", "Knomial ", ]}, {'name': "Allreduce", 'flag': "I_MPI_ADJUST_ALLREDUCE", 'algs': [ "Recursive doubling ", "Rabenseifner`s ", "Reduce + Bcast ", "Topology aware Reduce + Bcast ", "Binomial gather + scatter ", "Topology aware binominal gather + scatter ", "Shumilin`s ring ", "Ring ", "Knomial ", "Topology aware SHM-based flat ", "Topology aware SHM-based Knomial ", "Topology aware SHM-based Knary ", ]}, {'name': "Alltoall", 'flag': "I_MPI_ADJUST_ALLTOALL", 'algs': [ "Bruck`s ", "Isend/Irecv + waitall ", "Pair wise exchange ", "Plum`s ", ]}, {'name': "Barrier", 'flag': "I_MPI_ADJUST_BARRIER", 'algs': [ "Dissemination ", "Recursive doubling ", "Topology aware dissemination ", "Topology aware recursive doubling ", "Binominal gather + scatter ", "Topology aware binominal gather + scatter ", "Topology aware SHM-based flat ", "Topology aware SHM-based Knomial ", "Topology aware SHM-based Knary ", ]}, {'name': "Bcast", 'flag': "I_MPI_ADJUST_BCAST", 'algs': [ "Binomial ", "Recursive doubling ", "Ring ", "Topology aware binomial ", "Topology aware recursive doubling ", "Topology aware ring ", "Shumilin`s ", "Knomial ", "Topology aware SHM-based flat ", "Topology aware SHM-based Knomial ", "Topology aware SHM-based Knary ", "NUMA aware SHM-based (SSE4.2) ", "NUMA aware SHM-based (AVX2) ", "NUMA aware SHM-based (AVX512) ", ]}, {'name': "Gather", 'flag': "I_MPI_ADJUST_GATHER", 'algs': [ "Binomial ", "Topology aware binomial ", "Shumilin`s ", "Binomial with segmentation ", ]}, {'name': "Reduce_scatter", 'flag': "I_MPI_ADJUST_REDUCE_SCATTER", 'algs': [ "Recursive halving ", "Pair wise exchange ", "Recursive doubling ", "Reduce + Scatterv ", "Topology aware Reduce + Scatterv ", ]}, {'name': "Reduce", 'flag': "I_MPI_ADJUST_REDUCE", 'algs': [ "Shumilin`s ", "Binomial ", "Topology aware Shumilin`s ", "Topology aware binomial ", "Rabenseifner`s ", "Topology aware Rabenseifner`s ", "Knomial ", "Topology aware SHM-based flat ", "Topology aware SHM-based Knomial ", "Topology aware SHM-based Knary ", "Topology aware SHM-based binomial ", ]}, {'name': "Scatter", 'flag': "I_MPI_ADJUST_SCATTER", 'algs': [ "Binomial ", "Topology aware binomial ", "Shumilin`s ", ]}, ] log = "" ############## MULTIPLE-NODE LAUNCH ############## off_cache_flags = [ "-off_cache -1", "-off_cache 50", "" ] ndcnt = [ 2, 3, 4, 5, 6, 7, 8, 9, 10 ] proc_per_node = 72 multiple_node_parameter = dict(default_parameter) multiple_node_template = load_template("./templates/multinode_algs.template") for flag in off_cache_flags: multiple_node_parameter["off_cache_flag"] = flag for n_nodes in ndcnt: n_procs = n_nodes*proc_per_node multiple_node_parameter["n_procs"] = int(n_procs) multiple_node_parameter["n_nodes"] = n_nodes for alg_conf in algs_dic: collective = alg_conf['name'] multiple_node_parameter["job_name"] = collective multiple_node_parameter["alg_flag"] = alg_conf['flag'] algs = alg_conf["algs"] for idx, alg in enumerate(algs): multiple_node_parameter["alg_name"] = alg multiple_node_parameter["alg_idx"] = idx batch_file = os.path.join(batch_files_dir, f"{collective}_{alg.strip().replace('`','').replace(' ','_').replace('/','_')}.sh") write_batch(batch_file, multiple_node_template.format(**multiple_node_parameter)) result = subprocess.run(["sbatch", batch_file], capture_output=True, text=True) log += f"#{collective} {n_procs}" + "\n" log += "\tSTDOUT:" + result.stdout + "\n" log += "\tSTDERR:" + result.stderr + "\n" print(log)