from venv import create import pandas as pd import os data_markers = { "block_separator": "#----------------------------------------------------------------", "benchmark_type": "# Benchmarking", "processes_num": "# #processes = ", "min_bytelen": "# Minimum message length in bytes", "max_bytelen": "# Maximum message length in bytes", "mpi_datatype": "# MPI_Datatype :", "mpi_red_datatype": "# MPI_Datatype for reductions :", "mpi_red_op": "# MPI_Op", "end_of_table": "# All processes entering MPI_Finalize", "creation_time": "# CREATION_TIME :", "n_nodes": "# N_NODES :", "off_mem_flag": "# OFF_MEM_FLAG :" } column_names = [ "benchmark_type", "proc_num", "msg_size_bytes", "repetitions", "t_min_usec", "t_max_usec", "t_avg_usec", "mpi_datatype", "mpi_red_datatype", "mpi_red_op", "creation_time", "n_nodes", "off_mem_flag", ] data = list() for file in os.listdir("data/"): with open("data/"+file, 'r') as f: lines = f.readlines() past_preheader = False in_header = False in_body = False btype = "NA" proc_num = "NA" mpi_datatype = "NA" mpi_red_datatype = "NA" mpi_red_op = "NA" creation_time = "NA" n_nodes = "NA" off_mem_flag = "NA" for line in lines: if data_markers["block_separator"] in line: if in_header and not past_preheader: past_preheader = True elif in_header and past_preheader: in_body = True in_header = not in_header continue if not in_header and not in_body and past_preheader: if data_markers["mpi_datatype"] in line: mpi_datatype = line.split()[-1] elif data_markers["mpi_red_datatype"] in line: mpi_red_datatype = line.split()[-1] elif data_markers["mpi_red_op"] in line: mpi_red_op = line.split()[-1] if not in_header and not in_body and not past_preheader: if data_markers["n_nodes"] in line: n_nodes = line.split()[-1] if data_markers["creation_time"] in line: creation_time = line.split()[-1] if data_markers["off_mem_flag"] in line: off_mem_flag = line.split(":")[-1].strip() if off_mem_flag == "": off_mem_flag = "NA" else: off_mem_flag = off_mem_flag.replace("-off_cache","") if past_preheader and in_header: if data_markers["benchmark_type"] in line: btype = line.split()[2] if data_markers["processes_num"] in line: proc_num = int(line.split()[3]) if in_body: if "#" in line or "".join(line.split()) == "": continue if data_markers["end_of_table"] in line: break data.append([btype, proc_num]+[int(s) if s.isdigit() else float(s) for s in line.split()] + [ mpi_datatype, mpi_red_datatype, mpi_red_op, creation_time, n_nodes, off_mem_flag, ]) df = pd.DataFrame(data, columns=column_names) df.to_csv("data.csv", index=False)