From 459f12b086f8d4c4ba335a640e641f2cb52b2a5b Mon Sep 17 00:00:00 2001 From: Erik Fabrizzi Date: Fri, 17 Oct 2025 13:57:23 +0200 Subject: [PATCH] Added data,plots and plotting scripts --- .../python/scripts/plot_all.py | 69 ++++++++++++++ .../python/scripts/plot_fast_group.py | 64 +++++++++++++ .../python/scripts/plot_mid_group.py | 66 +++++++++++++ .../python/scripts/plot_slow_group.py | 93 +++++++++++++++++++ 4 files changed, 292 insertions(+) create mode 100644 results-and-plotting/python/scripts/plot_all.py create mode 100644 results-and-plotting/python/scripts/plot_fast_group.py create mode 100644 results-and-plotting/python/scripts/plot_mid_group.py create mode 100644 results-and-plotting/python/scripts/plot_slow_group.py diff --git a/results-and-plotting/python/scripts/plot_all.py b/results-and-plotting/python/scripts/plot_all.py new file mode 100644 index 0000000..82639d1 --- /dev/null +++ b/results-and-plotting/python/scripts/plot_all.py @@ -0,0 +1,69 @@ +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns +from scipy.optimize import curve_fit +import matplotlib.cm as cm + + +def max_transfer_size(msg_size, np_procs, benchmark_type): + if benchmark_type == 'Allgather': + return (np_procs-72)*msg_size + elif benchmark_type == 'Scatter': + return (np_procs-72)*msg_size # ? + elif benchmark_type == 'Alltoall': + return 72*(np_procs-72)*msg_size + elif benchmark_type == 'Bcast': + return msg_size + elif benchmark_type == 'Gather': + return (np_procs)*msg_size # ? + elif benchmark_type == 'Reduce_scatter': + return 0.25*(np_procs-72)*(1/72)*msg_size # ? + elif benchmark_type == 'Allreduce': + return 0.25*(np_procs-72)*(1/72)*msg_size + elif benchmark_type == 'Reduce': + return 0.25*(np_procs-72)*(1/72)*msg_size + + +data_file = "data/data-multi-defand100cflag.csv" +df_multinode = pd.read_csv(data_file, delimiter=',') +df_multinode_offdef = df_multinode[df_multinode['off_cache_flag'] == 100] +benchmarks = df_multinode_offdef['benchmark_type'].unique().tolist() +benchmarks = [x for x in benchmarks if x[-1] != 'v'] +print(benchmarks) +df_multinode_offdef = df_multinode_offdef[df_multinode_offdef['benchmark_type'].isin( + benchmarks)][df_multinode_offdef['msg_size_bytes'] > 1000] + +df_multinode_offdef["max_transfer"] = df_multinode_offdef.apply( + lambda row: max_transfer_size( + msg_size=row["msg_size_bytes"], + np_procs=row["proc_num"], + benchmark_type=row["benchmark_type"] + ), + axis=1 +) + +df_multinode_offdef["bytes/usec"] = df_multinode_offdef["max_transfer"] / \ + df_multinode_offdef["t_avg_usec"] +df_multinode_offdef = df_multinode_offdef[df_multinode_offdef['benchmark_type']!='Allgather'][df_multinode_offdef['benchmark_type']!='Alltoall'] +df_multinode_offdef = df_multinode_offdef[['benchmark_type','msg_size_bytes','t_avg_usec','proc_num']] + +plt.figure(figsize=(16, 9)) +sns.barplot( + data=df_multinode_offdef, + x="benchmark_type", + y="t_avg_usec", + dodge=True, + hue=df_multinode_offdef["msg_size_bytes"].astype(str), +) +# plt.yscale("log") +plt.title("Average Time (usec) per Benchmark Type and Message Size") +plt.ylabel("Average Time (usec)") +plt.xlabel("Benchmark Type") +plt.xticks(rotation=45) +plt.legend(title="Message Size (bytes)") +plt.tight_layout() +# plt.show() +plt.savefig("./plots/benchmark_avg_time_barplot.png", dpi=300) +plt.close() + diff --git a/results-and-plotting/python/scripts/plot_fast_group.py b/results-and-plotting/python/scripts/plot_fast_group.py new file mode 100644 index 0000000..e52d616 --- /dev/null +++ b/results-and-plotting/python/scripts/plot_fast_group.py @@ -0,0 +1,64 @@ +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns +import numpy as np +from mpl_toolkits.mplot3d import Axes3D + +data_file = "data/data-multi-defand100cflag.csv" +df_multinode = pd.read_csv(data_file, delimiter=',') +df_multinode_offdef = df_multinode[df_multinode['off_cache_flag'] == 100] +df_multinode_offdef = df_multinode_offdef[['benchmark_type','msg_size_bytes','t_avg_usec','proc_num']] + +benchmarks = df_multinode_offdef['benchmark_type'].unique().tolist() +benchmarks = [x for x in benchmarks if x[-1] != 'v'] +df_multinode_offdef = df_multinode_offdef[df_multinode_offdef['benchmark_type'].isin( + benchmarks)][df_multinode_offdef['msg_size_bytes'] > 1000] +fast_benchmarks = ["Allreduce","Bcast","Reduce","Reduce_scatter"] +df_multinode_offdef = df_multinode_offdef[df_multinode_offdef["benchmark_type"].isin(fast_benchmarks)] + +plt.figure(figsize=(16, 9)) +sns.barplot( + data=df_multinode_offdef, + x="benchmark_type", + y="t_avg_usec", + dodge=True, + hue=df_multinode_offdef["msg_size_bytes"].astype(str), +) + +plt.ylim(0) +plt.title("Average Time (usec) per Benchmark Type and Message Size") +plt.ylabel("Average Time (usec)") +plt.xlabel("Benchmark Type") +plt.xticks(rotation=45) +plt.legend(title="Message Size (bytes)") +plt.tight_layout() +plt.savefig("./plots/fbenchmarks_avg_time_barplot.png", dpi=300) +plt.close() + +df_allreduce= df_multinode_offdef[df_multinode_offdef["benchmark_type"]=="Allreduce"] +df_allreduce = df_allreduce[['msg_size_bytes','t_avg_usec','proc_num']] +df_allreduce = df_allreduce[df_allreduce['msg_size_bytes']>2**17] +pivot = df_allreduce.pivot(index="msg_size_bytes", columns="proc_num", values="t_avg_usec") +X = pivot.columns.values # proc_num +Y = pivot.index.values # msg_size_bytes +X, Y = np.meshgrid(X, Y) +Z = pivot.values + +fig = plt.figure(figsize=(16, 9)) +ax = fig.add_subplot(111, projection='3d') +surf = ax.plot_surface(X, Y, Z, cmap="viridis", edgecolor='k') +cbar = fig.colorbar(surf, ax=ax, shrink=0.6, pad=0.01, location='left') +cbar.set_label("Average Time (μs)") +ax.set_xlabel("Process Count") +ax.set_ylabel("Message Size (B)") +ax.set_zlabel("Average Time (μs)") +ax.set_title("Allreduce") +ax.set_xticks(pivot.columns.values) # use the actual process count values +ax.set_xticklabels(pivot.columns.values) +ax.set_yticks(Y[:, 0]) +ymin, ymax = ax.get_ylim() +ax.set_ylim(ymin*0.8, ymax) # 30% more space at top +ax.set_yticklabels([f"$2^{{{int(np.log2(v))}}}$" for v in Y[:, 0]]) +plt.tight_layout() +plt.savefig("./plots/allreduce_surface.png", dpi=300) +plt.close() diff --git a/results-and-plotting/python/scripts/plot_mid_group.py b/results-and-plotting/python/scripts/plot_mid_group.py new file mode 100644 index 0000000..2973ad4 --- /dev/null +++ b/results-and-plotting/python/scripts/plot_mid_group.py @@ -0,0 +1,66 @@ +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns +import numpy as np +from mpl_toolkits.mplot3d import Axes3D + +data_file = "data/data-multi-defand100cflag.csv" +df_multinode = pd.read_csv(data_file, delimiter=',') +df_multinode_offdef = df_multinode[df_multinode['off_cache_flag'] == 100] +df_multinode_offdef = df_multinode_offdef[['benchmark_type','msg_size_bytes','t_avg_usec','proc_num']] + +benchmarks = df_multinode_offdef['benchmark_type'].unique().tolist() +benchmarks = [x for x in benchmarks if x[-1] != 'v'] +df_multinode_offdef = df_multinode_offdef[df_multinode_offdef['benchmark_type'].isin( + benchmarks)][df_multinode_offdef['msg_size_bytes'] > 1000] +# fast_benchmarks = ["Allreduce","Bcast","Reduce","Reduce_scatter"] + +medium_benchmarks = ["Gather","Scatter"] +df_multinode_offdef = df_multinode_offdef[df_multinode_offdef["benchmark_type"].isin(medium_benchmarks)] + +plt.figure(figsize=(16, 9)) +sns.barplot( + data=df_multinode_offdef, + x="benchmark_type", + y="t_avg_usec", + dodge=True, + hue=df_multinode_offdef["msg_size_bytes"].astype(str), +) + +plt.ylim(0) +plt.title("Average Time (usec) per Benchmark Type and Message Size") +plt.ylabel("Average Time (usec)") +plt.xlabel("Benchmark Type") +plt.xticks(rotation=45) +plt.legend(title="Message Size (bytes)") +plt.tight_layout() +plt.savefig("./plots/mbenchmarks_avg_time_barplot.png", dpi=300) +plt.close() + +df_gather = df_multinode_offdef[df_multinode_offdef['benchmark_type']=='Gather'] +df_gather = df_gather[['msg_size_bytes','t_avg_usec','proc_num']] +df_gather = df_gather[df_gather['msg_size_bytes']>2**17] +pivot = df_gather.pivot(index="msg_size_bytes", columns="proc_num", values="t_avg_usec") +X = pivot.columns.values # proc_num +Y = pivot.index.values # msg_size_bytes +X, Y = np.meshgrid(X, Y) +Z = pivot.values + +fig = plt.figure(figsize=(16, 9)) +ax = fig.add_subplot(111, projection='3d') +surf = ax.plot_surface(X, Y, Z, cmap="viridis", edgecolor='k') +cbar = fig.colorbar(surf, ax=ax, shrink=0.6, pad=0.01, location='left') +cbar.set_label("Average Time (μs)") +ax.set_xlabel("Process Count") +ax.set_ylabel("Message Size (B)") +ax.set_zlabel("Average Time (μs)") +ax.set_title("Gather") +ax.set_xticks(pivot.columns.values) # use the actual process count values +ax.set_xticklabels(pivot.columns.values) +ax.set_yticks(Y[:, 0]) +ymin, ymax = ax.get_ylim() +ax.set_ylim(ymin*0.8, ymax) # 30% more space at top +ax.set_yticklabels([f"$2^{{{int(np.log2(v))}}}$" for v in Y[:, 0]]) +plt.tight_layout() +plt.savefig("./plots/gather_surface.png", dpi=300) +plt.close() diff --git a/results-and-plotting/python/scripts/plot_slow_group.py b/results-and-plotting/python/scripts/plot_slow_group.py new file mode 100644 index 0000000..f31f9cb --- /dev/null +++ b/results-and-plotting/python/scripts/plot_slow_group.py @@ -0,0 +1,93 @@ +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns +import numpy as np +from mpl_toolkits.mplot3d import Axes3D + +data_file = "data/data-multi-defand100cflag.csv" +df_multinode = pd.read_csv(data_file, delimiter=',') +df_multinode_offdef = df_multinode[df_multinode['off_cache_flag'] == 100] +df_multinode_offdef = df_multinode_offdef[['benchmark_type','msg_size_bytes','t_avg_usec','proc_num']] + +benchmarks = df_multinode_offdef['benchmark_type'].unique().tolist() +benchmarks = [x for x in benchmarks if x[-1] != 'v'] +df_multinode_offdef = df_multinode_offdef[df_multinode_offdef['benchmark_type'].isin( + benchmarks)][df_multinode_offdef['msg_size_bytes'] > 1000] + +slow_benchmarks = ["Alltoall","Allgather"] +df_multinode_offdef = df_multinode_offdef[df_multinode_offdef["benchmark_type"].isin(slow_benchmarks)] + +plt.figure(figsize=(16, 9)) +sns.barplot( + data=df_multinode_offdef, + x="benchmark_type", + y="t_avg_usec", + dodge=True, + hue=df_multinode_offdef["msg_size_bytes"].astype(str), +) + +plt.ylim(0) +plt.title("Average Time (usec) per Benchmark Type and Message Size") +plt.ylabel("Average Time (usec)") +plt.xlabel("Benchmark Type") +plt.xticks(rotation=45) +plt.legend(title="Message Size (bytes)") +plt.tight_layout() +plt.savefig("./plots/sbenchmarks_avg_time_barplot.png", dpi=300) +plt.close() + +df_alltoall = df_multinode_offdef[df_multinode_offdef['benchmark_type']=='Alltoall'] +df_alltoall = df_alltoall[['msg_size_bytes','t_avg_usec','proc_num']] +df_alltoall = df_alltoall[df_alltoall['msg_size_bytes']>2**17] +pivot = df_alltoall.pivot(index="msg_size_bytes", columns="proc_num", values="t_avg_usec") +X = pivot.columns.values # proc_num +Y = pivot.index.values # msg_size_bytes +X, Y = np.meshgrid(X, Y) +Z = pivot.values + +fig = plt.figure(figsize=(16, 9)) +ax = fig.add_subplot(111, projection='3d') +surf = ax.plot_surface(X, Y, Z, cmap="viridis", edgecolor='k') +cbar = fig.colorbar(surf, ax=ax, shrink=0.6, pad=0.01, location='left') +cbar.set_label("Average Time (μs)") +ax.set_xlabel("Process Count") +ax.set_ylabel("Message Size (B)") +ax.set_zlabel("Average Time (μs)") +ax.set_title("Alltoall") +ax.set_xticks(pivot.columns.values) # use the actual process count values +ax.set_xticklabels(pivot.columns.values) +ax.set_yticks(Y[:, 0]) +ymin, ymax = ax.get_ylim() +ax.set_ylim(ymin*0.8, ymax) # 30% more space at top +ax.set_yticklabels([f"$2^{{{int(np.log2(v))}}}$" for v in Y[:, 0]]) +plt.tight_layout() +plt.savefig("./plots/alltoall_surface.png", dpi=300) +plt.close() + +df_allgather = df_multinode_offdef[df_multinode_offdef['benchmark_type']=='Allgather'] +df_allgather = df_allgather[['msg_size_bytes','t_avg_usec','proc_num']] +df_allgather = df_allgather[df_allgather['msg_size_bytes']>2**17] +pivot = df_allgather.pivot(index="msg_size_bytes", columns="proc_num", values="t_avg_usec") +X = pivot.columns.values # proc_num +Y = pivot.index.values # msg_size_bytes +X, Y = np.meshgrid(X, Y) +Z = pivot.values + +fig = plt.figure(figsize=(16, 9)) +ax = fig.add_subplot(111, projection='3d') +surf = ax.plot_surface(X, Y, Z, cmap="viridis", edgecolor='k') +cbar = fig.colorbar(surf, ax=ax, shrink=0.6, pad=0.01, location='left') +cbar.set_label("Average Time (μs)") +ax.set_xlabel("Process Count") +ax.set_ylabel("Message Size (B)") +ax.set_zlabel("Average Time (μs)") +ax.set_title("Allgather") +ax.set_xticks(pivot.columns.values) # use the actual process count values +ax.set_xticklabels(pivot.columns.values) +ax.set_yticks(Y[:, 0]) +ymin, ymax = ax.get_ylim() +ax.set_ylim(ymin*0.8, ymax) # 30% more space at top +ax.set_yticklabels([f"$2^{{{int(np.log2(v))}}}$" for v in Y[:, 0]]) +plt.tight_layout() +plt.savefig("./plots/allgather_surface.png", dpi=300) +plt.close()