Compare commits

2 Commits

Author SHA1 Message Date
Erik Fabrizzi
0f7db21d6f forgotten due to .gitignore 2025-10-17 14:02:53 +02:00
Erik Fabrizzi
459f12b086 Added data,plots and plotting scripts 2025-10-17 13:57:23 +02:00
42 changed files with 15906 additions and 11 deletions

11
.gitignore vendored
View File

@@ -1,11 +0,0 @@
# Ignore everything
*
# But not these!
!.gitignore
!README.md
!*.py
!*.template
# Optional: Keep subdirectories and their Python files
!*/

Binary file not shown.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 645 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 696 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 683 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 376 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 389 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 243 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 266 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 232 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 581 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 203 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 197 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 657 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 637 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 580 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 623 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 586 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 588 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 644 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 527 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 572 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 662 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 648 KiB

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,175 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "da7c16b4",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from scipy.optimize import curve_fit\n",
"from matplotlib.cm import get_cmap"
]
},
{
"cell_type": "markdown",
"id": "47341b1d",
"metadata": {},
"source": [
"# Alltoall "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1cc39aab",
"metadata": {},
"outputs": [
{
"ename": "",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31mnotebook controller is DISPOSED. \n",
"\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
]
}
],
"source": [
"df_multinode = pd.read_csv(\"../data/data-multi-defand100cflag.csv\",delimiter = \",\")\n",
"df_multinode['benchmark_type'].unique()\n",
"df_gather = df_multinode[df_multinode[\"benchmark_type\"]==\"Bcast\"][df_multinode['msg_size_bytes']>1024][df_multinode['off_cache_flag']==-1]\n",
"df_gather.columns.tolist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4336d3c6",
"metadata": {},
"outputs": [
{
"ename": "",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31mnotebook controller is DISPOSED. \n",
"\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
]
}
],
"source": [
"def model(proc_num, alpha, beta, msg_size):\n",
" return (alpha * msg_size * (proc_num - 72) * 72) / (12.5 * 1e3) + 1e6*beta\n",
"\n",
"results = []\n",
"msg_sizes = sorted(df_gather['msg_size_bytes'].unique())\n",
"n_rows = int(np.ceil(len(msg_sizes) / 3))\n",
"n_cols = min(len(msg_sizes), 3)\n",
"fig, axes = plt.subplots(n_rows, n_cols, figsize=(5*n_cols, 4*n_rows), squeeze=False)\n",
"cmap = get_cmap('tab10')\n",
"\n",
"for idx, (msg_size, group) in enumerate(df_gather.groupby('msg_size_bytes')):\n",
" x = group['proc_num'].values.copy()\n",
" y = group['t_avg_usec'].values.copy()\n",
" sorted_indices = np.argsort(x)\n",
" x = x[sorted_indices]\n",
" y = y[sorted_indices]\n",
" fit_func = lambda proc_num, alpha, beta: model(proc_num, alpha, beta, msg_size)\n",
" popt, _ = curve_fit(fit_func, x, y, bounds=([1, 0], [np.inf, np.inf]))\n",
" alpha, beta = popt\n",
" results.append({'msg_size_bytes': msg_size, 'alpha': alpha, 'beta': beta})\n",
"\n",
" x_fit = np.linspace(min(x), max(x), 100)\n",
" y_fit = fit_func(x_fit, alpha, beta)\n",
" y_speed = model(x_fit,1,0,msg_size)\n",
" row, col = divmod(idx, n_cols)\n",
" ax = axes[row][col]\n",
"\n",
" color = cmap(idx % 10)\n",
" # ax.scatter(x, y/1e6, label='Data', color=color)\n",
" ax.plot(x, y/1e6, label='Data', color=color)\n",
" # ax.plot(x_fit, y_fit/1e6, linestyle='--', color=color, alpha=0.5, label='Fit')\n",
" # ax.plot(x_fit, y_speed/1e6, linestyle='--', color='red', alpha=0.1, label='Fit')\n",
" ax.set_title(f'msg_size: {msg_size} bytes')\n",
" ax.set_xlabel('num. proc.')\n",
" ax.set_ylabel('Average Time [s]')\n",
" ax.set_xticks(x)\n",
" ax.grid(True)\n",
" max_data =(x[-1]-72)*72*msg_size\n",
" min_data =(x[0]-72)*72*msg_size\n",
"\n",
" textstr = \"\"\n",
" # if(max_data > 1e9):\n",
" # textstr+=f\"max data = {max_data/1e9:0.2f}GB\\n\" \n",
" # else:\n",
" # textstr+=f\"max data = {max_data/1e6:0.2f}MB\\n\" \n",
"\n",
" # if(min_data > 1e9):\n",
" # textstr+=f\"min data = {min_data/1e9:0.2f}GB\\n\" \n",
" # else:\n",
" # textstr+=f\"min data = {min_data/1e6:0.2f}MB\\n\" \n",
" # textstr += r\"$\\alpha$\" +f\"= {alpha:.3e}\\n\"+r\"$b_{eff}=$\"+f\"{12.5/alpha:0.3f}Gbps\\n\"+\\\n",
" # r\"$\\beta$\"+f\"= {beta:.3e} s\"\n",
" # ax.text(0.95, 0.05, textstr, transform=ax.transAxes,\n",
" # fontsize=10, verticalalignment='bottom',\n",
" # horizontalalignment='right',\n",
" # bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))\n",
"\n",
"fig.suptitle('Alltoall Time Fit per Message Size\\nDots = Data Points | Dashed Lines = Fits\\n off_mem=-1', fontsize=14)\n",
"fig.tight_layout(rect=[0, 0.03, 1, 0.95])\n",
"# plt.savefig(\"plots/alltoall_analysis.png\",dpi=300)\n",
"plt.show()\n",
"\n",
"fit_results = pd.DataFrame(results)\n",
"fit_results['inv_alpha'] = 1 / fit_results['alpha']\n",
"print(fit_results)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ce632d6f",
"metadata": {},
"outputs": [
{
"ename": "",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31mnotebook controller is DISPOSED. \n",
"\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
]
}
],
"source": [
"df_gather[df_gather['msg_size_bytes']==1048576]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "data",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,69 @@
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.optimize import curve_fit
import matplotlib.cm as cm
def max_transfer_size(msg_size, np_procs, benchmark_type):
if benchmark_type == 'Allgather':
return (np_procs-72)*msg_size
elif benchmark_type == 'Scatter':
return (np_procs-72)*msg_size # ?
elif benchmark_type == 'Alltoall':
return 72*(np_procs-72)*msg_size
elif benchmark_type == 'Bcast':
return msg_size
elif benchmark_type == 'Gather':
return (np_procs)*msg_size # ?
elif benchmark_type == 'Reduce_scatter':
return 0.25*(np_procs-72)*(1/72)*msg_size # ?
elif benchmark_type == 'Allreduce':
return 0.25*(np_procs-72)*(1/72)*msg_size
elif benchmark_type == 'Reduce':
return 0.25*(np_procs-72)*(1/72)*msg_size
data_file = "data/data-multi-defand100cflag.csv"
df_multinode = pd.read_csv(data_file, delimiter=',')
df_multinode_offdef = df_multinode[df_multinode['off_cache_flag'] == 100]
benchmarks = df_multinode_offdef['benchmark_type'].unique().tolist()
benchmarks = [x for x in benchmarks if x[-1] != 'v']
print(benchmarks)
df_multinode_offdef = df_multinode_offdef[df_multinode_offdef['benchmark_type'].isin(
benchmarks)][df_multinode_offdef['msg_size_bytes'] > 1000]
df_multinode_offdef["max_transfer"] = df_multinode_offdef.apply(
lambda row: max_transfer_size(
msg_size=row["msg_size_bytes"],
np_procs=row["proc_num"],
benchmark_type=row["benchmark_type"]
),
axis=1
)
df_multinode_offdef["bytes/usec"] = df_multinode_offdef["max_transfer"] / \
df_multinode_offdef["t_avg_usec"]
df_multinode_offdef = df_multinode_offdef[df_multinode_offdef['benchmark_type']!='Allgather'][df_multinode_offdef['benchmark_type']!='Alltoall']
df_multinode_offdef = df_multinode_offdef[['benchmark_type','msg_size_bytes','t_avg_usec','proc_num']]
plt.figure(figsize=(16, 9))
sns.barplot(
data=df_multinode_offdef,
x="benchmark_type",
y="t_avg_usec",
dodge=True,
hue=df_multinode_offdef["msg_size_bytes"].astype(str),
)
# plt.yscale("log")
plt.title("Average Time (usec) per Benchmark Type and Message Size")
plt.ylabel("Average Time (usec)")
plt.xlabel("Benchmark Type")
plt.xticks(rotation=45)
plt.legend(title="Message Size (bytes)")
plt.tight_layout()
# plt.show()
plt.savefig("./plots/benchmark_avg_time_barplot.png", dpi=300)
plt.close()

View File

@@ -0,0 +1,64 @@
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
data_file = "data/data-multi-defand100cflag.csv"
df_multinode = pd.read_csv(data_file, delimiter=',')
df_multinode_offdef = df_multinode[df_multinode['off_cache_flag'] == 100]
df_multinode_offdef = df_multinode_offdef[['benchmark_type','msg_size_bytes','t_avg_usec','proc_num']]
benchmarks = df_multinode_offdef['benchmark_type'].unique().tolist()
benchmarks = [x for x in benchmarks if x[-1] != 'v']
df_multinode_offdef = df_multinode_offdef[df_multinode_offdef['benchmark_type'].isin(
benchmarks)][df_multinode_offdef['msg_size_bytes'] > 1000]
fast_benchmarks = ["Allreduce","Bcast","Reduce","Reduce_scatter"]
df_multinode_offdef = df_multinode_offdef[df_multinode_offdef["benchmark_type"].isin(fast_benchmarks)]
plt.figure(figsize=(16, 9))
sns.barplot(
data=df_multinode_offdef,
x="benchmark_type",
y="t_avg_usec",
dodge=True,
hue=df_multinode_offdef["msg_size_bytes"].astype(str),
)
plt.ylim(0)
plt.title("Average Time (usec) per Benchmark Type and Message Size")
plt.ylabel("Average Time (usec)")
plt.xlabel("Benchmark Type")
plt.xticks(rotation=45)
plt.legend(title="Message Size (bytes)")
plt.tight_layout()
plt.savefig("./plots/fbenchmarks_avg_time_barplot.png", dpi=300)
plt.close()
df_allreduce= df_multinode_offdef[df_multinode_offdef["benchmark_type"]=="Allreduce"]
df_allreduce = df_allreduce[['msg_size_bytes','t_avg_usec','proc_num']]
df_allreduce = df_allreduce[df_allreduce['msg_size_bytes']>2**17]
pivot = df_allreduce.pivot(index="msg_size_bytes", columns="proc_num", values="t_avg_usec")
X = pivot.columns.values # proc_num
Y = pivot.index.values # msg_size_bytes
X, Y = np.meshgrid(X, Y)
Z = pivot.values
fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(111, projection='3d')
surf = ax.plot_surface(X, Y, Z, cmap="viridis", edgecolor='k')
cbar = fig.colorbar(surf, ax=ax, shrink=0.6, pad=0.01, location='left')
cbar.set_label("Average Time (μs)")
ax.set_xlabel("Process Count")
ax.set_ylabel("Message Size (B)")
ax.set_zlabel("Average Time (μs)")
ax.set_title("Allreduce")
ax.set_xticks(pivot.columns.values) # use the actual process count values
ax.set_xticklabels(pivot.columns.values)
ax.set_yticks(Y[:, 0])
ymin, ymax = ax.get_ylim()
ax.set_ylim(ymin*0.8, ymax) # 30% more space at top
ax.set_yticklabels([f"$2^{{{int(np.log2(v))}}}$" for v in Y[:, 0]])
plt.tight_layout()
plt.savefig("./plots/allreduce_surface.png", dpi=300)
plt.close()

View File

@@ -0,0 +1,66 @@
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
data_file = "data/data-multi-defand100cflag.csv"
df_multinode = pd.read_csv(data_file, delimiter=',')
df_multinode_offdef = df_multinode[df_multinode['off_cache_flag'] == 100]
df_multinode_offdef = df_multinode_offdef[['benchmark_type','msg_size_bytes','t_avg_usec','proc_num']]
benchmarks = df_multinode_offdef['benchmark_type'].unique().tolist()
benchmarks = [x for x in benchmarks if x[-1] != 'v']
df_multinode_offdef = df_multinode_offdef[df_multinode_offdef['benchmark_type'].isin(
benchmarks)][df_multinode_offdef['msg_size_bytes'] > 1000]
# fast_benchmarks = ["Allreduce","Bcast","Reduce","Reduce_scatter"]
medium_benchmarks = ["Gather","Scatter"]
df_multinode_offdef = df_multinode_offdef[df_multinode_offdef["benchmark_type"].isin(medium_benchmarks)]
plt.figure(figsize=(16, 9))
sns.barplot(
data=df_multinode_offdef,
x="benchmark_type",
y="t_avg_usec",
dodge=True,
hue=df_multinode_offdef["msg_size_bytes"].astype(str),
)
plt.ylim(0)
plt.title("Average Time (usec) per Benchmark Type and Message Size")
plt.ylabel("Average Time (usec)")
plt.xlabel("Benchmark Type")
plt.xticks(rotation=45)
plt.legend(title="Message Size (bytes)")
plt.tight_layout()
plt.savefig("./plots/mbenchmarks_avg_time_barplot.png", dpi=300)
plt.close()
df_gather = df_multinode_offdef[df_multinode_offdef['benchmark_type']=='Gather']
df_gather = df_gather[['msg_size_bytes','t_avg_usec','proc_num']]
df_gather = df_gather[df_gather['msg_size_bytes']>2**17]
pivot = df_gather.pivot(index="msg_size_bytes", columns="proc_num", values="t_avg_usec")
X = pivot.columns.values # proc_num
Y = pivot.index.values # msg_size_bytes
X, Y = np.meshgrid(X, Y)
Z = pivot.values
fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(111, projection='3d')
surf = ax.plot_surface(X, Y, Z, cmap="viridis", edgecolor='k')
cbar = fig.colorbar(surf, ax=ax, shrink=0.6, pad=0.01, location='left')
cbar.set_label("Average Time (μs)")
ax.set_xlabel("Process Count")
ax.set_ylabel("Message Size (B)")
ax.set_zlabel("Average Time (μs)")
ax.set_title("Gather")
ax.set_xticks(pivot.columns.values) # use the actual process count values
ax.set_xticklabels(pivot.columns.values)
ax.set_yticks(Y[:, 0])
ymin, ymax = ax.get_ylim()
ax.set_ylim(ymin*0.8, ymax) # 30% more space at top
ax.set_yticklabels([f"$2^{{{int(np.log2(v))}}}$" for v in Y[:, 0]])
plt.tight_layout()
plt.savefig("./plots/gather_surface.png", dpi=300)
plt.close()

View File

@@ -0,0 +1,93 @@
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
data_file = "data/data-multi-defand100cflag.csv"
df_multinode = pd.read_csv(data_file, delimiter=',')
df_multinode_offdef = df_multinode[df_multinode['off_cache_flag'] == 100]
df_multinode_offdef = df_multinode_offdef[['benchmark_type','msg_size_bytes','t_avg_usec','proc_num']]
benchmarks = df_multinode_offdef['benchmark_type'].unique().tolist()
benchmarks = [x for x in benchmarks if x[-1] != 'v']
df_multinode_offdef = df_multinode_offdef[df_multinode_offdef['benchmark_type'].isin(
benchmarks)][df_multinode_offdef['msg_size_bytes'] > 1000]
slow_benchmarks = ["Alltoall","Allgather"]
df_multinode_offdef = df_multinode_offdef[df_multinode_offdef["benchmark_type"].isin(slow_benchmarks)]
plt.figure(figsize=(16, 9))
sns.barplot(
data=df_multinode_offdef,
x="benchmark_type",
y="t_avg_usec",
dodge=True,
hue=df_multinode_offdef["msg_size_bytes"].astype(str),
)
plt.ylim(0)
plt.title("Average Time (usec) per Benchmark Type and Message Size")
plt.ylabel("Average Time (usec)")
plt.xlabel("Benchmark Type")
plt.xticks(rotation=45)
plt.legend(title="Message Size (bytes)")
plt.tight_layout()
plt.savefig("./plots/sbenchmarks_avg_time_barplot.png", dpi=300)
plt.close()
df_alltoall = df_multinode_offdef[df_multinode_offdef['benchmark_type']=='Alltoall']
df_alltoall = df_alltoall[['msg_size_bytes','t_avg_usec','proc_num']]
df_alltoall = df_alltoall[df_alltoall['msg_size_bytes']>2**17]
pivot = df_alltoall.pivot(index="msg_size_bytes", columns="proc_num", values="t_avg_usec")
X = pivot.columns.values # proc_num
Y = pivot.index.values # msg_size_bytes
X, Y = np.meshgrid(X, Y)
Z = pivot.values
fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(111, projection='3d')
surf = ax.plot_surface(X, Y, Z, cmap="viridis", edgecolor='k')
cbar = fig.colorbar(surf, ax=ax, shrink=0.6, pad=0.01, location='left')
cbar.set_label("Average Time (μs)")
ax.set_xlabel("Process Count")
ax.set_ylabel("Message Size (B)")
ax.set_zlabel("Average Time (μs)")
ax.set_title("Alltoall")
ax.set_xticks(pivot.columns.values) # use the actual process count values
ax.set_xticklabels(pivot.columns.values)
ax.set_yticks(Y[:, 0])
ymin, ymax = ax.get_ylim()
ax.set_ylim(ymin*0.8, ymax) # 30% more space at top
ax.set_yticklabels([f"$2^{{{int(np.log2(v))}}}$" for v in Y[:, 0]])
plt.tight_layout()
plt.savefig("./plots/alltoall_surface.png", dpi=300)
plt.close()
df_allgather = df_multinode_offdef[df_multinode_offdef['benchmark_type']=='Allgather']
df_allgather = df_allgather[['msg_size_bytes','t_avg_usec','proc_num']]
df_allgather = df_allgather[df_allgather['msg_size_bytes']>2**17]
pivot = df_allgather.pivot(index="msg_size_bytes", columns="proc_num", values="t_avg_usec")
X = pivot.columns.values # proc_num
Y = pivot.index.values # msg_size_bytes
X, Y = np.meshgrid(X, Y)
Z = pivot.values
fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(111, projection='3d')
surf = ax.plot_surface(X, Y, Z, cmap="viridis", edgecolor='k')
cbar = fig.colorbar(surf, ax=ax, shrink=0.6, pad=0.01, location='left')
cbar.set_label("Average Time (μs)")
ax.set_xlabel("Process Count")
ax.set_ylabel("Message Size (B)")
ax.set_zlabel("Average Time (μs)")
ax.set_title("Allgather")
ax.set_xticks(pivot.columns.values) # use the actual process count values
ax.set_xticklabels(pivot.columns.values)
ax.set_yticks(Y[:, 0])
ymin, ymax = ax.get_ylim()
ax.set_ylim(ymin*0.8, ymax) # 30% more space at top
ax.set_yticklabels([f"$2^{{{int(np.log2(v))}}}$" for v in Y[:, 0]])
plt.tight_layout()
plt.savefig("./plots/allgather_surface.png", dpi=300)
plt.close()