Initial commit

This commit is contained in:
Bole Ma
2026-02-05 23:18:26 +01:00
commit 747c92ac6b
31 changed files with 4220 additions and 0 deletions

View File

@@ -0,0 +1,37 @@
{
"model_name": "Qwen/Qwen3-4B",
"gpu_name": "NVIDIA H200",
"attention_implementation": "sdpa",
"num_requests": 10,
"prompt_length": 512,
"generation_length": 100,
"prefill": {
"stage_name": "prefill",
"duration_ms": 247.9969559935853,
"tokens_processed": 5120,
"tokens_per_second": 20645.414696672466,
"energy_joules": 73.83399999141693,
"energy_per_token": 0.014420703123323619,
"avg_power_watts": 222.33737204549297,
"peak_memory_gb": 46.1165771484375,
"avg_gpu_util_percent": 40.0
},
"decode": {
"stage_name": "decode",
"duration_ms": 23003.622506046668,
"tokens_processed": 1000,
"tokens_per_second": 43.47141411041425,
"energy_joules": 4033.3500000089407,
"energy_per_token": 4.033350000008941,
"avg_power_watts": 174.6335604209662,
"peak_memory_gb": 46.1165771484375,
"avg_gpu_util_percent": 40.0
},
"e2e_latency_ms": 2325.1619462040253,
"e2e_tokens_per_second": 263.20747292425324,
"e2e_energy_joules": 4107.184000000358,
"e2e_energy_per_token": 0.6711084967320846,
"ttft_ms": 24.79969559935853,
"itl_ms": 23.003622506046668,
"timestamp": 1769149520.7919798
}

View File

@@ -0,0 +1,47 @@
{
"model_name": "Qwen/Qwen3-4B",
"gpu_name": "NVIDIA H200",
"attention_implementation": "sdpa",
"batch_size": 3,
"sequence_length": 2048,
"num_steps": 10,
"forward": {
"stage_name": "forward",
"duration_ms": 1615.8598741167225,
"tokens_processed": 61440,
"tokens_per_second": 38023.09902248482,
"energy_joules": 873.9250000119209,
"energy_per_token": 0.014224039713735693,
"avg_power_watts": 541.9081076256928,
"peak_memory_gb": 76.5540771484375,
"avg_gpu_util_percent": 100.0
},
"backward": {
"stage_name": "backward",
"duration_ms": 3462.180594098754,
"tokens_processed": 61440,
"tokens_per_second": 17746.04135460864,
"energy_joules": 1696.024000003934,
"energy_per_token": 0.027604557291730693,
"avg_power_watts": 472.8399628680292,
"peak_memory_gb": 76.5540771484375,
"avg_gpu_util_percent": 100.0
},
"optimizer": {
"stage_name": "optimizer",
"duration_ms": 551.849422918167,
"tokens_processed": 61440,
"tokens_per_second": 111334.71821915968,
"energy_joules": 316.88299998641014,
"energy_per_token": 0.005157600911237144,
"avg_power_watts": 499.2301039455484,
"peak_memory_gb": 76.5540771484375,
"avg_gpu_util_percent": 100.0
},
"total_duration_ms": 5629.889891133644,
"total_tokens": 61440,
"total_tokens_per_second": 10913.179687005982,
"total_energy_joules": 2886.832000002265,
"total_energy_per_token": 0.04698619791670353,
"timestamp": 1769149487.0005488
}