Initial commit

This commit is contained in:
Bole Ma
2026-02-05 23:18:26 +01:00
commit 747c92ac6b
31 changed files with 4220 additions and 0 deletions

View File

@@ -0,0 +1,37 @@
{
"model_name": "Qwen/Qwen3-4B",
"gpu_name": "NVIDIA H200",
"attention_implementation": "flash_attention_3_hopper",
"num_requests": 10,
"prompt_length": 512,
"generation_length": 100,
"prefill": {
"stage_name": "prefill",
"duration_ms": 323.8773119999223,
"tokens_processed": 5120,
"tokens_per_second": 15808.455270868828,
"energy_joules": 98.1449999999968,
"energy_per_token": 0.019168945312499373,
"avg_power_watts": 250.96736239598317,
"peak_memory_gb": 46.1302490234375,
"avg_gpu_util_percent": 32.2
},
"decode": {
"stage_name": "decode",
"duration_ms": 30558.618001000013,
"tokens_processed": 1000,
"tokens_per_second": 32.72399294913388,
"energy_joules": 4828.459999999999,
"energy_per_token": 4.828459999999999,
"avg_power_watts": 157.61927190444868,
"peak_memory_gb": 46.1302490234375,
"avg_gpu_util_percent": 32.2
},
"e2e_latency_ms": 3088.2495312999936,
"e2e_tokens_per_second": 198.17051497855476,
"e2e_energy_joules": 4926.604999999996,
"e2e_energy_per_token": 0.8050008169934634,
"ttft_ms": 32.38773119999223,
"itl_ms": 30.558618001000013,
"timestamp": 1768541964.4743361
}

View File

@@ -0,0 +1,47 @@
{
"model_name": "Qwen/Qwen3-4B",
"gpu_name": "NVIDIA H200",
"attention_implementation": "flash_attention_3_hopper",
"batch_size": 3,
"sequence_length": 2048,
"num_steps": 10,
"forward": {
"stage_name": "forward",
"duration_ms": 1605.9521619997668,
"tokens_processed": 61440,
"tokens_per_second": 38257.67756587068,
"energy_joules": 817.7539999999863,
"energy_per_token": 0.01330979817708311,
"avg_power_watts": 476.6091506406698,
"peak_memory_gb": 76.5540771484375,
"avg_gpu_util_percent": 95.1
},
"backward": {
"stage_name": "backward",
"duration_ms": 3448.8081949999696,
"tokens_processed": 61440,
"tokens_per_second": 17814.849804948502,
"energy_joules": 1765.182000000008,
"energy_per_token": 0.02873017578125013,
"avg_power_watts": 498.84691252245983,
"peak_memory_gb": 76.5540771484375,
"avg_gpu_util_percent": 95.1
},
"optimizer": {
"stage_name": "optimizer",
"duration_ms": 545.701982000196,
"tokens_processed": 61440,
"tokens_per_second": 112588.92587268984,
"energy_joules": 332.4770000000135,
"energy_per_token": 0.005411409505208553,
"avg_power_watts": 521.4900438388863,
"peak_memory_gb": 76.5540771484375,
"avg_gpu_util_percent": 95.1
},
"total_duration_ms": 5600.462338999932,
"total_tokens": 61440,
"total_tokens_per_second": 10970.522839186035,
"total_energy_joules": 2915.4130000000077,
"total_energy_per_token": 0.047451383463541795,
"timestamp": 1768541921.6000674
}