Initial commit

This commit is contained in:
Bole Ma
2026-02-05 23:18:26 +01:00
commit 747c92ac6b
31 changed files with 4220 additions and 0 deletions

View File

@@ -0,0 +1,37 @@
{
"model_name": "Qwen/Qwen3-4B",
"gpu_name": "NVIDIA H100",
"attention_implementation": "sdpa",
"num_requests": 10,
"prompt_length": 512,
"generation_length": 100,
"prefill": {
"stage_name": "prefill",
"duration_ms": 253.97859653458,
"tokens_processed": 5120,
"tokens_per_second": 20159.179040517676,
"energy_joules": 0.0,
"energy_per_token": 0.0,
"avg_power_watts": 0.0,
"peak_memory_gb": 46.01458740234375,
"avg_gpu_util_percent": 48.8
},
"decode": {
"stage_name": "decode",
"duration_ms": 23519.252635538578,
"tokens_processed": 1000,
"tokens_per_second": 42.51835785330007,
"energy_joules": 4544.901999980211,
"energy_per_token": 4.544901999980211,
"avg_power_watts": 192.5432634001641,
"peak_memory_gb": 46.01458740234375,
"avg_gpu_util_percent": 48.8
},
"e2e_latency_ms": 2377.323123207316,
"e2e_tokens_per_second": 257.43240118504923,
"e2e_energy_joules": 4544.901999980211,
"e2e_energy_per_token": 0.7426310457484006,
"ttft_ms": 25.397859653458,
"itl_ms": 23.519252635538578,
"timestamp": 1769149269.5228984
}

View File

@@ -0,0 +1,47 @@
{
"model_name": "Qwen/Qwen3-4B",
"gpu_name": "NVIDIA H100",
"attention_implementation": "sdpa",
"batch_size": 3,
"sequence_length": 2048,
"num_steps": 10,
"forward": {
"stage_name": "forward",
"duration_ms": 1790.2467511594296,
"tokens_processed": 61440,
"tokens_per_second": 34319.29143857359,
"energy_joules": 981.029000043869,
"energy_per_token": 0.01596726888092235,
"avg_power_watts": 520.9058508009567,
"peak_memory_gb": 76.45208740234375,
"avg_gpu_util_percent": 100.0
},
"backward": {
"stage_name": "backward",
"duration_ms": 3854.5540031045675,
"tokens_processed": 61440,
"tokens_per_second": 15939.587290906931,
"energy_joules": 1953.71099999547,
"energy_per_token": 0.03179868164055127,
"avg_power_watts": 491.5443624439596,
"peak_memory_gb": 76.45208740234375,
"avg_gpu_util_percent": 100.0
},
"optimizer": {
"stage_name": "optimizer",
"duration_ms": 899.9840868636966,
"tokens_processed": 61440,
"tokens_per_second": 68267.87372886644,
"energy_joules": 365.9209999740124,
"energy_per_token": 0.005955745442285358,
"avg_power_watts": 377.8756124501158,
"peak_memory_gb": 76.45208740234375,
"avg_gpu_util_percent": 100.0
},
"total_duration_ms": 6544.784841127694,
"total_tokens": 61440,
"total_tokens_per_second": 9387.627170553957,
"total_energy_joules": 3300.6610000133514,
"total_energy_per_token": 0.053721695963758975,
"timestamp": 1769149234.99943
}