37 lines
1.1 KiB
JSON
37 lines
1.1 KiB
JSON
{
|
|
"model_name": "Qwen/Qwen3-4B",
|
|
"gpu_name": "NVIDIA H200",
|
|
"attention_implementation": "sdpa",
|
|
"num_requests": 10,
|
|
"prompt_length": 512,
|
|
"generation_length": 100,
|
|
"prefill": {
|
|
"stage_name": "prefill",
|
|
"duration_ms": 247.9969559935853,
|
|
"tokens_processed": 5120,
|
|
"tokens_per_second": 20645.414696672466,
|
|
"energy_joules": 73.83399999141693,
|
|
"energy_per_token": 0.014420703123323619,
|
|
"avg_power_watts": 222.33737204549297,
|
|
"peak_memory_gb": 46.1165771484375,
|
|
"avg_gpu_util_percent": 40.0
|
|
},
|
|
"decode": {
|
|
"stage_name": "decode",
|
|
"duration_ms": 23003.622506046668,
|
|
"tokens_processed": 1000,
|
|
"tokens_per_second": 43.47141411041425,
|
|
"energy_joules": 4033.3500000089407,
|
|
"energy_per_token": 4.033350000008941,
|
|
"avg_power_watts": 174.6335604209662,
|
|
"peak_memory_gb": 46.1165771484375,
|
|
"avg_gpu_util_percent": 40.0
|
|
},
|
|
"e2e_latency_ms": 2325.1619462040253,
|
|
"e2e_tokens_per_second": 263.20747292425324,
|
|
"e2e_energy_joules": 4107.184000000358,
|
|
"e2e_energy_per_token": 0.6711084967320846,
|
|
"ttft_ms": 24.79969559935853,
|
|
"itl_ms": 23.003622506046668,
|
|
"timestamp": 1769149520.7919798
|
|
} |