Files
cocogoat/results/a100/inference_NVIDIA_A100-SXM4-80GB_flash_attention_2.json
2026-02-05 23:18:26 +01:00

37 lines
1.2 KiB
JSON

{
"model_name": "Qwen/Qwen3-4B",
"gpu_name": "NVIDIA A100-SXM4-80GB",
"attention_implementation": "flash_attention_2",
"num_requests": 10,
"prompt_length": 512,
"generation_length": 100,
"prefill": {
"stage_name": "prefill",
"duration_ms": 475.62581300735474,
"tokens_processed": 5120,
"tokens_per_second": 10764.76477932628,
"energy_joules": 21.409000039100647,
"energy_per_token": 0.004181445320136845,
"avg_power_watts": 68.91171083870925,
"peak_memory_gb": 45.87115478515625,
"avg_gpu_util_percent": 38.1
},
"decode": {
"stage_name": "decode",
"duration_ms": 41460.768724791706,
"tokens_processed": 1000,
"tokens_per_second": 24.119186179055195,
"energy_joules": 4684.697999954224,
"energy_per_token": 4.684697999954223,
"avg_power_watts": 112.85507087682042,
"peak_memory_gb": 45.87115478515625,
"avg_gpu_util_percent": 38.1
},
"e2e_latency_ms": 4193.639453779906,
"e2e_tokens_per_second": 145.93529242204605,
"e2e_energy_joules": 4706.106999993324,
"e2e_energy_per_token": 0.768971732025053,
"ttft_ms": 47.562581300735474,
"itl_ms": 41.460768724791706,
"timestamp": 1768519487.5402663
}