{ "model_name": "Qwen/Qwen3-4B", "gpu_name": "NVIDIA H200", "attention_implementation": "flash_attention_3_hopper", "num_requests": 10, "prompt_length": 512, "generation_length": 100, "prefill": { "stage_name": "prefill", "duration_ms": 323.8773119999223, "tokens_processed": 5120, "tokens_per_second": 15808.455270868828, "energy_joules": 98.1449999999968, "energy_per_token": 0.019168945312499373, "avg_power_watts": 250.96736239598317, "peak_memory_gb": 46.1302490234375, "avg_gpu_util_percent": 32.2 }, "decode": { "stage_name": "decode", "duration_ms": 30558.618001000013, "tokens_processed": 1000, "tokens_per_second": 32.72399294913388, "energy_joules": 4828.459999999999, "energy_per_token": 4.828459999999999, "avg_power_watts": 157.61927190444868, "peak_memory_gb": 46.1302490234375, "avg_gpu_util_percent": 32.2 }, "e2e_latency_ms": 3088.2495312999936, "e2e_tokens_per_second": 198.17051497855476, "e2e_energy_joules": 4926.604999999996, "e2e_energy_per_token": 0.8050008169934634, "ttft_ms": 32.38773119999223, "itl_ms": 30.558618001000013, "timestamp": 1768541964.4743361 }