{ "model_name": "Qwen/Qwen3-4B", "gpu_name": "NVIDIA H200", "attention_implementation": "sdpa", "num_requests": 10, "prompt_length": 512, "generation_length": 100, "prefill": { "stage_name": "prefill", "duration_ms": 247.9969559935853, "tokens_processed": 5120, "tokens_per_second": 20645.414696672466, "energy_joules": 73.83399999141693, "energy_per_token": 0.014420703123323619, "avg_power_watts": 222.33737204549297, "peak_memory_gb": 46.1165771484375, "avg_gpu_util_percent": 40.0 }, "decode": { "stage_name": "decode", "duration_ms": 23003.622506046668, "tokens_processed": 1000, "tokens_per_second": 43.47141411041425, "energy_joules": 4033.3500000089407, "energy_per_token": 4.033350000008941, "avg_power_watts": 174.6335604209662, "peak_memory_gb": 46.1165771484375, "avg_gpu_util_percent": 40.0 }, "e2e_latency_ms": 2325.1619462040253, "e2e_tokens_per_second": 263.20747292425324, "e2e_energy_joules": 4107.184000000358, "e2e_energy_per_token": 0.6711084967320846, "ttft_ms": 24.79969559935853, "itl_ms": 23.003622506046668, "timestamp": 1769149520.7919798 }