{ "model_name": "Qwen/Qwen3-4B", "gpu_name": "NVIDIA H100", "attention_implementation": "flash_attention_3_hopper", "num_requests": 10, "prompt_length": 512, "generation_length": 100, "prefill": { "stage_name": "prefill", "duration_ms": 323.99015384726226, "tokens_processed": 5120, "tokens_per_second": 15802.949377324925, "energy_joules": 17.092000007629395, "energy_per_token": 0.0033382812514901163, "avg_power_watts": 93.64442380045372, "peak_memory_gb": 46.02825927734375, "avg_gpu_util_percent": 40.0 }, "decode": { "stage_name": "decode", "duration_ms": 30513.75844143331, "tokens_processed": 1000, "tokens_per_second": 32.772101867403634, "energy_joules": 4915.5139999985695, "energy_per_token": 4.915513999998569, "avg_power_watts": 161.199160874206, "peak_memory_gb": 46.02825927734375, "avg_gpu_util_percent": 40.0 }, "e2e_latency_ms": 3083.7748595280573, "e2e_tokens_per_second": 198.4580677506596, "e2e_energy_joules": 4932.606000006199, "e2e_energy_per_token": 0.8059813725500325, "ttft_ms": 32.399015384726226, "itl_ms": 30.51375844143331, "timestamp": 1768541839.3186588 }