{ "model_name": "Qwen/Qwen3-4B", "gpu_name": "NVIDIA A100-SXM4-80GB", "attention_implementation": "flash_attention_2", "num_requests": 10, "prompt_length": 512, "generation_length": 100, "prefill": { "stage_name": "prefill", "duration_ms": 475.62581300735474, "tokens_processed": 5120, "tokens_per_second": 10764.76477932628, "energy_joules": 21.409000039100647, "energy_per_token": 0.004181445320136845, "avg_power_watts": 68.91171083870925, "peak_memory_gb": 45.87115478515625, "avg_gpu_util_percent": 38.1 }, "decode": { "stage_name": "decode", "duration_ms": 41460.768724791706, "tokens_processed": 1000, "tokens_per_second": 24.119186179055195, "energy_joules": 4684.697999954224, "energy_per_token": 4.684697999954223, "avg_power_watts": 112.85507087682042, "peak_memory_gb": 45.87115478515625, "avg_gpu_util_percent": 38.1 }, "e2e_latency_ms": 4193.639453779906, "e2e_tokens_per_second": 145.93529242204605, "e2e_energy_joules": 4706.106999993324, "e2e_energy_per_token": 0.768971732025053, "ttft_ms": 47.562581300735474, "itl_ms": 41.460768724791706, "timestamp": 1768519487.5402663 }