{ "model_name": "Qwen/Qwen3-4B", "gpu_name": "NVIDIA H100", "attention_implementation": "sdpa", "num_requests": 10, "prompt_length": 512, "generation_length": 100, "prefill": { "stage_name": "prefill", "duration_ms": 253.97859653458, "tokens_processed": 5120, "tokens_per_second": 20159.179040517676, "energy_joules": 0.0, "energy_per_token": 0.0, "avg_power_watts": 0.0, "peak_memory_gb": 46.01458740234375, "avg_gpu_util_percent": 48.8 }, "decode": { "stage_name": "decode", "duration_ms": 23519.252635538578, "tokens_processed": 1000, "tokens_per_second": 42.51835785330007, "energy_joules": 4544.901999980211, "energy_per_token": 4.544901999980211, "avg_power_watts": 192.5432634001641, "peak_memory_gb": 46.01458740234375, "avg_gpu_util_percent": 48.8 }, "e2e_latency_ms": 2377.323123207316, "e2e_tokens_per_second": 257.43240118504923, "e2e_energy_joules": 4544.901999980211, "e2e_energy_per_token": 0.7426310457484006, "ttft_ms": 25.397859653458, "itl_ms": 23.519252635538578, "timestamp": 1769149269.5228984 }