{ "model_name": "Qwen/Qwen3-4B", "gpu_name": "NVIDIA H100", "attention_implementation": "sdpa", "batch_size": 3, "sequence_length": 2048, "num_steps": 10, "forward": { "stage_name": "forward", "duration_ms": 1790.2467511594296, "tokens_processed": 61440, "tokens_per_second": 34319.29143857359, "energy_joules": 981.029000043869, "energy_per_token": 0.01596726888092235, "avg_power_watts": 520.9058508009567, "peak_memory_gb": 76.45208740234375, "avg_gpu_util_percent": 100.0 }, "backward": { "stage_name": "backward", "duration_ms": 3854.5540031045675, "tokens_processed": 61440, "tokens_per_second": 15939.587290906931, "energy_joules": 1953.71099999547, "energy_per_token": 0.03179868164055127, "avg_power_watts": 491.5443624439596, "peak_memory_gb": 76.45208740234375, "avg_gpu_util_percent": 100.0 }, "optimizer": { "stage_name": "optimizer", "duration_ms": 899.9840868636966, "tokens_processed": 61440, "tokens_per_second": 68267.87372886644, "energy_joules": 365.9209999740124, "energy_per_token": 0.005955745442285358, "avg_power_watts": 377.8756124501158, "peak_memory_gb": 76.45208740234375, "avg_gpu_util_percent": 100.0 }, "total_duration_ms": 6544.784841127694, "total_tokens": 61440, "total_tokens_per_second": 9387.627170553957, "total_energy_joules": 3300.6610000133514, "total_energy_per_token": 0.053721695963758975, "timestamp": 1769149234.99943 }