{ "model_name": "Qwen/Qwen3-4B", "gpu_name": "NVIDIA H200", "attention_implementation": "sdpa", "batch_size": 3, "sequence_length": 2048, "num_steps": 10, "forward": { "stage_name": "forward", "duration_ms": 1615.8598741167225, "tokens_processed": 61440, "tokens_per_second": 38023.09902248482, "energy_joules": 873.9250000119209, "energy_per_token": 0.014224039713735693, "avg_power_watts": 541.9081076256928, "peak_memory_gb": 76.5540771484375, "avg_gpu_util_percent": 100.0 }, "backward": { "stage_name": "backward", "duration_ms": 3462.180594098754, "tokens_processed": 61440, "tokens_per_second": 17746.04135460864, "energy_joules": 1696.024000003934, "energy_per_token": 0.027604557291730693, "avg_power_watts": 472.8399628680292, "peak_memory_gb": 76.5540771484375, "avg_gpu_util_percent": 100.0 }, "optimizer": { "stage_name": "optimizer", "duration_ms": 551.849422918167, "tokens_processed": 61440, "tokens_per_second": 111334.71821915968, "energy_joules": 316.88299998641014, "energy_per_token": 0.005157600911237144, "avg_power_watts": 499.2301039455484, "peak_memory_gb": 76.5540771484375, "avg_gpu_util_percent": 100.0 }, "total_duration_ms": 5629.889891133644, "total_tokens": 61440, "total_tokens_per_second": 10913.179687005982, "total_energy_joules": 2886.832000002265, "total_energy_per_token": 0.04698619791670353, "timestamp": 1769149487.0005488 }