47 lines
1.5 KiB
JSON
47 lines
1.5 KiB
JSON
{
|
|
"model_name": "Qwen/Qwen3-4B",
|
|
"gpu_name": "NVIDIA H100",
|
|
"attention_implementation": "sdpa",
|
|
"batch_size": 3,
|
|
"sequence_length": 2048,
|
|
"num_steps": 10,
|
|
"forward": {
|
|
"stage_name": "forward",
|
|
"duration_ms": 1790.2467511594296,
|
|
"tokens_processed": 61440,
|
|
"tokens_per_second": 34319.29143857359,
|
|
"energy_joules": 981.029000043869,
|
|
"energy_per_token": 0.01596726888092235,
|
|
"avg_power_watts": 520.9058508009567,
|
|
"peak_memory_gb": 76.45208740234375,
|
|
"avg_gpu_util_percent": 100.0
|
|
},
|
|
"backward": {
|
|
"stage_name": "backward",
|
|
"duration_ms": 3854.5540031045675,
|
|
"tokens_processed": 61440,
|
|
"tokens_per_second": 15939.587290906931,
|
|
"energy_joules": 1953.71099999547,
|
|
"energy_per_token": 0.03179868164055127,
|
|
"avg_power_watts": 491.5443624439596,
|
|
"peak_memory_gb": 76.45208740234375,
|
|
"avg_gpu_util_percent": 100.0
|
|
},
|
|
"optimizer": {
|
|
"stage_name": "optimizer",
|
|
"duration_ms": 899.9840868636966,
|
|
"tokens_processed": 61440,
|
|
"tokens_per_second": 68267.87372886644,
|
|
"energy_joules": 365.9209999740124,
|
|
"energy_per_token": 0.005955745442285358,
|
|
"avg_power_watts": 377.8756124501158,
|
|
"peak_memory_gb": 76.45208740234375,
|
|
"avg_gpu_util_percent": 100.0
|
|
},
|
|
"total_duration_ms": 6544.784841127694,
|
|
"total_tokens": 61440,
|
|
"total_tokens_per_second": 9387.627170553957,
|
|
"total_energy_joules": 3300.6610000133514,
|
|
"total_energy_per_token": 0.053721695963758975,
|
|
"timestamp": 1769149234.99943
|
|
} |