47 lines
1.5 KiB
JSON
47 lines
1.5 KiB
JSON
{
|
|
"model_name": "Qwen/Qwen3-4B",
|
|
"gpu_name": "NVIDIA H200",
|
|
"attention_implementation": "sdpa",
|
|
"batch_size": 3,
|
|
"sequence_length": 2048,
|
|
"num_steps": 10,
|
|
"forward": {
|
|
"stage_name": "forward",
|
|
"duration_ms": 1615.8598741167225,
|
|
"tokens_processed": 61440,
|
|
"tokens_per_second": 38023.09902248482,
|
|
"energy_joules": 873.9250000119209,
|
|
"energy_per_token": 0.014224039713735693,
|
|
"avg_power_watts": 541.9081076256928,
|
|
"peak_memory_gb": 76.5540771484375,
|
|
"avg_gpu_util_percent": 100.0
|
|
},
|
|
"backward": {
|
|
"stage_name": "backward",
|
|
"duration_ms": 3462.180594098754,
|
|
"tokens_processed": 61440,
|
|
"tokens_per_second": 17746.04135460864,
|
|
"energy_joules": 1696.024000003934,
|
|
"energy_per_token": 0.027604557291730693,
|
|
"avg_power_watts": 472.8399628680292,
|
|
"peak_memory_gb": 76.5540771484375,
|
|
"avg_gpu_util_percent": 100.0
|
|
},
|
|
"optimizer": {
|
|
"stage_name": "optimizer",
|
|
"duration_ms": 551.849422918167,
|
|
"tokens_processed": 61440,
|
|
"tokens_per_second": 111334.71821915968,
|
|
"energy_joules": 316.88299998641014,
|
|
"energy_per_token": 0.005157600911237144,
|
|
"avg_power_watts": 499.2301039455484,
|
|
"peak_memory_gb": 76.5540771484375,
|
|
"avg_gpu_util_percent": 100.0
|
|
},
|
|
"total_duration_ms": 5629.889891133644,
|
|
"total_tokens": 61440,
|
|
"total_tokens_per_second": 10913.179687005982,
|
|
"total_energy_joules": 2886.832000002265,
|
|
"total_energy_per_token": 0.04698619791670353,
|
|
"timestamp": 1769149487.0005488
|
|
} |