47 lines
1.5 KiB
JSON
47 lines
1.5 KiB
JSON
{
|
|
"model_name": "Qwen/Qwen3-4B",
|
|
"gpu_name": "NVIDIA H200",
|
|
"attention_implementation": "flash_attention_3_hopper",
|
|
"batch_size": 3,
|
|
"sequence_length": 2048,
|
|
"num_steps": 10,
|
|
"forward": {
|
|
"stage_name": "forward",
|
|
"duration_ms": 1605.9521619997668,
|
|
"tokens_processed": 61440,
|
|
"tokens_per_second": 38257.67756587068,
|
|
"energy_joules": 817.7539999999863,
|
|
"energy_per_token": 0.01330979817708311,
|
|
"avg_power_watts": 476.6091506406698,
|
|
"peak_memory_gb": 76.5540771484375,
|
|
"avg_gpu_util_percent": 95.1
|
|
},
|
|
"backward": {
|
|
"stage_name": "backward",
|
|
"duration_ms": 3448.8081949999696,
|
|
"tokens_processed": 61440,
|
|
"tokens_per_second": 17814.849804948502,
|
|
"energy_joules": 1765.182000000008,
|
|
"energy_per_token": 0.02873017578125013,
|
|
"avg_power_watts": 498.84691252245983,
|
|
"peak_memory_gb": 76.5540771484375,
|
|
"avg_gpu_util_percent": 95.1
|
|
},
|
|
"optimizer": {
|
|
"stage_name": "optimizer",
|
|
"duration_ms": 545.701982000196,
|
|
"tokens_processed": 61440,
|
|
"tokens_per_second": 112588.92587268984,
|
|
"energy_joules": 332.4770000000135,
|
|
"energy_per_token": 0.005411409505208553,
|
|
"avg_power_watts": 521.4900438388863,
|
|
"peak_memory_gb": 76.5540771484375,
|
|
"avg_gpu_util_percent": 95.1
|
|
},
|
|
"total_duration_ms": 5600.462338999932,
|
|
"total_tokens": 61440,
|
|
"total_tokens_per_second": 10970.522839186035,
|
|
"total_energy_joules": 2915.4130000000077,
|
|
"total_energy_per_token": 0.047451383463541795,
|
|
"timestamp": 1768541921.6000674
|
|
} |