Files
cocogoat/results/h200/pretrain_NVIDIA_H200_flash_attention_3_hopper.json
2026-02-05 23:18:26 +01:00

47 lines
1.5 KiB
JSON

{
"model_name": "Qwen/Qwen3-4B",
"gpu_name": "NVIDIA H200",
"attention_implementation": "flash_attention_3_hopper",
"batch_size": 3,
"sequence_length": 2048,
"num_steps": 10,
"forward": {
"stage_name": "forward",
"duration_ms": 1605.9521619997668,
"tokens_processed": 61440,
"tokens_per_second": 38257.67756587068,
"energy_joules": 817.7539999999863,
"energy_per_token": 0.01330979817708311,
"avg_power_watts": 476.6091506406698,
"peak_memory_gb": 76.5540771484375,
"avg_gpu_util_percent": 95.1
},
"backward": {
"stage_name": "backward",
"duration_ms": 3448.8081949999696,
"tokens_processed": 61440,
"tokens_per_second": 17814.849804948502,
"energy_joules": 1765.182000000008,
"energy_per_token": 0.02873017578125013,
"avg_power_watts": 498.84691252245983,
"peak_memory_gb": 76.5540771484375,
"avg_gpu_util_percent": 95.1
},
"optimizer": {
"stage_name": "optimizer",
"duration_ms": 545.701982000196,
"tokens_processed": 61440,
"tokens_per_second": 112588.92587268984,
"energy_joules": 332.4770000000135,
"energy_per_token": 0.005411409505208553,
"avg_power_watts": 521.4900438388863,
"peak_memory_gb": 76.5540771484375,
"avg_gpu_util_percent": 95.1
},
"total_duration_ms": 5600.462338999932,
"total_tokens": 61440,
"total_tokens_per_second": 10970.522839186035,
"total_energy_joules": 2915.4130000000077,
"total_energy_per_token": 0.047451383463541795,
"timestamp": 1768541921.6000674
}