Initial commit
This commit is contained in:
@@ -0,0 +1,37 @@
|
||||
{
|
||||
"model_name": "Qwen/Qwen3-4B",
|
||||
"gpu_name": "NVIDIA H100",
|
||||
"attention_implementation": "flash_attention_3_hopper",
|
||||
"num_requests": 10,
|
||||
"prompt_length": 512,
|
||||
"generation_length": 100,
|
||||
"prefill": {
|
||||
"stage_name": "prefill",
|
||||
"duration_ms": 323.99015384726226,
|
||||
"tokens_processed": 5120,
|
||||
"tokens_per_second": 15802.949377324925,
|
||||
"energy_joules": 17.092000007629395,
|
||||
"energy_per_token": 0.0033382812514901163,
|
||||
"avg_power_watts": 93.64442380045372,
|
||||
"peak_memory_gb": 46.02825927734375,
|
||||
"avg_gpu_util_percent": 40.0
|
||||
},
|
||||
"decode": {
|
||||
"stage_name": "decode",
|
||||
"duration_ms": 30513.75844143331,
|
||||
"tokens_processed": 1000,
|
||||
"tokens_per_second": 32.772101867403634,
|
||||
"energy_joules": 4915.5139999985695,
|
||||
"energy_per_token": 4.915513999998569,
|
||||
"avg_power_watts": 161.199160874206,
|
||||
"peak_memory_gb": 46.02825927734375,
|
||||
"avg_gpu_util_percent": 40.0
|
||||
},
|
||||
"e2e_latency_ms": 3083.7748595280573,
|
||||
"e2e_tokens_per_second": 198.4580677506596,
|
||||
"e2e_energy_joules": 4932.606000006199,
|
||||
"e2e_energy_per_token": 0.8059813725500325,
|
||||
"ttft_ms": 32.399015384726226,
|
||||
"itl_ms": 30.51375844143331,
|
||||
"timestamp": 1768541839.3186588
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
{
|
||||
"model_name": "Qwen/Qwen3-4B",
|
||||
"gpu_name": "NVIDIA H100",
|
||||
"attention_implementation": "flash_attention_3_hopper",
|
||||
"batch_size": 3,
|
||||
"sequence_length": 2048,
|
||||
"num_steps": 10,
|
||||
"forward": {
|
||||
"stage_name": "forward",
|
||||
"duration_ms": 1748.5067250672728,
|
||||
"tokens_processed": 61440,
|
||||
"tokens_per_second": 35138.55515633555,
|
||||
"energy_joules": 946.9269999563694,
|
||||
"energy_per_token": 0.015412223306581534,
|
||||
"avg_power_watts": 501.76439870614394,
|
||||
"peak_memory_gb": 76.45208740234375,
|
||||
"avg_gpu_util_percent": 97.0
|
||||
},
|
||||
"backward": {
|
||||
"stage_name": "backward",
|
||||
"duration_ms": 3761.718863155693,
|
||||
"tokens_processed": 61440,
|
||||
"tokens_per_second": 16332.959010248362,
|
||||
"energy_joules": 1904.104000031948,
|
||||
"energy_per_token": 0.030991276042186655,
|
||||
"avg_power_watts": 491.250130606127,
|
||||
"peak_memory_gb": 76.45208740234375,
|
||||
"avg_gpu_util_percent": 97.0
|
||||
},
|
||||
"optimizer": {
|
||||
"stage_name": "optimizer",
|
||||
"duration_ms": 896.0564862936735,
|
||||
"tokens_processed": 61440,
|
||||
"tokens_per_second": 68567.1059133025,
|
||||
"energy_joules": 349.722000002861,
|
||||
"energy_per_token": 0.0056920898437965665,
|
||||
"avg_power_watts": 356.92130879075387,
|
||||
"peak_memory_gb": 76.45208740234375,
|
||||
"avg_gpu_util_percent": 97.0
|
||||
},
|
||||
"total_duration_ms": 6406.282074516639,
|
||||
"total_tokens": 61440,
|
||||
"total_tokens_per_second": 9590.586128637759,
|
||||
"total_energy_joules": 3200.7529999911785,
|
||||
"total_energy_per_token": 0.052095589192564754,
|
||||
"timestamp": 1768541796.4011748
|
||||
}
|
||||
Reference in New Issue
Block a user