Initial commit
This commit is contained in:
@@ -0,0 +1,37 @@
|
||||
{
|
||||
"model_name": "Qwen/Qwen3-4B",
|
||||
"gpu_name": "NVIDIA A100-SXM4-80GB",
|
||||
"attention_implementation": "flash_attention_2",
|
||||
"num_requests": 10,
|
||||
"prompt_length": 512,
|
||||
"generation_length": 100,
|
||||
"prefill": {
|
||||
"stage_name": "prefill",
|
||||
"duration_ms": 475.62581300735474,
|
||||
"tokens_processed": 5120,
|
||||
"tokens_per_second": 10764.76477932628,
|
||||
"energy_joules": 21.409000039100647,
|
||||
"energy_per_token": 0.004181445320136845,
|
||||
"avg_power_watts": 68.91171083870925,
|
||||
"peak_memory_gb": 45.87115478515625,
|
||||
"avg_gpu_util_percent": 38.1
|
||||
},
|
||||
"decode": {
|
||||
"stage_name": "decode",
|
||||
"duration_ms": 41460.768724791706,
|
||||
"tokens_processed": 1000,
|
||||
"tokens_per_second": 24.119186179055195,
|
||||
"energy_joules": 4684.697999954224,
|
||||
"energy_per_token": 4.684697999954223,
|
||||
"avg_power_watts": 112.85507087682042,
|
||||
"peak_memory_gb": 45.87115478515625,
|
||||
"avg_gpu_util_percent": 38.1
|
||||
},
|
||||
"e2e_latency_ms": 4193.639453779906,
|
||||
"e2e_tokens_per_second": 145.93529242204605,
|
||||
"e2e_energy_joules": 4706.106999993324,
|
||||
"e2e_energy_per_token": 0.768971732025053,
|
||||
"ttft_ms": 47.562581300735474,
|
||||
"itl_ms": 41.460768724791706,
|
||||
"timestamp": 1768519487.5402663
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
{
|
||||
"model_name": "Qwen/Qwen3-4B",
|
||||
"gpu_name": "NVIDIA A100-SXM4-80GB",
|
||||
"attention_implementation": "flash_attention_2",
|
||||
"batch_size": 3,
|
||||
"sequence_length": 2048,
|
||||
"num_steps": 10,
|
||||
"forward": {
|
||||
"stage_name": "forward",
|
||||
"duration_ms": 3359.0412912890315,
|
||||
"tokens_processed": 61440,
|
||||
"tokens_per_second": 18290.933237210196,
|
||||
"energy_joules": 1292.2280000448227,
|
||||
"energy_per_token": 0.021032356771562868,
|
||||
"avg_power_watts": 387.19580415542595,
|
||||
"peak_memory_gb": 79.66021728515625,
|
||||
"avg_gpu_util_percent": 97.8
|
||||
},
|
||||
"backward": {
|
||||
"stage_name": "backward",
|
||||
"duration_ms": 6954.944152384996,
|
||||
"tokens_processed": 61440,
|
||||
"tokens_per_second": 8834.003358449821,
|
||||
"energy_joules": 2729.588000059128,
|
||||
"energy_per_token": 0.0444268880217957,
|
||||
"avg_power_watts": 394.24766095856324,
|
||||
"peak_memory_gb": 79.66021728515625,
|
||||
"avg_gpu_util_percent": 97.8
|
||||
},
|
||||
"optimizer": {
|
||||
"stage_name": "optimizer",
|
||||
"duration_ms": 1153.845101594925,
|
||||
"tokens_processed": 61440,
|
||||
"tokens_per_second": 53248.048559614595,
|
||||
"energy_joules": 362.6529998779297,
|
||||
"energy_per_token": 0.005902555336554845,
|
||||
"avg_power_watts": 299.1223537953503,
|
||||
"peak_memory_gb": 79.66021728515625,
|
||||
"avg_gpu_util_percent": 97.8
|
||||
},
|
||||
"total_duration_ms": 11467.830545268953,
|
||||
"total_tokens": 61440,
|
||||
"total_tokens_per_second": 5357.595733340081,
|
||||
"total_energy_joules": 4384.46899998188,
|
||||
"total_energy_per_token": 0.07136180012991342,
|
||||
"timestamp": 1768519431.5985208
|
||||
}
|
||||
Reference in New Issue
Block a user