Initial commit
This commit is contained in:
37
results/h100_sdpa/inference_NVIDIA_H100_sdpa.json
Normal file
37
results/h100_sdpa/inference_NVIDIA_H100_sdpa.json
Normal file
@@ -0,0 +1,37 @@
|
||||
{
|
||||
"model_name": "Qwen/Qwen3-4B",
|
||||
"gpu_name": "NVIDIA H100",
|
||||
"attention_implementation": "sdpa",
|
||||
"num_requests": 10,
|
||||
"prompt_length": 512,
|
||||
"generation_length": 100,
|
||||
"prefill": {
|
||||
"stage_name": "prefill",
|
||||
"duration_ms": 253.97859653458,
|
||||
"tokens_processed": 5120,
|
||||
"tokens_per_second": 20159.179040517676,
|
||||
"energy_joules": 0.0,
|
||||
"energy_per_token": 0.0,
|
||||
"avg_power_watts": 0.0,
|
||||
"peak_memory_gb": 46.01458740234375,
|
||||
"avg_gpu_util_percent": 48.8
|
||||
},
|
||||
"decode": {
|
||||
"stage_name": "decode",
|
||||
"duration_ms": 23519.252635538578,
|
||||
"tokens_processed": 1000,
|
||||
"tokens_per_second": 42.51835785330007,
|
||||
"energy_joules": 4544.901999980211,
|
||||
"energy_per_token": 4.544901999980211,
|
||||
"avg_power_watts": 192.5432634001641,
|
||||
"peak_memory_gb": 46.01458740234375,
|
||||
"avg_gpu_util_percent": 48.8
|
||||
},
|
||||
"e2e_latency_ms": 2377.323123207316,
|
||||
"e2e_tokens_per_second": 257.43240118504923,
|
||||
"e2e_energy_joules": 4544.901999980211,
|
||||
"e2e_energy_per_token": 0.7426310457484006,
|
||||
"ttft_ms": 25.397859653458,
|
||||
"itl_ms": 23.519252635538578,
|
||||
"timestamp": 1769149269.5228984
|
||||
}
|
||||
Reference in New Issue
Block a user