Initial commit
This commit is contained in:
@@ -0,0 +1,37 @@
|
||||
{
|
||||
"model_name": "Qwen/Qwen3-4B",
|
||||
"gpu_name": "NVIDIA A100-SXM4-80GB",
|
||||
"attention_implementation": "flash_attention_2",
|
||||
"num_requests": 10,
|
||||
"prompt_length": 512,
|
||||
"generation_length": 100,
|
||||
"prefill": {
|
||||
"stage_name": "prefill",
|
||||
"duration_ms": 475.62581300735474,
|
||||
"tokens_processed": 5120,
|
||||
"tokens_per_second": 10764.76477932628,
|
||||
"energy_joules": 21.409000039100647,
|
||||
"energy_per_token": 0.004181445320136845,
|
||||
"avg_power_watts": 68.91171083870925,
|
||||
"peak_memory_gb": 45.87115478515625,
|
||||
"avg_gpu_util_percent": 38.1
|
||||
},
|
||||
"decode": {
|
||||
"stage_name": "decode",
|
||||
"duration_ms": 41460.768724791706,
|
||||
"tokens_processed": 1000,
|
||||
"tokens_per_second": 24.119186179055195,
|
||||
"energy_joules": 4684.697999954224,
|
||||
"energy_per_token": 4.684697999954223,
|
||||
"avg_power_watts": 112.85507087682042,
|
||||
"peak_memory_gb": 45.87115478515625,
|
||||
"avg_gpu_util_percent": 38.1
|
||||
},
|
||||
"e2e_latency_ms": 4193.639453779906,
|
||||
"e2e_tokens_per_second": 145.93529242204605,
|
||||
"e2e_energy_joules": 4706.106999993324,
|
||||
"e2e_energy_per_token": 0.768971732025053,
|
||||
"ttft_ms": 47.562581300735474,
|
||||
"itl_ms": 41.460768724791706,
|
||||
"timestamp": 1768519487.5402663
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
{
|
||||
"model_name": "Qwen/Qwen3-4B",
|
||||
"gpu_name": "NVIDIA A100-SXM4-80GB",
|
||||
"attention_implementation": "flash_attention_2",
|
||||
"batch_size": 3,
|
||||
"sequence_length": 2048,
|
||||
"num_steps": 10,
|
||||
"forward": {
|
||||
"stage_name": "forward",
|
||||
"duration_ms": 3359.0412912890315,
|
||||
"tokens_processed": 61440,
|
||||
"tokens_per_second": 18290.933237210196,
|
||||
"energy_joules": 1292.2280000448227,
|
||||
"energy_per_token": 0.021032356771562868,
|
||||
"avg_power_watts": 387.19580415542595,
|
||||
"peak_memory_gb": 79.66021728515625,
|
||||
"avg_gpu_util_percent": 97.8
|
||||
},
|
||||
"backward": {
|
||||
"stage_name": "backward",
|
||||
"duration_ms": 6954.944152384996,
|
||||
"tokens_processed": 61440,
|
||||
"tokens_per_second": 8834.003358449821,
|
||||
"energy_joules": 2729.588000059128,
|
||||
"energy_per_token": 0.0444268880217957,
|
||||
"avg_power_watts": 394.24766095856324,
|
||||
"peak_memory_gb": 79.66021728515625,
|
||||
"avg_gpu_util_percent": 97.8
|
||||
},
|
||||
"optimizer": {
|
||||
"stage_name": "optimizer",
|
||||
"duration_ms": 1153.845101594925,
|
||||
"tokens_processed": 61440,
|
||||
"tokens_per_second": 53248.048559614595,
|
||||
"energy_joules": 362.6529998779297,
|
||||
"energy_per_token": 0.005902555336554845,
|
||||
"avg_power_watts": 299.1223537953503,
|
||||
"peak_memory_gb": 79.66021728515625,
|
||||
"avg_gpu_util_percent": 97.8
|
||||
},
|
||||
"total_duration_ms": 11467.830545268953,
|
||||
"total_tokens": 61440,
|
||||
"total_tokens_per_second": 5357.595733340081,
|
||||
"total_energy_joules": 4384.46899998188,
|
||||
"total_energy_per_token": 0.07136180012991342,
|
||||
"timestamp": 1768519431.5985208
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
{
|
||||
"model_name": "Qwen/Qwen3-4B",
|
||||
"gpu_name": "NVIDIA H100",
|
||||
"attention_implementation": "flash_attention_3_hopper",
|
||||
"num_requests": 10,
|
||||
"prompt_length": 512,
|
||||
"generation_length": 100,
|
||||
"prefill": {
|
||||
"stage_name": "prefill",
|
||||
"duration_ms": 323.99015384726226,
|
||||
"tokens_processed": 5120,
|
||||
"tokens_per_second": 15802.949377324925,
|
||||
"energy_joules": 17.092000007629395,
|
||||
"energy_per_token": 0.0033382812514901163,
|
||||
"avg_power_watts": 93.64442380045372,
|
||||
"peak_memory_gb": 46.02825927734375,
|
||||
"avg_gpu_util_percent": 40.0
|
||||
},
|
||||
"decode": {
|
||||
"stage_name": "decode",
|
||||
"duration_ms": 30513.75844143331,
|
||||
"tokens_processed": 1000,
|
||||
"tokens_per_second": 32.772101867403634,
|
||||
"energy_joules": 4915.5139999985695,
|
||||
"energy_per_token": 4.915513999998569,
|
||||
"avg_power_watts": 161.199160874206,
|
||||
"peak_memory_gb": 46.02825927734375,
|
||||
"avg_gpu_util_percent": 40.0
|
||||
},
|
||||
"e2e_latency_ms": 3083.7748595280573,
|
||||
"e2e_tokens_per_second": 198.4580677506596,
|
||||
"e2e_energy_joules": 4932.606000006199,
|
||||
"e2e_energy_per_token": 0.8059813725500325,
|
||||
"ttft_ms": 32.399015384726226,
|
||||
"itl_ms": 30.51375844143331,
|
||||
"timestamp": 1768541839.3186588
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
{
|
||||
"model_name": "Qwen/Qwen3-4B",
|
||||
"gpu_name": "NVIDIA H100",
|
||||
"attention_implementation": "flash_attention_3_hopper",
|
||||
"batch_size": 3,
|
||||
"sequence_length": 2048,
|
||||
"num_steps": 10,
|
||||
"forward": {
|
||||
"stage_name": "forward",
|
||||
"duration_ms": 1748.5067250672728,
|
||||
"tokens_processed": 61440,
|
||||
"tokens_per_second": 35138.55515633555,
|
||||
"energy_joules": 946.9269999563694,
|
||||
"energy_per_token": 0.015412223306581534,
|
||||
"avg_power_watts": 501.76439870614394,
|
||||
"peak_memory_gb": 76.45208740234375,
|
||||
"avg_gpu_util_percent": 97.0
|
||||
},
|
||||
"backward": {
|
||||
"stage_name": "backward",
|
||||
"duration_ms": 3761.718863155693,
|
||||
"tokens_processed": 61440,
|
||||
"tokens_per_second": 16332.959010248362,
|
||||
"energy_joules": 1904.104000031948,
|
||||
"energy_per_token": 0.030991276042186655,
|
||||
"avg_power_watts": 491.250130606127,
|
||||
"peak_memory_gb": 76.45208740234375,
|
||||
"avg_gpu_util_percent": 97.0
|
||||
},
|
||||
"optimizer": {
|
||||
"stage_name": "optimizer",
|
||||
"duration_ms": 896.0564862936735,
|
||||
"tokens_processed": 61440,
|
||||
"tokens_per_second": 68567.1059133025,
|
||||
"energy_joules": 349.722000002861,
|
||||
"energy_per_token": 0.0056920898437965665,
|
||||
"avg_power_watts": 356.92130879075387,
|
||||
"peak_memory_gb": 76.45208740234375,
|
||||
"avg_gpu_util_percent": 97.0
|
||||
},
|
||||
"total_duration_ms": 6406.282074516639,
|
||||
"total_tokens": 61440,
|
||||
"total_tokens_per_second": 9590.586128637759,
|
||||
"total_energy_joules": 3200.7529999911785,
|
||||
"total_energy_per_token": 0.052095589192564754,
|
||||
"timestamp": 1768541796.4011748
|
||||
}
|
||||
37
results/h100_sdpa/inference_NVIDIA_H100_sdpa.json
Normal file
37
results/h100_sdpa/inference_NVIDIA_H100_sdpa.json
Normal file
@@ -0,0 +1,37 @@
|
||||
{
|
||||
"model_name": "Qwen/Qwen3-4B",
|
||||
"gpu_name": "NVIDIA H100",
|
||||
"attention_implementation": "sdpa",
|
||||
"num_requests": 10,
|
||||
"prompt_length": 512,
|
||||
"generation_length": 100,
|
||||
"prefill": {
|
||||
"stage_name": "prefill",
|
||||
"duration_ms": 253.97859653458,
|
||||
"tokens_processed": 5120,
|
||||
"tokens_per_second": 20159.179040517676,
|
||||
"energy_joules": 0.0,
|
||||
"energy_per_token": 0.0,
|
||||
"avg_power_watts": 0.0,
|
||||
"peak_memory_gb": 46.01458740234375,
|
||||
"avg_gpu_util_percent": 48.8
|
||||
},
|
||||
"decode": {
|
||||
"stage_name": "decode",
|
||||
"duration_ms": 23519.252635538578,
|
||||
"tokens_processed": 1000,
|
||||
"tokens_per_second": 42.51835785330007,
|
||||
"energy_joules": 4544.901999980211,
|
||||
"energy_per_token": 4.544901999980211,
|
||||
"avg_power_watts": 192.5432634001641,
|
||||
"peak_memory_gb": 46.01458740234375,
|
||||
"avg_gpu_util_percent": 48.8
|
||||
},
|
||||
"e2e_latency_ms": 2377.323123207316,
|
||||
"e2e_tokens_per_second": 257.43240118504923,
|
||||
"e2e_energy_joules": 4544.901999980211,
|
||||
"e2e_energy_per_token": 0.7426310457484006,
|
||||
"ttft_ms": 25.397859653458,
|
||||
"itl_ms": 23.519252635538578,
|
||||
"timestamp": 1769149269.5228984
|
||||
}
|
||||
47
results/h100_sdpa/pretrain_NVIDIA_H100_sdpa.json
Normal file
47
results/h100_sdpa/pretrain_NVIDIA_H100_sdpa.json
Normal file
@@ -0,0 +1,47 @@
|
||||
{
|
||||
"model_name": "Qwen/Qwen3-4B",
|
||||
"gpu_name": "NVIDIA H100",
|
||||
"attention_implementation": "sdpa",
|
||||
"batch_size": 3,
|
||||
"sequence_length": 2048,
|
||||
"num_steps": 10,
|
||||
"forward": {
|
||||
"stage_name": "forward",
|
||||
"duration_ms": 1790.2467511594296,
|
||||
"tokens_processed": 61440,
|
||||
"tokens_per_second": 34319.29143857359,
|
||||
"energy_joules": 981.029000043869,
|
||||
"energy_per_token": 0.01596726888092235,
|
||||
"avg_power_watts": 520.9058508009567,
|
||||
"peak_memory_gb": 76.45208740234375,
|
||||
"avg_gpu_util_percent": 100.0
|
||||
},
|
||||
"backward": {
|
||||
"stage_name": "backward",
|
||||
"duration_ms": 3854.5540031045675,
|
||||
"tokens_processed": 61440,
|
||||
"tokens_per_second": 15939.587290906931,
|
||||
"energy_joules": 1953.71099999547,
|
||||
"energy_per_token": 0.03179868164055127,
|
||||
"avg_power_watts": 491.5443624439596,
|
||||
"peak_memory_gb": 76.45208740234375,
|
||||
"avg_gpu_util_percent": 100.0
|
||||
},
|
||||
"optimizer": {
|
||||
"stage_name": "optimizer",
|
||||
"duration_ms": 899.9840868636966,
|
||||
"tokens_processed": 61440,
|
||||
"tokens_per_second": 68267.87372886644,
|
||||
"energy_joules": 365.9209999740124,
|
||||
"energy_per_token": 0.005955745442285358,
|
||||
"avg_power_watts": 377.8756124501158,
|
||||
"peak_memory_gb": 76.45208740234375,
|
||||
"avg_gpu_util_percent": 100.0
|
||||
},
|
||||
"total_duration_ms": 6544.784841127694,
|
||||
"total_tokens": 61440,
|
||||
"total_tokens_per_second": 9387.627170553957,
|
||||
"total_energy_joules": 3300.6610000133514,
|
||||
"total_energy_per_token": 0.053721695963758975,
|
||||
"timestamp": 1769149234.99943
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
{
|
||||
"model_name": "Qwen/Qwen3-4B",
|
||||
"gpu_name": "NVIDIA H200",
|
||||
"attention_implementation": "flash_attention_3_hopper",
|
||||
"num_requests": 10,
|
||||
"prompt_length": 512,
|
||||
"generation_length": 100,
|
||||
"prefill": {
|
||||
"stage_name": "prefill",
|
||||
"duration_ms": 323.8773119999223,
|
||||
"tokens_processed": 5120,
|
||||
"tokens_per_second": 15808.455270868828,
|
||||
"energy_joules": 98.1449999999968,
|
||||
"energy_per_token": 0.019168945312499373,
|
||||
"avg_power_watts": 250.96736239598317,
|
||||
"peak_memory_gb": 46.1302490234375,
|
||||
"avg_gpu_util_percent": 32.2
|
||||
},
|
||||
"decode": {
|
||||
"stage_name": "decode",
|
||||
"duration_ms": 30558.618001000013,
|
||||
"tokens_processed": 1000,
|
||||
"tokens_per_second": 32.72399294913388,
|
||||
"energy_joules": 4828.459999999999,
|
||||
"energy_per_token": 4.828459999999999,
|
||||
"avg_power_watts": 157.61927190444868,
|
||||
"peak_memory_gb": 46.1302490234375,
|
||||
"avg_gpu_util_percent": 32.2
|
||||
},
|
||||
"e2e_latency_ms": 3088.2495312999936,
|
||||
"e2e_tokens_per_second": 198.17051497855476,
|
||||
"e2e_energy_joules": 4926.604999999996,
|
||||
"e2e_energy_per_token": 0.8050008169934634,
|
||||
"ttft_ms": 32.38773119999223,
|
||||
"itl_ms": 30.558618001000013,
|
||||
"timestamp": 1768541964.4743361
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
{
|
||||
"model_name": "Qwen/Qwen3-4B",
|
||||
"gpu_name": "NVIDIA H200",
|
||||
"attention_implementation": "flash_attention_3_hopper",
|
||||
"batch_size": 3,
|
||||
"sequence_length": 2048,
|
||||
"num_steps": 10,
|
||||
"forward": {
|
||||
"stage_name": "forward",
|
||||
"duration_ms": 1605.9521619997668,
|
||||
"tokens_processed": 61440,
|
||||
"tokens_per_second": 38257.67756587068,
|
||||
"energy_joules": 817.7539999999863,
|
||||
"energy_per_token": 0.01330979817708311,
|
||||
"avg_power_watts": 476.6091506406698,
|
||||
"peak_memory_gb": 76.5540771484375,
|
||||
"avg_gpu_util_percent": 95.1
|
||||
},
|
||||
"backward": {
|
||||
"stage_name": "backward",
|
||||
"duration_ms": 3448.8081949999696,
|
||||
"tokens_processed": 61440,
|
||||
"tokens_per_second": 17814.849804948502,
|
||||
"energy_joules": 1765.182000000008,
|
||||
"energy_per_token": 0.02873017578125013,
|
||||
"avg_power_watts": 498.84691252245983,
|
||||
"peak_memory_gb": 76.5540771484375,
|
||||
"avg_gpu_util_percent": 95.1
|
||||
},
|
||||
"optimizer": {
|
||||
"stage_name": "optimizer",
|
||||
"duration_ms": 545.701982000196,
|
||||
"tokens_processed": 61440,
|
||||
"tokens_per_second": 112588.92587268984,
|
||||
"energy_joules": 332.4770000000135,
|
||||
"energy_per_token": 0.005411409505208553,
|
||||
"avg_power_watts": 521.4900438388863,
|
||||
"peak_memory_gb": 76.5540771484375,
|
||||
"avg_gpu_util_percent": 95.1
|
||||
},
|
||||
"total_duration_ms": 5600.462338999932,
|
||||
"total_tokens": 61440,
|
||||
"total_tokens_per_second": 10970.522839186035,
|
||||
"total_energy_joules": 2915.4130000000077,
|
||||
"total_energy_per_token": 0.047451383463541795,
|
||||
"timestamp": 1768541921.6000674
|
||||
}
|
||||
37
results/h200_sdpa/inference_NVIDIA_H200_sdpa.json
Normal file
37
results/h200_sdpa/inference_NVIDIA_H200_sdpa.json
Normal file
@@ -0,0 +1,37 @@
|
||||
{
|
||||
"model_name": "Qwen/Qwen3-4B",
|
||||
"gpu_name": "NVIDIA H200",
|
||||
"attention_implementation": "sdpa",
|
||||
"num_requests": 10,
|
||||
"prompt_length": 512,
|
||||
"generation_length": 100,
|
||||
"prefill": {
|
||||
"stage_name": "prefill",
|
||||
"duration_ms": 247.9969559935853,
|
||||
"tokens_processed": 5120,
|
||||
"tokens_per_second": 20645.414696672466,
|
||||
"energy_joules": 73.83399999141693,
|
||||
"energy_per_token": 0.014420703123323619,
|
||||
"avg_power_watts": 222.33737204549297,
|
||||
"peak_memory_gb": 46.1165771484375,
|
||||
"avg_gpu_util_percent": 40.0
|
||||
},
|
||||
"decode": {
|
||||
"stage_name": "decode",
|
||||
"duration_ms": 23003.622506046668,
|
||||
"tokens_processed": 1000,
|
||||
"tokens_per_second": 43.47141411041425,
|
||||
"energy_joules": 4033.3500000089407,
|
||||
"energy_per_token": 4.033350000008941,
|
||||
"avg_power_watts": 174.6335604209662,
|
||||
"peak_memory_gb": 46.1165771484375,
|
||||
"avg_gpu_util_percent": 40.0
|
||||
},
|
||||
"e2e_latency_ms": 2325.1619462040253,
|
||||
"e2e_tokens_per_second": 263.20747292425324,
|
||||
"e2e_energy_joules": 4107.184000000358,
|
||||
"e2e_energy_per_token": 0.6711084967320846,
|
||||
"ttft_ms": 24.79969559935853,
|
||||
"itl_ms": 23.003622506046668,
|
||||
"timestamp": 1769149520.7919798
|
||||
}
|
||||
47
results/h200_sdpa/pretrain_NVIDIA_H200_sdpa.json
Normal file
47
results/h200_sdpa/pretrain_NVIDIA_H200_sdpa.json
Normal file
@@ -0,0 +1,47 @@
|
||||
{
|
||||
"model_name": "Qwen/Qwen3-4B",
|
||||
"gpu_name": "NVIDIA H200",
|
||||
"attention_implementation": "sdpa",
|
||||
"batch_size": 3,
|
||||
"sequence_length": 2048,
|
||||
"num_steps": 10,
|
||||
"forward": {
|
||||
"stage_name": "forward",
|
||||
"duration_ms": 1615.8598741167225,
|
||||
"tokens_processed": 61440,
|
||||
"tokens_per_second": 38023.09902248482,
|
||||
"energy_joules": 873.9250000119209,
|
||||
"energy_per_token": 0.014224039713735693,
|
||||
"avg_power_watts": 541.9081076256928,
|
||||
"peak_memory_gb": 76.5540771484375,
|
||||
"avg_gpu_util_percent": 100.0
|
||||
},
|
||||
"backward": {
|
||||
"stage_name": "backward",
|
||||
"duration_ms": 3462.180594098754,
|
||||
"tokens_processed": 61440,
|
||||
"tokens_per_second": 17746.04135460864,
|
||||
"energy_joules": 1696.024000003934,
|
||||
"energy_per_token": 0.027604557291730693,
|
||||
"avg_power_watts": 472.8399628680292,
|
||||
"peak_memory_gb": 76.5540771484375,
|
||||
"avg_gpu_util_percent": 100.0
|
||||
},
|
||||
"optimizer": {
|
||||
"stage_name": "optimizer",
|
||||
"duration_ms": 551.849422918167,
|
||||
"tokens_processed": 61440,
|
||||
"tokens_per_second": 111334.71821915968,
|
||||
"energy_joules": 316.88299998641014,
|
||||
"energy_per_token": 0.005157600911237144,
|
||||
"avg_power_watts": 499.2301039455484,
|
||||
"peak_memory_gb": 76.5540771484375,
|
||||
"avg_gpu_util_percent": 100.0
|
||||
},
|
||||
"total_duration_ms": 5629.889891133644,
|
||||
"total_tokens": 61440,
|
||||
"total_tokens_per_second": 10913.179687005982,
|
||||
"total_energy_joules": 2886.832000002265,
|
||||
"total_energy_per_token": 0.04698619791670353,
|
||||
"timestamp": 1769149487.0005488
|
||||
}
|
||||
Reference in New Issue
Block a user