Initial commit

This commit is contained in:
Bole Ma
2026-02-05 23:18:26 +01:00
commit 747c92ac6b
31 changed files with 4220 additions and 0 deletions

26
configs/a100.yaml Normal file
View File

@@ -0,0 +1,26 @@
# A100 Configuration
gpu_type: a100
gpu_model: "NVIDIA A100 80GB"
# Default attention implementation
default_attention: flash_attention_2
# Pretraining defaults
pretrain:
batch_size: 8
sequence_length: 8192
num_steps: 10
warmup_steps: 3
# Inference defaults
inference:
num_requests: 10
prompt_length: 512
generation_length: 100
warmup_requests: 2
# Hardware specs (for reference)
hardware:
memory_gb: 80
tdp_watts: 400
compute_capability: "8.0"

26
configs/h100.yaml Normal file
View File

@@ -0,0 +1,26 @@
# H100 Configuration
gpu_type: h100
gpu_model: "NVIDIA H100 80GB"
# Default attention implementation
default_attention: flash_attention_3_hopper
# Pretraining defaults
pretrain:
batch_size: 8
sequence_length: 8192
num_steps: 10
warmup_steps: 3
# Inference defaults
inference:
num_requests: 10
prompt_length: 512
generation_length: 100
warmup_requests: 2
# Hardware specs (for reference)
hardware:
memory_gb: 80
tdp_watts: 700
compute_capability: "9.0"

26
configs/h200.yaml Normal file
View File

@@ -0,0 +1,26 @@
# H200 Configuration
gpu_type: h200
gpu_model: "NVIDIA H200 141GB"
# Default attention implementation
default_attention: flash_attention_3_hopper
# Pretraining defaults
pretrain:
batch_size: 8
sequence_length: 8192
num_steps: 10
warmup_steps: 3
# Inference defaults
inference:
num_requests: 10
prompt_length: 512
generation_length: 100
warmup_requests: 2
# Hardware specs (for reference)
hardware:
memory_gb: 141
tdp_watts: 700
compute_capability: "9.0"

26
configs/mi300x.yaml Normal file
View File

@@ -0,0 +1,26 @@
# MI300X Configuration
gpu_type: mi300x
gpu_model: "AMD Instinct MI300X"
# Default attention implementation
default_attention: flash_attention_2
# Pretraining defaults
pretrain:
batch_size: 8
sequence_length: 8192
num_steps: 10
warmup_steps: 3
# Inference defaults
inference:
num_requests: 10
prompt_length: 512
generation_length: 100
warmup_requests: 2
# Hardware specs (for reference)
hardware:
memory_gb: 192
tdp_watts: 750
compute_capability: "gfx940"