Initial commit
This commit is contained in:
42
slurm_mi300x.sh
Executable file
42
slurm_mi300x.sh
Executable file
@@ -0,0 +1,42 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --job-name=llm_bench_mi300x
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH -w=aquavan1 # Request MI300X GPUs
|
||||
#SBATCH --time=02:00:00
|
||||
#SBATCH --output=logs/benchmark_mi300x_%j.out
|
||||
#SBATCH --error=logs/benchmark_mi300x_%j.err
|
||||
|
||||
# Create logs directory
|
||||
mkdir -p logs
|
||||
|
||||
# Print job info
|
||||
echo "========================================="
|
||||
echo "Job ID: $SLURM_JOB_ID"
|
||||
echo "Job Name: $SLURM_JOB_NAME"
|
||||
echo "Node: $SLURM_NODELIST"
|
||||
echo "Date: $(date)"
|
||||
echo "========================================="
|
||||
|
||||
# Set cache paths
|
||||
export TRANSFORMERS_CACHE=$(pwd)/models
|
||||
export HF_HOME=$(pwd)/models
|
||||
|
||||
# Path to apptainer image
|
||||
#APPTAINER_IMAGE="/home/woody/ihpc/ihpc125h/pytorch_25.10_updated_ao.sif"
|
||||
|
||||
apptainer exec --writable ../rocm_sandbox/ python run_benchmark.py \
|
||||
--mode both \
|
||||
--model-path ./model_cache \
|
||||
--model-name Qwen/Qwen3-4B \
|
||||
--attn-implementation sdpa \
|
||||
--batch-size 3 \
|
||||
--sequence-length 2048 \
|
||||
--num-steps 10 \
|
||||
--num-requests 10 \
|
||||
--prompt-length 512 \
|
||||
--generation-length 100 \
|
||||
--output-dir ./results/mi300x_sdpa
|
||||
|
||||
echo "========================================="
|
||||
echo "Benchmark Complete!"
|
||||
echo "========================================="
|
||||
Reference in New Issue
Block a user