47 lines
1.4 KiB
Bash
Executable File
47 lines
1.4 KiB
Bash
Executable File
#!/bin/bash
|
|
#SBATCH --job-name=llm_bench_h100
|
|
#SBATCH --partition=h100 # Adjust to your H100 partition name
|
|
#SBATCH --nodes=1
|
|
#SBATCH --gres=gpu:h100:1 # Request 1 H100 GPU
|
|
#SBATCH --time=02:00:00
|
|
#SBATCH --output=logs/benchmark_h100_%j.out
|
|
#SBATCH --error=logs/benchmark_h100_%j.err
|
|
|
|
# Create logs directory
|
|
mkdir -p logs
|
|
|
|
# Print job info
|
|
echo "========================================="
|
|
echo "Job ID: $SLURM_JOB_ID"
|
|
echo "Job Name: $SLURM_JOB_NAME"
|
|
echo "Node: $SLURM_NODELIST"
|
|
echo "Date: $(date)"
|
|
echo "========================================="
|
|
|
|
# Set cache paths
|
|
export TRANSFORMERS_CACHE=$(pwd)/model_cache
|
|
export HF_HOME=$(pwd)/model_cache
|
|
|
|
# Path to apptainer image
|
|
APPTAINER_IMAGE="/hnvme/workspace/ihpc125h-llm-profiler/pytorch_25.10_updated_ao.sif"
|
|
|
|
# Run benchmark with FlashAttention-3 Hopper inside apptainer
|
|
apptainer exec --nv $APPTAINER_IMAGE python run_benchmark.py \
|
|
--mode both \
|
|
--model-path ./model_cache \
|
|
--model-name Qwen/Qwen3-4B \
|
|
--attn-implementation sdpa \
|
|
--batch-size 3 \
|
|
--sequence-length 2048 \
|
|
--num-steps 10 \
|
|
--num-requests 10 \
|
|
--prompt-length 512 \
|
|
--generation-length 100 \
|
|
--output-dir ./results/h100_sdpa
|
|
|
|
# --attn-implementation flash_attention_3_hopper \
|
|
|
|
echo "========================================="
|
|
echo "Benchmark Complete!"
|
|
echo "========================================="
|