#!/bin/bash #SBATCH --job-name=llm_bench_a100 #SBATCH --partition=a100 # Adjust to your A100 partition name #SBATCH --nodes=1 #SBATCH --gres=gpu:a100:1 # Request 1 A100 GPU #SBATCH -C a100_80 #SBATCH --time=02:00:00 #SBATCH --output=logs/benchmark_a100_sdpa_%j.out #SBATCH --error=logs/benchmark_a100_sdpa_%j.err # Create logs directory mkdir -p logs # Print job info echo "=========================================" echo "Job ID: $SLURM_JOB_ID" echo "Job Name: $SLURM_JOB_NAME" echo "Node: $SLURM_NODELIST" echo "Date: $(date)" echo "=========================================" # Set cache paths export TRANSFORMERS_CACHE=$(pwd)/model_cache export HF_HOME=$(pwd)/model_cache # Path to apptainer image APPTAINER_IMAGE="/anvme/workspace/ihpc125h-llm-profiles/pytorch_25.10_updated_ao.sif" # Run benchmark inside apptainer apptainer exec --nv $APPTAINER_IMAGE python run_benchmark.py \ --mode both \ --model-path ./model_cache \ --model-name Qwen/Qwen3-4B \ --attn-implementation sdpa \ --batch-size 3 \ --sequence-length 2048 \ --num-steps 10 \ --num-requests 10 \ --prompt-length 512 \ --generation-length 100 \ --output-dir ./results/a100 echo "=========================================" echo "Benchmark Complete!" echo "========================================="