cocogoat/quick_start.sh

#!/bin/bash
# Quick Start Script for LLM Benchmark Suite
#
# This script helps you get started quickly with the benchmark suite.
# It will:
# 1. Check dependencies
# 2. Cache the model if needed
# 3. Run a quick test benchmark
#
# Usage: ./quick_start.sh [--skip-cache]

set -e  # Exit on error

echo "========================================="
echo "LLM Benchmark Suite - Quick Start"
echo "========================================="

# Parse arguments
SKIP_CACHE=false
if [[ "$1" == "--skip-cache" ]]; then
    SKIP_CACHE=true
fi

# Check Python
echo ""
echo "[1/5] Checking Python..."
if ! command -v python &> /dev/null; then
    echo "✗ Python not found. Please install Python 3.8+"
    exit 1
fi
PYTHON_VERSION=$(python --version 2>&1 | awk '{print $2}')
echo "  ✓ Python $PYTHON_VERSION found"

# Check dependencies
echo ""
echo "[2/5] Checking dependencies..."
MISSING_DEPS=()

if ! python -c "import torch" 2>/dev/null; then
    MISSING_DEPS+=("torch")
fi

if ! python -c "import transformers" 2>/dev/null; then
    MISSING_DEPS+=("transformers")
fi

if ${#MISSING_DEPS[@]} -gt 0; then
    echo "  ⚠ Missing dependencies: ${MISSING_DEPS[*]}"
    echo "  Installing dependencies..."
    pip install -r requirements.txt
else
    echo "  ✓ All dependencies installed"
fi

# Check GPU
echo ""
echo "[3/5] Checking GPU..."
if python -c "import torch; assert torch.cuda.is_available()" 2>/dev/null; then
    GPU_NAME=$(python -c "import torch; print(torch.cuda.get_device_name(0))")
    echo "  ✓ GPU found: $GPU_NAME"
else
    echo "  ✗ No GPU found or CUDA not available"
    echo "  This benchmark requires a GPU to run."
    exit 1
fi

# Cache model
if [ "$SKIP_CACHE" = false ]; then
    echo ""
    echo "[4/5] Caching model..."
    if [ -d "./model_cache" ] && [ "$(ls -A ./model_cache)" ]; then
        echo "  ✓ Model cache already exists at ./model_cache"
        echo "  To re-download, remove the directory and run again."
    else
        echo "  Downloading Qwen/Qwen3-4B..."
        echo "  (This may take several minutes depending on your connection)"
        python cache_model.py --model-name Qwen/Qwen3-4B --cache-dir ./model_cache
    fi
else
    echo ""
    echo "[4/5] Skipping model cache (--skip-cache specified)"
fi

# Run quick test
echo ""
echo "[5/5] Running quick test benchmark..."
echo "  This will run a minimal benchmark to verify everything works."
echo "  Parameters: 2 steps, batch size 2, sequence length 512"
echo ""

python run_benchmark.py \
    --mode both \
    --model-path ./model_cache \
    --model-name Qwen/Qwen3-4B \
    --batch-size 2 \
    --sequence-length 512 \
    --num-steps 2 \
    --num-requests 2 \
    --prompt-length 256 \
    --generation-length 20 \
    --output-dir ./results/test

echo ""
echo "========================================="
echo "Quick Start Complete!"
echo "========================================="
echo ""
echo "Next steps:"
echo "  1. Run full benchmarks:"
echo "     python run_benchmark.py --mode both"
echo ""
echo "  2. Run on different GPUs using SLURM:"
echo "     sbatch slurm_a100.sh"
echo "     sbatch slurm_h100.sh"
echo "     sbatch slurm_h200.sh"
echo "     sbatch slurm_mi300x.sh"
echo ""
echo "  3. View results:"
echo "     ls -l results/"
echo ""
echo "For more information, see README.md"
echo ""