Introduce slurm cluster

This commit is contained in:
2023-06-23 08:38:15 +02:00
parent d6517a2797
commit fa2287c661
17 changed files with 1528 additions and 17 deletions

19
slurm/database/Dockerfile Normal file
View File

@@ -0,0 +1,19 @@
FROM scidas/slurm.base:19.05.1
MAINTAINER Michael J. Stealey <stealey@renci.org>
ENV DBD_ADDR=database \
DBD_HOST=database \
DBD_PORT=6819 \
STORAGE_HOST=database.local.dev \
STORAGE_PORT=3306 \
STORAGE_PASS=password \
STORAGE_USER=slurm
# clean up
RUN rm -f /packages/slurm-*.rpm /packages/openmpi-*.rpm \
&& yum clean all \
&& rm -rf /var/cache/yum
COPY docker-entrypoint.sh /docker-entrypoint.sh
ENTRYPOINT ["/usr/local/bin/tini", "--", "/docker-entrypoint.sh"]

14
slurm/database/Makefile Normal file
View File

@@ -0,0 +1,14 @@
SLURM_VERSION = 19.05.1
IMAGE = scidas/slurm.database
.PHONY: all build clean test
all: build
build:
docker build -t $(IMAGE):$(SLURM_VERSION) .
clean:
@[ -z $(docker images -q $(IMAGE):$(SLURM_VERSION)) ] || docker rmi $(IMAGE):$(SLURM_VERSION)

3
slurm/database/README.md Normal file
View File

@@ -0,0 +1,3 @@
# Slurm Database
TODO

View File

@@ -0,0 +1,143 @@
#!/usr/bin/env bash
set -e
SLURM_ACCT_DB_SQL=/slurm_acct_db.sql
# start sshd server
_sshd_host() {
if [ ! -d /var/run/sshd ]; then
mkdir /var/run/sshd
ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key -N ''
fi
/usr/sbin/sshd
}
# slurm database user settings
_slurm_acct_db() {
{
echo "create database slurm_acct_db;"
echo "create user '${STORAGE_USER}'@'${STORAGE_HOST}';"
echo "set password for '${STORAGE_USER}'@'${STORAGE_HOST}' = password('${STORAGE_PASS}');"
echo "grant usage on *.* to '${STORAGE_USER}'@'${STORAGE_HOST}';"
echo "grant all privileges on slurm_acct_db.* to '${STORAGE_USER}'@'${STORAGE_HOST}';"
echo "flush privileges;"
} >> $SLURM_ACCT_DB_SQL
}
# start database
_mariadb_start() {
# mariadb somehow expects `resolveip` to be found under this path; see https://github.com/SciDAS/slurm-in-docker/issues/26
ln -s /usr/bin/resolveip /usr/libexec/resolveip
mysql_install_db
chown -R mysql: /var/lib/mysql/ /var/log/mariadb/ /var/run/mariadb
cd /var/lib/mysql
mysqld_safe --user=mysql &
cd /
_slurm_acct_db
sleep 5s
mysql -uroot < $SLURM_ACCT_DB_SQL
}
# start munge using existing key
_munge_start_using_key() {
if [ ! -f /.secret/munge.key ]; then
echo -n "cheking for munge.key"
while [ ! -f /.secret/munge.key ]; do
echo -n "."
sleep 1
done
echo ""
fi
cp /.secret/munge.key /etc/munge/munge.key
chown -R munge: /etc/munge /var/lib/munge /var/log/munge /var/run/munge
chmod 0700 /etc/munge
chmod 0711 /var/lib/munge
chmod 0700 /var/log/munge
chmod 0755 /var/run/munge
sudo -u munge /sbin/munged
munge -n
munge -n | unmunge
remunge
}
# wait for worker user in shared /home volume
_wait_for_worker() {
if [ ! -f /home/worker/.ssh/id_rsa.pub ]; then
echo -n "cheking for id_rsa.pub"
while [ ! -f /home/worker/.ssh/id_rsa.pub ]; do
echo -n "."
sleep 1
done
echo ""
fi
}
# generate slurmdbd.conf
_generate_slurmdbd_conf() {
cat > /etc/slurm/slurmdbd.conf <<EOF
#
# Example slurmdbd.conf file.
#
# See the slurmdbd.conf man page for more information.
#
# Archive info
#ArchiveJobs=yes
#ArchiveDir="/tmp"
#ArchiveSteps=yes
#ArchiveScript=
#JobPurge=12
#StepPurge=1
#
# Authentication info
AuthType=auth/munge
AuthInfo=/var/run/munge/munge.socket.2
#
# slurmDBD info
DbdAddr=$DBD_ADDR
DbdHost=$DBD_HOST
DbdPort=$DBD_PORT
SlurmUser=slurm
#MessageTimeout=300
DebugLevel=4
#DefaultQOS=normal,standby
LogFile=/var/log/slurm/slurmdbd.log
PidFile=/var/run/slurmdbd.pid
#PluginDir=/usr/lib/slurm
#PrivateData=accounts,users,usage,jobs
#TrackWCKey=yes
#
# Database info
StorageType=accounting_storage/mysql
StorageHost=$STORAGE_HOST
StoragePort=$STORAGE_PORT
StoragePass=$STORAGE_PASS
StorageUser=$STORAGE_USER
StorageLoc=slurm_acct_db
EOF
}
# run slurmdbd
_slurmdbd() {
mkdir -p /var/spool/slurm/d \
/var/log/slurm
chown slurm: /var/spool/slurm/d \
/var/log/slurm
if [[ ! -f /home/config/slurmdbd.conf ]]; then
echo "### generate slurmdbd.conf ###"
_generate_slurmdbd_conf
else
echo "### use provided slurmdbd.conf ###"
cp /home/config/slurmdbd.conf /etc/slurm/slurmdbd.conf
fi
/usr/sbin/slurmdbd
cp /etc/slurm/slurmdbd.conf /.secret/slurmdbd.conf
}
### main ###
_sshd_host
_mariadb_start
_munge_start_using_key
_wait_for_worker
_slurmdbd
tail -f /dev/null