mirror of
https://github.com/ClusterCockpit/cc-docker.git
synced 2025-03-15 03:15:56 +01:00
Slurm version update and rest service add
This commit is contained in:
parent
2d15d513c6
commit
2b68597724
@ -69,8 +69,6 @@ services:
|
||||
build:
|
||||
context: ./slurm/controller
|
||||
privileged: true
|
||||
ports:
|
||||
- "6817:6817"
|
||||
volumes:
|
||||
- ${DATADIR}/slurm/home:/home
|
||||
- ${DATADIR}/slurm/secret:/.secret
|
||||
@ -78,6 +76,8 @@ services:
|
||||
- /etc/timezone:/etc/timezone:ro
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
- ${DATADIR}/slurm/state:/var/lib/slurm/d
|
||||
ports:
|
||||
- "6817:6817"
|
||||
|
||||
slurmdbd:
|
||||
container_name: slurmdbd
|
||||
@ -88,14 +88,15 @@ services:
|
||||
- mariadb
|
||||
- slurmctld
|
||||
privileged: true
|
||||
ports:
|
||||
- "6819:6819"
|
||||
volumes:
|
||||
- ${DATADIR}/slurm/home:/home
|
||||
- ${DATADIR}/slurm/secret:/.secret
|
||||
- ./slurm/database/slurmdbd.conf:/home/config/slurmdbd.conf
|
||||
- /etc/timezone:/etc/timezone:ro
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
- ${DATADIR}/slurm/state:/var/lib/slurm/d
|
||||
ports:
|
||||
- "6819:6819"
|
||||
|
||||
node01:
|
||||
container_name: node01
|
||||
@ -114,3 +115,21 @@ services:
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
ports:
|
||||
- "6818:6818"
|
||||
|
||||
slurmrestd:
|
||||
container_name: slurmrestd
|
||||
hostname: slurmrestd
|
||||
build:
|
||||
context: ./slurm/rest
|
||||
depends_on:
|
||||
- slurmctld
|
||||
privileged: true
|
||||
volumes:
|
||||
- ${DATADIR}/slurm/home:/home
|
||||
- ${DATADIR}/slurm/secret:/.secret
|
||||
- ./slurm/controller/slurm.conf:/home/config/slurm.conf
|
||||
- ./slurm/rest/slurmrestd.conf:/home/config/slurmrestd.conf
|
||||
- /etc/timezone:/etc/timezone:ro
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
ports:
|
||||
- "6820:6820"
|
@ -88,6 +88,7 @@ if [ ! -f docker-compose.yml ]; then
|
||||
fi
|
||||
|
||||
docker-compose down
|
||||
docker-compose down --remove-orphans
|
||||
|
||||
cd slurm/base/
|
||||
make
|
||||
|
@ -1,9 +1,11 @@
|
||||
FROM rockylinux:8
|
||||
LABEL org.opencontainers.image.authors="jan.eitzinger@fau.de"
|
||||
|
||||
ENV SLURM_VERSION=22.05.6
|
||||
ENV SLURM_VERSION=24.05.3
|
||||
ENV HTTP_PARSER_VERSION=2.8.0
|
||||
|
||||
RUN yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm
|
||||
RUN ARCH=$(uname -m) && yum install -y https://rpmfind.net/linux/almalinux/8.10/PowerTools/x86_64/os/Packages/http-parser-devel-2.8.0-9.el8.$ARCH.rpm
|
||||
|
||||
RUN groupadd -g 981 munge \
|
||||
&& useradd -m -c "MUNGE Uid 'N' Gid Emporium" -d /var/lib/munge -u 981 -g munge -s /sbin/nologin munge \
|
||||
@ -17,6 +19,7 @@ RUN yum install -y munge munge-libs rng-tools \
|
||||
openssh-server openssh-clients dbus-devel \
|
||||
pam-devel numactl numactl-devel hwloc sudo \
|
||||
lua readline-devel ncurses-devel man2html \
|
||||
autoconf automake json-c-devel \
|
||||
libibmad libibumad rpm-build perl-ExtUtils-MakeMaker.noarch rpm-build make wget
|
||||
|
||||
RUN dnf --enablerepo=powertools install -y munge-devel rrdtool-devel lua-devel hwloc-devel mariadb-server mariadb-devel
|
||||
@ -24,13 +27,13 @@ RUN dnf --enablerepo=powertools install -y munge-devel rrdtool-devel lua-devel h
|
||||
RUN mkdir -p /usr/local/slurm-tmp \
|
||||
&& cd /usr/local/slurm-tmp \
|
||||
&& wget https://download.schedmd.com/slurm/slurm-${SLURM_VERSION}.tar.bz2 \
|
||||
&& rpmbuild -ta slurm-${SLURM_VERSION}.tar.bz2
|
||||
&& rpmbuild -ta --with slurmrestd slurm-${SLURM_VERSION}.tar.bz2
|
||||
|
||||
RUN ARCH=$(uname -m) \
|
||||
&& yum -y --nogpgcheck localinstall \
|
||||
/root/rpmbuild/RPMS/$ARCH/slurm-${SLURM_VERSION}-1.el8.$ARCH.rpm \
|
||||
/root/rpmbuild/RPMS/$ARCH/slurm-perlapi-${SLURM_VERSION}-1.el8.$ARCH.rpm \
|
||||
/root/rpmbuild/RPMS/$ARCH/slurm-slurmctld-${SLURM_VERSION}-1.el8.$ARCH.rpm
|
||||
/root/rpmbuild/RPMS/$ARCH/slurm-${SLURM_VERSION}*.$ARCH.rpm \
|
||||
/root/rpmbuild/RPMS/$ARCH/slurm-perlapi-${SLURM_VERSION}*.$ARCH.rpm \
|
||||
/root/rpmbuild/RPMS/$ARCH/slurm-slurmctld-${SLURM_VERSION}*.$ARCH.rpm
|
||||
|
||||
VOLUME ["/home", "/.secret"]
|
||||
# 22: SSH
|
||||
@ -38,4 +41,5 @@ VOLUME ["/home", "/.secret"]
|
||||
# 6817: SlurmCtlD
|
||||
# 6818: SlurmD
|
||||
# 6819: SlurmDBD
|
||||
EXPOSE 22 6817 6818 6819
|
||||
# 6820: SlurmRestD
|
||||
EXPOSE 22 6817 6818 6819 6820
|
||||
|
@ -1,6 +1,6 @@
|
||||
include ../../.env
|
||||
IMAGE = clustercockpit/slurm.base
|
||||
|
||||
SLURM_VERSION = 24.05.3
|
||||
.PHONY: build clean
|
||||
|
||||
build:
|
||||
|
@ -1,4 +1,4 @@
|
||||
FROM clustercockpit/slurm.base:22.05.6
|
||||
FROM clustercockpit/slurm.base:24.05.3
|
||||
LABEL org.opencontainers.image.authors="jan.eitzinger@fau.de"
|
||||
|
||||
# clean up
|
||||
|
@ -3,6 +3,7 @@ set -e
|
||||
|
||||
# Determine the system architecture dynamically
|
||||
ARCH=$(uname -m)
|
||||
SLURM_VERSION="24.05.3"
|
||||
|
||||
_delete_secrets() {
|
||||
if [ -f /.secret/munge.key ]; then
|
||||
@ -90,11 +91,12 @@ _copy_secrets() {
|
||||
# run slurmctld
|
||||
_slurmctld() {
|
||||
cd /root/rpmbuild/RPMS/$ARCH
|
||||
yum -y --nogpgcheck localinstall slurm-22.05.6-1.el8.$ARCH.rpm \
|
||||
slurm-perlapi-22.05.6-1.el8.$ARCH.rpm \
|
||||
slurm-slurmd-22.05.6-1.el8.$ARCH.rpm \
|
||||
slurm-torque-22.05.6-1.el8.$ARCH.rpm \
|
||||
slurm-slurmctld-22.05.6-1.el8.$ARCH.rpm
|
||||
|
||||
yum -y --nogpgcheck localinstall slurm-$SLURM_VERSION*.$ARCH.rpm \
|
||||
slurm-perlapi-$SLURM_VERSION*.$ARCH.rpm \
|
||||
slurm-slurmd-$SLURM_VERSION*.$ARCH.rpm \
|
||||
slurm-torque-$SLURM_VERSION*.$ARCH.rpm \
|
||||
slurm-slurmctld-$SLURM_VERSION*.$ARCH.rpm
|
||||
echo "checking for slurmdbd.conf"
|
||||
while [ ! -f /.secret/slurmdbd.conf ]; do
|
||||
echo -n "."
|
||||
|
@ -56,7 +56,7 @@ SchedulerType=sched/backfill
|
||||
#SchedulerAuth=
|
||||
#SchedulerPort=
|
||||
#SchedulerRootFilter=
|
||||
SelectType=select/cons_res
|
||||
# SelectType=select/con_res
|
||||
SelectTypeParameters=CR_CPU_Memory
|
||||
# FastSchedule=1
|
||||
#PriorityType=priority/multifactor
|
||||
|
@ -1,4 +1,4 @@
|
||||
FROM clustercockpit/slurm.base:22.05.6
|
||||
FROM clustercockpit/slurm.base:24.05.3
|
||||
LABEL org.opencontainers.image.authors="jan.eitzinger@fau.de"
|
||||
|
||||
# clean up
|
||||
|
@ -3,6 +3,7 @@ set -e
|
||||
|
||||
# Determine the system architecture dynamically
|
||||
ARCH=$(uname -m)
|
||||
SLURM_VERSION="24.05.3"
|
||||
|
||||
SLURM_ACCT_DB_SQL=/slurm_acct_db.sql
|
||||
|
||||
@ -52,9 +53,9 @@ _wait_for_worker() {
|
||||
# run slurmdbd
|
||||
_slurmdbd() {
|
||||
cd /root/rpmbuild/RPMS/$ARCH
|
||||
yum -y --nogpgcheck localinstall slurm-22.05.6-1.el8.$ARCH.rpm \
|
||||
slurm-perlapi-22.05.6-1.el8.$ARCH.rpm \
|
||||
slurm-slurmdbd-22.05.6-1.el8.$ARCH.rpm
|
||||
yum -y --nogpgcheck localinstall slurm-$SLURM_VERSION*.$ARCH.rpm \
|
||||
slurm-perlapi-$SLURM_VERSION*.$ARCH.rpm \
|
||||
slurm-slurmdbd-$SLURM_VERSION*.$ARCH.rpm
|
||||
mkdir -p /var/spool/slurm/d /var/log/slurm /etc/slurm
|
||||
chown slurm: /var/spool/slurm/d /var/log/slurm
|
||||
if [[ ! -f /home/config/slurmdbd.conf ]]; then
|
||||
|
@ -1,4 +1,4 @@
|
||||
FROM clustercockpit/slurm.base:22.05.6
|
||||
FROM clustercockpit/slurm.base:24.05.3
|
||||
LABEL org.opencontainers.image.authors="jan.eitzinger@fau.de"
|
||||
|
||||
# clean up
|
||||
|
@ -3,6 +3,7 @@ set -e
|
||||
|
||||
# Determine the system architecture dynamically
|
||||
ARCH=$(uname -m)
|
||||
SLURM_VERSION="24.05.3"
|
||||
|
||||
# start sshd server
|
||||
_sshd_host() {
|
||||
@ -13,99 +14,62 @@ _sshd_host() {
|
||||
/usr/sbin/sshd
|
||||
}
|
||||
|
||||
# setup worker ssh to be passwordless
|
||||
_ssh_worker() {
|
||||
if [[ ! -d /home/worker ]]; then
|
||||
mkdir -p /home/worker
|
||||
chown -R worker:worker /home/worker
|
||||
fi
|
||||
cat > /home/worker/setup-worker-ssh.sh <<EOF2
|
||||
mkdir -p ~/.ssh
|
||||
chmod 0700 ~/.ssh
|
||||
ssh-keygen -b 2048 -t rsa -f ~/.ssh/id_rsa -q -N "" -C "$(whoami)@$(hostname)-$(date -I)"
|
||||
cat ~/.ssh/id_rsa.pub > ~/.ssh/authorized_keys
|
||||
chmod 0640 ~/.ssh/authorized_keys
|
||||
cat >> ~/.ssh/config <<EOF
|
||||
Host *
|
||||
StrictHostKeyChecking no
|
||||
UserKnownHostsFile /dev/null
|
||||
LogLevel QUIET
|
||||
EOF
|
||||
chmod 0644 ~/.ssh/config
|
||||
cd ~/
|
||||
tar -czvf ~/worker-secret.tar.gz .ssh
|
||||
cd -
|
||||
EOF2
|
||||
chmod +x /home/worker/setup-worker-ssh.sh
|
||||
chown worker: /home/worker/setup-worker-ssh.sh
|
||||
sudo -u worker /home/worker/setup-worker-ssh.sh
|
||||
}
|
||||
|
||||
# start munge and generate key
|
||||
_munge_start() {
|
||||
# start munge using existing key
|
||||
_munge_start_using_key() {
|
||||
if [ ! -f /.secret/munge.key ]; then
|
||||
echo -n "checking for munge.key"
|
||||
while [ ! -f /.secret/munge.key ]; do
|
||||
echo -n "."
|
||||
sleep 1
|
||||
done
|
||||
echo ""
|
||||
fi
|
||||
cp /.secret/munge.key /etc/munge/munge.key
|
||||
chown -R munge: /etc/munge /var/lib/munge /var/log/munge /var/run/munge
|
||||
chmod 0700 /etc/munge
|
||||
chmod 0711 /var/lib/munge
|
||||
chmod 0700 /var/log/munge
|
||||
chmod 0755 /var/run/munge
|
||||
/sbin/create-munge-key -f
|
||||
rngd -r /dev/urandom
|
||||
/usr/sbin/create-munge-key -r -f
|
||||
sh -c "dd if=/dev/urandom bs=1 count=1024 > /etc/munge/munge.key"
|
||||
chown munge: /etc/munge/munge.key
|
||||
chmod 400 /etc/munge/munge.key
|
||||
sudo -u munge /sbin/munged
|
||||
munge -n
|
||||
munge -n | unmunge
|
||||
remunge
|
||||
}
|
||||
|
||||
# copy secrets to /.secret directory for other nodes
|
||||
_copy_secrets() {
|
||||
cp /home/worker/worker-secret.tar.gz /.secret/worker-secret.tar.gz
|
||||
cp thome/worker/setup-worker-ssh.sh /.secret/setup-worker-ssh.sh
|
||||
cp /etc/munge/munge.key /.secret/munge.key
|
||||
rm -f /home/worker/worker-secret.tar.gz
|
||||
rm -f /home/worker/setup-worker-ssh.sh
|
||||
}
|
||||
|
||||
# run slurmctld
|
||||
_slurmctld() {
|
||||
# run slurmrestd
|
||||
_slurmrestd() {
|
||||
cd /root/rpmbuild/RPMS/$ARCH
|
||||
yum -y --nogpgcheck localinstall slurm-22.05.6-1.el8.$ARCH.rpm \
|
||||
slurm-perlapi-22.05.6-1.el8.$ARCH.rpm \
|
||||
slurm-slurmd-22.05.6-1.el8.$ARCH.rpm \
|
||||
slurm-torque-22.05.6-1.el8.$ARCH.rpm \
|
||||
slurm-slurmctld-22.05.6-1.el8.$ARCH.rpm \
|
||||
slurm-slurmrestd-22.05.6-1.el8.$ARCH.rpm
|
||||
yum -y --nogpgcheck localinstall slurm-$SLURM_VERSION*.$ARCH.rpm \
|
||||
slurm-perlapi-$SLURM_VERSION*.$ARCH.rpm \
|
||||
slurm-slurmd-$SLURM_VERSION*.$ARCH.rpm \
|
||||
slurm-torque-$SLURM_VERSION*.$ARCH.rpm \
|
||||
slurm-slurmctld-$SLURM_VERSION*.$ARCH.rpm \
|
||||
slurm-slurmrestd-$SLURM_VERSION*.$ARCH.rpm
|
||||
echo -n "checking for slurmdbd.conf"
|
||||
while [ ! -f /.secret/slurmdbd.conf ]; do
|
||||
echo -n "."
|
||||
sleep 1
|
||||
done
|
||||
echo ""
|
||||
mkdir -p /var/spool/slurm/ctld /var/spool/slurm/d /var/log/slurm /etc/slurm
|
||||
chown -R slurm: /var/spool/slurm/ctld /var/spool/slurm/d /var/log/slurm
|
||||
touch /var/log/slurmctld.log
|
||||
chown slurm: /var/log/slurmctld.log
|
||||
if [[ ! -f /home/config/slurm.conf ]]; then
|
||||
# mkdir -p /var/spool/slurm/ctld /var/spool/slurm/d /var/log/slurm /etc/slurm
|
||||
# chown -R slurm: /var/spool/slurm/ctld /var/spool/slurm/d /var/log/slurm
|
||||
touch /var/log/slurmrestd.log
|
||||
chown slurm: /var/log/slurmrestd.log
|
||||
if [[ ! -f /home/config/slurmrestd.conf ]]; then
|
||||
echo "### Missing slurm.conf ###"
|
||||
exit
|
||||
else
|
||||
echo "### use provided slurm.conf ###"
|
||||
cp /home/config/slurm.conf /etc/slurm/slurm.conf
|
||||
echo "### use provided slurmrestd.conf ###"
|
||||
cp /home/config/slurmrestd.conf /etc/config/slurmrestd.conf
|
||||
fi
|
||||
sacctmgr -i add cluster "snowflake"
|
||||
sleep 2s
|
||||
/usr/sbin/slurmctld
|
||||
cp -f /etc/slurm/slurm.conf /.secret/
|
||||
/usr/sbin/slurmrestd -f /etc/config/slurmrestd.conf 0.0.0.0:6820 -Dvv
|
||||
}
|
||||
|
||||
### main ###
|
||||
_sshd_host
|
||||
_ssh_worker
|
||||
_munge_start
|
||||
_copy_secrets
|
||||
_slurmctld
|
||||
_munge_start_using_key
|
||||
_slurmrestd
|
||||
|
||||
tail -f /dev/null
|
||||
|
6
slurm/rest/slurmrestd.conf
Normal file
6
slurm/rest/slurmrestd.conf
Normal file
@ -0,0 +1,6 @@
|
||||
#
|
||||
# Example slurmdbd.conf file.
|
||||
#
|
||||
include /etc/slurm/slurm.conf
|
||||
|
||||
AuthType=auth/munge
|
@ -1,4 +1,4 @@
|
||||
FROM clustercockpit/slurm.base:22.05.6
|
||||
FROM clustercockpit/slurm.base:24.05.3
|
||||
LABEL org.opencontainers.image.authors="jan.eitzinger@fau.de"
|
||||
|
||||
# clean up
|
||||
|
@ -3,6 +3,7 @@ set -e
|
||||
|
||||
# Determine the system architecture dynamically
|
||||
ARCH=$(uname -m)
|
||||
SLURM_VERSION="24.05.3"
|
||||
|
||||
# start sshd server
|
||||
_sshd_host() {
|
||||
@ -61,10 +62,10 @@ _start_dbus() {
|
||||
# run slurmd
|
||||
_slurmd() {
|
||||
cd /root/rpmbuild/RPMS/$ARCH
|
||||
yum -y --nogpgcheck localinstall slurm-22.05.6-1.el8.$ARCH.rpm \
|
||||
slurm-perlapi-22.05.6-1.el8.$ARCH.rpm \
|
||||
slurm-slurmd-22.05.6-1.el8.$ARCH.rpm \
|
||||
slurm-torque-22.05.6-1.el8.$ARCH.rpm
|
||||
yum -y --nogpgcheck localinstall slurm-$SLURM_VERSION*.$ARCH.rpm \
|
||||
slurm-perlapi-$SLURM_VERSION*.$ARCH.rpm \
|
||||
slurm-slurmd-$SLURM_VERSION*.$ARCH.rpm \
|
||||
slurm-torque-$SLURM_VERSION*.$ARCH.rpm
|
||||
|
||||
echo "checking for slurm.conf"
|
||||
if [ ! -f /.secret/slurm.conf ]; then
|
||||
|
Loading…
x
Reference in New Issue
Block a user