diff --git a/docker-compose.yml b/docker-compose.yml index 11db075..70f0a1e 100755 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -69,8 +69,6 @@ services: build: context: ./slurm/controller privileged: true - ports: - - "6817:6817" volumes: - ${DATADIR}/slurm/home:/home - ${DATADIR}/slurm/secret:/.secret @@ -78,6 +76,8 @@ services: - /etc/timezone:/etc/timezone:ro - /etc/localtime:/etc/localtime:ro - ${DATADIR}/slurm/state:/var/lib/slurm/d + ports: + - "6817:6817" slurmdbd: container_name: slurmdbd @@ -88,14 +88,15 @@ services: - mariadb - slurmctld privileged: true - ports: - - "6819:6819" volumes: - ${DATADIR}/slurm/home:/home - ${DATADIR}/slurm/secret:/.secret - ./slurm/database/slurmdbd.conf:/home/config/slurmdbd.conf - /etc/timezone:/etc/timezone:ro - /etc/localtime:/etc/localtime:ro + - ${DATADIR}/slurm/state:/var/lib/slurm/d + ports: + - "6819:6819" node01: container_name: node01 @@ -113,4 +114,22 @@ services: - /etc/timezone:/etc/timezone:ro - /etc/localtime:/etc/localtime:ro ports: - - "6818:6818" \ No newline at end of file + - "6818:6818" + + slurmrestd: + container_name: slurmrestd + hostname: slurmrestd + build: + context: ./slurm/rest + depends_on: + - slurmctld + privileged: true + volumes: + - ${DATADIR}/slurm/home:/home + - ${DATADIR}/slurm/secret:/.secret + - ./slurm/controller/slurm.conf:/home/config/slurm.conf + - ./slurm/rest/slurmrestd.conf:/home/config/slurmrestd.conf + - /etc/timezone:/etc/timezone:ro + - /etc/localtime:/etc/localtime:ro + ports: + - "6820:6820" \ No newline at end of file diff --git a/setupDev.sh b/setupDev.sh index 81eeee6..1141138 100755 --- a/setupDev.sh +++ b/setupDev.sh @@ -88,6 +88,7 @@ if [ ! -f docker-compose.yml ]; then fi docker-compose down +docker-compose down --remove-orphans cd slurm/base/ make diff --git a/slurm/base/Dockerfile b/slurm/base/Dockerfile index 5f19b55..caa8fde 100644 --- a/slurm/base/Dockerfile +++ b/slurm/base/Dockerfile @@ -1,9 +1,11 @@ FROM rockylinux:8 LABEL org.opencontainers.image.authors="jan.eitzinger@fau.de" -ENV SLURM_VERSION=22.05.6 +ENV SLURM_VERSION=24.05.3 +ENV HTTP_PARSER_VERSION=2.8.0 RUN yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm +RUN ARCH=$(uname -m) && yum install -y https://rpmfind.net/linux/almalinux/8.10/PowerTools/x86_64/os/Packages/http-parser-devel-2.8.0-9.el8.$ARCH.rpm RUN groupadd -g 981 munge \ && useradd -m -c "MUNGE Uid 'N' Gid Emporium" -d /var/lib/munge -u 981 -g munge -s /sbin/nologin munge \ @@ -17,20 +19,21 @@ RUN yum install -y munge munge-libs rng-tools \ openssh-server openssh-clients dbus-devel \ pam-devel numactl numactl-devel hwloc sudo \ lua readline-devel ncurses-devel man2html \ - libibmad libibumad rpm-build perl-ExtUtils-MakeMaker.noarch rpm-build make wget + autoconf automake json-c-devel \ + libibmad libibumad rpm-build perl-ExtUtils-MakeMaker.noarch rpm-build make wget RUN dnf --enablerepo=powertools install -y munge-devel rrdtool-devel lua-devel hwloc-devel mariadb-server mariadb-devel -RUN mkdir -p /usr/local/slurm-tmp \ +RUN mkdir -p /usr/local/slurm-tmp \ && cd /usr/local/slurm-tmp \ && wget https://download.schedmd.com/slurm/slurm-${SLURM_VERSION}.tar.bz2 \ - && rpmbuild -ta slurm-${SLURM_VERSION}.tar.bz2 + && rpmbuild -ta --with slurmrestd slurm-${SLURM_VERSION}.tar.bz2 RUN ARCH=$(uname -m) \ && yum -y --nogpgcheck localinstall \ - /root/rpmbuild/RPMS/$ARCH/slurm-${SLURM_VERSION}-1.el8.$ARCH.rpm \ - /root/rpmbuild/RPMS/$ARCH/slurm-perlapi-${SLURM_VERSION}-1.el8.$ARCH.rpm \ - /root/rpmbuild/RPMS/$ARCH/slurm-slurmctld-${SLURM_VERSION}-1.el8.$ARCH.rpm + /root/rpmbuild/RPMS/$ARCH/slurm-${SLURM_VERSION}*.$ARCH.rpm \ + /root/rpmbuild/RPMS/$ARCH/slurm-perlapi-${SLURM_VERSION}*.$ARCH.rpm \ + /root/rpmbuild/RPMS/$ARCH/slurm-slurmctld-${SLURM_VERSION}*.$ARCH.rpm VOLUME ["/home", "/.secret"] # 22: SSH @@ -38,4 +41,5 @@ VOLUME ["/home", "/.secret"] # 6817: SlurmCtlD # 6818: SlurmD # 6819: SlurmDBD -EXPOSE 22 6817 6818 6819 +# 6820: SlurmRestD +EXPOSE 22 6817 6818 6819 6820 diff --git a/slurm/base/Makefile b/slurm/base/Makefile index dc0dff3..01029b8 100644 --- a/slurm/base/Makefile +++ b/slurm/base/Makefile @@ -1,6 +1,6 @@ include ../../.env IMAGE = clustercockpit/slurm.base - +SLURM_VERSION = 24.05.3 .PHONY: build clean build: diff --git a/slurm/controller/Dockerfile b/slurm/controller/Dockerfile index b236b5b..a111d6b 100644 --- a/slurm/controller/Dockerfile +++ b/slurm/controller/Dockerfile @@ -1,4 +1,4 @@ -FROM clustercockpit/slurm.base:22.05.6 +FROM clustercockpit/slurm.base:24.05.3 LABEL org.opencontainers.image.authors="jan.eitzinger@fau.de" # clean up diff --git a/slurm/controller/docker-entrypoint.sh b/slurm/controller/docker-entrypoint.sh index 279412b..135f6c9 100755 --- a/slurm/controller/docker-entrypoint.sh +++ b/slurm/controller/docker-entrypoint.sh @@ -3,6 +3,7 @@ set -e # Determine the system architecture dynamically ARCH=$(uname -m) +SLURM_VERSION="24.05.3" _delete_secrets() { if [ -f /.secret/munge.key ]; then @@ -90,11 +91,12 @@ _copy_secrets() { # run slurmctld _slurmctld() { cd /root/rpmbuild/RPMS/$ARCH - yum -y --nogpgcheck localinstall slurm-22.05.6-1.el8.$ARCH.rpm \ - slurm-perlapi-22.05.6-1.el8.$ARCH.rpm \ - slurm-slurmd-22.05.6-1.el8.$ARCH.rpm \ - slurm-torque-22.05.6-1.el8.$ARCH.rpm \ - slurm-slurmctld-22.05.6-1.el8.$ARCH.rpm + + yum -y --nogpgcheck localinstall slurm-$SLURM_VERSION*.$ARCH.rpm \ + slurm-perlapi-$SLURM_VERSION*.$ARCH.rpm \ + slurm-slurmd-$SLURM_VERSION*.$ARCH.rpm \ + slurm-torque-$SLURM_VERSION*.$ARCH.rpm \ + slurm-slurmctld-$SLURM_VERSION*.$ARCH.rpm echo "checking for slurmdbd.conf" while [ ! -f /.secret/slurmdbd.conf ]; do echo -n "." diff --git a/slurm/controller/slurm.conf b/slurm/controller/slurm.conf index ab5172a..f41d0f5 100644 --- a/slurm/controller/slurm.conf +++ b/slurm/controller/slurm.conf @@ -56,7 +56,7 @@ SchedulerType=sched/backfill #SchedulerAuth= #SchedulerPort= #SchedulerRootFilter= -SelectType=select/cons_res +# SelectType=select/con_res SelectTypeParameters=CR_CPU_Memory # FastSchedule=1 #PriorityType=priority/multifactor diff --git a/slurm/database/Dockerfile b/slurm/database/Dockerfile index b236b5b..a111d6b 100644 --- a/slurm/database/Dockerfile +++ b/slurm/database/Dockerfile @@ -1,4 +1,4 @@ -FROM clustercockpit/slurm.base:22.05.6 +FROM clustercockpit/slurm.base:24.05.3 LABEL org.opencontainers.image.authors="jan.eitzinger@fau.de" # clean up diff --git a/slurm/database/docker-entrypoint.sh b/slurm/database/docker-entrypoint.sh index 504ecd1..3f74437 100755 --- a/slurm/database/docker-entrypoint.sh +++ b/slurm/database/docker-entrypoint.sh @@ -3,6 +3,7 @@ set -e # Determine the system architecture dynamically ARCH=$(uname -m) +SLURM_VERSION="24.05.3" SLURM_ACCT_DB_SQL=/slurm_acct_db.sql @@ -52,9 +53,9 @@ _wait_for_worker() { # run slurmdbd _slurmdbd() { cd /root/rpmbuild/RPMS/$ARCH - yum -y --nogpgcheck localinstall slurm-22.05.6-1.el8.$ARCH.rpm \ - slurm-perlapi-22.05.6-1.el8.$ARCH.rpm \ - slurm-slurmdbd-22.05.6-1.el8.$ARCH.rpm + yum -y --nogpgcheck localinstall slurm-$SLURM_VERSION*.$ARCH.rpm \ + slurm-perlapi-$SLURM_VERSION*.$ARCH.rpm \ + slurm-slurmdbd-$SLURM_VERSION*.$ARCH.rpm mkdir -p /var/spool/slurm/d /var/log/slurm /etc/slurm chown slurm: /var/spool/slurm/d /var/log/slurm if [[ ! -f /home/config/slurmdbd.conf ]]; then diff --git a/slurm/rest/Dockerfile b/slurm/rest/Dockerfile index b236b5b..a111d6b 100644 --- a/slurm/rest/Dockerfile +++ b/slurm/rest/Dockerfile @@ -1,4 +1,4 @@ -FROM clustercockpit/slurm.base:22.05.6 +FROM clustercockpit/slurm.base:24.05.3 LABEL org.opencontainers.image.authors="jan.eitzinger@fau.de" # clean up diff --git a/slurm/rest/docker-entrypoint.sh b/slurm/rest/docker-entrypoint.sh index 549b92c..0cdca57 100755 --- a/slurm/rest/docker-entrypoint.sh +++ b/slurm/rest/docker-entrypoint.sh @@ -3,109 +3,73 @@ set -e # Determine the system architecture dynamically ARCH=$(uname -m) +SLURM_VERSION="24.05.3" # start sshd server _sshd_host() { - if [ ! -d /var/run/sshd ]; then - mkdir /var/run/sshd - ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key -N '' - fi - /usr/sbin/sshd -} - -# setup worker ssh to be passwordless -_ssh_worker() { - if [[ ! -d /home/worker ]]; then - mkdir -p /home/worker - chown -R worker:worker /home/worker + if [ ! -d /var/run/sshd ]; then + mkdir /var/run/sshd + ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key -N '' fi - cat > /home/worker/setup-worker-ssh.sh < ~/.ssh/authorized_keys -chmod 0640 ~/.ssh/authorized_keys -cat >> ~/.ssh/config < /etc/munge/munge.key" - chown munge: /etc/munge/munge.key - chmod 400 /etc/munge/munge.key sudo -u munge /sbin/munged munge -n munge -n | unmunge remunge } -# copy secrets to /.secret directory for other nodes -_copy_secrets() { - cp /home/worker/worker-secret.tar.gz /.secret/worker-secret.tar.gz - cp thome/worker/setup-worker-ssh.sh /.secret/setup-worker-ssh.sh - cp /etc/munge/munge.key /.secret/munge.key - rm -f /home/worker/worker-secret.tar.gz - rm -f /home/worker/setup-worker-ssh.sh -} - -# run slurmctld -_slurmctld() { +# run slurmrestd +_slurmrestd() { cd /root/rpmbuild/RPMS/$ARCH - yum -y --nogpgcheck localinstall slurm-22.05.6-1.el8.$ARCH.rpm \ - slurm-perlapi-22.05.6-1.el8.$ARCH.rpm \ - slurm-slurmd-22.05.6-1.el8.$ARCH.rpm \ - slurm-torque-22.05.6-1.el8.$ARCH.rpm \ - slurm-slurmctld-22.05.6-1.el8.$ARCH.rpm \ - slurm-slurmrestd-22.05.6-1.el8.$ARCH.rpm + yum -y --nogpgcheck localinstall slurm-$SLURM_VERSION*.$ARCH.rpm \ + slurm-perlapi-$SLURM_VERSION*.$ARCH.rpm \ + slurm-slurmd-$SLURM_VERSION*.$ARCH.rpm \ + slurm-torque-$SLURM_VERSION*.$ARCH.rpm \ + slurm-slurmctld-$SLURM_VERSION*.$ARCH.rpm \ + slurm-slurmrestd-$SLURM_VERSION*.$ARCH.rpm echo -n "checking for slurmdbd.conf" while [ ! -f /.secret/slurmdbd.conf ]; do echo -n "." sleep 1 done echo "" - mkdir -p /var/spool/slurm/ctld /var/spool/slurm/d /var/log/slurm /etc/slurm - chown -R slurm: /var/spool/slurm/ctld /var/spool/slurm/d /var/log/slurm - touch /var/log/slurmctld.log - chown slurm: /var/log/slurmctld.log - if [[ ! -f /home/config/slurm.conf ]]; then + # mkdir -p /var/spool/slurm/ctld /var/spool/slurm/d /var/log/slurm /etc/slurm + # chown -R slurm: /var/spool/slurm/ctld /var/spool/slurm/d /var/log/slurm + touch /var/log/slurmrestd.log + chown slurm: /var/log/slurmrestd.log + if [[ ! -f /home/config/slurmrestd.conf ]]; then echo "### Missing slurm.conf ###" exit else - echo "### use provided slurm.conf ###" - cp /home/config/slurm.conf /etc/slurm/slurm.conf + echo "### use provided slurmrestd.conf ###" + cp /home/config/slurmrestd.conf /etc/config/slurmrestd.conf fi - sacctmgr -i add cluster "snowflake" sleep 2s - /usr/sbin/slurmctld - cp -f /etc/slurm/slurm.conf /.secret/ + /usr/sbin/slurmrestd -f /etc/config/slurmrestd.conf 0.0.0.0:6820 -Dvv } ### main ### _sshd_host -_ssh_worker -_munge_start -_copy_secrets -_slurmctld +_munge_start_using_key +_slurmrestd tail -f /dev/null diff --git a/slurm/rest/slurmrestd.conf b/slurm/rest/slurmrestd.conf new file mode 100644 index 0000000..a747d11 --- /dev/null +++ b/slurm/rest/slurmrestd.conf @@ -0,0 +1,6 @@ +# +# Example slurmdbd.conf file. +# +include /etc/slurm/slurm.conf + +AuthType=auth/munge \ No newline at end of file diff --git a/slurm/worker/Dockerfile b/slurm/worker/Dockerfile index 2fb1c11..556fcbc 100644 --- a/slurm/worker/Dockerfile +++ b/slurm/worker/Dockerfile @@ -1,4 +1,4 @@ -FROM clustercockpit/slurm.base:22.05.6 +FROM clustercockpit/slurm.base:24.05.3 LABEL org.opencontainers.image.authors="jan.eitzinger@fau.de" # clean up diff --git a/slurm/worker/docker-entrypoint.sh b/slurm/worker/docker-entrypoint.sh index e8bc2a9..dfadc46 100755 --- a/slurm/worker/docker-entrypoint.sh +++ b/slurm/worker/docker-entrypoint.sh @@ -3,6 +3,7 @@ set -e # Determine the system architecture dynamically ARCH=$(uname -m) +SLURM_VERSION="24.05.3" # start sshd server _sshd_host() { @@ -61,10 +62,10 @@ _start_dbus() { # run slurmd _slurmd() { cd /root/rpmbuild/RPMS/$ARCH - yum -y --nogpgcheck localinstall slurm-22.05.6-1.el8.$ARCH.rpm \ - slurm-perlapi-22.05.6-1.el8.$ARCH.rpm \ - slurm-slurmd-22.05.6-1.el8.$ARCH.rpm \ - slurm-torque-22.05.6-1.el8.$ARCH.rpm + yum -y --nogpgcheck localinstall slurm-$SLURM_VERSION*.$ARCH.rpm \ + slurm-perlapi-$SLURM_VERSION*.$ARCH.rpm \ + slurm-slurmd-$SLURM_VERSION*.$ARCH.rpm \ + slurm-torque-$SLURM_VERSION*.$ARCH.rpm echo "checking for slurm.conf" if [ ! -f /.secret/slurm.conf ]; then