diff --git a/curl_slurmrestd.sh b/curl_slurmrestd.sh new file mode 100755 index 0000000..e3826ee --- /dev/null +++ b/curl_slurmrestd.sh @@ -0,0 +1,3 @@ +JWT="eyJhbGciOiJSUzI1NiIsICJ0eXAiOiJKV1QifQ.eyJpc3MiOiJzbHVybSJ9.dzAHf1Ojoa149uRCCWY1eP3vDyCIZCOZ3h554R-KJJ8-OP0CJ0ymvSkFISLcYcyd9vVKmaYdSN3tWEF6bNZEmyX7G560i1MbkNFvhkhNVSPLKEKNPs38h5ra3ZlTlLlxAlDzXRAAn6UEEgKdm5vx4Jhec7ptaRL_zeSFpTS5fJPc0QE1Cm7e7nU39-9e8l4WU4KpRMxT6ANFm22_G4-mSA-AgCAvKQFzj2FInKsXDUTGlliNJuAgFxf-9LQxoeAknOQhEqcTXii_yBy9DNcT03pdNcAu5Ru4_qlX62vroInU_eh5mWQyiUdXN9Wj_OfMmfLoYFkJeUFYexBMZnSBgg" + +curl -X 'GET' -v 'http://localhost:6820/slurm/v0.0.39/ping' -H "X-SLURM-USER-NAME:slurm" -H "X-SLURM-USER-TOKEN:$SLURM_JWT" diff --git a/docker-compose.yml b/docker-compose.yml index 70f0a1e..f04fd8b 100755 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -91,6 +91,7 @@ services: volumes: - ${DATADIR}/slurm/home:/home - ${DATADIR}/slurm/secret:/.secret + - ${DATADIR}/slurm/tmp:/tmp:rw - ./slurm/database/slurmdbd.conf:/home/config/slurmdbd.conf - /etc/timezone:/etc/timezone:ro - /etc/localtime:/etc/localtime:ro @@ -109,6 +110,7 @@ services: volumes: - ${DATADIR}/slurm/home:/home - ${DATADIR}/slurm/secret:/.secret + - ${DATADIR}/slurm/tmp:/tmp:rw - ./slurm/worker/cgroup.conf:/home/config/cgroup.conf - ./slurm/controller/slurm.conf:/home/config/slurm.conf - /etc/timezone:/etc/timezone:ro @@ -121,12 +123,16 @@ services: hostname: slurmrestd build: context: ./slurm/rest + args: + uid_u: ${UID_U} + gid_g: ${GID_G} depends_on: - slurmctld privileged: true volumes: - ${DATADIR}/slurm/home:/home - ${DATADIR}/slurm/secret:/.secret + - ${DATADIR}/slurm/tmp:/tmp:rw - ./slurm/controller/slurm.conf:/home/config/slurm.conf - ./slurm/rest/slurmrestd.conf:/home/config/slurmrestd.conf - /etc/timezone:/etc/timezone:ro diff --git a/setupDev.sh b/setupDev.sh index 1141138..3616787 100755 --- a/setupDev.sh +++ b/setupDev.sh @@ -1,21 +1,24 @@ #!/bin/bash echo "" -echo "-----------------------------------------------------------------" -echo "Welcome to cc-docker automatic deployment script." -echo "Make sure you have sudo rights to run docker services" -echo "This script assumes that docker command is added to sudo group" -echo "This means that docker commands do not explicitly require" -echo "'sudo' keyword to run. You can use this following command:" -echo "" -echo "sudo groupadd docker" -echo "sudo usermod -aG docker $USER" -echo "" -echo "This will add docker to the sudo usergroup and all the docker" -echo "command will run as sudo by default without requiring" -echo "'sudo' keyword." -echo "-----------------------------------------------------------------" +echo "|--------------------------------------------------------------------------------------|" +echo "| Welcome to cc-docker automatic deployment script. |" +echo "| Make sure you have sudo rights to run docker services |" +echo "| This script assumes that docker command is added to sudo group |" +echo "| This means that docker commands do not explicitly require |" +echo "| 'sudo' keyword to run. You can use this following command: |" +echo "| |" +echo "| > sudo groupadd docker |" +echo "| > sudo usermod -aG docker $USER |" +echo "| |" +echo "| This will add docker to the sudo usergroup and all the docker |" +echo "| command will run as sudo by default without requiring |" +echo "| 'sudo' keyword. |" +echo "|--------------------------------------------------------------------------------------|" echo "" +export UID_U=$(id -u $USER) +export GID_G=$(id -g $USER) + # Check cc-backend, touch job.db if exists if [ ! -d cc-backend ]; then echo "'cc-backend' not yet prepared! Please clone cc-backend repository before starting this script." @@ -98,6 +101,15 @@ docker-compose build docker-compose up -d echo "" -echo "Setup complete, containers are up by default: Shut down with 'docker-compose down'." -echo "Use './cc-backend/cc-backend -server' to start cc-backend." -echo "Use scripts in /scripts to load data into influx or mariadb." +echo "|--------------------------------------------------------------------------------------|" +echo "| Check logs for each slurm service by using these commands: |" +echo "| docker-compose logs slurmctld |" +echo "| docker-compose logs slurmdbd |" +echo "| docker-compose logs slurmrestd |" +echo "| docker-compose logs node01 |" +echo "|======================================================================================|" +echo "| Setup complete, containers are up by default: Shut down with 'docker-compose down'. |" +echo "| Use './cc-backend/cc-backend -server' to start cc-backend. |" +echo "| Use scripts in /scripts to load data into influx or mariadb. |" +echo "|--------------------------------------------------------------------------------------|" +echo "" diff --git a/slurm/base/Dockerfile b/slurm/base/Dockerfile index fb9ae62..f47588e 100644 --- a/slurm/base/Dockerfile +++ b/slurm/base/Dockerfile @@ -19,7 +19,7 @@ RUN yum install -y munge munge-libs rng-tools \ openssh-server openssh-clients dbus-devel \ pam-devel numactl numactl-devel hwloc sudo \ lua readline-devel ncurses-devel man2html \ - autoconf automake json-c-devel \ + autoconf automake json-c-devel libjwt-devel \ libibmad libibumad rpm-build perl-ExtUtils-MakeMaker.noarch rpm-build make wget RUN dnf --enablerepo=powertools install -y munge-devel rrdtool-devel lua-devel hwloc-devel mariadb-server mariadb-devel diff --git a/slurm/controller/docker-entrypoint.sh b/slurm/controller/docker-entrypoint.sh index 135f6c9..2871c8d 100755 --- a/slurm/controller/docker-entrypoint.sh +++ b/slurm/controller/docker-entrypoint.sh @@ -4,6 +4,8 @@ set -e # Determine the system architecture dynamically ARCH=$(uname -m) SLURM_VERSION="24.05.3" +SLURM_JWT=daemon +SLURMRESTD_SECURITY=disable_user_check _delete_secrets() { if [ -f /.secret/munge.key ]; then @@ -11,6 +13,9 @@ _delete_secrets() { sudo rm -rf /.secret/munge.key sudo rm -rf /.secret/worker-secret.tar.gz sudo rm -rf /.secret/setup-worker-ssh.sh + sudo rm -rf /.secret/jwt.key + sudo rm -rf /.secret/jwt_public.key + sudo rm -rf /.secret/jwt_token.key echo "Done removing secrets" ls /.secret/ @@ -88,6 +93,31 @@ _copy_secrets() { rm -f /home/worker/setup-worker-ssh.sh } +_openssl_jwt_key() { + cd /.secret + openssl rand -base64 32 > jwt.key + # openssl genpkey -algorithm RSA -out jwt.key -pkeyopt rsa_keygen_bits:2048 + # openssl rsa -pubout -in jwt.key -out jwt_public.key + cd .. +} + +_generate_jwt_token() { + PEM=$(cat /etc/config/jwt.key) + USER=\"slurm\" + NOW=$(date +%s) + IAT="${NOW}" + EXP=$((${NOW} + 3600000)) + HEADER_RAW='{"alg":"HS256", "typ":"JWT"}' + HEADER=$(echo -n "${HEADER_RAW}" | openssl base64 | tr -d '=' | tr '/+' '_-' | tr -d '\n') + PAYLOAD_RAW='{"iss":'${USER}'}' + PAYLOAD=$(echo -n "${PAYLOAD_RAW}" | openssl base64 | tr -d '=' | tr '/+' '_-' | tr -d '\n') + HEADER_PAYLOAD="${HEADER}"."${PAYLOAD}" + SIGNATURE=$(openssl dgst -sha256 -sign <(echo -n "${PEM}") <(echo -n "${HEADER_PAYLOAD}") | openssl base64 | tr -d '=' | tr '/+' '_-' | tr -d '\n') + JWT="${HEADER_PAYLOAD}"."${SIGNATURE}" + echo $JWT | cat >/.secret/jwt_token.txt + chmod 777 /.secret/jwt_token.txt +} + # run slurmctld _slurmctld() { cd /root/rpmbuild/RPMS/$ARCH @@ -105,19 +135,22 @@ _slurmctld() { echo "" mkdir -p /var/spool/slurm/ctld /var/spool/slurm/d /var/log/slurm /etc/slurm /var/run/slurm/d /var/run/slurm/ctld /var/lib/slurm/d /var/lib/slurm/ctld chown -R slurm: /var/spool/slurm/ctld /var/spool/slurm/d /var/log/slurm /var/spool /var/lib /var/run/slurm/d /var/run/slurm/ctld /var/lib/slurm/d /var/lib/slurm/ctld + mkdir -p /etc/config + chown -R slurm: /etc/config + touch /var/log/slurmctld.log - chown slurm: /var/log/slurmctld.log + chown -R slurm: /var/log/slurmctld.log touch /var/log/slurmd.log - chown slurm: /var/log/slurmd.log + chown -R slurm: /var/log/slurmd.log touch /var/lib/slurm/d/job_state - chown slurm: /var/lib/slurm/d/job_state + chown -R slurm: /var/lib/slurm/d/job_state touch /var/lib/slurm/d/fed_mgr_state - chown slurm: /var/lib/slurm/d/fed_mgr_state + chown -R slurm: /var/lib/slurm/d/fed_mgr_state touch /var/run/slurm/d/slurmctld.pid - chown slurm: /var/run/slurm/d/slurmctld.pid + chown -R slurm: /var/run/slurm/d/slurmctld.pid touch /var/run/slurm/d/slurmd.pid - chown slurm: /var/run/slurm/d/slurmd.pid + chown -R slurm: /var/run/slurm/d/slurmd.pid if [[ ! -f /home/config/slurm.conf ]]; then echo "### Missing slurm.conf ###" @@ -129,6 +162,19 @@ _slurmctld() { chmod 600 /etc/slurm/slurm.conf fi + _openssl_jwt_key + + if [ ! -f /.secret/jwt.key ]; then + echo "### Missing jwt.key ###" + exit 1 + else + cp /.secret/jwt.key /etc/config/jwt.key + chown slurm: /etc/config/jwt.key + chmod 0400 /etc/config/jwt.key + fi + + _generate_jwt_token + sudo yum install -y nc sudo yum install -y procps sudo yum install -y iputils @@ -149,6 +195,7 @@ _slurmctld() { ### main ### _delete_secrets _sshd_host + _ssh_worker _munge_start _copy_secrets diff --git a/slurm/controller/slurm.conf b/slurm/controller/slurm.conf index f41d0f5..83c9f24 100644 --- a/slurm/controller/slurm.conf +++ b/slurm/controller/slurm.conf @@ -22,6 +22,8 @@ MpiDefault=none SlurmctldPidFile=/var/run/slurm/d/slurmctld.pid SlurmdPidFile=/var/run/slurm/d/slurmd.pid ProctrackType=proctrack/linuxproc +AuthAltTypes=auth/jwt +AuthAltParameters=jwt_key=/etc/config/jwt.key #PluginDir= #CacheGroups=0 #FirstJobId= diff --git a/slurm/database/docker-entrypoint.sh b/slurm/database/docker-entrypoint.sh index 3f74437..2b968fb 100755 --- a/slurm/database/docker-entrypoint.sh +++ b/slurm/database/docker-entrypoint.sh @@ -4,7 +4,7 @@ set -e # Determine the system architecture dynamically ARCH=$(uname -m) SLURM_VERSION="24.05.3" - +SLURM_JWT=daemon SLURM_ACCT_DB_SQL=/slurm_acct_db.sql # start sshd server @@ -52,12 +52,16 @@ _wait_for_worker() { # run slurmdbd _slurmdbd() { - cd /root/rpmbuild/RPMS/$ARCH - yum -y --nogpgcheck localinstall slurm-$SLURM_VERSION*.$ARCH.rpm \ - slurm-perlapi-$SLURM_VERSION*.$ARCH.rpm \ - slurm-slurmdbd-$SLURM_VERSION*.$ARCH.rpm + cd /root/rpmbuild/RPMS/$ARCH + yum -y --nogpgcheck localinstall slurm-$SLURM_VERSION*.$ARCH.rpm \ + slurm-perlapi-$SLURM_VERSION*.$ARCH.rpm \ + slurm-slurmdbd-$SLURM_VERSION*.$ARCH.rpm mkdir -p /var/spool/slurm/d /var/log/slurm /etc/slurm - chown slurm: /var/spool/slurm/d /var/log/slurm + chown -R slurm: /var/spool/slurm/d /var/log/slurm + + mkdir -p /etc/config + chown -R slurm: /etc/config + if [[ ! -f /home/config/slurmdbd.conf ]]; then echo "### Missing slurmdbd.conf ###" exit @@ -67,8 +71,26 @@ _slurmdbd() { chown slurm: /etc/slurm/slurmdbd.conf chmod 600 /etc/slurm/slurmdbd.conf fi - echo "Starting slurmdbd" + + echo -n "checking for jwt.key" + while [ ! -f /.secret/jwt.key ]; do + echo -n "." + sleep 1 + done + + cp /.secret/jwt.key /etc/config/jwt.key + chown slurm: /etc/config/jwt.key + chmod 0400 /etc/config/jwt.key + + echo "" + + sudo yum install -y nc + sudo yum install -y procps + sudo yum install -y iputils + cp /etc/slurm/slurmdbd.conf /.secret/slurmdbd.conf + + echo "Starting slurmdbd" /usr/sbin/slurmdbd -Dvv echo "Started slurmdbd" } diff --git a/slurm/database/slurmdbd.conf b/slurm/database/slurmdbd.conf index d584535..1be920c 100644 --- a/slurm/database/slurmdbd.conf +++ b/slurm/database/slurmdbd.conf @@ -14,7 +14,8 @@ # Authentication info AuthType=auth/munge #AuthInfo=/var/run/munge/munge.socket.2 -# +AuthAltTypes=auth/jwt +AuthAltParameters=jwt_key=/etc/config/jwt.key # slurmDBD info DbdAddr=slurmdbd DbdHost=slurmdbd diff --git a/slurm/rest/Dockerfile b/slurm/rest/Dockerfile index a111d6b..664921d 100644 --- a/slurm/rest/Dockerfile +++ b/slurm/rest/Dockerfile @@ -1,10 +1,15 @@ FROM clustercockpit/slurm.base:24.05.3 LABEL org.opencontainers.image.authors="jan.eitzinger@fau.de" +ARG uid_u +ARG gid_g +ENV uid_u=${uid_u} +ENV gid_g=${gid_g} + # clean up RUN rm -f /root/rpmbuild/RPMS/slurm-*.rpm \ && yum clean all \ && rm -rf /var/cache/yum COPY docker-entrypoint.sh /docker-entrypoint.sh -ENTRYPOINT ["/docker-entrypoint.sh"] +ENTRYPOINT /docker-entrypoint.sh $uid_u $gid_g diff --git a/slurm/rest/docker-entrypoint.sh b/slurm/rest/docker-entrypoint.sh index f5ef0dd..fc0f726 100755 --- a/slurm/rest/docker-entrypoint.sh +++ b/slurm/rest/docker-entrypoint.sh @@ -4,6 +4,18 @@ set -e # Determine the system architecture dynamically ARCH=$(uname -m) SLURM_VERSION="24.05.3" +SLURMRESTD="/tmp/slurmrestd.socket" +# SLURM_JWT=daemon + +uid_u="${1:-}" +gid_g="${2:-}" + +echo Your container args are: "$@" + +# Change the uid +# usermod -u "${uid_u}" slurm +# Change the gid +# groupmod -g "${gid_g}" slurm # start sshd server _sshd_host() { @@ -14,7 +26,6 @@ _sshd_host() { /usr/sbin/sshd } -# start munge and generate key # start munge using existing key _munge_start_using_key() { if [ ! -f /.secret/munge.key ]; then @@ -37,6 +48,48 @@ _munge_start_using_key() { remunge } +_enable_slurmrestd() { + + cd /tmp + mkdir statesave + dd if=/dev/random of=/tmp/statesave/jwt_hs256.key bs=32 count=1 + chown slurm:slurm /tmp/statesave/jwt_hs256.key + chmod 0600 /tmp/statesave/jwt_hs256.key + chown slurm:slurm /tmp/statesave + chmod 0755 /tmp/statesave + + cat >/usr/lib/systemd/system/slurmrestd.service <