From 2d15d513c631d5397822ef2a07c699b9da130643 Mon Sep 17 00:00:00 2001 From: Aditya Ujeniya Date: Tue, 15 Oct 2024 17:02:25 +0200 Subject: [PATCH] Stable docker services --- docker-compose.yml | 1 + slurm/controller/docker-entrypoint.sh | 16 ++++++++++------ slurm/controller/slurm.conf | 4 ++-- slurm/worker/docker-entrypoint.sh | 12 ++++++++---- 4 files changed, 21 insertions(+), 12 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index feab346..11db075 100755 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -77,6 +77,7 @@ services: - ./slurm/controller/slurm.conf:/home/config/slurm.conf - /etc/timezone:/etc/timezone:ro - /etc/localtime:/etc/localtime:ro + - ${DATADIR}/slurm/state:/var/lib/slurm/d slurmdbd: container_name: slurmdbd diff --git a/slurm/controller/docker-entrypoint.sh b/slurm/controller/docker-entrypoint.sh index 3fc3d18..279412b 100755 --- a/slurm/controller/docker-entrypoint.sh +++ b/slurm/controller/docker-entrypoint.sh @@ -101,17 +101,21 @@ _slurmctld() { sleep 1 done echo "" - mkdir -p /var/spool/slurm/ctld /var/spool/slurm/d /var/log/slurm /etc/slurm - chown -R slurm: /var/spool/slurm/ctld /var/spool/slurm/d /var/log/slurm /var/spool /var/lib + mkdir -p /var/spool/slurm/ctld /var/spool/slurm/d /var/log/slurm /etc/slurm /var/run/slurm/d /var/run/slurm/ctld /var/lib/slurm/d /var/lib/slurm/ctld + chown -R slurm: /var/spool/slurm/ctld /var/spool/slurm/d /var/log/slurm /var/spool /var/lib /var/run/slurm/d /var/run/slurm/ctld /var/lib/slurm/d /var/lib/slurm/ctld touch /var/log/slurmctld.log chown slurm: /var/log/slurmctld.log touch /var/log/slurmd.log chown slurm: /var/log/slurmd.log - # touch /var/run/slurm/d/slurmctld.pid - # chown slurm: /var/run/slurm/d/slurmctld.pid - # touch /var/run/slurm/d/slurmd.pid - # chown slurm:/var/run/slurm/d/slurmd.pid + touch /var/lib/slurm/d/job_state + chown slurm: /var/lib/slurm/d/job_state + touch /var/lib/slurm/d/fed_mgr_state + chown slurm: /var/lib/slurm/d/fed_mgr_state + touch /var/run/slurm/d/slurmctld.pid + chown slurm: /var/run/slurm/d/slurmctld.pid + touch /var/run/slurm/d/slurmd.pid + chown slurm: /var/run/slurm/d/slurmd.pid if [[ ! -f /home/config/slurm.conf ]]; then echo "### Missing slurm.conf ###" diff --git a/slurm/controller/slurm.conf b/slurm/controller/slurm.conf index 7a55ff6..ab5172a 100644 --- a/slurm/controller/slurm.conf +++ b/slurm/controller/slurm.conf @@ -92,8 +92,8 @@ AccountingStoragePort=6819 # # COMPUTE NODES -PartitionName=DEFAULT Nodes=c[1-2] -PartitionName=debug Nodes=c[1-2] Default=YES MaxTime=INFINITE State=UP +PartitionName=DEFAULT Nodes=node01 +PartitionName=debug Nodes=node01 Default=YES MaxTime=INFINITE State=UP # # COMPUTE NODES # NodeName=c[1-2] RealMemory=1000 State=UNKNOWN diff --git a/slurm/worker/docker-entrypoint.sh b/slurm/worker/docker-entrypoint.sh index db691bc..e8bc2a9 100755 --- a/slurm/worker/docker-entrypoint.sh +++ b/slurm/worker/docker-entrypoint.sh @@ -77,16 +77,20 @@ _slurmd() { fi echo "found slurm.conf" - mkdir -p /var/spool/slurm/d /etc/slurm - chown slurm: /var/spool/slurm/d + mkdir -p /var/spool/slurm/d /etc/slurm /var/run/slurm/d /var/log/slurm + chown slurm: /var/spool/slurm/d /var/run/slurm/d /var/log/slurm cp /home/config/cgroup.conf /etc/slurm/cgroup.conf chown slurm: /etc/slurm/cgroup.conf chmod 600 /etc/slurm/cgroup.conf cp /home/config/slurm.conf /etc/slurm/slurm.conf chown slurm: /etc/slurm/slurm.conf chmod 600 /etc/slurm/slurm.conf - touch /var/log/slurmd.log - chown slurm: /var/log/slurmd.log + touch /var/log/slurm/slurmd.log + chown slurm: /var/log/slurm/slurmd.log + + touch /var/run/slurm/d/slurmd.pid + chmod 600 /var/run/slurm/d/slurmd.pid + chown slurm: /var/run/slurm/d/slurmd.pid echo "Starting slurmd" /usr/sbin/slurmd -Dvv