cc-docker/slurm/worker/docker-entrypoint.sh

114 lines
2.7 KiB
Bash
Raw Normal View History

2023-06-23 08:38:15 +02:00
#!/usr/bin/env bash
set -e
2024-10-10 22:09:01 +02:00
# Determine the system architecture dynamically
ARCH=$(uname -m)
SLURM_VERSION="24.05.3"
2024-10-10 22:09:01 +02:00
2023-06-23 08:38:15 +02:00
# start sshd server
_sshd_host() {
if [ ! -d /var/run/sshd ]; then
mkdir /var/run/sshd
ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key -N ''
fi
/usr/sbin/sshd
}
# start munge using existing key
_munge_start_using_key() {
2024-10-15 16:15:37 +02:00
sudo yum install -y nc
sudo yum install -y procps
sudo yum install -y iputils
2023-06-23 08:38:15 +02:00
echo -n "cheking for munge.key"
while [ ! -f /.secret/munge.key ]; do
echo -n "."
sleep 1
done
echo ""
cp /.secret/munge.key /etc/munge/munge.key
chown -R munge: /etc/munge /var/lib/munge /var/log/munge /var/run/munge
chmod 0700 /etc/munge
chmod 0711 /var/lib/munge
chmod 0700 /var/log/munge
chmod 0755 /var/run/munge
sudo -u munge /sbin/munged
munge -n
munge -n | unmunge
remunge
}
# wait for worker user in shared /home volume
_wait_for_worker() {
2024-10-15 16:15:37 +02:00
echo "checking for id_rsa.pub"
2023-06-23 08:38:15 +02:00
if [ ! -f /home/worker/.ssh/id_rsa.pub ]; then
2024-10-15 16:15:37 +02:00
echo "checking for id_rsa.pub"
2023-06-23 08:38:15 +02:00
while [ ! -f /home/worker/.ssh/id_rsa.pub ]; do
echo -n "."
sleep 1
done
echo ""
fi
2024-10-15 16:15:37 +02:00
echo "done checking for id_rsa.pub"
2023-06-23 08:38:15 +02:00
}
2023-08-21 09:57:51 +02:00
_start_dbus() {
2024-10-15 16:15:37 +02:00
dbus-uuidgen >/var/lib/dbus/machine-id
mkdir -p /var/run/dbus
dbus-daemon --config-file=/usr/share/dbus-1/system.conf --print-address
2023-08-21 09:57:51 +02:00
}
2023-06-23 08:38:15 +02:00
# run slurmd
_slurmd() {
2024-10-15 16:15:37 +02:00
cd /root/rpmbuild/RPMS/$ARCH
yum -y --nogpgcheck localinstall slurm-$SLURM_VERSION*.$ARCH.rpm \
slurm-perlapi-$SLURM_VERSION*.$ARCH.rpm \
slurm-slurmd-$SLURM_VERSION*.$ARCH.rpm \
slurm-torque-$SLURM_VERSION*.$ARCH.rpm
2024-10-15 16:15:37 +02:00
echo "checking for slurm.conf"
if [ ! -f /.secret/slurm.conf ]; then
echo "checking for slurm.conf"
while [ ! -f /.secret/slurm.conf ]; do
echo -n "."
sleep 1
done
echo ""
fi
echo "found slurm.conf"
2024-11-13 13:14:41 +00:00
# sudo yum install -y nc
# sudo yum install -y procps
# sudo yum install -y iputils
2024-10-24 14:54:40 +02:00
2024-10-15 17:02:25 +02:00
mkdir -p /var/spool/slurm/d /etc/slurm /var/run/slurm/d /var/log/slurm
chown slurm: /var/spool/slurm/d /var/run/slurm/d /var/log/slurm
2024-10-15 16:15:37 +02:00
cp /home/config/cgroup.conf /etc/slurm/cgroup.conf
chown slurm: /etc/slurm/cgroup.conf
chmod 600 /etc/slurm/cgroup.conf
cp /home/config/slurm.conf /etc/slurm/slurm.conf
chown slurm: /etc/slurm/slurm.conf
chmod 600 /etc/slurm/slurm.conf
2024-10-15 17:02:25 +02:00
touch /var/log/slurm/slurmd.log
chown slurm: /var/log/slurm/slurmd.log
touch /var/run/slurm/d/slurmd.pid
chmod 600 /var/run/slurm/d/slurmd.pid
chown slurm: /var/run/slurm/d/slurmd.pid
2024-10-15 16:15:37 +02:00
echo "Starting slurmd"
2024-11-13 13:14:41 +00:00
/usr/sbin/slurmstepd infinity &
2024-10-15 16:15:37 +02:00
/usr/sbin/slurmd -Dvv
echo "Started slurmd"
2023-06-23 08:38:15 +02:00
}
### main ###
_sshd_host
_munge_start_using_key
_wait_for_worker
2023-08-21 09:57:51 +02:00
_start_dbus
2023-06-23 08:38:15 +02:00
_slurmd
tail -f /dev/null