#!/usr/bin/env bash set -e # start sshd server _sshd_host() { if [ ! -d /var/run/sshd ]; then mkdir /var/run/sshd ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key -N '' fi /usr/sbin/sshd } # setup worker ssh to be passwordless _ssh_worker() { if [[ ! -d /home/worker ]]; then mkdir -p /home/worker chown -R worker:worker /home/worker fi cat > /home/worker/setup-worker-ssh.sh < ~/.ssh/authorized_keys chmod 0640 ~/.ssh/authorized_keys cat >> ~/.ssh/config < /etc/munge/munge.key" chown munge: /etc/munge/munge.key chmod 400 /etc/munge/munge.key sudo -u munge /sbin/munged munge -n munge -n | unmunge remunge } # copy secrets to /.secret directory for other nodes _copy_secrets() { cp /home/worker/worker-secret.tar.gz /.secret/worker-secret.tar.gz cp thome/worker/setup-worker-ssh.sh /.secret/setup-worker-ssh.sh cp /etc/munge/munge.key /.secret/munge.key rm -f /home/worker/worker-secret.tar.gz rm -f /home/worker/setup-worker-ssh.sh } # run slurmctld _slurmctld() { cd /root/rpmbuild/RPMS/aarch64 yum -y --nogpgcheck localinstall slurm-22.05.6-1.el8.aarch64.rpm \ slurm-perlapi-22.05.6-1.el8.aarch64.rpm \ slurm-slurmd-22.05.6-1.el8.aarch64.rpm \ slurm-torque-22.05.6-1.el8.aarch64.rpm \ slurm-slurmctld-22.05.6-1.el8.aarch64.rpm \ slurm-slurmrestd-22.05.6-1.el8.aarch64.rpm echo -n "checking for slurmdbd.conf" while [ ! -f /.secret/slurmdbd.conf ]; do echo -n "." sleep 1 done echo "" mkdir -p /var/spool/slurm/ctld /var/spool/slurm/d /var/log/slurm /etc/slurm chown -R slurm: /var/spool/slurm/ctld /var/spool/slurm/d /var/log/slurm touch /var/log/slurmctld.log chown slurm: /var/log/slurmctld.log if [[ ! -f /home/config/slurm.conf ]]; then echo "### Missing slurm.conf ###" exit else echo "### use provided slurm.conf ###" cp /home/config/slurm.conf /etc/slurm/slurm.conf fi sacctmgr -i add cluster "snowflake" sleep 2s /usr/sbin/slurmctld cp -f /etc/slurm/slurm.conf /.secret/ } ### main ### _sshd_host _ssh_worker _munge_start _copy_secrets _slurmctld tail -f /dev/null