Introduce slurm cluster

This commit is contained in:
2023-06-23 08:38:15 +02:00
parent d6517a2797
commit fa2287c661
17 changed files with 1528 additions and 17 deletions

39
slurm/worker/Dockerfile Normal file
View File

@@ -0,0 +1,39 @@
FROM scidas/slurm.base:19.05.1
MAINTAINER Michael J. Stealey <stealey@renci.org>
# install openmpi 3.0.1
RUN yum -y install \
gcc-c++ \
gcc-gfortran \
&& yum -y localinstall \
/packages/openmpi-*.rpm
# install Lmod 7.7
RUN yum -y install \
lua-posix \
lua \
lua-filesystem \
lua-devel \
wget \
bzip2 \
expectk \
make \
&& wget https://sourceforge.net/projects/lmod/files/Lmod-7.7.tar.bz2 \
&& tar -xjvf Lmod-7.7.tar.bz2
WORKDIR /Lmod-7.7
RUN ./configure --prefix=/opt/apps \
&& make install \
&& ln -s /opt/apps/lmod/lmod/init/profile /etc/profile.d/z00_lmod.sh \
&& ln -s /opt/apps/lmod/lmod/init/cshrc /etc/profile.d/z00_lmod.csh
WORKDIR /home/worker
# clean up
RUN rm -f /packages/slurm-*.rpm /packages/openmpi-*.rpm \
&& yum clean all \
&& rm -rf /var/cache/yum \
&& rm -f /Lmod-7.7.tar.bz2
COPY docker-entrypoint.sh /docker-entrypoint.sh
ENTRYPOINT ["/usr/local/bin/tini", "--", "/docker-entrypoint.sh"]

13
slurm/worker/Makefile Normal file
View File

@@ -0,0 +1,13 @@
SLURM_VERSION = 19.05.1
IMAGE = scidas/slurm.worker
.PHONY: all build clean test
all: build
build:
docker build -t $(IMAGE):$(SLURM_VERSION) .
clean:
@[ -z $(docker images -q $(IMAGE):$(SLURM_VERSION)) ] || docker rmi $(IMAGE):$(SLURM_VERSION)

3
slurm/worker/README.md Normal file
View File

@@ -0,0 +1,3 @@
# Slurm Worker
TODO

View File

@@ -0,0 +1,69 @@
#!/usr/bin/env bash
set -e
# start sshd server
_sshd_host() {
if [ ! -d /var/run/sshd ]; then
mkdir /var/run/sshd
ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key -N ''
fi
/usr/sbin/sshd
}
# start munge using existing key
_munge_start_using_key() {
echo -n "cheking for munge.key"
while [ ! -f /.secret/munge.key ]; do
echo -n "."
sleep 1
done
echo ""
cp /.secret/munge.key /etc/munge/munge.key
chown -R munge: /etc/munge /var/lib/munge /var/log/munge /var/run/munge
chmod 0700 /etc/munge
chmod 0711 /var/lib/munge
chmod 0700 /var/log/munge
chmod 0755 /var/run/munge
sudo -u munge /sbin/munged
munge -n
munge -n | unmunge
remunge
}
# wait for worker user in shared /home volume
_wait_for_worker() {
if [ ! -f /home/worker/.ssh/id_rsa.pub ]; then
echo -n "cheking for id_rsa.pub"
while [ ! -f /home/worker/.ssh/id_rsa.pub ]; do
echo -n "."
sleep 1
done
echo ""
fi
}
# run slurmd
_slurmd() {
if [ ! -f /.secret/slurm.conf ]; then
echo -n "cheking for slurm.conf"
while [ ! -f /.secret/slurm.conf ]; do
echo -n "."
sleep 1
done
echo ""
fi
mkdir -p /var/spool/slurm/d
chown slurm: /var/spool/slurm/d
cp /.secret/slurm.conf /etc/slurm/slurm.conf
touch /var/log/slurmd.log
chown slurm: /var/log/slurmd.log
/usr/sbin/slurmd
}
### main ###
_sshd_host
_munge_start_using_key
_wait_for_worker
_slurmd
tail -f /dev/null