mirror of
https://github.com/ClusterCockpit/cc-docker.git
synced 2024-11-10 10:27:25 +01:00
Start converting to Rocky Linux base image
This commit is contained in:
parent
fa2287c661
commit
f0a6652fb0
@ -131,34 +131,34 @@ services:
|
|||||||
STORAGE_PASS: password
|
STORAGE_PASS: password
|
||||||
STORAGE_USER: slurm
|
STORAGE_USER: slurm
|
||||||
|
|
||||||
slurm-worker01:
|
# slurm-worker01:
|
||||||
container_name: slurm-worker01
|
# container_name: slurm-worker01
|
||||||
build:
|
# build:
|
||||||
context: ./slurm/worker
|
# context: ./slurm/worker
|
||||||
depends_on:
|
# depends_on:
|
||||||
- slurm-controller
|
# - slurm-controller
|
||||||
privileged: true
|
# privileged: true
|
||||||
volumes:
|
# volumes:
|
||||||
- ./home:/home
|
# - ./home:/home
|
||||||
- ./secret:/.secret
|
# - ./secret:/.secret
|
||||||
restart: always
|
# restart: always
|
||||||
environment:
|
# environment:
|
||||||
CONTROL_MACHINE: controller
|
# CONTROL_MACHINE: controller
|
||||||
ACCOUNTING_STORAGE_HOST: database
|
# ACCOUNTING_STORAGE_HOST: database
|
||||||
COMPUTE_NODES: worker01 worker02
|
# COMPUTE_NODES: worker01 worker02
|
||||||
|
|
||||||
slurm-worker02:
|
# slurm-worker02:
|
||||||
container_name: slurm-worker02
|
# container_name: slurm-worker02
|
||||||
build:
|
# build:
|
||||||
context: ./slurm/worker
|
# context: ./slurm/worker
|
||||||
depends_on:
|
# depends_on:
|
||||||
- slurm-controller
|
# - slurm-controller
|
||||||
privileged: true
|
# privileged: true
|
||||||
volumes:
|
# volumes:
|
||||||
- ./home:/home
|
# - ./home:/home
|
||||||
- ./secret:/.secret
|
# - ./secret:/.secret
|
||||||
restart: always
|
# restart: always
|
||||||
environment:
|
# environment:
|
||||||
CONTROL_MACHINE: controller
|
# CONTROL_MACHINE: controller
|
||||||
ACCOUNTING_STORAGE_HOST: database
|
# ACCOUNTING_STORAGE_HOST: database
|
||||||
COMPUTE_NODES: worker01 worker02
|
# COMPUTE_NODES: worker01 worker02
|
||||||
|
@ -1,46 +0,0 @@
|
|||||||
FROM krallin/centos-tini:7
|
|
||||||
MAINTAINER Michael J. Stealey <stealey@renci.org>
|
|
||||||
|
|
||||||
ENV SLURM_VERSION=19.05.1 \
|
|
||||||
MUNGE_UID=981 \
|
|
||||||
SLURM_UID=982 \
|
|
||||||
WORKER_UID=1000
|
|
||||||
|
|
||||||
RUN groupadd -g $MUNGE_UID munge \
|
|
||||||
&& useradd -m -c "MUNGE Uid 'N' Gid Emporium" -d /var/lib/munge -u $MUNGE_UID -g munge -s /sbin/nologin munge \
|
|
||||||
&& groupadd -g $SLURM_UID slurm \
|
|
||||||
&& useradd -m -c "Slurm workload manager" -d /var/lib/slurm -u $SLURM_UID -g slurm -s /bin/bash slurm \
|
|
||||||
&& groupadd -g $WORKER_UID worker \
|
|
||||||
&& useradd -m -c "Workflow user" -d /home/worker -u $WORKER_UID -g worker -s /bin/bash worker
|
|
||||||
|
|
||||||
# install packages for general functionality
|
|
||||||
RUN yum -y install \
|
|
||||||
epel-release \
|
|
||||||
&& yum -y install \
|
|
||||||
sudo \
|
|
||||||
wget \
|
|
||||||
which \
|
|
||||||
tree \
|
|
||||||
mariadb-server \
|
|
||||||
mariadb-devel \
|
|
||||||
munge \
|
|
||||||
munge-libs \
|
|
||||||
munge-devel \
|
|
||||||
openssh-server \
|
|
||||||
openssh-clients
|
|
||||||
|
|
||||||
# install slurm 19.05.1
|
|
||||||
COPY rpms /packages
|
|
||||||
# /usr/bin/mpiexec from slurm-torque conflicts with openmpi install
|
|
||||||
WORKDIR /packages
|
|
||||||
RUN yum -y localinstall $(ls | grep -v -e 'torque' -e 'openmpi')
|
|
||||||
WORKDIR /
|
|
||||||
|
|
||||||
VOLUME ["/home", "/.secret"]
|
|
||||||
|
|
||||||
# 22: SSH
|
|
||||||
# 3306: MariaDB
|
|
||||||
# 6817: Slurm Ctl D
|
|
||||||
# 6818: Slurm D
|
|
||||||
# 6819: Slurm DBD
|
|
||||||
EXPOSE 22 3306 6817 6818 6819
|
|
@ -1,17 +0,0 @@
|
|||||||
|
|
||||||
SLURM_VERSION = 19.05.1
|
|
||||||
IMAGE = scidas/slurm.base
|
|
||||||
|
|
||||||
DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
|
|
||||||
|
|
||||||
.PHONY: all build clean test
|
|
||||||
|
|
||||||
all: build
|
|
||||||
|
|
||||||
build:
|
|
||||||
cp -r "$(DIR)/../packages/centos-7/rpms" .
|
|
||||||
docker build -t $(IMAGE):$(SLURM_VERSION) .
|
|
||||||
|
|
||||||
clean:
|
|
||||||
@[ -z $(docker images -q $(IMAGE):$(SLURM_VERSION)) ] || docker rmi $(IMAGE):$(SLURM_VERSION)
|
|
||||||
rm -rf rpms
|
|
@ -1,3 +0,0 @@
|
|||||||
# Slurm Base Image
|
|
||||||
|
|
||||||
TODO
|
|
@ -1,31 +1,35 @@
|
|||||||
FROM scidas/slurm.base:19.05.1
|
FROM rockylinux:8
|
||||||
MAINTAINER Michael J. Stealey <stealey@renci.org>
|
MAINTAINER Jan Eitzinger <jan.eitzinger@fau.de>
|
||||||
|
|
||||||
# install openmpi 3.0.1
|
ENV SLURM_VERSION=19.05.1 \
|
||||||
RUN yum -y install \
|
MUNGE_UID=981 \
|
||||||
gcc-c++ \
|
SLURM_UID=982 \
|
||||||
gcc-gfortran \
|
WORKER_UID=1000
|
||||||
&& yum -y localinstall \
|
|
||||||
/packages/openmpi-*.rpm
|
|
||||||
|
|
||||||
# install Lmod 7.7
|
RUN yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm -y
|
||||||
RUN yum -y install \
|
|
||||||
lua-posix \
|
RUN groupadd -g $MUNGE_UID munge \
|
||||||
lua \
|
&& useradd -m -c "MUNGE Uid 'N' Gid Emporium" -d /var/lib/munge -u $MUNGE_UID -g munge -s /sbin/nologin munge \
|
||||||
lua-filesystem \
|
&& groupadd -g $SLURM_UID slurm \
|
||||||
lua-devel \
|
&& useradd -m -c "Slurm workload manager" -d /var/lib/slurm -u $SLURM_UID -g slurm -s /bin/bash slurm \
|
||||||
wget \
|
&& groupadd -g $WORKER_UID worker \
|
||||||
bzip2 \
|
&& useradd -m -c "Workflow user" -d /home/worker -u $WORKER_UID -g worker -s /bin/bash worker
|
||||||
expectk \
|
|
||||||
make \
|
RUN yum install -y munge munge-libs
|
||||||
&& wget https://sourceforge.net/projects/lmod/files/Lmod-7.7.tar.bz2 \
|
RUN dnf --enablerepo=powertools install munge-devel -y
|
||||||
&& tar -xjvf Lmod-7.7.tar.bz2
|
RUN yum install rng-tools -y
|
||||||
WORKDIR /Lmod-7.7
|
|
||||||
RUN ./configure --prefix=/opt/apps \
|
RUN yum install -y python3 gcc openssl openssl-devel \
|
||||||
&& make install \
|
pam-devel numactl numactl-devel hwloc sudo \
|
||||||
&& ln -s /opt/apps/lmod/lmod/init/profile /etc/profile.d/z00_lmod.sh \
|
lua readline-devel ncurses-devel man2html \
|
||||||
&& ln -s /opt/apps/lmod/lmod/init/cshrc /etc/profile.d/z00_lmod.csh
|
libibmad libibumad rpm-build perl-ExtUtils-MakeMaker.noarch rpm-build make wget
|
||||||
WORKDIR /
|
|
||||||
|
RUN dnf --enablerepo=powertools install rrdtool-devel lua-devel hwloc-devel rpm-build -y
|
||||||
|
RUN dnf install mariadb-server mariadb-devel -y
|
||||||
|
RUN mkdir /usr/local/slurm-tmp
|
||||||
|
RUN cd /usr/local/slurm-tmp
|
||||||
|
RUN wget https://download.schedmd.com/slurm/slurm-22.05.6.tar.bz2
|
||||||
|
RUN rpmbuild -ta slurm-22.05.6.tar.bz2
|
||||||
|
|
||||||
ENV USE_SLURMDBD=true \
|
ENV USE_SLURMDBD=true \
|
||||||
CLUSTER_NAME=snowflake \
|
CLUSTER_NAME=snowflake \
|
||||||
@ -36,12 +40,12 @@ ENV USE_SLURMDBD=true \
|
|||||||
ACCOUNTING_STORAGE_PORT=6819 \
|
ACCOUNTING_STORAGE_PORT=6819 \
|
||||||
PARTITION_NAME=docker
|
PARTITION_NAME=docker
|
||||||
|
|
||||||
# clean up
|
|
||||||
RUN rm -f /packages/slurm-*.rpm /packages/openmpi-*.rpm \
|
|
||||||
&& yum clean all \
|
|
||||||
&& rm -rf /var/cache/yum \
|
|
||||||
&& rm -f /Lmod-7.7.tar.bz2
|
|
||||||
|
|
||||||
COPY docker-entrypoint.sh /docker-entrypoint.sh
|
COPY docker-entrypoint.sh /docker-entrypoint.sh
|
||||||
|
VOLUME ["/home", "/.secret"]
|
||||||
ENTRYPOINT ["/usr/local/bin/tini", "--", "/docker-entrypoint.sh"]
|
# 22: SSH
|
||||||
|
# 3306: MariaDB
|
||||||
|
# 6817: SlurmCtlD
|
||||||
|
# 6818: SlurmD
|
||||||
|
# 6819: SlurmDBD
|
||||||
|
EXPOSE 22 3306 6817 6818 6819
|
||||||
|
ENTRYPOINT ["/docker-entrypoint.sh"]
|
||||||
|
@ -16,23 +16,23 @@ _ssh_worker() {
|
|||||||
mkdir -p /home/worker
|
mkdir -p /home/worker
|
||||||
chown -R worker:worker /home/worker
|
chown -R worker:worker /home/worker
|
||||||
fi
|
fi
|
||||||
cat > /home/worker/setup-worker-ssh.sh <<'EOF2'
|
cat > /home/worker/setup-worker-ssh.sh <<EOF2
|
||||||
mkdir -p ~/.ssh
|
mkdir -p ~/.ssh
|
||||||
chmod 0700 ~/.ssh
|
chmod 0700 ~/.ssh
|
||||||
ssh-keygen -b 2048 -t rsa -f ~/.ssh/id_rsa -q -N "" -C "$(whoami)@$(hostname)-$(date -I)"
|
ssh-keygen -b 2048 -t rsa -f ~/.ssh/id_rsa -q -N "" -C "$(whoami)@$(hostname)-$(date -I)"
|
||||||
cat ~/.ssh/id_rsa.pub > ~/.ssh/authorized_keys
|
cat ~/.ssh/id_rsa.pub > ~/.ssh/authorized_keys
|
||||||
chmod 0640 ~/.ssh/authorized_keys
|
chmod 0640 ~/.ssh/authorized_keys
|
||||||
cat >> ~/.ssh/config <<EOF
|
cat >> ~/.ssh/config <<EOF
|
||||||
Host *
|
Host *
|
||||||
StrictHostKeyChecking no
|
StrictHostKeyChecking no
|
||||||
UserKnownHostsFile /dev/null
|
UserKnownHostsFile /dev/null
|
||||||
LogLevel QUIET
|
LogLevel QUIET
|
||||||
EOF
|
|
||||||
chmod 0644 ~/.ssh/config
|
|
||||||
cd ~/
|
|
||||||
tar -czvf ~/worker-secret.tar.gz .ssh
|
|
||||||
cd -
|
|
||||||
EOF2
|
EOF2
|
||||||
|
chmod 0644 ~/.ssh/config
|
||||||
|
cd ~/
|
||||||
|
tar -czvf ~/worker-secret.tar.gz .ssh
|
||||||
|
cd -
|
||||||
|
EOF2
|
||||||
chmod +x /home/worker/setup-worker-ssh.sh
|
chmod +x /home/worker/setup-worker-ssh.sh
|
||||||
chown worker: /home/worker/setup-worker-ssh.sh
|
chown worker: /home/worker/setup-worker-ssh.sh
|
||||||
sudo -u worker /home/worker/setup-worker-ssh.sh
|
sudo -u worker /home/worker/setup-worker-ssh.sh
|
||||||
@ -46,6 +46,11 @@ _munge_start() {
|
|||||||
chmod 0700 /var/log/munge
|
chmod 0700 /var/log/munge
|
||||||
chmod 0755 /var/run/munge
|
chmod 0755 /var/run/munge
|
||||||
/sbin/create-munge-key -f
|
/sbin/create-munge-key -f
|
||||||
|
rngd -r /dev/urandom
|
||||||
|
/usr/sbin/create-munge-key -r -f
|
||||||
|
sh -c "dd if=/dev/urandom bs=1 count=1024 > /etc/munge/munge.key"
|
||||||
|
chown munge: /etc/munge/munge.key
|
||||||
|
chmod 400 /etc/munge/munge.key
|
||||||
sudo -u munge /sbin/munged
|
sudo -u munge /sbin/munged
|
||||||
munge -n
|
munge -n
|
||||||
munge -n | unmunge
|
munge -n | unmunge
|
||||||
@ -76,15 +81,10 @@ _generate_slurm_conf() {
|
|||||||
#
|
#
|
||||||
ClusterName=$CLUSTER_NAME
|
ClusterName=$CLUSTER_NAME
|
||||||
SlurmctldHost=$CONTROL_MACHINE
|
SlurmctldHost=$CONTROL_MACHINE
|
||||||
#SlurmctldHostr=
|
|
||||||
#
|
|
||||||
SlurmUser=slurm
|
SlurmUser=slurm
|
||||||
#SlurmdUser=root
|
|
||||||
SlurmctldPort=$SLURMCTLD_PORT
|
SlurmctldPort=$SLURMCTLD_PORT
|
||||||
SlurmdPort=$SLURMD_PORT
|
SlurmdPort=$SLURMD_PORT
|
||||||
AuthType=auth/munge
|
AuthType=auth/munge
|
||||||
#JobCredentialPrivateKey=
|
|
||||||
#JobCredentialPublicCertificate=
|
|
||||||
StateSaveLocation=/var/spool/slurm/ctld
|
StateSaveLocation=/var/spool/slurm/ctld
|
||||||
SlurmdSpoolDir=/var/spool/slurm/d
|
SlurmdSpoolDir=/var/spool/slurm/d
|
||||||
SwitchType=switch/none
|
SwitchType=switch/none
|
||||||
@ -92,25 +92,7 @@ MpiDefault=none
|
|||||||
SlurmctldPidFile=/var/run/slurmctld.pid
|
SlurmctldPidFile=/var/run/slurmctld.pid
|
||||||
SlurmdPidFile=/var/run/slurmd.pid
|
SlurmdPidFile=/var/run/slurmd.pid
|
||||||
ProctrackType=proctrack/pgid
|
ProctrackType=proctrack/pgid
|
||||||
#PluginDir=
|
|
||||||
#FirstJobId=
|
|
||||||
ReturnToService=0
|
ReturnToService=0
|
||||||
#MaxJobCount=
|
|
||||||
#PlugStackConfig=
|
|
||||||
#PropagatePrioProcess=
|
|
||||||
#PropagateResourceLimits=
|
|
||||||
#PropagateResourceLimitsExcept=
|
|
||||||
#Prolog=
|
|
||||||
#Epilog=
|
|
||||||
#SrunProlog=
|
|
||||||
#SrunEpilog=
|
|
||||||
#TaskProlog=
|
|
||||||
#TaskEpilog=
|
|
||||||
#TaskPlugin=
|
|
||||||
#TrackWCKey=no
|
|
||||||
#TreeWidth=50
|
|
||||||
#TmpFS=
|
|
||||||
#UsePAM=
|
|
||||||
#
|
#
|
||||||
# TIMERS
|
# TIMERS
|
||||||
SlurmctldTimeout=300
|
SlurmctldTimeout=300
|
||||||
@ -122,17 +104,7 @@ Waittime=0
|
|||||||
#
|
#
|
||||||
# SCHEDULING
|
# SCHEDULING
|
||||||
SchedulerType=sched/backfill
|
SchedulerType=sched/backfill
|
||||||
#SchedulerAuth=
|
|
||||||
#SelectType=select/linear
|
|
||||||
FastSchedule=1
|
FastSchedule=1
|
||||||
#PriorityType=priority/multifactor
|
|
||||||
#PriorityDecayHalfLife=14-0
|
|
||||||
#PriorityUsageResetPeriod=14-0
|
|
||||||
#PriorityWeightFairshare=100000
|
|
||||||
#PriorityWeightAge=1000
|
|
||||||
#PriorityWeightPartition=10000
|
|
||||||
#PriorityWeightJobSize=1000
|
|
||||||
#PriorityMaxAge=1-0
|
|
||||||
#
|
#
|
||||||
# LOGGING
|
# LOGGING
|
||||||
SlurmctldDebug=3
|
SlurmctldDebug=3
|
||||||
@ -161,20 +133,18 @@ EOF
|
|||||||
|
|
||||||
# run slurmctld
|
# run slurmctld
|
||||||
_slurmctld() {
|
_slurmctld() {
|
||||||
|
cd /root/rpmbuild/RPMS/aarch64
|
||||||
|
yum -y --nogpgcheck localinstall slurm-22.05.6-1.el8.aarch64.rpm slurm-perlapi-22.05.6-1.el8.aarch64.rpm slurm-slurmctld-22.05.6-1.el8.aarch64.rpm
|
||||||
if $USE_SLURMDBD; then
|
if $USE_SLURMDBD; then
|
||||||
echo -n "cheking for slurmdbd.conf"
|
echo -n "checking for slurmdbd.conf"
|
||||||
while [ ! -f /.secret/slurmdbd.conf ]; do
|
while [ ! -f /.secret/slurmdbd.conf ]; do
|
||||||
echo -n "."
|
echo -n "."
|
||||||
sleep 1
|
sleep 1
|
||||||
done
|
done
|
||||||
echo ""
|
echo ""
|
||||||
fi
|
fi
|
||||||
mkdir -p /var/spool/slurm/ctld \
|
mkdir -p /var/spool/slurm/ctld /var/spool/slurmd /var/log/slurm /etc/slurm
|
||||||
/var/spool/slurm/d \
|
chown -R slurm: /var/spool/slurm/ctld /var/spool/slurmd /var/log/slurm
|
||||||
/var/log/slurm
|
|
||||||
chown -R slurm: /var/spool/slurm/ctld \
|
|
||||||
/var/spool/slurm/d \
|
|
||||||
/var/log/slurm
|
|
||||||
touch /var/log/slurmctld.log
|
touch /var/log/slurmctld.log
|
||||||
chown slurm: /var/log/slurmctld.log
|
chown slurm: /var/log/slurmctld.log
|
||||||
if [[ ! -f /home/config/slurm.conf ]]; then
|
if [[ ! -f /home/config/slurm.conf ]]; then
|
||||||
|
@ -1,5 +1,36 @@
|
|||||||
FROM scidas/slurm.base:19.05.1
|
FROM rockylinux:8
|
||||||
MAINTAINER Michael J. Stealey <stealey@renci.org>
|
MAINTAINER Jan Eitzinger <jan.eitzinger@fau.de>
|
||||||
|
|
||||||
|
ENV SLURM_VERSION=19.05.1 \
|
||||||
|
MUNGE_UID=981 \
|
||||||
|
SLURM_UID=982 \
|
||||||
|
WORKER_UID=1000
|
||||||
|
|
||||||
|
RUN yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm -y
|
||||||
|
|
||||||
|
RUN groupadd -g $MUNGE_UID munge \
|
||||||
|
&& useradd -m -c "MUNGE Uid 'N' Gid Emporium" -d /var/lib/munge -u $MUNGE_UID -g munge -s /sbin/nologin munge \
|
||||||
|
&& groupadd -g $SLURM_UID slurm \
|
||||||
|
&& useradd -m -c "Slurm workload manager" -d /var/lib/slurm -u $SLURM_UID -g slurm -s /bin/bash slurm \
|
||||||
|
&& groupadd -g $WORKER_UID worker \
|
||||||
|
&& useradd -m -c "Workflow user" -d /home/worker -u $WORKER_UID -g worker -s /bin/bash worker
|
||||||
|
|
||||||
|
RUN yum install -y munge munge-libs
|
||||||
|
RUN dnf --enablerepo=powertools install munge-devel -y
|
||||||
|
RUN yum install rng-tools -y
|
||||||
|
|
||||||
|
RUN yum install -y python3 gcc openssl openssl-devel \
|
||||||
|
pam-devel numactl numactl-devel hwloc sudo \
|
||||||
|
lua readline-devel ncurses-devel man2html \
|
||||||
|
libibmad libibumad rpm-build perl-ExtUtils-MakeMaker.noarch rpm-build make wget
|
||||||
|
|
||||||
|
RUN dnf --enablerepo=powertools install rrdtool-devel lua-devel hwloc-devel rpm-build -y
|
||||||
|
RUN dnf install mariadb-server mariadb-devel -y
|
||||||
|
RUN mkdir /usr/local/slurm-tmp
|
||||||
|
RUN cd /usr/local/slurm-tmp
|
||||||
|
RUN wget https://download.schedmd.com/slurm/slurm-22.05.6.tar.bz2
|
||||||
|
RUN rpmbuild -ta slurm-22.05.6.tar.bz2
|
||||||
|
|
||||||
|
|
||||||
ENV DBD_ADDR=database \
|
ENV DBD_ADDR=database \
|
||||||
DBD_HOST=database \
|
DBD_HOST=database \
|
||||||
@ -9,11 +40,12 @@ ENV DBD_ADDR=database \
|
|||||||
STORAGE_PASS=password \
|
STORAGE_PASS=password \
|
||||||
STORAGE_USER=slurm
|
STORAGE_USER=slurm
|
||||||
|
|
||||||
# clean up
|
|
||||||
RUN rm -f /packages/slurm-*.rpm /packages/openmpi-*.rpm \
|
|
||||||
&& yum clean all \
|
|
||||||
&& rm -rf /var/cache/yum
|
|
||||||
|
|
||||||
COPY docker-entrypoint.sh /docker-entrypoint.sh
|
COPY docker-entrypoint.sh /docker-entrypoint.sh
|
||||||
|
VOLUME ["/home", "/.secret"]
|
||||||
ENTRYPOINT ["/usr/local/bin/tini", "--", "/docker-entrypoint.sh"]
|
# 22: SSH
|
||||||
|
# 3306: MariaDB
|
||||||
|
# 6817: Slurm Ctl D
|
||||||
|
# 6818: Slurm D
|
||||||
|
# 6819: Slurm DBD
|
||||||
|
EXPOSE 22 3306 6817 6818 6819
|
||||||
|
ENTRYPOINT ["/docker-entrypoint.sh"]
|
||||||
|
@ -41,7 +41,7 @@ _mariadb_start() {
|
|||||||
# start munge using existing key
|
# start munge using existing key
|
||||||
_munge_start_using_key() {
|
_munge_start_using_key() {
|
||||||
if [ ! -f /.secret/munge.key ]; then
|
if [ ! -f /.secret/munge.key ]; then
|
||||||
echo -n "cheking for munge.key"
|
echo -n "checking for munge.key"
|
||||||
while [ ! -f /.secret/munge.key ]; do
|
while [ ! -f /.secret/munge.key ]; do
|
||||||
echo -n "."
|
echo -n "."
|
||||||
sleep 1
|
sleep 1
|
||||||
@ -63,7 +63,7 @@ _munge_start_using_key() {
|
|||||||
# wait for worker user in shared /home volume
|
# wait for worker user in shared /home volume
|
||||||
_wait_for_worker() {
|
_wait_for_worker() {
|
||||||
if [ ! -f /home/worker/.ssh/id_rsa.pub ]; then
|
if [ ! -f /home/worker/.ssh/id_rsa.pub ]; then
|
||||||
echo -n "cheking for id_rsa.pub"
|
echo -n "checking for id_rsa.pub"
|
||||||
while [ ! -f /home/worker/.ssh/id_rsa.pub ]; do
|
while [ ! -f /home/worker/.ssh/id_rsa.pub ]; do
|
||||||
echo -n "."
|
echo -n "."
|
||||||
sleep 1
|
sleep 1
|
||||||
|
48
slurm/slurm.conf
Normal file
48
slurm/slurm.conf
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
# slurm.conf file generated by configurator.html.
|
||||||
|
# Put this file on all nodes of your cluster.
|
||||||
|
# See the slurm.conf man page for more information.
|
||||||
|
#
|
||||||
|
ClusterName=snowflake
|
||||||
|
SlurmctldHost=linux0
|
||||||
|
SlurmUser=slurm
|
||||||
|
SlurmctldPort=6817
|
||||||
|
SlurmdPort=6818
|
||||||
|
MpiDefault=none
|
||||||
|
ProctrackType=proctrack/cgroup
|
||||||
|
ReturnToService=1
|
||||||
|
SlurmctldPidFile=/var/run/slurmctld.pid
|
||||||
|
SlurmdPidFile=/var/run/slurmd.pid
|
||||||
|
SlurmdSpoolDir=/var/spool/slurmd
|
||||||
|
StateSaveLocation=/var/spool/slurmctld
|
||||||
|
SwitchType=switch/none
|
||||||
|
TaskPlugin=task/affinity,task/cgroup
|
||||||
|
#
|
||||||
|
# TIMERS
|
||||||
|
InactiveLimit=0
|
||||||
|
KillWait=30
|
||||||
|
MinJobAge=300
|
||||||
|
SlurmctldTimeout=120
|
||||||
|
SlurmdTimeout=300
|
||||||
|
Waittime=0
|
||||||
|
#
|
||||||
|
# SCHEDULING
|
||||||
|
SchedulerType=sched/backfill
|
||||||
|
SelectType=select/cons_tres
|
||||||
|
#
|
||||||
|
# LOGGING AND ACCOUNTING
|
||||||
|
AccountingStorageHost=slurm-db
|
||||||
|
AccountingStoragePort=6818
|
||||||
|
AccountingStorageType=accounting_storage/slurmdbd
|
||||||
|
AccountingStorageUser=slurm
|
||||||
|
AccountingStoreFlags=job_script,job_comment,job_env,job_extra
|
||||||
|
JobCompType=jobcomp/none
|
||||||
|
JobAcctGatherFrequency=30
|
||||||
|
JobAcctGatherType=jobacct_gather/cgroup
|
||||||
|
SlurmctldDebug=info
|
||||||
|
SlurmctldLogFile=/var/log/slurmctld.log
|
||||||
|
SlurmdDebug=info
|
||||||
|
SlurmdLogFile=/var/log/slurmd.log
|
||||||
|
#
|
||||||
|
# COMPUTE NODES
|
||||||
|
NodeName=linux[1-32] CPUs=1 State=UNKNOWN
|
||||||
|
PartitionName=debug Nodes=ALL Default=YES MaxTime=INFINITE State=UP
|
@ -1,39 +1,43 @@
|
|||||||
FROM scidas/slurm.base:19.05.1
|
FROM rockylinux:8
|
||||||
MAINTAINER Michael J. Stealey <stealey@renci.org>
|
MAINTAINER Jan Eitzinger <jan.eitzinger@fau.de>
|
||||||
|
|
||||||
# install openmpi 3.0.1
|
ENV SLURM_VERSION=19.05.1 \
|
||||||
RUN yum -y install \
|
MUNGE_UID=981 \
|
||||||
gcc-c++ \
|
SLURM_UID=982 \
|
||||||
gcc-gfortran \
|
WORKER_UID=1000
|
||||||
&& yum -y localinstall \
|
|
||||||
/packages/openmpi-*.rpm
|
|
||||||
|
|
||||||
# install Lmod 7.7
|
RUN yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm -y
|
||||||
RUN yum -y install \
|
|
||||||
lua-posix \
|
|
||||||
lua \
|
|
||||||
lua-filesystem \
|
|
||||||
lua-devel \
|
|
||||||
wget \
|
|
||||||
bzip2 \
|
|
||||||
expectk \
|
|
||||||
make \
|
|
||||||
&& wget https://sourceforge.net/projects/lmod/files/Lmod-7.7.tar.bz2 \
|
|
||||||
&& tar -xjvf Lmod-7.7.tar.bz2
|
|
||||||
WORKDIR /Lmod-7.7
|
|
||||||
RUN ./configure --prefix=/opt/apps \
|
|
||||||
&& make install \
|
|
||||||
&& ln -s /opt/apps/lmod/lmod/init/profile /etc/profile.d/z00_lmod.sh \
|
|
||||||
&& ln -s /opt/apps/lmod/lmod/init/cshrc /etc/profile.d/z00_lmod.csh
|
|
||||||
|
|
||||||
|
RUN groupadd -g $MUNGE_UID munge \
|
||||||
|
&& useradd -m -c "MUNGE Uid 'N' Gid Emporium" -d /var/lib/munge -u $MUNGE_UID -g munge -s /sbin/nologin munge \
|
||||||
|
&& groupadd -g $SLURM_UID slurm \
|
||||||
|
&& useradd -m -c "Slurm workload manager" -d /var/lib/slurm -u $SLURM_UID -g slurm -s /bin/bash slurm \
|
||||||
|
&& groupadd -g $WORKER_UID worker \
|
||||||
|
&& useradd -m -c "Workflow user" -d /home/worker -u $WORKER_UID -g worker -s /bin/bash worker
|
||||||
|
|
||||||
|
RUN yum install -y munge munge-libs
|
||||||
|
RUN dnf --enablerepo=powertools install munge-devel -y
|
||||||
|
RUN yum install rng-tools -y
|
||||||
|
|
||||||
|
RUN yum install -y python3 gcc openssl openssl-devel \
|
||||||
|
pam-devel numactl numactl-devel hwloc sudo \
|
||||||
|
lua readline-devel ncurses-devel man2html \
|
||||||
|
libibmad libibumad rpm-build perl-ExtUtils-MakeMaker.noarch rpm-build make wget
|
||||||
|
|
||||||
|
RUN dnf --enablerepo=powertools install rrdtool-devel lua-devel hwloc-devel rpm-build -y
|
||||||
|
RUN dnf install mariadb-server mariadb-devel -y
|
||||||
|
RUN mkdir /usr/local/slurm-tmp
|
||||||
|
RUN cd /usr/local/slurm-tmp
|
||||||
|
RUN wget https://download.schedmd.com/slurm/slurm-22.05.6.tar.bz2
|
||||||
|
RUN rpmbuild -ta slurm-22.05.6.tar.bz2
|
||||||
|
|
||||||
|
VOLUME ["/home", "/.secret"]
|
||||||
|
# 22: SSH
|
||||||
|
# 3306: MariaDB
|
||||||
|
# 6817: SlurmCtlD
|
||||||
|
# 6818: SlurmD
|
||||||
|
# 6819: SlurmDBD
|
||||||
|
EXPOSE 22 3306 6817 6818 6819
|
||||||
WORKDIR /home/worker
|
WORKDIR /home/worker
|
||||||
|
|
||||||
# clean up
|
|
||||||
RUN rm -f /packages/slurm-*.rpm /packages/openmpi-*.rpm \
|
|
||||||
&& yum clean all \
|
|
||||||
&& rm -rf /var/cache/yum \
|
|
||||||
&& rm -f /Lmod-7.7.tar.bz2
|
|
||||||
|
|
||||||
COPY docker-entrypoint.sh /docker-entrypoint.sh
|
COPY docker-entrypoint.sh /docker-entrypoint.sh
|
||||||
|
ENTRYPOINT ["/docker-entrypoint.sh"]
|
||||||
ENTRYPOINT ["/usr/local/bin/tini", "--", "/docker-entrypoint.sh"]
|
|
||||||
|
@ -45,7 +45,7 @@ _wait_for_worker() {
|
|||||||
# run slurmd
|
# run slurmd
|
||||||
_slurmd() {
|
_slurmd() {
|
||||||
if [ ! -f /.secret/slurm.conf ]; then
|
if [ ! -f /.secret/slurm.conf ]; then
|
||||||
echo -n "cheking for slurm.conf"
|
echo -n "checking for slurm.conf"
|
||||||
while [ ! -f /.secret/slurm.conf ]; do
|
while [ ! -f /.secret/slurm.conf ]; do
|
||||||
echo -n "."
|
echo -n "."
|
||||||
sleep 1
|
sleep 1
|
||||||
|
Loading…
Reference in New Issue
Block a user