mirror of
https://github.com/ClusterCockpit/cc-docker.git
synced 2024-12-26 19:39:05 +01:00
Continue reworking docker setup
This commit is contained in:
parent
f0a6652fb0
commit
235956f92c
@ -1,6 +1,6 @@
|
|||||||
services:
|
services:
|
||||||
nats:
|
nats:
|
||||||
container_name: cc-nats
|
container_name: nats
|
||||||
image: nats:alpine
|
image: nats:alpine
|
||||||
ports:
|
ports:
|
||||||
- "4222:4222"
|
- "4222:4222"
|
||||||
@ -18,7 +18,7 @@ services:
|
|||||||
- nats
|
- nats
|
||||||
|
|
||||||
influxdb:
|
influxdb:
|
||||||
container_name: cc-influxdb
|
container_name: influxdb
|
||||||
image: influxdb
|
image: influxdb
|
||||||
command: ["--reporting-disabled"]
|
command: ["--reporting-disabled"]
|
||||||
environment:
|
environment:
|
||||||
@ -36,7 +36,7 @@ services:
|
|||||||
- ${DATADIR}/influxdb/config:/etc/influxdb2
|
- ${DATADIR}/influxdb/config:/etc/influxdb2
|
||||||
|
|
||||||
openldap:
|
openldap:
|
||||||
container_name: cc-ldap
|
container_name: ldap
|
||||||
image: osixia/openldap:1.5.0
|
image: osixia/openldap:1.5.0
|
||||||
command: --copy-service --loglevel debug
|
command: --copy-service --loglevel debug
|
||||||
environment:
|
environment:
|
||||||
@ -47,7 +47,7 @@ services:
|
|||||||
- ${DATADIR}/ldap:/container/service/slapd/assets/config/bootstrap/ldif/custom
|
- ${DATADIR}/ldap:/container/service/slapd/assets/config/bootstrap/ldif/custom
|
||||||
|
|
||||||
mariadb:
|
mariadb:
|
||||||
container_name: cc-db
|
container_name: mariadb
|
||||||
image: mariadb:latest
|
image: mariadb:latest
|
||||||
command: ["--default-authentication-plugin=mysql_native_password"]
|
command: ["--default-authentication-plugin=mysql_native_password"]
|
||||||
environment:
|
environment:
|
||||||
@ -63,7 +63,7 @@ services:
|
|||||||
- SYS_NICE
|
- SYS_NICE
|
||||||
|
|
||||||
# mysql:
|
# mysql:
|
||||||
# container_name: cc-mysql
|
# container_name: mysql
|
||||||
# image: mysql:8.0.22
|
# image: mysql:8.0.22
|
||||||
# command: ["--default-authentication-plugin=mysql_native_password"]
|
# command: ["--default-authentication-plugin=mysql_native_password"]
|
||||||
# environment:
|
# environment:
|
||||||
@ -79,20 +79,8 @@ services:
|
|||||||
# cap_add:
|
# cap_add:
|
||||||
# - SYS_NICE
|
# - SYS_NICE
|
||||||
|
|
||||||
phpmyadmin:
|
|
||||||
container_name: cc-phpmyadmin
|
|
||||||
image: phpmyadmin
|
|
||||||
environment:
|
|
||||||
- PMA_HOST=cc-db
|
|
||||||
- PMA_USER=root
|
|
||||||
- PMA_PASSWORD=${MARIADB_ROOT_PASSWORD}
|
|
||||||
ports:
|
|
||||||
- "127.0.0.1:${PHPMYADMIN_PORT}:80"
|
|
||||||
depends_on:
|
|
||||||
- mariadb
|
|
||||||
|
|
||||||
slurm-controller:
|
slurm-controller:
|
||||||
container_name: slurm-controller
|
container_name: slurmctld
|
||||||
build:
|
build:
|
||||||
context: ./slurm/controller
|
context: ./slurm/controller
|
||||||
privileged: true
|
privileged: true
|
||||||
@ -112,7 +100,7 @@ services:
|
|||||||
PARTITION_NAME: docker
|
PARTITION_NAME: docker
|
||||||
|
|
||||||
slurm-database:
|
slurm-database:
|
||||||
container_name: slurm-database
|
container_name: slurmdb
|
||||||
build:
|
build:
|
||||||
context: ./slurm/database
|
context: ./slurm/database
|
||||||
depends_on:
|
depends_on:
|
||||||
@ -131,21 +119,21 @@ services:
|
|||||||
STORAGE_PASS: password
|
STORAGE_PASS: password
|
||||||
STORAGE_USER: slurm
|
STORAGE_USER: slurm
|
||||||
|
|
||||||
# slurm-worker01:
|
slurm-worker01:
|
||||||
# container_name: slurm-worker01
|
container_name: node01
|
||||||
# build:
|
build:
|
||||||
# context: ./slurm/worker
|
context: ./slurm/worker
|
||||||
# depends_on:
|
depends_on:
|
||||||
# - slurm-controller
|
- slurm-controller
|
||||||
# privileged: true
|
privileged: true
|
||||||
# volumes:
|
volumes:
|
||||||
# - ./home:/home
|
- ./home:/home
|
||||||
# - ./secret:/.secret
|
- ./secret:/.secret
|
||||||
# restart: always
|
restart: always
|
||||||
# environment:
|
environment:
|
||||||
# CONTROL_MACHINE: controller
|
CONTROL_MACHINE: controller
|
||||||
# ACCOUNTING_STORAGE_HOST: database
|
ACCOUNTING_STORAGE_HOST: database
|
||||||
# COMPUTE_NODES: worker01 worker02
|
COMPUTE_NODES: worker01 worker02
|
||||||
|
|
||||||
# slurm-worker02:
|
# slurm-worker02:
|
||||||
# container_name: slurm-worker02
|
# container_name: slurm-worker02
|
||||||
|
5
env-template.txt
Normal file
5
env-template.txt
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
ENV SLURM_VERSION=19.05.1 \
|
||||||
|
MUNGE_UID=981 \
|
||||||
|
SLURM_UID=982 \
|
||||||
|
WORKER_UID=1000
|
||||||
|
|
@ -3,7 +3,7 @@
|
|||||||
# See the slurm.conf man page for more information.
|
# See the slurm.conf man page for more information.
|
||||||
#
|
#
|
||||||
ClusterName=snowflake
|
ClusterName=snowflake
|
||||||
SlurmctldHost=linux0
|
SlurmctldHost=slurmctld
|
||||||
SlurmUser=slurm
|
SlurmUser=slurm
|
||||||
SlurmctldPort=6817
|
SlurmctldPort=6817
|
||||||
SlurmdPort=6818
|
SlurmdPort=6818
|
||||||
@ -12,8 +12,8 @@ ProctrackType=proctrack/cgroup
|
|||||||
ReturnToService=1
|
ReturnToService=1
|
||||||
SlurmctldPidFile=/var/run/slurmctld.pid
|
SlurmctldPidFile=/var/run/slurmctld.pid
|
||||||
SlurmdPidFile=/var/run/slurmd.pid
|
SlurmdPidFile=/var/run/slurmd.pid
|
||||||
SlurmdSpoolDir=/var/spool/slurmd
|
SlurmdSpoolDir=/var/spool/slurm/d
|
||||||
StateSaveLocation=/var/spool/slurmctld
|
StateSaveLocation=/var/spool/slurm/ctld
|
||||||
SwitchType=switch/none
|
SwitchType=switch/none
|
||||||
TaskPlugin=task/affinity,task/cgroup
|
TaskPlugin=task/affinity,task/cgroup
|
||||||
#
|
#
|
||||||
@ -30,8 +30,8 @@ SchedulerType=sched/backfill
|
|||||||
SelectType=select/cons_tres
|
SelectType=select/cons_tres
|
||||||
#
|
#
|
||||||
# LOGGING AND ACCOUNTING
|
# LOGGING AND ACCOUNTING
|
||||||
AccountingStorageHost=slurm-db
|
AccountingStorageHost=slurmdb
|
||||||
AccountingStoragePort=6818
|
AccountingStoragePort=6819
|
||||||
AccountingStorageType=accounting_storage/slurmdbd
|
AccountingStorageType=accounting_storage/slurmdbd
|
||||||
AccountingStorageUser=slurm
|
AccountingStorageUser=slurm
|
||||||
AccountingStoreFlags=job_script,job_comment,job_env,job_extra
|
AccountingStoreFlags=job_script,job_comment,job_env,job_extra
|
||||||
@ -44,5 +44,5 @@ SlurmdDebug=info
|
|||||||
SlurmdLogFile=/var/log/slurmd.log
|
SlurmdLogFile=/var/log/slurmd.log
|
||||||
#
|
#
|
||||||
# COMPUTE NODES
|
# COMPUTE NODES
|
||||||
NodeName=linux[1-32] CPUs=1 State=UNKNOWN
|
NodeName=node0[1-2] CPUs=1 State=UNKNOWN
|
||||||
PartitionName=debug Nodes=ALL Default=YES MaxTime=INFINITE State=UP
|
PartitionName=main Nodes=ALL Default=YES MaxTime=INFINITE State=UP
|
31
home/config/slurmdbd.conf
Normal file
31
home/config/slurmdbd.conf
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
# Archive info
|
||||||
|
#ArchiveJobs=yes
|
||||||
|
#ArchiveDir="/tmp"
|
||||||
|
#ArchiveSteps=yes
|
||||||
|
#ArchiveScript=
|
||||||
|
#JobPurge=12
|
||||||
|
#StepPurge=1
|
||||||
|
#
|
||||||
|
# Authentication info
|
||||||
|
AuthType=auth/munge
|
||||||
|
AuthInfo=/var/run/munge/munge.socket.2
|
||||||
|
#
|
||||||
|
# slurmDBD info
|
||||||
|
DbdAddr=slurmdb
|
||||||
|
DbdHost=slurmdb
|
||||||
|
DbdPort=6819
|
||||||
|
SlurmUser=slurm
|
||||||
|
DebugLevel=4
|
||||||
|
LogFile=/var/log/slurm/slurmdbd.log
|
||||||
|
PidFile=/var/run/slurmdbd.pid
|
||||||
|
#PluginDir=/usr/lib/slurm
|
||||||
|
#PrivateData=accounts,users,usage,jobs
|
||||||
|
#TrackWCKey=yes
|
||||||
|
#
|
||||||
|
# Database info
|
||||||
|
StorageType=accounting_storage/mysql
|
||||||
|
StorageHost=slurmdb
|
||||||
|
StoragePort=3306
|
||||||
|
StoragePass=demo
|
||||||
|
StorageUser=slurm
|
||||||
|
StorageLoc=slurm_acct_db
|
45
slurm/base/Dockerfile
Normal file
45
slurm/base/Dockerfile
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
FROM rockylinux:8
|
||||||
|
MAINTAINER Jan Eitzinger <jan.eitzinger@fau.de>
|
||||||
|
|
||||||
|
ENV SLURM_VERSION=22.05.6
|
||||||
|
ENV ARCH=aarch64
|
||||||
|
|
||||||
|
RUN yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm -y
|
||||||
|
|
||||||
|
RUN groupadd -g 981 munge \
|
||||||
|
&& useradd -m -c "MUNGE Uid 'N' Gid Emporium" -d /var/lib/munge -u 981 -g munge -s /sbin/nologin munge \
|
||||||
|
&& groupadd -g 982 slurm \
|
||||||
|
&& useradd -m -c "Slurm workload manager" -d /var/lib/slurm -u 982 -g slurm -s /bin/bash slurm \
|
||||||
|
&& groupadd -g 1000 worker \
|
||||||
|
&& useradd -m -c "Workflow user" -d /home/worker -u 1000 -g worker -s /bin/bash worker
|
||||||
|
|
||||||
|
RUN yum install -y munge munge-libs
|
||||||
|
RUN dnf --enablerepo=powertools install munge-devel -y
|
||||||
|
RUN yum install rng-tools -y
|
||||||
|
|
||||||
|
RUN yum install -y python3 gcc openssl openssl-devel \
|
||||||
|
pam-devel numactl numactl-devel hwloc sudo \
|
||||||
|
lua readline-devel ncurses-devel man2html \
|
||||||
|
libibmad libibumad rpm-build perl-ExtUtils-MakeMaker.noarch rpm-build make wget
|
||||||
|
|
||||||
|
RUN dnf --enablerepo=powertools install rrdtool-devel lua-devel hwloc-devel rpm-build -y
|
||||||
|
RUN dnf install mariadb-server mariadb-devel -y
|
||||||
|
RUN mkdir /usr/local/slurm-tmp
|
||||||
|
RUN cd /usr/local/slurm-tmp
|
||||||
|
RUN wget https://download.schedmd.com/slurm/slurm-${SLURM_VERSION}.tar.bz2
|
||||||
|
RUN rpmbuild -ta slurm-${SLURM_VERSION}.tar.bz2
|
||||||
|
|
||||||
|
WORKDIR /root/rpmbuild/RPMS/${ARCH}
|
||||||
|
RUN yum -y --nogpgcheck localinstall \
|
||||||
|
slurm-${SLURM_VERSION}-1.el8.${ARCH}.rpm \
|
||||||
|
slurm-perlapi-${SLURM_VERSION}-1.el8.${ARCH}.rpm \
|
||||||
|
slurm-slurmctld-${SLURM_VERSION}-1.el8.${ARCH}.rpm
|
||||||
|
WORKDIR /
|
||||||
|
|
||||||
|
VOLUME ["/home", "/.secret"]
|
||||||
|
# 22: SSH
|
||||||
|
# 3306: MariaDB
|
||||||
|
# 6817: SlurmCtlD
|
||||||
|
# 6818: SlurmD
|
||||||
|
# 6819: SlurmDBD
|
||||||
|
EXPOSE 22 3306 6817 6818 6819
|
@ -1,51 +1,10 @@
|
|||||||
FROM rockylinux:8
|
FROM clustercockpit/slurm.base:latest
|
||||||
MAINTAINER Jan Eitzinger <jan.eitzinger@fau.de>
|
MAINTAINER Jan Eitzinger <jan.eitzinger@fau.de>
|
||||||
|
|
||||||
ENV SLURM_VERSION=19.05.1 \
|
# clean up
|
||||||
MUNGE_UID=981 \
|
RUN rm -f /root/rpmbuild/RPMS/slurm-*.rpm \
|
||||||
SLURM_UID=982 \
|
&& yum clean all \
|
||||||
WORKER_UID=1000
|
&& rm -rf /var/cache/yum
|
||||||
|
|
||||||
RUN yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm -y
|
|
||||||
|
|
||||||
RUN groupadd -g $MUNGE_UID munge \
|
|
||||||
&& useradd -m -c "MUNGE Uid 'N' Gid Emporium" -d /var/lib/munge -u $MUNGE_UID -g munge -s /sbin/nologin munge \
|
|
||||||
&& groupadd -g $SLURM_UID slurm \
|
|
||||||
&& useradd -m -c "Slurm workload manager" -d /var/lib/slurm -u $SLURM_UID -g slurm -s /bin/bash slurm \
|
|
||||||
&& groupadd -g $WORKER_UID worker \
|
|
||||||
&& useradd -m -c "Workflow user" -d /home/worker -u $WORKER_UID -g worker -s /bin/bash worker
|
|
||||||
|
|
||||||
RUN yum install -y munge munge-libs
|
|
||||||
RUN dnf --enablerepo=powertools install munge-devel -y
|
|
||||||
RUN yum install rng-tools -y
|
|
||||||
|
|
||||||
RUN yum install -y python3 gcc openssl openssl-devel \
|
|
||||||
pam-devel numactl numactl-devel hwloc sudo \
|
|
||||||
lua readline-devel ncurses-devel man2html \
|
|
||||||
libibmad libibumad rpm-build perl-ExtUtils-MakeMaker.noarch rpm-build make wget
|
|
||||||
|
|
||||||
RUN dnf --enablerepo=powertools install rrdtool-devel lua-devel hwloc-devel rpm-build -y
|
|
||||||
RUN dnf install mariadb-server mariadb-devel -y
|
|
||||||
RUN mkdir /usr/local/slurm-tmp
|
|
||||||
RUN cd /usr/local/slurm-tmp
|
|
||||||
RUN wget https://download.schedmd.com/slurm/slurm-22.05.6.tar.bz2
|
|
||||||
RUN rpmbuild -ta slurm-22.05.6.tar.bz2
|
|
||||||
|
|
||||||
ENV USE_SLURMDBD=true \
|
|
||||||
CLUSTER_NAME=snowflake \
|
|
||||||
CONTROL_MACHINE=controller \
|
|
||||||
SLURMCTLD_PORT=6817 \
|
|
||||||
SLURMD_PORT=6818 \
|
|
||||||
ACCOUNTING_STORAGE_HOST=database \
|
|
||||||
ACCOUNTING_STORAGE_PORT=6819 \
|
|
||||||
PARTITION_NAME=docker
|
|
||||||
|
|
||||||
COPY docker-entrypoint.sh /docker-entrypoint.sh
|
COPY docker-entrypoint.sh /docker-entrypoint.sh
|
||||||
VOLUME ["/home", "/.secret"]
|
|
||||||
# 22: SSH
|
|
||||||
# 3306: MariaDB
|
|
||||||
# 6817: SlurmCtlD
|
|
||||||
# 6818: SlurmD
|
|
||||||
# 6819: SlurmDBD
|
|
||||||
EXPOSE 22 3306 6817 6818 6819
|
|
||||||
ENTRYPOINT ["/docker-entrypoint.sh"]
|
ENTRYPOINT ["/docker-entrypoint.sh"]
|
||||||
|
@ -17,22 +17,22 @@ _ssh_worker() {
|
|||||||
chown -R worker:worker /home/worker
|
chown -R worker:worker /home/worker
|
||||||
fi
|
fi
|
||||||
cat > /home/worker/setup-worker-ssh.sh <<EOF2
|
cat > /home/worker/setup-worker-ssh.sh <<EOF2
|
||||||
mkdir -p ~/.ssh
|
mkdir -p ~/.ssh
|
||||||
chmod 0700 ~/.ssh
|
chmod 0700 ~/.ssh
|
||||||
ssh-keygen -b 2048 -t rsa -f ~/.ssh/id_rsa -q -N "" -C "$(whoami)@$(hostname)-$(date -I)"
|
ssh-keygen -b 2048 -t rsa -f ~/.ssh/id_rsa -q -N "" -C "$(whoami)@$(hostname)-$(date -I)"
|
||||||
cat ~/.ssh/id_rsa.pub > ~/.ssh/authorized_keys
|
cat ~/.ssh/id_rsa.pub > ~/.ssh/authorized_keys
|
||||||
chmod 0640 ~/.ssh/authorized_keys
|
chmod 0640 ~/.ssh/authorized_keys
|
||||||
cat >> ~/.ssh/config <<EOF
|
cat >> ~/.ssh/config <<EOF
|
||||||
Host *
|
Host *
|
||||||
StrictHostKeyChecking no
|
StrictHostKeyChecking no
|
||||||
UserKnownHostsFile /dev/null
|
UserKnownHostsFile /dev/null
|
||||||
LogLevel QUIET
|
LogLevel QUIET
|
||||||
|
EOF
|
||||||
|
chmod 0644 ~/.ssh/config
|
||||||
|
cd ~/
|
||||||
|
tar -czvf ~/worker-secret.tar.gz .ssh
|
||||||
|
cd -
|
||||||
EOF2
|
EOF2
|
||||||
chmod 0644 ~/.ssh/config
|
|
||||||
cd ~/
|
|
||||||
tar -czvf ~/worker-secret.tar.gz .ssh
|
|
||||||
cd -
|
|
||||||
EOF2
|
|
||||||
chmod +x /home/worker/setup-worker-ssh.sh
|
chmod +x /home/worker/setup-worker-ssh.sh
|
||||||
chown worker: /home/worker/setup-worker-ssh.sh
|
chown worker: /home/worker/setup-worker-ssh.sh
|
||||||
sudo -u worker /home/worker/setup-worker-ssh.sh
|
sudo -u worker /home/worker/setup-worker-ssh.sh
|
||||||
@ -66,95 +66,28 @@ _copy_secrets() {
|
|||||||
rm -f /home/worker/setup-worker-ssh.sh
|
rm -f /home/worker/setup-worker-ssh.sh
|
||||||
}
|
}
|
||||||
|
|
||||||
# generate slurm.conf
|
|
||||||
_generate_slurm_conf() {
|
|
||||||
cat > /etc/slurm/slurm.conf <<EOF
|
|
||||||
#
|
|
||||||
# Example slurm.conf file. Please run configurator.html
|
|
||||||
# (in doc/html) to build a configuration file customized
|
|
||||||
# for your environment.
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# slurm.conf file generated by configurator.html.
|
|
||||||
#
|
|
||||||
# See the slurm.conf man page for more information.
|
|
||||||
#
|
|
||||||
ClusterName=$CLUSTER_NAME
|
|
||||||
SlurmctldHost=$CONTROL_MACHINE
|
|
||||||
SlurmUser=slurm
|
|
||||||
SlurmctldPort=$SLURMCTLD_PORT
|
|
||||||
SlurmdPort=$SLURMD_PORT
|
|
||||||
AuthType=auth/munge
|
|
||||||
StateSaveLocation=/var/spool/slurm/ctld
|
|
||||||
SlurmdSpoolDir=/var/spool/slurm/d
|
|
||||||
SwitchType=switch/none
|
|
||||||
MpiDefault=none
|
|
||||||
SlurmctldPidFile=/var/run/slurmctld.pid
|
|
||||||
SlurmdPidFile=/var/run/slurmd.pid
|
|
||||||
ProctrackType=proctrack/pgid
|
|
||||||
ReturnToService=0
|
|
||||||
#
|
|
||||||
# TIMERS
|
|
||||||
SlurmctldTimeout=300
|
|
||||||
SlurmdTimeout=300
|
|
||||||
InactiveLimit=0
|
|
||||||
MinJobAge=300
|
|
||||||
KillWait=30
|
|
||||||
Waittime=0
|
|
||||||
#
|
|
||||||
# SCHEDULING
|
|
||||||
SchedulerType=sched/backfill
|
|
||||||
FastSchedule=1
|
|
||||||
#
|
|
||||||
# LOGGING
|
|
||||||
SlurmctldDebug=3
|
|
||||||
SlurmctldLogFile=/var/log/slurmctld.log
|
|
||||||
SlurmdDebug=3
|
|
||||||
SlurmdLogFile=/var/log/slurmd.log
|
|
||||||
JobCompType=jobcomp/none
|
|
||||||
#JobCompLoc=
|
|
||||||
#
|
|
||||||
# ACCOUNTING
|
|
||||||
JobAcctGatherType=jobacct_gather/linux
|
|
||||||
#JobAcctGatherFrequency=30
|
|
||||||
#
|
|
||||||
AccountingStorageType=accounting_storage/slurmdbd
|
|
||||||
AccountingStorageHost=$ACCOUNTING_STORAGE_HOST
|
|
||||||
AccountingStoragePort=$ACCOUNTING_STORAGE_PORT
|
|
||||||
#AccountingStorageLoc=
|
|
||||||
#AccountingStoragePass=
|
|
||||||
#AccountingStorageUser=
|
|
||||||
#
|
|
||||||
# COMPUTE NODES
|
|
||||||
NodeName=worker[01-02] RealMemory=1800 CPUs=1 State=UNKNOWN
|
|
||||||
PartitionName=$PARTITION_NAME Nodes=ALL Default=YES MaxTime=INFINITE State=UP
|
|
||||||
EOF
|
|
||||||
}
|
|
||||||
|
|
||||||
# run slurmctld
|
# run slurmctld
|
||||||
_slurmctld() {
|
_slurmctld() {
|
||||||
cd /root/rpmbuild/RPMS/aarch64
|
cd /root/rpmbuild/RPMS/aarch64
|
||||||
yum -y --nogpgcheck localinstall slurm-22.05.6-1.el8.aarch64.rpm slurm-perlapi-22.05.6-1.el8.aarch64.rpm slurm-slurmctld-22.05.6-1.el8.aarch64.rpm
|
yum -y --nogpgcheck localinstall slurm-22.05.6-1.el8.aarch64.rpm slurm-perlapi-22.05.6-1.el8.aarch64.rpm slurm-slurmctld-22.05.6-1.el8.aarch64.rpm
|
||||||
if $USE_SLURMDBD; then
|
echo -n "checking for slurmdbd.conf"
|
||||||
echo -n "checking for slurmdbd.conf"
|
while [ ! -f /.secret/slurmdbd.conf ]; do
|
||||||
while [ ! -f /.secret/slurmdbd.conf ]; do
|
echo -n "."
|
||||||
echo -n "."
|
sleep 1
|
||||||
sleep 1
|
done
|
||||||
done
|
echo ""
|
||||||
echo ""
|
mkdir -p /var/spool/slurm/ctld /var/spool/slurm/d /var/log/slurm /etc/slurm
|
||||||
fi
|
chown -R slurm: /var/spool/slurm/ctld /var/spool/slurm/d /var/log/slurm
|
||||||
mkdir -p /var/spool/slurm/ctld /var/spool/slurmd /var/log/slurm /etc/slurm
|
|
||||||
chown -R slurm: /var/spool/slurm/ctld /var/spool/slurmd /var/log/slurm
|
|
||||||
touch /var/log/slurmctld.log
|
touch /var/log/slurmctld.log
|
||||||
chown slurm: /var/log/slurmctld.log
|
chown slurm: /var/log/slurmctld.log
|
||||||
if [[ ! -f /home/config/slurm.conf ]]; then
|
if [[ ! -f /home/config/slurm.conf ]]; then
|
||||||
echo "### generate slurm.conf ###"
|
echo "### Missing slurm.conf ###"
|
||||||
_generate_slurm_conf
|
exit
|
||||||
else
|
else
|
||||||
echo "### use provided slurm.conf ###"
|
echo "### use provided slurm.conf ###"
|
||||||
cp /home/config/slurm.conf /etc/slurm/slurm.conf
|
cp /home/config/slurm.conf /etc/slurm/slurm.conf
|
||||||
fi
|
fi
|
||||||
sacctmgr -i add cluster "${CLUSTER_NAME}"
|
sacctmgr -i add cluster "snowflake"
|
||||||
sleep 2s
|
sleep 2s
|
||||||
/usr/sbin/slurmctld
|
/usr/sbin/slurmctld
|
||||||
cp -f /etc/slurm/slurm.conf /.secret/
|
cp -f /etc/slurm/slurm.conf /.secret/
|
||||||
|
@ -1,51 +1,10 @@
|
|||||||
FROM rockylinux:8
|
FROM clustercockpit/slurm.base:latest
|
||||||
MAINTAINER Jan Eitzinger <jan.eitzinger@fau.de>
|
MAINTAINER Jan Eitzinger <jan.eitzinger@fau.de>
|
||||||
|
|
||||||
ENV SLURM_VERSION=19.05.1 \
|
# clean up
|
||||||
MUNGE_UID=981 \
|
RUN rm -f /root/rpmbuild/RPMS/slurm-*.rpm \
|
||||||
SLURM_UID=982 \
|
&& yum clean all \
|
||||||
WORKER_UID=1000
|
&& rm -rf /var/cache/yum
|
||||||
|
|
||||||
RUN yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm -y
|
|
||||||
|
|
||||||
RUN groupadd -g $MUNGE_UID munge \
|
|
||||||
&& useradd -m -c "MUNGE Uid 'N' Gid Emporium" -d /var/lib/munge -u $MUNGE_UID -g munge -s /sbin/nologin munge \
|
|
||||||
&& groupadd -g $SLURM_UID slurm \
|
|
||||||
&& useradd -m -c "Slurm workload manager" -d /var/lib/slurm -u $SLURM_UID -g slurm -s /bin/bash slurm \
|
|
||||||
&& groupadd -g $WORKER_UID worker \
|
|
||||||
&& useradd -m -c "Workflow user" -d /home/worker -u $WORKER_UID -g worker -s /bin/bash worker
|
|
||||||
|
|
||||||
RUN yum install -y munge munge-libs
|
|
||||||
RUN dnf --enablerepo=powertools install munge-devel -y
|
|
||||||
RUN yum install rng-tools -y
|
|
||||||
|
|
||||||
RUN yum install -y python3 gcc openssl openssl-devel \
|
|
||||||
pam-devel numactl numactl-devel hwloc sudo \
|
|
||||||
lua readline-devel ncurses-devel man2html \
|
|
||||||
libibmad libibumad rpm-build perl-ExtUtils-MakeMaker.noarch rpm-build make wget
|
|
||||||
|
|
||||||
RUN dnf --enablerepo=powertools install rrdtool-devel lua-devel hwloc-devel rpm-build -y
|
|
||||||
RUN dnf install mariadb-server mariadb-devel -y
|
|
||||||
RUN mkdir /usr/local/slurm-tmp
|
|
||||||
RUN cd /usr/local/slurm-tmp
|
|
||||||
RUN wget https://download.schedmd.com/slurm/slurm-22.05.6.tar.bz2
|
|
||||||
RUN rpmbuild -ta slurm-22.05.6.tar.bz2
|
|
||||||
|
|
||||||
|
|
||||||
ENV DBD_ADDR=database \
|
|
||||||
DBD_HOST=database \
|
|
||||||
DBD_PORT=6819 \
|
|
||||||
STORAGE_HOST=database.local.dev \
|
|
||||||
STORAGE_PORT=3306 \
|
|
||||||
STORAGE_PASS=password \
|
|
||||||
STORAGE_USER=slurm
|
|
||||||
|
|
||||||
COPY docker-entrypoint.sh /docker-entrypoint.sh
|
COPY docker-entrypoint.sh /docker-entrypoint.sh
|
||||||
VOLUME ["/home", "/.secret"]
|
|
||||||
# 22: SSH
|
|
||||||
# 3306: MariaDB
|
|
||||||
# 6817: Slurm Ctl D
|
|
||||||
# 6818: Slurm D
|
|
||||||
# 6819: Slurm DBD
|
|
||||||
EXPOSE 22 3306 6817 6818 6819
|
|
||||||
ENTRYPOINT ["/docker-entrypoint.sh"]
|
ENTRYPOINT ["/docker-entrypoint.sh"]
|
||||||
|
@ -16,10 +16,10 @@ _sshd_host() {
|
|||||||
_slurm_acct_db() {
|
_slurm_acct_db() {
|
||||||
{
|
{
|
||||||
echo "create database slurm_acct_db;"
|
echo "create database slurm_acct_db;"
|
||||||
echo "create user '${STORAGE_USER}'@'${STORAGE_HOST}';"
|
echo "create user 'slurm'@slurmdb'';"
|
||||||
echo "set password for '${STORAGE_USER}'@'${STORAGE_HOST}' = password('${STORAGE_PASS}');"
|
echo "set password for 'slurm'@'slurmdb' = password('demo');"
|
||||||
echo "grant usage on *.* to '${STORAGE_USER}'@'${STORAGE_HOST}';"
|
echo "grant usage on *.* to 'slurm'@'slurmdb';"
|
||||||
echo "grant all privileges on slurm_acct_db.* to '${STORAGE_USER}'@'${STORAGE_HOST}';"
|
echo "grant all privileges on slurm_acct_db.* to 'slurm'@'slurmdb';"
|
||||||
echo "flush privileges;"
|
echo "flush privileges;"
|
||||||
} >> $SLURM_ACCT_DB_SQL
|
} >> $SLURM_ACCT_DB_SQL
|
||||||
}
|
}
|
||||||
@ -72,50 +72,6 @@ _wait_for_worker() {
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
# generate slurmdbd.conf
|
|
||||||
_generate_slurmdbd_conf() {
|
|
||||||
cat > /etc/slurm/slurmdbd.conf <<EOF
|
|
||||||
#
|
|
||||||
# Example slurmdbd.conf file.
|
|
||||||
#
|
|
||||||
# See the slurmdbd.conf man page for more information.
|
|
||||||
#
|
|
||||||
# Archive info
|
|
||||||
#ArchiveJobs=yes
|
|
||||||
#ArchiveDir="/tmp"
|
|
||||||
#ArchiveSteps=yes
|
|
||||||
#ArchiveScript=
|
|
||||||
#JobPurge=12
|
|
||||||
#StepPurge=1
|
|
||||||
#
|
|
||||||
# Authentication info
|
|
||||||
AuthType=auth/munge
|
|
||||||
AuthInfo=/var/run/munge/munge.socket.2
|
|
||||||
#
|
|
||||||
# slurmDBD info
|
|
||||||
DbdAddr=$DBD_ADDR
|
|
||||||
DbdHost=$DBD_HOST
|
|
||||||
DbdPort=$DBD_PORT
|
|
||||||
SlurmUser=slurm
|
|
||||||
#MessageTimeout=300
|
|
||||||
DebugLevel=4
|
|
||||||
#DefaultQOS=normal,standby
|
|
||||||
LogFile=/var/log/slurm/slurmdbd.log
|
|
||||||
PidFile=/var/run/slurmdbd.pid
|
|
||||||
#PluginDir=/usr/lib/slurm
|
|
||||||
#PrivateData=accounts,users,usage,jobs
|
|
||||||
#TrackWCKey=yes
|
|
||||||
#
|
|
||||||
# Database info
|
|
||||||
StorageType=accounting_storage/mysql
|
|
||||||
StorageHost=$STORAGE_HOST
|
|
||||||
StoragePort=$STORAGE_PORT
|
|
||||||
StoragePass=$STORAGE_PASS
|
|
||||||
StorageUser=$STORAGE_USER
|
|
||||||
StorageLoc=slurm_acct_db
|
|
||||||
EOF
|
|
||||||
}
|
|
||||||
|
|
||||||
# run slurmdbd
|
# run slurmdbd
|
||||||
_slurmdbd() {
|
_slurmdbd() {
|
||||||
mkdir -p /var/spool/slurm/d \
|
mkdir -p /var/spool/slurm/d \
|
||||||
@ -123,8 +79,8 @@ _slurmdbd() {
|
|||||||
chown slurm: /var/spool/slurm/d \
|
chown slurm: /var/spool/slurm/d \
|
||||||
/var/log/slurm
|
/var/log/slurm
|
||||||
if [[ ! -f /home/config/slurmdbd.conf ]]; then
|
if [[ ! -f /home/config/slurmdbd.conf ]]; then
|
||||||
echo "### generate slurmdbd.conf ###"
|
echo "### Missing slurmdbd.conf ###"
|
||||||
_generate_slurmdbd_conf
|
exit
|
||||||
else
|
else
|
||||||
echo "### use provided slurmdbd.conf ###"
|
echo "### use provided slurmdbd.conf ###"
|
||||||
cp /home/config/slurmdbd.conf /etc/slurm/slurmdbd.conf
|
cp /home/config/slurmdbd.conf /etc/slurm/slurmdbd.conf
|
||||||
|
@ -1,43 +1,11 @@
|
|||||||
FROM rockylinux:8
|
FROM clustercockpit/slurm.base:latest
|
||||||
MAINTAINER Jan Eitzinger <jan.eitzinger@fau.de>
|
MAINTAINER Jan Eitzinger <jan.eitzinger@fau.de>
|
||||||
|
|
||||||
ENV SLURM_VERSION=19.05.1 \
|
# clean up
|
||||||
MUNGE_UID=981 \
|
RUN rm -f /root/rpmbuild/RPMS/slurm-*.rpm \
|
||||||
SLURM_UID=982 \
|
&& yum clean all \
|
||||||
WORKER_UID=1000
|
&& rm -rf /var/cache/yum
|
||||||
|
|
||||||
RUN yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm -y
|
|
||||||
|
|
||||||
RUN groupadd -g $MUNGE_UID munge \
|
|
||||||
&& useradd -m -c "MUNGE Uid 'N' Gid Emporium" -d /var/lib/munge -u $MUNGE_UID -g munge -s /sbin/nologin munge \
|
|
||||||
&& groupadd -g $SLURM_UID slurm \
|
|
||||||
&& useradd -m -c "Slurm workload manager" -d /var/lib/slurm -u $SLURM_UID -g slurm -s /bin/bash slurm \
|
|
||||||
&& groupadd -g $WORKER_UID worker \
|
|
||||||
&& useradd -m -c "Workflow user" -d /home/worker -u $WORKER_UID -g worker -s /bin/bash worker
|
|
||||||
|
|
||||||
RUN yum install -y munge munge-libs
|
|
||||||
RUN dnf --enablerepo=powertools install munge-devel -y
|
|
||||||
RUN yum install rng-tools -y
|
|
||||||
|
|
||||||
RUN yum install -y python3 gcc openssl openssl-devel \
|
|
||||||
pam-devel numactl numactl-devel hwloc sudo \
|
|
||||||
lua readline-devel ncurses-devel man2html \
|
|
||||||
libibmad libibumad rpm-build perl-ExtUtils-MakeMaker.noarch rpm-build make wget
|
|
||||||
|
|
||||||
RUN dnf --enablerepo=powertools install rrdtool-devel lua-devel hwloc-devel rpm-build -y
|
|
||||||
RUN dnf install mariadb-server mariadb-devel -y
|
|
||||||
RUN mkdir /usr/local/slurm-tmp
|
|
||||||
RUN cd /usr/local/slurm-tmp
|
|
||||||
RUN wget https://download.schedmd.com/slurm/slurm-22.05.6.tar.bz2
|
|
||||||
RUN rpmbuild -ta slurm-22.05.6.tar.bz2
|
|
||||||
|
|
||||||
VOLUME ["/home", "/.secret"]
|
|
||||||
# 22: SSH
|
|
||||||
# 3306: MariaDB
|
|
||||||
# 6817: SlurmCtlD
|
|
||||||
# 6818: SlurmD
|
|
||||||
# 6819: SlurmDBD
|
|
||||||
EXPOSE 22 3306 6817 6818 6819
|
|
||||||
WORKDIR /home/worker
|
WORKDIR /home/worker
|
||||||
COPY docker-entrypoint.sh /docker-entrypoint.sh
|
COPY docker-entrypoint.sh /docker-entrypoint.sh
|
||||||
ENTRYPOINT ["/docker-entrypoint.sh"]
|
ENTRYPOINT ["/docker-entrypoint.sh"]
|
||||||
|
Loading…
Reference in New Issue
Block a user