mirror of
https://github.com/ClusterCockpit/cc-docker.git
synced 2025-04-18 02:45:56 +02:00
Added new Docker Compose setup
This commit is contained in:
parent
cf7ddde758
commit
237c0ecd43
107
Dockerfile
Normal file
107
Dockerfile
Normal file
@ -0,0 +1,107 @@
|
||||
FROM rockylinux:8
|
||||
|
||||
LABEL org.opencontainers.image.source="https://github.com/giovtorres/slurm-docker-cluster" \
|
||||
org.opencontainers.image.title="slurm-docker-cluster" \
|
||||
org.opencontainers.image.description="Slurm Docker cluster on Rocky Linux 8" \
|
||||
org.label-schema.docker.cmd="docker-compose up -d" \
|
||||
maintainer="Giovanni Torres"
|
||||
|
||||
ARG SLURM_TAG=slurm-21-08-6-1
|
||||
ARG GOSU_VERSION=1.11
|
||||
|
||||
RUN set -ex \
|
||||
&& yum makecache \
|
||||
&& yum -y update \
|
||||
&& yum -y install dnf-plugins-core \
|
||||
&& yum config-manager --set-enabled powertools \
|
||||
&& yum -y install \
|
||||
wget \
|
||||
bzip2 \
|
||||
cmake \
|
||||
perl \
|
||||
gcc \
|
||||
gcc-c++\
|
||||
git \
|
||||
gnupg \
|
||||
make \
|
||||
munge \
|
||||
munge-devel \
|
||||
nano \
|
||||
python3-devel \
|
||||
python3-pip \
|
||||
python3 \
|
||||
mariadb-server \
|
||||
mariadb-devel \
|
||||
psmisc \
|
||||
bash-completion \
|
||||
vim-enhanced \
|
||||
http-parser-devel \
|
||||
json-c-devel \
|
||||
&& yum clean all \
|
||||
&& rm -rf /var/cache/yum
|
||||
|
||||
RUN alternatives --set python /usr/bin/python3
|
||||
|
||||
RUN pip3 install Cython nose
|
||||
|
||||
RUN set -ex \
|
||||
&& wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" \
|
||||
&& wget -O /usr/local/bin/gosu.asc "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64.asc" \
|
||||
&& export GNUPGHOME="$(mktemp -d)" \
|
||||
&& gpg --batch --keyserver hkps://keys.openpgp.org --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 \
|
||||
&& gpg --batch --verify /usr/local/bin/gosu.asc /usr/local/bin/gosu \
|
||||
&& rm -rf "${GNUPGHOME}" /usr/local/bin/gosu.asc \
|
||||
&& chmod +x /usr/local/bin/gosu \
|
||||
&& gosu nobody true
|
||||
|
||||
WORKDIR /home
|
||||
|
||||
RUN set -x \
|
||||
&& git clone https://gitlab.hrz.tu-chemnitz.de/pika/pika-packages.git \
|
||||
&& git clone https://github.com/nats-io/nats.c.git \
|
||||
&& git clone -b ${SLURM_TAG} --single-branch --depth=1 https://github.com/SchedMD/slurm.git \
|
||||
&& pushd slurm \
|
||||
&& ./configure --enable-debug --prefix=/usr --sysconfdir=/etc/slurm \
|
||||
--with-mysql_config=/usr/bin --libdir=/usr/lib64 \
|
||||
&& make install \
|
||||
&& install -D -m644 etc/cgroup.conf.example /etc/slurm/cgroup.conf.example \
|
||||
&& install -D -m644 etc/slurm.conf.example /etc/slurm/slurm.conf.example \
|
||||
&& install -D -m644 etc/slurmdbd.conf.example /etc/slurm/slurmdbd.conf.example \
|
||||
&& install -D -m644 contribs/slurm_completion_help/slurm_completion.sh /etc/profile.d/slurm_completion.sh \
|
||||
&& popd \
|
||||
&& cp -r slurm /opt \
|
||||
&& groupadd -r --gid=990 slurm \
|
||||
&& useradd -r -g slurm --uid=990 slurm \
|
||||
&& mkdir /etc/sysconfig/slurm \
|
||||
/var/spool/slurmd \
|
||||
/var/run/slurmd \
|
||||
/var/run/slurmdbd \
|
||||
/var/lib/slurmd \
|
||||
/var/log/slurm \
|
||||
/data \
|
||||
&& touch /var/lib/slurmd/node_state \
|
||||
/var/lib/slurmd/front_end_state \
|
||||
/var/lib/slurmd/job_state \
|
||||
/var/lib/slurmd/resv_state \
|
||||
/var/lib/slurmd/trigger_state \
|
||||
/var/lib/slurmd/assoc_mgr_state \
|
||||
/var/lib/slurmd/assoc_usage \
|
||||
/var/lib/slurmd/qos_usage \
|
||||
/var/lib/slurmd/fed_mgr_state \
|
||||
&& chown -R slurm:slurm /var/*/slurm* \
|
||||
&& /sbin/create-munge-key
|
||||
|
||||
COPY slurm-prep-pika_v4.c /home/slurm-prep-pika_v4.c
|
||||
COPY makefile /home/makefile
|
||||
|
||||
COPY slurm.conf /etc/slurm/slurm.conf
|
||||
COPY slurmdbd.conf /etc/slurm/slurmdbd.conf
|
||||
RUN set -x \
|
||||
&& chown slurm:slurm /etc/slurm/slurmdbd.conf \
|
||||
&& chmod 600 /etc/slurm/slurmdbd.conf
|
||||
|
||||
|
||||
COPY docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh
|
||||
ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]
|
||||
|
||||
CMD ["slurmdbd"]
|
2
LICENSE
2
LICENSE
@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2021 ClusterCockpit
|
||||
Copyright (c) 2019 Giovanni Torres
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
158
README.md
158
README.md
@ -1,74 +1,116 @@
|
||||
# cc-docker
|
||||
# Slurm Docker Cluster
|
||||
|
||||
This is a `docker-compose` setup which provides a quickly started environment for ClusterCockpit development and testing, using `cc-backend`.
|
||||
A number of services is readily available as docker container (nats, cc-metric-store, InfluxDB, LDAP), or easily added by manual configuration (MySQL).
|
||||
This is a multi-container Slurm cluster using docker-compose. The compose file
|
||||
creates named volumes for persistent storage of MySQL data files as well as
|
||||
Slurm state and log directories.
|
||||
|
||||
It includes the following containers:
|
||||
* nats (Default)
|
||||
* cc-metric-store (Default)
|
||||
* influxdb (Default)
|
||||
* openldap (Default)
|
||||
* mysql (Optional)
|
||||
* mariadb (Optional)
|
||||
* phpmyadmin (Optional)
|
||||
## Containers and Volumes
|
||||
|
||||
The setup comes with fixture data for a Job archive, cc-metric-store checkpoints, InfluxDB, MySQL, and a LDAP user directory.
|
||||
The compose file will run the following containers:
|
||||
|
||||
## Known Issues
|
||||
* mysql
|
||||
* slurmdbd
|
||||
* slurmctld
|
||||
* c1 (slurmd)
|
||||
* c2 (slurmd)
|
||||
|
||||
* `docker-compose` installed on Ubuntu (18.04, 20.04) via `apt-get` can not correctly parse `docker-compose.yml` due to version differences. Install latest version of `docker-compose` from https://docs.docker.com/compose/install/ instead.
|
||||
* You need to ensure that no other web server is running on ports 8080 (cc-backend), 8081 (phpmyadmin), 8084 (cc-metric-store), 8086 (nfluxDB), 4222 and 8222 (Nats), or 3306 (MySQL). If one or more ports are already in use, you habe to adapt the related config accordingly.
|
||||
* Existing VPN connections sometimes cause problems with docker. If `docker-compose` does not start up correctly, try disabling any active VPN connection. Refer to https://stackoverflow.com/questions/45692255/how-make-openvpn-work-with-docker for further information.
|
||||
The compose file will create the following named volumes:
|
||||
|
||||
## Configuration Templates
|
||||
* etc_munge ( -> /etc/munge )
|
||||
* etc_slurm ( -> /etc/slurm )
|
||||
* slurm_jobdir ( -> /data )
|
||||
* var_lib_mysql ( -> /var/lib/mysql )
|
||||
* var_log_slurm ( -> /var/log/slurm )
|
||||
|
||||
Located in `./templates`
|
||||
* `docker-compose.yml.default`: Docker-Compose file to setup cc-metric-store, InfluxDB, MariaDB, PhpMyadmin, and LDAP containers (Default). Used in `setupDev.sh`.
|
||||
* `docker-compose.yml.mysql`: Docker-Compose configuration template if MySQL is desired instead of MariaDB.
|
||||
* `env.default`: Environment variables for setup with cc-metric-store, InfluxDB, MariaDB, PhpMyadmin, and LDAP containers (Default). Used in `setupDev.sh`.
|
||||
* `env.mysql`: Additional environment variables required if MySQL is desired instead of MariaDB.
|
||||
## Building the Docker Image
|
||||
|
||||
## Setup
|
||||
Build the image locally:
|
||||
|
||||
1. Clone `cc-backend` repository in chosen base folder: `$> git clone https://github.com/ClusterCockpit/cc-backend.git`
|
||||
|
||||
2. Run `$ ./setupDev.sh`: **NOTICE** The script will download files of a total size of 338MB (mostly for the InfluxDB data).
|
||||
|
||||
3. The setup-script launches the supporting container stack in the background automatically if everything went well. Run `$> ./cc-backend/cc-backend` to start `cc-backend.`
|
||||
|
||||
4. By default, you can access `cc-backend` in your browser at `http://localhost:8080`. You can shut down the cc-backend server by pressing `CTRL-C`, remember to also shut down all containers via `$> docker-compose down` afterwards.
|
||||
|
||||
5. You can restart the containers with: `$> docker-compose up -d`.
|
||||
|
||||
## Post-Setup Adjustment for using `influxdb`
|
||||
|
||||
When using `influxdb` as a metric database, one must adjust the following files:
|
||||
* `cc-backend/var/job-archive/emmy/cluster.json`
|
||||
* `cc-backend/var/job-archive/woody/cluster.json`
|
||||
|
||||
In the JSON, exchange the content of the `metricDataRepository`-Entry (By default configured for `cc-metric-store`) with:
|
||||
```console
|
||||
docker build -t slurm-docker-cluster:21.08.6 .
|
||||
```
|
||||
"metricDataRepository": {
|
||||
"kind": "influxdb",
|
||||
"url": "http://localhost:8086",
|
||||
"token": "egLfcf7fx0FESqFYU3RpAAbj",
|
||||
"bucket": "ClusterCockpit",
|
||||
"org": "ClusterCockpit",
|
||||
"skiptls": false
|
||||
}
|
||||
|
||||
Build a different version of Slurm using Docker build args and the Slurm Git
|
||||
tag:
|
||||
|
||||
```console
|
||||
docker build --build-arg SLURM_TAG="slurm-19-05-2-1" -t slurm-docker-cluster:19.05.2 .
|
||||
```
|
||||
|
||||
Or equivalently using `docker-compose`:
|
||||
|
||||
```console
|
||||
SLURM_TAG=slurm-19-05-2-1 IMAGE_TAG=19.05.2 docker-compose build
|
||||
```
|
||||
|
||||
|
||||
## Usage
|
||||
## Starting the Cluster
|
||||
|
||||
Credentials for the preconfigured demo user are:
|
||||
* User: `demo`
|
||||
* Password: `AdminDev`
|
||||
Run `docker-compose` to instantiate the cluster:
|
||||
|
||||
You can also login as regular user using any credential in the LDAP user directory at `./data/ldap/users.ldif`.
|
||||
```console
|
||||
IMAGE_TAG=19.05.2 docker-compose up -d
|
||||
```
|
||||
|
||||
TODO: Update job archive and all other metric data.
|
||||
The job archive with 1867 jobs originates from the second half of 2020.
|
||||
Roughly 2700 jobs from the first week of 2021 are loaded with data from InfluxDB.
|
||||
Some views of ClusterCockpit (e.g. the Users view) show the last week or month.
|
||||
To show some data there you have to set the filter to time periods with jobs (August 2020 to January 2021).
|
||||
## Register the Cluster with SlurmDBD
|
||||
|
||||
To register the cluster to the slurmdbd daemon, run the `register_cluster.sh`
|
||||
script:
|
||||
|
||||
```console
|
||||
./register_cluster.sh
|
||||
```
|
||||
|
||||
> Note: You may have to wait a few seconds for the cluster daemons to become
|
||||
> ready before registering the cluster. Otherwise, you may get an error such
|
||||
> as **sacctmgr: error: Problem talking to the database: Connection refused**.
|
||||
>
|
||||
> You can check the status of the cluster by viewing the logs: `docker-compose
|
||||
> logs -f`
|
||||
|
||||
## Accessing the Cluster
|
||||
|
||||
Use `docker exec` to run a bash shell on the controller container:
|
||||
|
||||
```console
|
||||
docker exec -it slurmctld bash
|
||||
```
|
||||
|
||||
From the shell, execute slurm commands, for example:
|
||||
|
||||
```console
|
||||
[root@slurmctld /]# sinfo
|
||||
PARTITION AVAIL TIMELIMIT NODES STATE NODELIST
|
||||
normal* up 5-00:00:00 2 idle c[1-2]
|
||||
```
|
||||
|
||||
## Submitting Jobs
|
||||
|
||||
The `slurm_jobdir` named volume is mounted on each Slurm container as `/data`.
|
||||
Therefore, in order to see job output files while on the controller, change to
|
||||
the `/data` directory when on the **slurmctld** container and then submit a job:
|
||||
|
||||
```console
|
||||
[root@slurmctld /]# cd /data/
|
||||
[root@slurmctld data]# sbatch --wrap="uptime"
|
||||
Submitted batch job 2
|
||||
[root@slurmctld data]# ls
|
||||
slurm-2.out
|
||||
```
|
||||
|
||||
## Stopping and Restarting the Cluster
|
||||
|
||||
```console
|
||||
docker-compose stop
|
||||
docker-compose start
|
||||
```
|
||||
|
||||
## Deleting the Cluster
|
||||
|
||||
To remove all containers and volumes, run:
|
||||
|
||||
```console
|
||||
docker-compose stop
|
||||
docker-compose rm -f
|
||||
docker volume rm slurm-docker-cluster_etc_munge slurm-docker-cluster_etc_slurm slurm-docker-cluster_slurm_jobdir slurm-docker-cluster_var_lib_mysql slurm-docker-cluster_var_log_slurm
|
||||
```
|
||||
|
@ -1,128 +1,84 @@
|
||||
version: "2.2"
|
||||
|
||||
services:
|
||||
nats:
|
||||
container_name: nats
|
||||
image: nats:alpine
|
||||
ports:
|
||||
- "4222:4222"
|
||||
- "8222:8222"
|
||||
mysql:
|
||||
image: mariadb:10.10
|
||||
hostname: mysql
|
||||
container_name: mysql
|
||||
environment:
|
||||
MYSQL_RANDOM_ROOT_PASSWORD: "yes"
|
||||
MYSQL_DATABASE: slurm_acct_db
|
||||
MYSQL_USER: slurm
|
||||
MYSQL_PASSWORD: password
|
||||
volumes:
|
||||
- var_lib_mysql:/var/lib/mysql
|
||||
|
||||
cc-metric-store:
|
||||
container_name: cc-metric-store
|
||||
slurmdbd:
|
||||
image: slurm-docker-cluster:${IMAGE_TAG:-21.08}
|
||||
build:
|
||||
context: ./cc-metric-store
|
||||
ports:
|
||||
- "8084:8084"
|
||||
context: .
|
||||
args:
|
||||
SLURM_TAG: ${SLURM_TAG:-slurm-21-08-6-1}
|
||||
command: ["slurmdbd"]
|
||||
container_name: slurmdbd
|
||||
hostname: slurmdbd
|
||||
volumes:
|
||||
- ${DATADIR}/cc-metric-store:/data
|
||||
- etc_munge:/etc/munge
|
||||
- etc_slurm:/etc/slurm
|
||||
- var_log_slurm:/var/log/slurm
|
||||
expose:
|
||||
- "6819"
|
||||
depends_on:
|
||||
- nats
|
||||
- mysql
|
||||
|
||||
influxdb:
|
||||
container_name: influxdb
|
||||
image: influxdb
|
||||
command: ["--reporting-disabled"]
|
||||
environment:
|
||||
DOCKER_INFLUXDB_INIT_MODE: setup
|
||||
DOCKER_INFLUXDB_INIT_USERNAME: devel
|
||||
DOCKER_INFLUXDB_INIT_PASSWORD: ${INFLUXDB_PASSWORD}
|
||||
DOCKER_INFLUXDB_INIT_ORG: ${INFLUXDB_ORG}
|
||||
DOCKER_INFLUXDB_INIT_BUCKET: ${INFLUXDB_BUCKET}
|
||||
DOCKER_INFLUXDB_INIT_RETENTION: 100w
|
||||
DOCKER_INFLUXDB_INIT_ADMIN_TOKEN: ${INFLUXDB_ADMIN_TOKEN}
|
||||
ports:
|
||||
- "127.0.0.1:${INFLUXDB_PORT}:8086"
|
||||
volumes:
|
||||
- ${DATADIR}/influxdb/data:/var/lib/influxdb2
|
||||
- ${DATADIR}/influxdb/config:/etc/influxdb2
|
||||
|
||||
openldap:
|
||||
container_name: ldap
|
||||
image: osixia/openldap:1.5.0
|
||||
command: --copy-service --loglevel debug
|
||||
environment:
|
||||
- LDAP_ADMIN_PASSWORD=${LDAP_ADMIN_PASSWORD}
|
||||
- LDAP_ORGANISATION=${LDAP_ORGANISATION}
|
||||
- LDAP_DOMAIN=${LDAP_DOMAIN}
|
||||
volumes:
|
||||
- ${DATADIR}/ldap:/container/service/slapd/assets/config/bootstrap/ldif/custom
|
||||
|
||||
mariadb:
|
||||
container_name: mariadb
|
||||
image: mariadb:latest
|
||||
command: ["--default-authentication-plugin=mysql_native_password"]
|
||||
environment:
|
||||
MARIADB_ROOT_PASSWORD: ${MARIADB_ROOT_PASSWORD}
|
||||
MARIADB_DATABASE: slurm_acct_db
|
||||
MARIADB_USER: slurm
|
||||
MARIADB_PASSWORD: demo
|
||||
ports:
|
||||
- "127.0.0.1:${MARIADB_PORT}:3306"
|
||||
volumes:
|
||||
- ${DATADIR}/mariadb:/etc/mysql/conf.d
|
||||
# - ${DATADIR}/sql-init:/docker-entrypoint-initdb.d
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
|
||||
# mysql:
|
||||
# container_name: mysql
|
||||
# image: mysql:8.0.22
|
||||
# command: ["--default-authentication-plugin=mysql_native_password"]
|
||||
# environment:
|
||||
# MYSQL_ROOT_PASSWORD: ${MYSQL_ROOT_PASSWORD}
|
||||
# MYSQL_DATABASE: ${MYSQL_DATABASE}
|
||||
# MYSQL_USER: ${MYSQL_USER}
|
||||
# MYSQL_PASSWORD: ${MYSQL_PASSWORD}
|
||||
# ports:
|
||||
# - "127.0.0.1:${MYSQL_PORT}:3306"
|
||||
# # volumes:
|
||||
# # - ${DATADIR}/sql-init:/docker-entrypoint-initdb.d
|
||||
# # - ${DATADIR}/sqldata:/var/lib/mysql
|
||||
# cap_add:
|
||||
# - SYS_NICE
|
||||
|
||||
slurm-controller:
|
||||
slurmctld:
|
||||
image: slurm-docker-cluster:${IMAGE_TAG:-21.08}
|
||||
command: ["slurmctld"]
|
||||
container_name: slurmctld
|
||||
hostname: slurmctld
|
||||
build:
|
||||
context: ./slurm/controller
|
||||
privileged: true
|
||||
volumes:
|
||||
- ${DATADIR}/slurm/home:/home
|
||||
- ${DATADIR}/slurm/secret:/.secret
|
||||
|
||||
slurm-database:
|
||||
container_name: slurmdb
|
||||
hostname: slurmdb
|
||||
build:
|
||||
context: ./slurm/database
|
||||
- etc_munge:/etc/munge
|
||||
- etc_slurm:/etc/slurm
|
||||
- slurm_jobdir:/data
|
||||
- var_log_slurm:/var/log/slurm
|
||||
expose:
|
||||
- "6817"
|
||||
depends_on:
|
||||
- mariadb
|
||||
- slurm-controller
|
||||
privileged: true
|
||||
volumes:
|
||||
- ${DATADIR}/slurm/home:/home
|
||||
- ${DATADIR}/slurm/secret:/.secret
|
||||
- "slurmdbd"
|
||||
|
||||
slurm-worker01:
|
||||
container_name: node01
|
||||
hostname: node01
|
||||
build:
|
||||
context: ./slurm/worker
|
||||
c1:
|
||||
image: slurm-docker-cluster:${IMAGE_TAG:-21.08}
|
||||
command: ["slurmd"]
|
||||
hostname: c1
|
||||
container_name: c1
|
||||
volumes:
|
||||
- etc_munge:/etc/munge
|
||||
- etc_slurm:/etc/slurm
|
||||
- slurm_jobdir:/data
|
||||
- var_log_slurm:/var/log/slurm
|
||||
expose:
|
||||
- "6818"
|
||||
depends_on:
|
||||
- slurm-controller
|
||||
privileged: true
|
||||
volumes:
|
||||
- ${DATADIR}/slurm/home:/home
|
||||
- ${DATADIR}/slurm/secret:/.secret
|
||||
- "slurmctld"
|
||||
|
||||
# slurm-worker02:
|
||||
# container_name: node02
|
||||
# hostname: node02
|
||||
# build:
|
||||
# context: ./slurm/worker
|
||||
# depends_on:
|
||||
# - slurm-controller
|
||||
# privileged: true
|
||||
# volumes:
|
||||
# - ${DATADIR}/slurm/home:/home
|
||||
# - ${DATADIR}/slurm/secret:/.secret
|
||||
c2:
|
||||
image: slurm-docker-cluster:${IMAGE_TAG:-21.08}
|
||||
command: ["slurmd"]
|
||||
hostname: c2
|
||||
container_name: c2
|
||||
volumes:
|
||||
- etc_munge:/etc/munge
|
||||
- etc_slurm:/etc/slurm
|
||||
- slurm_jobdir:/data
|
||||
- var_log_slurm:/var/log/slurm
|
||||
expose:
|
||||
- "6818"
|
||||
depends_on:
|
||||
- "slurmctld"
|
||||
|
||||
volumes:
|
||||
etc_munge:
|
||||
etc_slurm:
|
||||
slurm_jobdir:
|
||||
var_lib_mysql:
|
||||
var_log_slurm:
|
||||
|
64
docker-entrypoint.sh
Executable file
64
docker-entrypoint.sh
Executable file
@ -0,0 +1,64 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
if [ "$1" = "slurmdbd" ]
|
||||
then
|
||||
echo "---> Starting the MUNGE Authentication service (munged) ..."
|
||||
gosu munge /usr/sbin/munged
|
||||
|
||||
echo "---> Starting the Slurm Database Daemon (slurmdbd) ..."
|
||||
|
||||
{
|
||||
. /etc/slurm/slurmdbd.conf
|
||||
until echo "SELECT 1" | mysql -h $StorageHost -u$StorageUser -p$StoragePass 2>&1 > /dev/null
|
||||
do
|
||||
echo "-- Waiting for database to become active ..."
|
||||
sleep 2
|
||||
done
|
||||
}
|
||||
echo "-- Database is now active ..."
|
||||
|
||||
exec gosu slurm /usr/sbin/slurmdbd -Dvvv
|
||||
fi
|
||||
|
||||
if [ "$1" = "slurmctld" ]
|
||||
then
|
||||
echo "---> Starting the MUNGE Authentication service (munged) ..."
|
||||
gosu munge /usr/sbin/munged
|
||||
|
||||
echo "---> Waiting for slurmdbd to become active before starting slurmctld ..."
|
||||
|
||||
until 2>/dev/null >/dev/tcp/slurmdbd/6819
|
||||
do
|
||||
echo "-- slurmdbd is not available. Sleeping ..."
|
||||
sleep 2
|
||||
done
|
||||
echo "-- slurmdbd is now active ..."
|
||||
|
||||
echo "---> Starting the Slurm Controller Daemon (slurmctld) ..."
|
||||
if /usr/sbin/slurmctld -V | grep -q '17.02' ; then
|
||||
exec gosu slurm /usr/sbin/slurmctld -Dvvv
|
||||
else
|
||||
exec gosu slurm /usr/sbin/slurmctld -i -Dvvv
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$1" = "slurmd" ]
|
||||
then
|
||||
echo "---> Starting the MUNGE Authentication service (munged) ..."
|
||||
gosu munge /usr/sbin/munged
|
||||
|
||||
echo "---> Waiting for slurmctld to become active before starting slurmd..."
|
||||
|
||||
until 2>/dev/null >/dev/tcp/slurmctld/6817
|
||||
do
|
||||
echo "-- slurmctld is not available. Sleeping ..."
|
||||
sleep 2
|
||||
done
|
||||
echo "-- slurmctld is now active ..."
|
||||
|
||||
echo "---> Starting the Slurm Node Daemon (slurmd) ..."
|
||||
exec /usr/sbin/slurmd -Dvvv
|
||||
fi
|
||||
|
||||
exec "$@"
|
30
makefile
Normal file
30
makefile
Normal file
@ -0,0 +1,30 @@
|
||||
SLURM_ROOT_DIR = /usr
|
||||
SLURM_INC_DIR = /usr/include/slurm
|
||||
SLURM_LIB_DIR = /usr/lib64/slurm
|
||||
SLURM_BUILD = 21.08.6
|
||||
SLURM_BUILD_DIR = /home/slurm
|
||||
|
||||
PLUGIN_TYPE = prep
|
||||
PLUGIN_NAME = pika
|
||||
PLUGIN_FILE = $(PLUGIN_TYPE)_$(PLUGIN_NAME).so
|
||||
|
||||
SRC_FILE = slurm-prep-pika_v4.c
|
||||
|
||||
CC = gcc
|
||||
CFLAGS ?= -Wall -fPIC -g -I$(SLURM_INC_DIR) -I$(SLURM_BUILD_DIR) -I/home/slurm/src/ -I/home/slurm
|
||||
LDFLAGS ?= --shared -L.
|
||||
|
||||
all: $(PLUGIN_FILE)
|
||||
|
||||
default: $(PLUGIN_FILE)
|
||||
|
||||
$(PLUGIN_FILE): $(SRC_FILE)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) $^ -o $@
|
||||
|
||||
install: $(PLUGIN_FILE)
|
||||
install -m 755 $(PLUGIN_FILE) $(SLURM_LIB_DIR)
|
||||
|
||||
clean:
|
||||
rm -f $(PLUGIN_FILE)
|
||||
|
||||
mrproper: clean
|
5
register_cluster.sh
Executable file
5
register_cluster.sh
Executable file
@ -0,0 +1,5 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
docker exec slurmctld bash -c "/usr/bin/sacctmgr --immediate add cluster name=linux" && \
|
||||
docker-compose restart slurmdbd slurmctld
|
1012
slurm-prep-pika_v4.c
Normal file
1012
slurm-prep-pika_v4.c
Normal file
File diff suppressed because it is too large
Load Diff
95
slurm.conf
Normal file
95
slurm.conf
Normal file
@ -0,0 +1,95 @@
|
||||
# slurm.conf
|
||||
#
|
||||
# See the slurm.conf man page for more information.
|
||||
#
|
||||
ClusterName=linux
|
||||
ControlMachine=slurmctld
|
||||
ControlAddr=slurmctld
|
||||
#BackupController=
|
||||
#BackupAddr=
|
||||
#
|
||||
SlurmUser=slurm
|
||||
#SlurmdUser=root
|
||||
SlurmctldPort=6817
|
||||
SlurmdPort=6818
|
||||
AuthType=auth/munge
|
||||
#JobCredentialPrivateKey=
|
||||
#JobCredentialPublicCertificate=
|
||||
StateSaveLocation=/var/lib/slurmd
|
||||
SlurmdSpoolDir=/var/spool/slurmd
|
||||
SwitchType=switch/none
|
||||
MpiDefault=none
|
||||
SlurmctldPidFile=/var/run/slurmd/slurmctld.pid
|
||||
SlurmdPidFile=/var/run/slurmd/slurmd.pid
|
||||
ProctrackType=proctrack/linuxproc
|
||||
#PluginDir=
|
||||
#CacheGroups=0
|
||||
#FirstJobId=
|
||||
ReturnToService=0
|
||||
#MaxJobCount=
|
||||
#PlugStackConfig=
|
||||
#PropagatePrioProcess=
|
||||
#PropagateResourceLimits=
|
||||
#PropagateResourceLimitsExcept=
|
||||
#Prolog=
|
||||
#Epilog=
|
||||
#SrunProlog=
|
||||
#SrunEpilog=
|
||||
#TaskProlog=
|
||||
#TaskEpilog=
|
||||
#TaskPlugin=
|
||||
#TrackWCKey=no
|
||||
#TreeWidth=50
|
||||
#TmpFS=
|
||||
#UsePAM=
|
||||
#
|
||||
# TIMERS
|
||||
SlurmctldTimeout=300
|
||||
SlurmdTimeout=300
|
||||
InactiveLimit=0
|
||||
MinJobAge=300
|
||||
KillWait=30
|
||||
Waittime=0
|
||||
#
|
||||
# SCHEDULING
|
||||
SchedulerType=sched/backfill
|
||||
#SchedulerAuth=
|
||||
#SchedulerPort=
|
||||
#SchedulerRootFilter=
|
||||
SelectType=select/cons_res
|
||||
SelectTypeParameters=CR_CPU_Memory
|
||||
FastSchedule=1
|
||||
#PriorityType=priority/multifactor
|
||||
#PriorityDecayHalfLife=14-0
|
||||
#PriorityUsageResetPeriod=14-0
|
||||
#PriorityWeightFairshare=100000
|
||||
#PriorityWeightAge=1000
|
||||
#PriorityWeightPartition=10000
|
||||
#PriorityWeightJobSize=1000
|
||||
#PriorityMaxAge=1-0
|
||||
#
|
||||
# LOGGING
|
||||
SlurmctldDebug=3
|
||||
SlurmctldLogFile=/var/log/slurm/slurmctld.log
|
||||
SlurmdDebug=3
|
||||
SlurmdLogFile=/var/log/slurm/slurmd.log
|
||||
JobCompType=jobcomp/filetxt
|
||||
JobCompLoc=/var/log/slurm/jobcomp.log
|
||||
#
|
||||
# ACCOUNTING
|
||||
JobAcctGatherType=jobacct_gather/linux
|
||||
JobAcctGatherFrequency=30
|
||||
#
|
||||
AccountingStorageType=accounting_storage/slurmdbd
|
||||
AccountingStorageHost=slurmdbd
|
||||
AccountingStoragePort=6819
|
||||
#AccountingStorageLoc=slurm_acct_db
|
||||
#AccountingStoragePass=
|
||||
#AccountingStorageUser=
|
||||
#
|
||||
# COMPUTE NODES
|
||||
NodeName=c[1-2] RealMemory=1000 State=UNKNOWN
|
||||
#
|
||||
# PARTITIONS
|
||||
PartitionName=normal Default=yes Nodes=c[1-2] Priority=50 DefMemPerCPU=500 Shared=NO MaxNodes=2 MaxTime=5-00:00:00 DefaultTime=5-00:00:00 State=UP
|
||||
#PrEpPlugins=pika
|
BIN
slurm/.DS_Store
vendored
Normal file
BIN
slurm/.DS_Store
vendored
Normal file
Binary file not shown.
37
slurmdbd.conf
Normal file
37
slurmdbd.conf
Normal file
@ -0,0 +1,37 @@
|
||||
#
|
||||
# Example slurmdbd.conf file.
|
||||
#
|
||||
# See the slurmdbd.conf man page for more information.
|
||||
#
|
||||
# Archive info
|
||||
#ArchiveJobs=yes
|
||||
#ArchiveDir="/tmp"
|
||||
#ArchiveSteps=yes
|
||||
#ArchiveScript=
|
||||
#JobPurge=12
|
||||
#StepPurge=1
|
||||
#
|
||||
# Authentication info
|
||||
AuthType=auth/munge
|
||||
#AuthInfo=/var/run/munge/munge.socket.2
|
||||
#
|
||||
# slurmDBD info
|
||||
DbdAddr=slurmdbd
|
||||
DbdHost=slurmdbd
|
||||
#DbdPort=6819
|
||||
SlurmUser=slurm
|
||||
#MessageTimeout=300
|
||||
DebugLevel=4
|
||||
#DefaultQOS=normal,standby
|
||||
LogFile=/var/log/slurm/slurmdbd.log
|
||||
PidFile=/var/run/slurmdbd/slurmdbd.pid
|
||||
#PluginDir=/usr/lib/slurm
|
||||
#PrivateData=accounts,users,usage,jobs
|
||||
#TrackWCKey=yes
|
||||
#
|
||||
# Database info
|
||||
StorageType=accounting_storage/mysql
|
||||
StorageHost=mysql
|
||||
StorageUser=slurm
|
||||
StoragePass=password
|
||||
#StorageLoc=slurm_acct_db
|
Loading…
x
Reference in New Issue
Block a user