From fa2287c661123c84a8cea1953a11a729c90ad4de Mon Sep 17 00:00:00 2001
From: Jan Eitzinger <jan@moebiusband.org>
Date: Fri, 23 Jun 2023 08:38:15 +0200
Subject: [PATCH] Introduce slurm cluster

---
 docker-compose.yml                    | 106 +++-
 slurm/README.md                       | 806 ++++++++++++++++++++++++++
 slurm/base/Dockerfile                 |  46 ++
 slurm/base/Makefile                   |  17 +
 slurm/base/README.md                  |   3 +
 slurm/controller/Dockerfile           |  47 ++
 slurm/controller/Makefile             |  14 +
 slurm/controller/README.md            |   3 +
 slurm/controller/docker-entrypoint.sh | 200 +++++++
 slurm/database/Dockerfile             |  19 +
 slurm/database/Makefile               |  14 +
 slurm/database/README.md              |   3 +
 slurm/database/docker-entrypoint.sh   | 143 +++++
 slurm/worker/Dockerfile               |  39 ++
 slurm/worker/Makefile                 |  13 +
 slurm/worker/README.md                |   3 +
 slurm/worker/docker-entrypoint.sh     |  69 +++
 17 files changed, 1528 insertions(+), 17 deletions(-)
 create mode 100644 slurm/README.md
 create mode 100644 slurm/base/Dockerfile
 create mode 100644 slurm/base/Makefile
 create mode 100644 slurm/base/README.md
 create mode 100644 slurm/controller/Dockerfile
 create mode 100644 slurm/controller/Makefile
 create mode 100644 slurm/controller/README.md
 create mode 100755 slurm/controller/docker-entrypoint.sh
 create mode 100644 slurm/database/Dockerfile
 create mode 100644 slurm/database/Makefile
 create mode 100644 slurm/database/README.md
 create mode 100755 slurm/database/docker-entrypoint.sh
 create mode 100644 slurm/worker/Dockerfile
 create mode 100644 slurm/worker/Makefile
 create mode 100644 slurm/worker/README.md
 create mode 100755 slurm/worker/docker-entrypoint.sh

diff --git a/docker-compose.yml b/docker-compose.yml
index 63c7109..e5e811f 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -62,22 +62,22 @@ services:
     cap_add:
       - SYS_NICE
 
-  mysql:
-    container_name: cc-mysql
-    image: mysql:8.0.22
-    command: ["--default-authentication-plugin=mysql_native_password"]
-    environment:
-      MYSQL_ROOT_PASSWORD: ${MYSQL_ROOT_PASSWORD}
-      MYSQL_DATABASE: ${MYSQL_DATABASE}
-      MYSQL_USER: ${MYSQL_USER}
-      MYSQL_PASSWORD: ${MYSQL_PASSWORD}
-    ports:
-      - "127.0.0.1:${MYSQL_PORT}:3306"
-    # volumes:
-      # - ${DATADIR}/sql-init:/docker-entrypoint-initdb.d
-      # - ${DATADIR}/sqldata:/var/lib/mysql
-    cap_add:
-      - SYS_NICE
+  # mysql:
+  #   container_name: cc-mysql
+  #   image: mysql:8.0.22
+  #   command: ["--default-authentication-plugin=mysql_native_password"]
+  #   environment:
+  #     MYSQL_ROOT_PASSWORD: ${MYSQL_ROOT_PASSWORD}
+  #     MYSQL_DATABASE: ${MYSQL_DATABASE}
+  #     MYSQL_USER: ${MYSQL_USER}
+  #     MYSQL_PASSWORD: ${MYSQL_PASSWORD}
+  #   ports:
+  #     - "127.0.0.1:${MYSQL_PORT}:3306"
+  #   # volumes:
+  #     # - ${DATADIR}/sql-init:/docker-entrypoint-initdb.d
+  #     # - ${DATADIR}/sqldata:/var/lib/mysql
+  #   cap_add:
+  #     - SYS_NICE
 
   phpmyadmin:
     container_name: cc-phpmyadmin
@@ -89,4 +89,76 @@ services:
     ports:
       - "127.0.0.1:${PHPMYADMIN_PORT}:80"
     depends_on:
-      - db
+      - mariadb
+
+  slurm-controller:
+    container_name: slurm-controller
+    build:
+      context: ./slurm/controller
+    privileged: true
+    volumes:
+      - ./home:/home
+      - ./secret:/.secret
+    restart: always
+    environment:
+      USE_SLURMDBD: 'true'
+      CLUSTER_NAME: snowflake
+      CONTROL_MACHINE: controller
+      SLURMCTLD_PORT: 6817
+      SLURMD_PORT: 6818
+      ACCOUNTING_STORAGE_HOST: database
+      ACCOUNTING_STORAGE_PORT: 6819
+      COMPUTE_NODES: worker01 worker02
+      PARTITION_NAME: docker
+
+  slurm-database:
+    container_name: slurm-database
+    build:
+      context: ./slurm/database
+    depends_on:
+      - slurm-controller
+    privileged: true
+    volumes:
+      - ./home:/home
+      - ./secret:/.secret
+    restart: always
+    environment:
+      DBD_ADDR: database
+      DBD_HOST: database
+      DBD_PORT: 6819
+      STORAGE_HOST: database.local.dev
+      STORAGE_PORT: 3306
+      STORAGE_PASS: password
+      STORAGE_USER: slurm
+
+  slurm-worker01:
+    container_name: slurm-worker01
+    build:
+      context: ./slurm/worker
+    depends_on:
+      - slurm-controller
+    privileged: true
+    volumes:
+      - ./home:/home
+      - ./secret:/.secret
+    restart: always
+    environment:
+      CONTROL_MACHINE: controller
+      ACCOUNTING_STORAGE_HOST: database
+      COMPUTE_NODES: worker01 worker02
+
+  slurm-worker02:
+    container_name: slurm-worker02
+    build:
+      context: ./slurm/worker
+    depends_on:
+      - slurm-controller
+    privileged: true
+    volumes:
+      - ./home:/home
+      - ./secret:/.secret
+    restart: always
+    environment:
+      CONTROL_MACHINE: controller
+      ACCOUNTING_STORAGE_HOST: database
+      COMPUTE_NODES: worker01 worker02
diff --git a/slurm/README.md b/slurm/README.md
new file mode 100644
index 0000000..6c6d2d9
--- /dev/null
+++ b/slurm/README.md
@@ -0,0 +1,806 @@
+# Slurm in Docker
+
+**WORK IN PROGRESS**
+
+Use [Docker](https://www.docker.com/) to explore the various components of [Slurm](https://www.schedmd.com/index.php)
+
+This work represents a small exploratory Slurm cluster using CentOS 7 based Docker images. The intent was to learn the basics of Slurm prior to extending the concept to a more distributed environment.
+
+Images include:
+
+- [Slurm 19.05.1](https://slurm.schedmd.com) - installed from [rpm packages](packages)
+- [OpenMPI 3.0.1](https://www.open-mpi.org/doc/current/) - installed from [rpm packages](packages)
+- [Lmod 7.7](http://lmod.readthedocs.io/en/latest/index.html) - installed from [distribution files](https://sourceforge.net/projects/lmod/files/)
+  - [Lmod module packages for CentOS 7](https://github.com/scidas/lmod-modules-centos) - Organized for Slurm-in-Docker use
+  - [Using Lmod with Slurm-in-Docker](using-lmod-with-slurm-in-docker.md) documentation
+
+## Contents
+
+1. [packages](packages) - Build the RPM packages for running Slurm and OpenMPI on CentOS 7
+2. [base](base) - Slurm base image from which other components are derived
+3. [controller](controller) - Slurm controller (head-node) definition
+4. [database](database) - Slurm database definition (not necessary, but useful for accounting information)
+5. [worker](worker) - Slurm worker (compute-node) definition
+
+## Container Overview
+
+An example [docker-compose.yml](docker-compose.yml) file is provided that builds and deploys the diagramed topology
+
+<img width="90%" alt="Slurm cluster" src="https://user-images.githubusercontent.com/5332509/38642211-67a7e1a4-3da7-11e8-85a9-3394ad3c8cb6.png">
+
+Listing of participating containers with FQDNs and their function within the cluster.
+
+Container | Function | FQDN
+:-------- | :------- | :---
+controller | Slurm Primary Controller | controller.local.dev
+database | Slurm Primary Database Daemon | database.local.dev
+worker01 | Slurm Worker | worker01.local.dev
+worker02 | Slurm Worker | worker02.local.dev
+
+## Configure slurm.conf/slurmdbd.conf
+
+Users may use the default slurm.conf file generated in [docker-entrypoint.sh](https://github.com/SciDAS/slurm-in-docker/blob/master/controller/docker-entrypoint.sh), or preferably create one to better fit their system.
+
+The [Slurm Configuration Tool](https://slurm.schedmd.com/configurator.html) is a useful resource for creating custom slurm.conf files.
+
+Steps to add user profided slurm.conf/slurmdbd.conf:
+
+1. Create ```home/config``` and ```secret``` directories:
+
+```
+mkdir -p home/config secret
+```
+
+2. Copy configuration files to the ```home/config``` directory:
+
+```
+cp <user-provided-slurm.conf> home/config/slurm.conf; cp <user-provided-slurmdbd.conf> home/config/slurmdbd.conf
+```
+
+The user can then proceed as normal.
+
+TODO: Have software check validity of custom configuration files.
+
+## Build
+
+Build the slurm RPM files by following the instructions in the [packages](packages) directory.
+
+**Create the base Slurm image**:
+
+Copy the `packages/centos-7/rpms` directory to the `base` directory
+
+```
+cd base/
+cp -r ../packages/centos-7/rpms .
+```
+
+Build the base image
+
+```
+docker build -t scidas/slurm.base:19.05.1 .
+```
+
+Verify image build
+
+```console
+$ docker images
+REPOSITORY             TAG                 IMAGE ID            CREATED                  SIZE
+scidas/slurm.base   19.05.1             1600621cb483        Less than a second ago   819MB
+...
+```
+
+All images defined in `docker-compose.yml` will be built from the `scidas/slurm.base:19.05.1` base image
+
+## Usage
+
+An example [docker-compose.yml](docker-compose.yml) file is provided that builds and deploys the diagramed topology (`-d` is used to daemonize the call).
+
+```
+docker-compose up -d
+```
+
+Four containers should be observed running when completed
+
+```console
+$ docker ps
+CONTAINER ID        IMAGE                                COMMAND                  CREATED             STATUS              PORTS                                              NAMES
+995183e9391e        scidas/slurm.worker:19.05.1       "/usr/local/bin/tini…"   10 seconds ago      Up 30 seconds       22/tcp, 3306/tcp, 6817-6819/tcp, 60001-63000/tcp   worker01
+bdd7c8daaca2        scidas/slurm.database:19.05.1     "/usr/local/bin/tini…"   10 seconds ago      Up 30 seconds       22/tcp, 3306/tcp, 6817-6819/tcp, 60001-63000/tcp   database
+a8382a486989        scidas/slurm.worker:19.05.1       "/usr/local/bin/tini…"   10 seconds ago      Up 30 seconds       22/tcp, 3306/tcp, 6817-6819/tcp, 60001-63000/tcp   worker02
+24e951854109        scidas/slurm.controller:19.05.1   "/usr/local/bin/tini…"   11 seconds ago      Up 31 seconds       22/tcp, 3306/tcp, 6817-6819/tcp, 60001-63000/tcp   controller
+```
+
+## Examples using Slurm
+
+The examples make use of the following commands.
+
+- `sinfo` - [man page](https://slurm.schedmd.com/sinfo.html)
+- `sacctmgr` - [man page](https://slurm.schedmd.com/sacctmgr.html)
+- `sacct` - [man page](https://slurm.schedmd.com/sacct.html)
+- `srun` - [man page](https://slurm.schedmd.com/srun.html)
+- `sbatch` - [man page](https://slurm.schedmd.com/sbatch.html)
+- `squeue` - [man page](https://slurm.schedmd.com/squeue.html)
+
+### controller
+
+Use the `docker exec` call to gain a shell on the `controller` container.
+
+```console
+$ docker exec -ti controller /bin/bash
+[root@controller /]#
+```
+
+Issue an `sinfo` call
+
+```console
+# sinfo -lN
+Wed Apr 11 21:15:35 2018
+NODELIST   NODES PARTITION       STATE CPUS    S:C:T MEMORY TMP_DISK WEIGHT AVAIL_FE REASON
+worker01       1   docker*        idle    1    1:1:1   1998        0      1   (null) none
+worker02       1   docker*        idle    1    1:1:1   1998        0      1   (null) none
+```
+
+Create a `worker` account and `worker` user in Slurm
+
+```console
+# sacctmgr -i add account worker description="worker account" Organization=Slurm-in-Docker
+ Adding Account(s)
+  worker
+ Settings
+  Description     = worker account
+  Organization    = slurm-in-docker
+ Associations
+  A = worker     C = snowflake
+ Settings
+  Parent        = root
+
+# sacctmgr -i create user worker account=worker adminlevel=None
+ Adding User(s)
+  worker
+ Settings =
+  Admin Level     = None
+ Associations =
+  U = worker    A = worker     C = snowflake
+ Non Default Settings
+```
+
+### database
+
+Use the `docker exec` call to gain a MariaDB/MySQL shell on the `database` container.
+
+```console
+$ docker exec -ti database mysql -uslurm -ppassword -hdatabase.local.dev
+Welcome to the MariaDB monitor.  Commands end with ; or \g.
+Your MariaDB connection id is 9
+Server version: 5.5.56-MariaDB MariaDB Server
+
+Copyright (c) 2000, 2017, Oracle, MariaDB Corporation Ab and others.
+
+Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.
+
+MariaDB [(none)]>
+```
+
+Checkout the `slurm_acct_db` database and it's tables
+
+```console
+MariaDB [(none)]> use slurm_acct_db;
+Reading table information for completion of table and column names
+You can turn off this feature to get a quicker startup with -A
+
+Database changed
+MariaDB [slurm_acct_db]> show tables;
++-----------------------------------+
+| Tables_in_slurm_acct_db           |
++-----------------------------------+
+| acct_coord_table                  |
+| acct_table                        |
+| clus_res_table                    |
+| cluster_table                     |
+| convert_version_table             |
+| federation_table                  |
+| qos_table                         |
+| res_table                         |
+| snowflake_assoc_table             |
+| snowflake_assoc_usage_day_table   |
+| snowflake_assoc_usage_hour_table  |
+| snowflake_assoc_usage_month_table |
+| snowflake_event_table             |
+| snowflake_job_table               |
+| snowflake_last_ran_table          |
+| snowflake_resv_table              |
+| snowflake_step_table              |
+| snowflake_suspend_table           |
+| snowflake_usage_day_table         |
+| snowflake_usage_hour_table        |
+| snowflake_usage_month_table       |
+| snowflake_wckey_table             |
+| snowflake_wckey_usage_day_table   |
+| snowflake_wckey_usage_hour_table  |
+| snowflake_wckey_usage_month_table |
+| table_defs_table                  |
+| tres_table                        |
+| txn_table                         |
+| user_table                        |
++-----------------------------------+
+29 rows in set (0.00 sec)
+```
+
+Validate that the `worker` user was entered into the database
+
+```console
+MariaDB [slurm_acct_db]> select * from user_table;
++---------------+------------+---------+--------+-------------+
+| creation_time | mod_time   | deleted | name   | admin_level |
++---------------+------------+---------+--------+-------------+
+|    1523481120 | 1523481120 |       0 | root   |           3 |
+|    1523481795 | 1523481795 |       0 | worker |           1 |
++---------------+------------+---------+--------+-------------+
+2 rows in set (0.00 sec)
+```
+
+### worker01 and worker02
+
+Use the `docker exec` call to gain a shell on either the `worker01` or `worker02` container and become the user `worker`.
+
+```console
+$ docker exec -ti -u worker worker01 /bin/bash
+[worker@worker01 /]$ cd ~
+[worker@worker01 ~]$ pwd
+/home/worker
+```
+
+Test password-less `ssh` between containers
+
+```console
+[worker@worker01 ~]$ hostname
+worker01.local.dev
+[worker@worker01 ~]$ ssh worker02
+[worker@worker02 ~]$ hostname
+worker02.local.dev
+[worker@worker02 ~]$ ssh controller
+[worker@controller ~]$ hostname
+controller.local.dev
+```
+
+### Slurm commands
+
+All commands are issued as the user `worker` from the `controller` node
+
+```console
+$ docker exec -ti -u worker controller /bin/bash
+[worker@controller /]$ cd ~
+[worker@controller ~]$ pwd
+/home/worker
+```
+
+- For the rest of this section the `[worker@controller ~]$` prompt will be shortend to simply `$`
+
+Test the `sacct` and `srun` calls
+
+```console
+$ sacct
+       JobID    JobName  Partition    Account  AllocCPUS      State ExitCode
+------------ ---------- ---------- ---------- ---------- ---------- --------
+$ srun -N 2 hostname
+worker01.local.dev
+worker02.local.dev
+$ sacct
+       JobID    JobName  Partition    Account  AllocCPUS      State ExitCode
+------------ ---------- ---------- ---------- ---------- ---------- --------
+2              hostname     docker     worker          2  COMPLETED      0:0
+```
+
+Test the `sbatch` call
+
+Make a job file named: `slurm_test.job`
+
+```bash
+#!/bin/bash
+
+#SBATCH --job-name=SLURM_TEST
+#SBATCH --output=SLURM_TEST.out
+#SBATCH --error=SLURM_TEST.err
+#SBATCH --partition=docker
+
+srun hostname | sort
+```
+
+Run the job using `sbatch`
+
+```console
+$ sbatch -N 2 slurm_test.job
+Submitted batch job 3
+```
+
+Check the `sacct` output
+
+```console
+$ sacct
+       JobID    JobName  Partition    Account  AllocCPUS      State ExitCode
+------------ ---------- ---------- ---------- ---------- ---------- --------
+2              hostname     docker     worker          2  COMPLETED      0:0
+3            SLURM_TEST     docker     worker          2  COMPLETED      0:0
+3.batch           batch                worker          1  COMPLETED      0:0
+3.0            hostname                worker          2  COMPLETED      0:0
+```
+
+Check the output files
+
+```console
+$ ls -1
+SLURM_TEST.err
+SLURM_TEST.out
+slurm_test.job
+$ cat SLURM_TEST.out
+worker01.local.dev
+worker02.local.dev
+```
+
+Test the `sbatch --array` and `squeue` calls
+
+Make a job file named `array_test.job`:
+
+```bash
+#!/bin/bash
+
+#SBATCH -N 1
+#SBATCH -c 1
+#SBATCH -t 24:00:00
+###################
+## %A == SLURM_ARRAY_JOB_ID
+## %a == SLURM_ARRAY_TASK_ID (or index)
+## %N == SLURMD_NODENAME (directories made ahead of time)
+#SBATCH -o %N/%A_%a_out.txt
+#SBATCH -e %N/%A_%a_err.txt
+
+snooze=$(( ( RANDOM % 10 )  + 1 ))
+echo "$(hostname) is snoozing for ${snooze} seconds..."
+
+sleep $snooze
+```
+
+This job defines output directories as being `%N` which reflect the `SLURMD_NODENAME` variable. The output directories will need to exist ahead of time in this particular case, and can be determined by finding all available nodes in the `NODELIST` and creating the directories.
+
+```console
+$ sinfo -N
+NODELIST   NODES PARTITION STATE
+worker01       1   docker* idle
+worker02       1   docker* idle
+$ mkdir worker01 worker02
+```
+
+The job when run will direct it's output files to the directory defined by the node on which it is running. Each iteration will sleep from 1 to 10 seconds randomly before moving onto the next run in the array.
+
+We will run an array of 20 jobs, 2 at a time, until the array is completed. The status can be found using the `squeue` command.
+
+```console
+$ sbatch --array=1-20%2 array_test.job
+Submitted batch job 4
+$ squeue
+             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
+        4_[3-20%2]    docker array_te   worker PD       0:00      1 (JobArrayTaskLimit)
+               4_1    docker array_te   worker  R       0:01      1 worker01
+               4_2    docker array_te   worker  R       0:01      1 worker02
+...
+$ squeue
+             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
+          4_[20%2]    docker array_te   worker PD       0:00      1 (JobArrayTaskLimit)
+              4_19    docker array_te   worker  R       0:04      1 worker02
+              4_18    docker array_te   worker  R       0:10      1 worker01
+$ squeue
+             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
+```
+
+Looking into each of the `worker01` and `worker02` directories we can see which jobs were run on each node.
+
+```console
+$ ls
+SLURM_TEST.err  array_test.job  worker01
+SLURM_TEST.out  slurm_test.job  worker02
+$ ls worker01
+4_11_err.txt  4_16_err.txt  4_1_err.txt   4_3_err.txt  4_7_err.txt
+4_11_out.txt  4_16_out.txt  4_1_out.txt   4_3_out.txt  4_7_out.txt
+4_14_err.txt  4_18_err.txt  4_20_err.txt  4_5_err.txt  4_9_err.txt
+4_14_out.txt  4_18_out.txt  4_20_out.txt  4_5_out.txt  4_9_out.txt
+$ ls worker02
+4_10_err.txt  4_13_err.txt  4_17_err.txt  4_2_err.txt  4_6_err.txt
+4_10_out.txt  4_13_out.txt  4_17_out.txt  4_2_out.txt  4_6_out.txt
+4_12_err.txt  4_15_err.txt  4_19_err.txt  4_4_err.txt  4_8_err.txt
+4_12_out.txt  4_15_out.txt  4_19_out.txt  4_4_out.txt  4_8_out.txt
+```
+
+And looking at each `*_out.txt` file view the output
+
+```console
+$ cat worker01/4_14_out.txt
+worker01.local.dev is snoozing for 10 seconds...
+$ cat worker02/4_6_out.txt
+worker02.local.dev is snoozing for 7 seconds...
+```
+
+Using the `sacct` call we can see when each job in the array was executed
+
+```console
+$ sacct
+       JobID    JobName  Partition    Account  AllocCPUS      State ExitCode
+------------ ---------- ---------- ---------- ---------- ---------- --------
+2              hostname     docker     worker          2  COMPLETED      0:0
+3            SLURM_TEST     docker     worker          2  COMPLETED      0:0
+3.batch           batch                worker          1  COMPLETED      0:0
+3.0            hostname                worker          2  COMPLETED      0:0
+4_20         array_tes+     docker     worker          1  COMPLETED      0:0
+4_20.batch        batch                worker          1  COMPLETED      0:0
+4_1          array_tes+     docker     worker          1  COMPLETED      0:0
+4_1.batch         batch                worker          1  COMPLETED      0:0
+4_2          array_tes+     docker     worker          1  COMPLETED      0:0
+4_2.batch         batch                worker          1  COMPLETED      0:0
+4_3          array_tes+     docker     worker          1  COMPLETED      0:0
+4_3.batch         batch                worker          1  COMPLETED      0:0
+4_4          array_tes+     docker     worker          1  COMPLETED      0:0
+4_4.batch         batch                worker          1  COMPLETED      0:0
+4_5          array_tes+     docker     worker          1  COMPLETED      0:0
+4_5.batch         batch                worker          1  COMPLETED      0:0
+4_6          array_tes+     docker     worker          1  COMPLETED      0:0
+4_6.batch         batch                worker          1  COMPLETED      0:0
+4_7          array_tes+     docker     worker          1  COMPLETED      0:0
+4_7.batch         batch                worker          1  COMPLETED      0:0
+4_8          array_tes+     docker     worker          1  COMPLETED      0:0
+4_8.batch         batch                worker          1  COMPLETED      0:0
+4_9          array_tes+     docker     worker          1  COMPLETED      0:0
+4_9.batch         batch                worker          1  COMPLETED      0:0
+4_10         array_tes+     docker     worker          1  COMPLETED      0:0
+4_10.batch        batch                worker          1  COMPLETED      0:0
+4_11         array_tes+     docker     worker          1  COMPLETED      0:0
+4_11.batch        batch                worker          1  COMPLETED      0:0
+4_12         array_tes+     docker     worker          1  COMPLETED      0:0
+4_12.batch        batch                worker          1  COMPLETED      0:0
+4_13         array_tes+     docker     worker          1  COMPLETED      0:0
+4_13.batch        batch                worker          1  COMPLETED      0:0
+4_14         array_tes+     docker     worker          1  COMPLETED      0:0
+4_14.batch        batch                worker          1  COMPLETED      0:0
+4_15         array_tes+     docker     worker          1  COMPLETED      0:0
+4_15.batch        batch                worker          1  COMPLETED      0:0
+4_16         array_tes+     docker     worker          1  COMPLETED      0:0
+4_16.batch        batch                worker          1  COMPLETED      0:0
+4_17         array_tes+     docker     worker          1  COMPLETED      0:0
+4_17.batch        batch                worker          1  COMPLETED      0:0
+4_18         array_tes+     docker     worker          1  COMPLETED      0:0
+4_18.batch        batch                worker          1  COMPLETED      0:0
+4_19         array_tes+     docker     worker          1  COMPLETED      0:0
+4_19.batch        batch                worker          1  COMPLETED      0:0
+```
+## Examples using MPI
+
+The examples make use of the following commands.
+
+- `ompi_info` - [man page](https://www.open-mpi.org/doc/v3.0/man1/ompi_info.1.php)
+- `mpicc` - [man page](https://www.open-mpi.org/doc/v3.0/man1/mpicc.1.php)
+- `srun` - [man page](https://slurm.schedmd.com/srun.html)
+- `sbatch` - [man page](https://slurm.schedmd.com/sbatch.html)
+- `squeue` - [man page](https://slurm.schedmd.com/squeue.html)
+- `sacct` - [man page](https://slurm.schedmd.com/sacct.html)
+
+### controller
+
+All commands are issued as the user `worker` from the `controller` node
+
+```console
+$ docker exec -ti -u worker controller /bin/bash
+[worker@controller /]$ cd ~
+[worker@controller ~]$ pwd
+/home/worker
+```
+
+Available implementions of MPI
+
+```console
+$ srun --mpi=list
+srun: MPI types are...
+srun: none
+srun: pmi2
+srun: openmpi
+```
+
+About Open MPI
+
+```console
+$ ompi_info
+                 Package: Open MPI root@a6fd2549e449 Distribution
+                Open MPI: 3.0.1
+  Open MPI repo revision: v3.0.1
+   Open MPI release date: Mar 29, 2018
+                Open RTE: 3.0.1
+  Open RTE repo revision: v3.0.1
+   Open RTE release date: Mar 29, 2018
+                    OPAL: 3.0.1
+      OPAL repo revision: v3.0.1
+       OPAL release date: Mar 29, 2018
+                 MPI API: 3.1.0
+            Ident string: 3.0.1
+                  Prefix: /usr
+ Configured architecture: x86_64-redhat-linux-gnu
+          Configure host: a6fd2549e449
+           Configured by: root
+           Configured on: Fri Apr 13 02:32:11 UTC 2018
+          Configure host: a6fd2549e449
+  Configure command line: '--build=x86_64-redhat-linux-gnu'
+                          '--host=x86_64-redhat-linux-gnu'
+                          '--program-prefix=' '--disable-dependency-tracking'
+                          '--prefix=/usr' '--exec-prefix=/usr'
+                          '--bindir=/usr/bin' '--sbindir=/usr/sbin'
+                          '--sysconfdir=/etc' '--datadir=/usr/share'
+                          '--includedir=/usr/include' '--libdir=/usr/lib64'
+                          '--libexecdir=/usr/libexec' '--localstatedir=/var'
+                          '--sharedstatedir=/var/lib'
+                          '--mandir=/usr/share/man'
+                          '--infodir=/usr/share/info' '--with-slurm'
+                          '--with-pmi' '--with-libfabric='
+                          'LDFLAGS=-Wl,--build-id -Wl,-rpath -Wl,/lib64
+                          -Wl,--enable-new-dtags'
+...
+```
+
+Hello world using `mpi_hello.out`
+
+Create a new file called `mpi_hello.c` in `/home/worker` and compile it:
+
+```c
+/******************************************************************************
+ * * FILE: mpi_hello.c
+ * * DESCRIPTION:
+ * *   MPI tutorial example code: Simple hello world program
+ * * AUTHOR: Blaise Barney
+ * * LAST REVISED: 03/05/10
+ * ******************************************************************************/
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#define  MASTER 0
+
+int main (int argc, char *argv[]) {
+   int   numtasks, taskid, len;
+   char hostname[MPI_MAX_PROCESSOR_NAME];
+
+   MPI_Init(&argc, &argv);
+   MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
+   MPI_Comm_rank(MPI_COMM_WORLD,&taskid);
+   MPI_Get_processor_name(hostname, &len);
+
+   printf ("Hello from task %d on %s!\n", taskid, hostname);
+
+   if (taskid == MASTER)
+      printf("MASTER: Number of MPI tasks is: %d\n",numtasks);
+
+   //while(1) {}
+
+   MPI_Finalize();
+}
+```
+
+```console
+$ mpicc mpi_hello.c -o mpi_hello.out
+$ ls | grep mpi
+mpi_hello.c
+mpi_hello.out
+```
+
+Test `mpi_hello.out` using the MPI versions avalaible on the system with `srun`
+
+- single node using **openmpi**
+
+    ```console
+    $ srun --mpi=openmpi mpi_hello.out
+    Hello from task 0 on worker01.local.dev!
+    MASTER: Number of MPI tasks is: 1
+    $ sacct
+           JobID    JobName  Partition    Account  AllocCPUS      State ExitCode
+    ------------ ---------- ---------- ---------- ---------- ---------- --------
+    2            mpi_hello+     docker     worker          1  COMPLETED      0:0
+    ```
+- two nodes using **openmpi**
+
+    ```console
+    $ srun -N 2 --mpi=openmpi mpi_hello.out
+    Hello from task 0 on worker01.local.dev!
+    MASTER: Number of MPI tasks is: 2
+    Hello from task 1 on worker02.local.dev!
+    $ sacct
+           JobID    JobName  Partition    Account  AllocCPUS      State ExitCode
+    ------------ ---------- ---------- ---------- ---------- ---------- --------
+    2            mpi_hello+     docker     worker          1  COMPLETED      0:0
+    3            mpi_hello+     docker     worker          2  COMPLETED      0:0
+    ```
+- two nodes using **pmi2**
+
+    ```console
+    $ srun -N 2 --mpi=pmi2 mpi_hello.out
+    Hello from task 0 on worker01.local.dev!
+    MASTER: Number of MPI tasks is: 2
+    Hello from task 1 on worker02.local.dev!
+    $ sacct
+           JobID    JobName  Partition    Account  AllocCPUS      State ExitCode
+    ------------ ---------- ---------- ---------- ---------- ---------- --------
+    2            mpi_hello+     docker     worker          1  COMPLETED      0:0
+    3            mpi_hello+     docker     worker          2  COMPLETED      0:0
+    4            mpi_hello+     docker     worker          2  COMPLETED      0:0
+    ```
+
+Run a batch array with a sleep to observe the queue
+
+Create a file named `mpi_batch.job` in `/home/worker` (similar to the script used for the `sbatch --array` example from above, and make an output directory named `mpi_out`)
+
+file `mpi_batch.job`:
+
+```bash
+#!/bin/bash
+
+#SBATCH -N 1
+#SBATCH -c 1
+#SBATCH -t 24:00:00
+###################
+## %A == SLURM_ARRAY_JOB_ID
+## %a == SLURM_ARRAY_TASK_ID (or index)
+#SBATCH -o mpi_out/%A_%a_out.txt
+#SBATCH -e mpi_out/%A_%a_err.txt
+
+snooze=$(( ( RANDOM % 10 )  + 1 ))
+sleep $snooze
+
+srun -N 2 --mpi=openmpi mpi_hello.out
+```
+
+Make directory `mpi_out`
+
+```console
+$ mkdir mpi_out
+```
+
+Run an `sbatch` array of 5 jobs, one at a time, using both nodes.
+
+```console
+$ sbatch -N 2 --array=1-5%1 mpi_batch.job
+Submitted batch job 10
+$ squeue
+             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
+        10_[2-5%1]    docker mpi_batc   worker PD       0:00      2 (JobArrayTaskLimit)
+              10_1    docker mpi_batc   worker  R       0:03      2 worker[01-02]
+$ sacct
+       JobID    JobName  Partition    Account  AllocCPUS      State ExitCode
+------------ ---------- ---------- ---------- ---------- ---------- --------
+...
+10_[2-5%1]   mpi_batch+     docker     worker          2    PENDING      0:0
+10_1         mpi_batch+     docker     worker          2  COMPLETED      0:0
+10_1.batch        batch                worker          1  COMPLETED      0:0
+10_1.0       mpi_hello+                worker          2  COMPLETED      0:0
+```
+...
+
+```console
+$ squeue
+             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
+        10_[4-5%1]    docker mpi_batc   worker PD       0:00      2 (JobArrayTaskLimit)
+              10_3    docker mpi_batc   worker  R       0:05      2 worker[01-02]
+$ sacct
+       JobID    JobName  Partition    Account  AllocCPUS      State ExitCode
+------------ ---------- ---------- ---------- ---------- ---------- --------
+...
+10_[4-5%1]   mpi_batch+     docker     worker          2    PENDING      0:0
+10_1         mpi_batch+     docker     worker          2  COMPLETED      0:0
+10_1.batch        batch                worker          1  COMPLETED      0:0
+10_1.0       mpi_hello+                worker          2  COMPLETED      0:0
+10_2         mpi_batch+     docker     worker          2  COMPLETED      0:0
+10_2.batch        batch                worker          1  COMPLETED      0:0
+10_2.0       mpi_hello+                worker          2  COMPLETED      0:0
+10_3         mpi_batch+     docker     worker          2  COMPLETED      0:0
+10_3.batch        batch                worker          1  COMPLETED      0:0
+10_3.0       mpi_hello+                worker          2  COMPLETED      0:0
+```
+...
+
+```console
+$ squeue
+             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
+$ sacct
+       JobID    JobName  Partition    Account  AllocCPUS      State ExitCode
+------------ ---------- ---------- ---------- ---------- ---------- --------
+...
+10_5         mpi_batch+     docker     worker          2  COMPLETED      0:0
+10_5.batch        batch                worker          1  COMPLETED      0:0
+10_5.0       mpi_hello+                worker          2  COMPLETED      0:0
+10_1         mpi_batch+     docker     worker          2  COMPLETED      0:0
+10_1.batch        batch                worker          1  COMPLETED      0:0
+10_1.0       mpi_hello+                worker          2  COMPLETED      0:0
+10_2         mpi_batch+     docker     worker          2  COMPLETED      0:0
+10_2.batch        batch                worker          1  COMPLETED      0:0
+10_2.0       mpi_hello+                worker          2  COMPLETED      0:0
+10_3         mpi_batch+     docker     worker          2  COMPLETED      0:0
+10_3.batch        batch                worker          1  COMPLETED      0:0
+10_3.0       mpi_hello+                worker          2  COMPLETED      0:0
+10_4         mpi_batch+     docker     worker          2  COMPLETED      0:0
+10_4.batch        batch                worker          1  COMPLETED      0:0
+10_4.0       mpi_hello+                worker          2  COMPLETED      0:0
+```
+
+Check the `mpi_out` output directory
+
+```console
+$ ls mpi_out/
+10_1_err.txt  10_2_err.txt  10_3_err.txt  10_4_err.txt  10_5_err.txt
+10_1_out.txt  10_2_out.txt  10_3_out.txt  10_4_out.txt  10_5_out.txt
+$ cat mpi_out/10_3_out.txt
+Hello from task 1 on worker02.local.dev!
+Hello from task 0 on worker01.local.dev!
+MASTER: Number of MPI tasks is: 2
+```
+
+
+
+## Tear down
+
+The containers, networks, and volumes associated with the cluster can be torn down by simply running:
+```
+./teardown.sh
+```
+
+Each step of this teardown may also be run individually:
+
+The containers can be stopped and removed using `docker-compose`
+
+```console
+$ docker-compose stop
+Stopping worker01   ... done
+Stopping database   ... done
+Stopping worker02   ... done
+Stopping controller ... done
+$ docker-compose rm -f
+Going to remove worker01, database, worker02, controller
+Removing worker01   ... done
+Removing database   ... done
+Removing worker02   ... done
+Removing controller ... done
+```
+
+The network and volumes can be removed using their representative `docker` commands
+
+- Volumes
+
+    ```console
+    $ docker volume list
+    DRIVER              VOLUME NAME
+    ...
+    local               slurmindocker_home
+    local               slurmindocker_secret
+    $ docker volume rm slurmindocker_home slurmindocker_secret
+    slurmindocker_home
+    slurmindocker_secret
+    ```
+
+- Network
+
+    ```console
+    $ docker network list
+    NETWORK ID          NAME                    DRIVER              SCOPE
+    ...
+    a94c168fb653        slurmindocker_slurm     bridge              local
+    $ docker network rm slurmindocker_slurm
+    slurmindocker_slurm
+    ```
+
+## References
+
+Slurm workload manager: [https://www.schedmd.com/index.php](https://www.schedmd.com/index.php)
+
+- Slurm is a highly configurable open-source workload manager. In its simplest configuration, it can be installed and configured in a few minutes (see [Caos NSA and Perceus: All-in-one Cluster Software Stack](http://www.linux-mag.com/id/7239/1/) by Jeffrey B. Layton). Use of optional plugins provides the functionality needed to satisfy the needs of demanding HPC centers. More complex configurations rely upon a database for archiving accounting records, managing resource limits by user or bank account, and supporting sophisticated scheduling algorithms.
+
+Docker: [https://www.docker.com](https://www.docker.com)
+
+- Docker is the company driving the container movement and the only container platform provider to address every application across the hybrid cloud. Today’s businesses are under pressure to digitally transform but are constrained by existing applications and infrastructure while rationalizing an increasingly diverse portfolio of clouds, datacenters and application architectures. Docker enables true independence between applications and infrastructure and developers and IT ops to unlock their potential and creates a model for better collaboration and innovation.
+
+OpenMPI: [https://www.open-mpi.org](https://www.open-mpi.org)
+
+- The Open MPI Project is an open source [Message Passing Interface](http://www.mpi-forum.org/) implementation that is developed and maintained by a consortium of academic, research, and industry partners. Open MPI is therefore able to combine the expertise, technologies, and resources from all across the High Performance Computing community in order to build the best MPI library available. Open MPI offers advantages for system and software vendors, application developers and computer science researchers.
+
+Lmod: [http://lmod.readthedocs.io/en/latest/index.html](http://lmod.readthedocs.io/en/latest/index.html)
+
+- Lmod is a Lua based module system that easily handles the MODULEPATH Hierarchical problem. Environment Modules provide a convenient way to dynamically change the users’ environment through modulefiles. This includes easily adding or removing directories to the PATH environment variable. Modulefiles for Library packages provide environment variables that specify where the library and header files can be found.
diff --git a/slurm/base/Dockerfile b/slurm/base/Dockerfile
new file mode 100644
index 0000000..6073bdb
--- /dev/null
+++ b/slurm/base/Dockerfile
@@ -0,0 +1,46 @@
+FROM krallin/centos-tini:7
+MAINTAINER Michael J. Stealey <stealey@renci.org>
+
+ENV SLURM_VERSION=19.05.1 \
+  MUNGE_UID=981 \
+  SLURM_UID=982 \
+  WORKER_UID=1000
+
+RUN groupadd -g $MUNGE_UID munge \
+  && useradd  -m -c "MUNGE Uid 'N' Gid Emporium" -d /var/lib/munge -u $MUNGE_UID -g munge  -s /sbin/nologin munge \
+  && groupadd -g $SLURM_UID slurm \
+  && useradd  -m -c "Slurm workload manager" -d /var/lib/slurm -u $SLURM_UID -g slurm  -s /bin/bash slurm \
+  && groupadd -g $WORKER_UID worker \
+  && useradd  -m -c "Workflow user" -d /home/worker -u $WORKER_UID -g worker  -s /bin/bash worker
+
+# install packages for general functionality
+RUN yum -y install \
+  epel-release \
+  && yum -y install \
+  sudo \
+  wget \
+  which \
+  tree \
+  mariadb-server \
+  mariadb-devel \
+  munge \
+  munge-libs \
+  munge-devel \
+  openssh-server \
+  openssh-clients
+
+# install slurm 19.05.1
+COPY rpms /packages
+# /usr/bin/mpiexec from slurm-torque conflicts with openmpi install
+WORKDIR /packages
+RUN yum -y localinstall $(ls | grep -v -e 'torque' -e 'openmpi')
+WORKDIR /
+
+VOLUME ["/home", "/.secret"]
+
+#   22:         SSH
+# 3306:         MariaDB
+# 6817:         Slurm Ctl D
+# 6818:         Slurm D
+# 6819:         Slurm DBD
+EXPOSE 22 3306 6817 6818 6819
diff --git a/slurm/base/Makefile b/slurm/base/Makefile
new file mode 100644
index 0000000..ff5859c
--- /dev/null
+++ b/slurm/base/Makefile
@@ -0,0 +1,17 @@
+
+SLURM_VERSION = 19.05.1
+IMAGE = scidas/slurm.base
+
+DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
+
+.PHONY: all build clean test
+
+all: build
+
+build:
+	cp -r "$(DIR)/../packages/centos-7/rpms" .
+	docker build -t $(IMAGE):$(SLURM_VERSION) .
+
+clean:
+	@[ -z $(docker images -q $(IMAGE):$(SLURM_VERSION)) ] || docker rmi $(IMAGE):$(SLURM_VERSION)
+	rm -rf rpms
diff --git a/slurm/base/README.md b/slurm/base/README.md
new file mode 100644
index 0000000..4b21b62
--- /dev/null
+++ b/slurm/base/README.md
@@ -0,0 +1,3 @@
+# Slurm Base Image
+
+TODO
diff --git a/slurm/controller/Dockerfile b/slurm/controller/Dockerfile
new file mode 100644
index 0000000..31cb001
--- /dev/null
+++ b/slurm/controller/Dockerfile
@@ -0,0 +1,47 @@
+FROM scidas/slurm.base:19.05.1
+MAINTAINER Michael J. Stealey <stealey@renci.org>
+
+# install openmpi 3.0.1
+RUN yum -y install \
+  gcc-c++ \
+  gcc-gfortran \
+  && yum -y localinstall \
+  /packages/openmpi-*.rpm
+
+# install Lmod 7.7
+RUN yum -y install \
+  lua-posix \
+  lua \
+  lua-filesystem \
+  lua-devel \
+  wget \
+  bzip2 \
+  expectk \
+  make \
+  && wget https://sourceforge.net/projects/lmod/files/Lmod-7.7.tar.bz2 \
+  && tar -xjvf Lmod-7.7.tar.bz2
+WORKDIR /Lmod-7.7
+RUN ./configure --prefix=/opt/apps \
+  && make install \
+  && ln -s /opt/apps/lmod/lmod/init/profile /etc/profile.d/z00_lmod.sh \
+  && ln -s /opt/apps/lmod/lmod/init/cshrc /etc/profile.d/z00_lmod.csh
+WORKDIR /
+
+ENV USE_SLURMDBD=true \
+  CLUSTER_NAME=snowflake \
+  CONTROL_MACHINE=controller \
+  SLURMCTLD_PORT=6817 \
+  SLURMD_PORT=6818 \
+  ACCOUNTING_STORAGE_HOST=database \
+  ACCOUNTING_STORAGE_PORT=6819 \
+  PARTITION_NAME=docker
+
+# clean up
+RUN rm -f /packages/slurm-*.rpm /packages/openmpi-*.rpm \
+  && yum clean all \
+  && rm -rf /var/cache/yum \
+  && rm -f /Lmod-7.7.tar.bz2
+
+COPY docker-entrypoint.sh /docker-entrypoint.sh
+
+ENTRYPOINT ["/usr/local/bin/tini", "--", "/docker-entrypoint.sh"]
diff --git a/slurm/controller/Makefile b/slurm/controller/Makefile
new file mode 100644
index 0000000..f4b5499
--- /dev/null
+++ b/slurm/controller/Makefile
@@ -0,0 +1,14 @@
+
+SLURM_VERSION = 19.05.1
+IMAGE = scidas/slurm.controller
+
+.PHONY: all build clean test
+
+
+all: build
+
+build:
+	docker build -t $(IMAGE):$(SLURM_VERSION) .
+
+clean:
+	@[ -z $(docker images -q $(IMAGE):$(SLURM_VERSION)) ] || docker rmi $(IMAGE):$(SLURM_VERSION)
diff --git a/slurm/controller/README.md b/slurm/controller/README.md
new file mode 100644
index 0000000..74c15d3
--- /dev/null
+++ b/slurm/controller/README.md
@@ -0,0 +1,3 @@
+# Slurm Controller
+
+TODO
diff --git a/slurm/controller/docker-entrypoint.sh b/slurm/controller/docker-entrypoint.sh
new file mode 100755
index 0000000..3a58322
--- /dev/null
+++ b/slurm/controller/docker-entrypoint.sh
@@ -0,0 +1,200 @@
+#!/usr/bin/env bash
+set -e
+
+# start sshd server
+_sshd_host() {
+  if [ ! -d /var/run/sshd ]; then
+    mkdir /var/run/sshd
+    ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key -N ''
+  fi
+  /usr/sbin/sshd
+}
+
+# setup worker ssh to be passwordless
+_ssh_worker() {
+  if [[ ! -d /home/worker ]]; then
+    mkdir -p /home/worker
+    chown -R worker:worker /home/worker
+  fi
+  cat > /home/worker/setup-worker-ssh.sh <<'EOF2'
+mkdir -p ~/.ssh
+chmod 0700 ~/.ssh
+ssh-keygen -b 2048 -t rsa -f ~/.ssh/id_rsa -q -N "" -C "$(whoami)@$(hostname)-$(date -I)"
+cat ~/.ssh/id_rsa.pub > ~/.ssh/authorized_keys
+chmod 0640 ~/.ssh/authorized_keys
+cat >> ~/.ssh/config <<EOF
+Host *
+  StrictHostKeyChecking no
+  UserKnownHostsFile /dev/null
+  LogLevel QUIET
+EOF
+chmod 0644 ~/.ssh/config
+cd ~/
+tar -czvf ~/worker-secret.tar.gz .ssh
+cd -
+EOF2
+  chmod +x /home/worker/setup-worker-ssh.sh
+  chown worker: /home/worker/setup-worker-ssh.sh
+  sudo -u worker /home/worker/setup-worker-ssh.sh
+}
+
+# start munge and generate key
+_munge_start() {
+  chown -R munge: /etc/munge /var/lib/munge /var/log/munge /var/run/munge
+  chmod 0700 /etc/munge
+  chmod 0711 /var/lib/munge
+  chmod 0700 /var/log/munge
+  chmod 0755 /var/run/munge
+  /sbin/create-munge-key -f
+  sudo -u munge /sbin/munged
+  munge -n
+  munge -n | unmunge
+  remunge
+}
+
+# copy secrets to /.secret directory for other nodes
+_copy_secrets() {
+  cp /home/worker/worker-secret.tar.gz /.secret/worker-secret.tar.gz
+  cp /home/worker/setup-worker-ssh.sh /.secret/setup-worker-ssh.sh
+  cp /etc/munge/munge.key /.secret/munge.key
+  rm -f /home/worker/worker-secret.tar.gz
+  rm -f /home/worker/setup-worker-ssh.sh
+}
+
+# generate slurm.conf
+_generate_slurm_conf() {
+  cat > /etc/slurm/slurm.conf <<EOF
+#
+# Example slurm.conf file. Please run configurator.html
+# (in doc/html) to build a configuration file customized
+# for your environment.
+#
+#
+# slurm.conf file generated by configurator.html.
+#
+# See the slurm.conf man page for more information.
+#
+ClusterName=$CLUSTER_NAME
+SlurmctldHost=$CONTROL_MACHINE
+#SlurmctldHostr=
+#
+SlurmUser=slurm
+#SlurmdUser=root
+SlurmctldPort=$SLURMCTLD_PORT
+SlurmdPort=$SLURMD_PORT
+AuthType=auth/munge
+#JobCredentialPrivateKey=
+#JobCredentialPublicCertificate=
+StateSaveLocation=/var/spool/slurm/ctld
+SlurmdSpoolDir=/var/spool/slurm/d
+SwitchType=switch/none
+MpiDefault=none
+SlurmctldPidFile=/var/run/slurmctld.pid
+SlurmdPidFile=/var/run/slurmd.pid
+ProctrackType=proctrack/pgid
+#PluginDir=
+#FirstJobId=
+ReturnToService=0
+#MaxJobCount=
+#PlugStackConfig=
+#PropagatePrioProcess=
+#PropagateResourceLimits=
+#PropagateResourceLimitsExcept=
+#Prolog=
+#Epilog=
+#SrunProlog=
+#SrunEpilog=
+#TaskProlog=
+#TaskEpilog=
+#TaskPlugin=
+#TrackWCKey=no
+#TreeWidth=50
+#TmpFS=
+#UsePAM=
+#
+# TIMERS
+SlurmctldTimeout=300
+SlurmdTimeout=300
+InactiveLimit=0
+MinJobAge=300
+KillWait=30
+Waittime=0
+#
+# SCHEDULING
+SchedulerType=sched/backfill
+#SchedulerAuth=
+#SelectType=select/linear
+FastSchedule=1
+#PriorityType=priority/multifactor
+#PriorityDecayHalfLife=14-0
+#PriorityUsageResetPeriod=14-0
+#PriorityWeightFairshare=100000
+#PriorityWeightAge=1000
+#PriorityWeightPartition=10000
+#PriorityWeightJobSize=1000
+#PriorityMaxAge=1-0
+#
+# LOGGING
+SlurmctldDebug=3
+SlurmctldLogFile=/var/log/slurmctld.log
+SlurmdDebug=3
+SlurmdLogFile=/var/log/slurmd.log
+JobCompType=jobcomp/none
+#JobCompLoc=
+#
+# ACCOUNTING
+JobAcctGatherType=jobacct_gather/linux
+#JobAcctGatherFrequency=30
+#
+AccountingStorageType=accounting_storage/slurmdbd
+AccountingStorageHost=$ACCOUNTING_STORAGE_HOST
+AccountingStoragePort=$ACCOUNTING_STORAGE_PORT
+#AccountingStorageLoc=
+#AccountingStoragePass=
+#AccountingStorageUser=
+#
+# COMPUTE NODES
+NodeName=worker[01-02] RealMemory=1800 CPUs=1 State=UNKNOWN
+PartitionName=$PARTITION_NAME Nodes=ALL Default=YES MaxTime=INFINITE State=UP
+EOF
+}
+
+# run slurmctld
+_slurmctld() {
+  if $USE_SLURMDBD; then
+    echo -n "cheking for slurmdbd.conf"
+    while [ ! -f /.secret/slurmdbd.conf ]; do
+      echo -n "."
+      sleep 1
+    done
+    echo ""
+  fi
+  mkdir -p /var/spool/slurm/ctld \
+    /var/spool/slurm/d \
+    /var/log/slurm
+  chown -R slurm: /var/spool/slurm/ctld \
+    /var/spool/slurm/d \
+    /var/log/slurm
+  touch /var/log/slurmctld.log
+  chown slurm: /var/log/slurmctld.log
+  if [[ ! -f /home/config/slurm.conf ]]; then
+    echo "### generate slurm.conf ###"
+    _generate_slurm_conf
+  else
+    echo "### use provided slurm.conf ###"
+    cp /home/config/slurm.conf /etc/slurm/slurm.conf
+  fi
+  sacctmgr -i add cluster "${CLUSTER_NAME}"
+  sleep 2s
+  /usr/sbin/slurmctld
+  cp -f /etc/slurm/slurm.conf /.secret/
+}
+
+### main ###
+_sshd_host
+_ssh_worker
+_munge_start
+_copy_secrets
+_slurmctld
+
+tail -f /dev/null
diff --git a/slurm/database/Dockerfile b/slurm/database/Dockerfile
new file mode 100644
index 0000000..a5c20f4
--- /dev/null
+++ b/slurm/database/Dockerfile
@@ -0,0 +1,19 @@
+FROM scidas/slurm.base:19.05.1
+MAINTAINER Michael J. Stealey <stealey@renci.org>
+
+ENV DBD_ADDR=database \
+  DBD_HOST=database \
+  DBD_PORT=6819 \
+  STORAGE_HOST=database.local.dev \
+  STORAGE_PORT=3306 \
+  STORAGE_PASS=password \
+  STORAGE_USER=slurm
+
+# clean up
+RUN rm -f /packages/slurm-*.rpm /packages/openmpi-*.rpm \
+  && yum clean all \
+  && rm -rf /var/cache/yum
+
+COPY docker-entrypoint.sh /docker-entrypoint.sh
+
+ENTRYPOINT ["/usr/local/bin/tini", "--", "/docker-entrypoint.sh"]
diff --git a/slurm/database/Makefile b/slurm/database/Makefile
new file mode 100644
index 0000000..6ae70e3
--- /dev/null
+++ b/slurm/database/Makefile
@@ -0,0 +1,14 @@
+
+SLURM_VERSION = 19.05.1
+IMAGE = scidas/slurm.database
+
+.PHONY: all build clean test
+
+
+all: build
+
+build:
+	docker build -t $(IMAGE):$(SLURM_VERSION) .
+
+clean:
+	@[ -z $(docker images -q $(IMAGE):$(SLURM_VERSION)) ] || docker rmi $(IMAGE):$(SLURM_VERSION)
diff --git a/slurm/database/README.md b/slurm/database/README.md
new file mode 100644
index 0000000..f00dfd9
--- /dev/null
+++ b/slurm/database/README.md
@@ -0,0 +1,3 @@
+# Slurm Database
+
+TODO
diff --git a/slurm/database/docker-entrypoint.sh b/slurm/database/docker-entrypoint.sh
new file mode 100755
index 0000000..eb5c1a1
--- /dev/null
+++ b/slurm/database/docker-entrypoint.sh
@@ -0,0 +1,143 @@
+#!/usr/bin/env bash
+set -e
+
+SLURM_ACCT_DB_SQL=/slurm_acct_db.sql
+
+# start sshd server
+_sshd_host() {
+  if [ ! -d /var/run/sshd ]; then
+    mkdir /var/run/sshd
+    ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key -N ''
+  fi
+  /usr/sbin/sshd
+}
+
+# slurm database user settings
+_slurm_acct_db() {
+  {
+    echo "create database slurm_acct_db;"
+    echo "create user '${STORAGE_USER}'@'${STORAGE_HOST}';"
+    echo "set password for '${STORAGE_USER}'@'${STORAGE_HOST}' = password('${STORAGE_PASS}');"
+    echo "grant usage on *.* to '${STORAGE_USER}'@'${STORAGE_HOST}';"
+    echo "grant all privileges on slurm_acct_db.* to '${STORAGE_USER}'@'${STORAGE_HOST}';"
+    echo "flush privileges;"
+  } >> $SLURM_ACCT_DB_SQL
+}
+
+# start database
+_mariadb_start() {
+  # mariadb somehow expects `resolveip` to be found under this path; see https://github.com/SciDAS/slurm-in-docker/issues/26
+  ln -s /usr/bin/resolveip /usr/libexec/resolveip
+  mysql_install_db
+  chown -R mysql: /var/lib/mysql/ /var/log/mariadb/ /var/run/mariadb
+  cd /var/lib/mysql
+  mysqld_safe --user=mysql &
+  cd /
+  _slurm_acct_db
+  sleep 5s
+  mysql -uroot < $SLURM_ACCT_DB_SQL
+}
+
+# start munge using existing key
+_munge_start_using_key() {
+  if [ ! -f /.secret/munge.key ]; then
+    echo -n "cheking for munge.key"
+    while [ ! -f /.secret/munge.key ]; do
+      echo -n "."
+      sleep 1
+    done
+    echo ""
+  fi
+  cp /.secret/munge.key /etc/munge/munge.key
+  chown -R munge: /etc/munge /var/lib/munge /var/log/munge /var/run/munge
+  chmod 0700 /etc/munge
+  chmod 0711 /var/lib/munge
+  chmod 0700 /var/log/munge
+  chmod 0755 /var/run/munge
+  sudo -u munge /sbin/munged
+  munge -n
+  munge -n | unmunge
+  remunge
+}
+
+# wait for worker user in shared /home volume
+_wait_for_worker() {
+  if [ ! -f /home/worker/.ssh/id_rsa.pub ]; then
+    echo -n "cheking for id_rsa.pub"
+    while [ ! -f /home/worker/.ssh/id_rsa.pub ]; do
+      echo -n "."
+      sleep 1
+    done
+    echo ""
+  fi
+}
+
+# generate slurmdbd.conf
+_generate_slurmdbd_conf() {
+  cat > /etc/slurm/slurmdbd.conf <<EOF
+#
+# Example slurmdbd.conf file.
+#
+# See the slurmdbd.conf man page for more information.
+#
+# Archive info
+#ArchiveJobs=yes
+#ArchiveDir="/tmp"
+#ArchiveSteps=yes
+#ArchiveScript=
+#JobPurge=12
+#StepPurge=1
+#
+# Authentication info
+AuthType=auth/munge
+AuthInfo=/var/run/munge/munge.socket.2
+#
+# slurmDBD info
+DbdAddr=$DBD_ADDR
+DbdHost=$DBD_HOST
+DbdPort=$DBD_PORT
+SlurmUser=slurm
+#MessageTimeout=300
+DebugLevel=4
+#DefaultQOS=normal,standby
+LogFile=/var/log/slurm/slurmdbd.log
+PidFile=/var/run/slurmdbd.pid
+#PluginDir=/usr/lib/slurm
+#PrivateData=accounts,users,usage,jobs
+#TrackWCKey=yes
+#
+# Database info
+StorageType=accounting_storage/mysql
+StorageHost=$STORAGE_HOST
+StoragePort=$STORAGE_PORT
+StoragePass=$STORAGE_PASS
+StorageUser=$STORAGE_USER
+StorageLoc=slurm_acct_db
+EOF
+}
+
+# run slurmdbd
+_slurmdbd() {
+  mkdir -p /var/spool/slurm/d \
+    /var/log/slurm
+  chown slurm: /var/spool/slurm/d \
+    /var/log/slurm
+  if [[ ! -f /home/config/slurmdbd.conf ]]; then
+    echo "### generate slurmdbd.conf ###"
+    _generate_slurmdbd_conf
+  else
+    echo "### use provided slurmdbd.conf ###"
+    cp /home/config/slurmdbd.conf /etc/slurm/slurmdbd.conf
+  fi
+  /usr/sbin/slurmdbd
+  cp /etc/slurm/slurmdbd.conf /.secret/slurmdbd.conf
+}
+
+### main ###
+_sshd_host
+_mariadb_start
+_munge_start_using_key
+_wait_for_worker
+_slurmdbd
+
+tail -f /dev/null
diff --git a/slurm/worker/Dockerfile b/slurm/worker/Dockerfile
new file mode 100644
index 0000000..cd4ca6d
--- /dev/null
+++ b/slurm/worker/Dockerfile
@@ -0,0 +1,39 @@
+FROM scidas/slurm.base:19.05.1
+MAINTAINER Michael J. Stealey <stealey@renci.org>
+
+# install openmpi 3.0.1
+RUN yum -y install \
+  gcc-c++ \
+  gcc-gfortran \
+  && yum -y localinstall \
+  /packages/openmpi-*.rpm
+
+# install Lmod 7.7
+RUN yum -y install \
+  lua-posix \
+  lua \
+  lua-filesystem \
+  lua-devel \
+  wget \
+  bzip2 \
+  expectk \
+  make \
+  && wget https://sourceforge.net/projects/lmod/files/Lmod-7.7.tar.bz2 \
+  && tar -xjvf Lmod-7.7.tar.bz2
+WORKDIR /Lmod-7.7
+RUN ./configure --prefix=/opt/apps \
+  && make install \
+  && ln -s /opt/apps/lmod/lmod/init/profile /etc/profile.d/z00_lmod.sh \
+  && ln -s /opt/apps/lmod/lmod/init/cshrc /etc/profile.d/z00_lmod.csh
+
+WORKDIR /home/worker
+
+# clean up
+RUN rm -f /packages/slurm-*.rpm /packages/openmpi-*.rpm \
+  && yum clean all \
+  && rm -rf /var/cache/yum \
+  && rm -f /Lmod-7.7.tar.bz2
+
+COPY docker-entrypoint.sh /docker-entrypoint.sh
+
+ENTRYPOINT ["/usr/local/bin/tini", "--", "/docker-entrypoint.sh"]
diff --git a/slurm/worker/Makefile b/slurm/worker/Makefile
new file mode 100644
index 0000000..5b03cf7
--- /dev/null
+++ b/slurm/worker/Makefile
@@ -0,0 +1,13 @@
+
+SLURM_VERSION = 19.05.1
+IMAGE = scidas/slurm.worker
+
+.PHONY: all build clean test
+
+all: build
+
+build:
+	docker build -t $(IMAGE):$(SLURM_VERSION) .
+
+clean:
+	@[ -z $(docker images -q $(IMAGE):$(SLURM_VERSION)) ] || docker rmi $(IMAGE):$(SLURM_VERSION)
diff --git a/slurm/worker/README.md b/slurm/worker/README.md
new file mode 100644
index 0000000..4f897ec
--- /dev/null
+++ b/slurm/worker/README.md
@@ -0,0 +1,3 @@
+# Slurm Worker
+
+TODO
diff --git a/slurm/worker/docker-entrypoint.sh b/slurm/worker/docker-entrypoint.sh
new file mode 100755
index 0000000..f18b9f7
--- /dev/null
+++ b/slurm/worker/docker-entrypoint.sh
@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+set -e
+
+# start sshd server
+_sshd_host() {
+  if [ ! -d /var/run/sshd ]; then
+    mkdir /var/run/sshd
+    ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key -N ''
+  fi
+  /usr/sbin/sshd
+}
+
+# start munge using existing key
+_munge_start_using_key() {
+  echo -n "cheking for munge.key"
+  while [ ! -f /.secret/munge.key ]; do
+    echo -n "."
+    sleep 1
+  done
+  echo ""
+  cp /.secret/munge.key /etc/munge/munge.key
+  chown -R munge: /etc/munge /var/lib/munge /var/log/munge /var/run/munge
+  chmod 0700 /etc/munge
+  chmod 0711 /var/lib/munge
+  chmod 0700 /var/log/munge
+  chmod 0755 /var/run/munge
+  sudo -u munge /sbin/munged
+  munge -n
+  munge -n | unmunge
+  remunge
+}
+
+# wait for worker user in shared /home volume
+_wait_for_worker() {
+  if [ ! -f /home/worker/.ssh/id_rsa.pub ]; then
+    echo -n "cheking for id_rsa.pub"
+    while [ ! -f /home/worker/.ssh/id_rsa.pub ]; do
+      echo -n "."
+      sleep 1
+    done
+    echo ""
+  fi
+}
+
+# run slurmd
+_slurmd() {
+  if [ ! -f /.secret/slurm.conf ]; then
+    echo -n "cheking for slurm.conf"
+    while [ ! -f /.secret/slurm.conf ]; do
+      echo -n "."
+      sleep 1
+    done
+    echo ""
+  fi
+  mkdir -p /var/spool/slurm/d
+  chown slurm: /var/spool/slurm/d
+  cp /.secret/slurm.conf /etc/slurm/slurm.conf
+  touch /var/log/slurmd.log
+  chown slurm: /var/log/slurmd.log
+  /usr/sbin/slurmd
+}
+
+### main ###
+_sshd_host
+_munge_start_using_key
+_wait_for_worker
+_slurmd
+
+tail -f /dev/null