mirror of
				https://github.com/ClusterCockpit/cc-docker.git
				synced 2025-10-30 00:35:06 +01:00 
			
		
		
		
	Added new Docker Compose setup
This commit is contained in:
		
							
								
								
									
										107
									
								
								Dockerfile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										107
									
								
								Dockerfile
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,107 @@ | |||||||
|  | FROM rockylinux:8 | ||||||
|  |  | ||||||
|  | LABEL org.opencontainers.image.source="https://github.com/giovtorres/slurm-docker-cluster" \ | ||||||
|  |       org.opencontainers.image.title="slurm-docker-cluster" \ | ||||||
|  |       org.opencontainers.image.description="Slurm Docker cluster on Rocky Linux 8" \ | ||||||
|  |       org.label-schema.docker.cmd="docker-compose up -d" \ | ||||||
|  |       maintainer="Giovanni Torres" | ||||||
|  |  | ||||||
|  | ARG SLURM_TAG=slurm-21-08-6-1 | ||||||
|  | ARG GOSU_VERSION=1.11 | ||||||
|  |  | ||||||
|  | RUN set -ex \ | ||||||
|  |     && yum makecache \ | ||||||
|  |     && yum -y update \ | ||||||
|  |     && yum -y install dnf-plugins-core \ | ||||||
|  |     && yum config-manager --set-enabled powertools \ | ||||||
|  |     && yum -y install \ | ||||||
|  |        wget \ | ||||||
|  |        bzip2 \ | ||||||
|  |        cmake \ | ||||||
|  |        perl \ | ||||||
|  |        gcc \ | ||||||
|  |        gcc-c++\ | ||||||
|  |        git \ | ||||||
|  |        gnupg \ | ||||||
|  |        make \ | ||||||
|  |        munge \ | ||||||
|  |        munge-devel \ | ||||||
|  |        nano \ | ||||||
|  |        python3-devel \ | ||||||
|  |        python3-pip \ | ||||||
|  |        python3 \ | ||||||
|  |        mariadb-server \ | ||||||
|  |        mariadb-devel \ | ||||||
|  |        psmisc \ | ||||||
|  |        bash-completion \ | ||||||
|  |        vim-enhanced \ | ||||||
|  |        http-parser-devel \ | ||||||
|  |        json-c-devel \ | ||||||
|  |     && yum clean all \ | ||||||
|  |     && rm -rf /var/cache/yum | ||||||
|  |  | ||||||
|  | RUN alternatives --set python /usr/bin/python3 | ||||||
|  |  | ||||||
|  | RUN pip3 install Cython nose | ||||||
|  |  | ||||||
|  | RUN set -ex \ | ||||||
|  |     && wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" \ | ||||||
|  |     && wget -O /usr/local/bin/gosu.asc "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64.asc" \ | ||||||
|  |     && export GNUPGHOME="$(mktemp -d)" \ | ||||||
|  |     && gpg --batch --keyserver hkps://keys.openpgp.org --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 \ | ||||||
|  |     && gpg --batch --verify /usr/local/bin/gosu.asc /usr/local/bin/gosu \ | ||||||
|  |     && rm -rf "${GNUPGHOME}" /usr/local/bin/gosu.asc \ | ||||||
|  |     && chmod +x /usr/local/bin/gosu \ | ||||||
|  |     && gosu nobody true | ||||||
|  |  | ||||||
|  | WORKDIR /home | ||||||
|  |  | ||||||
|  | RUN set -x \ | ||||||
|  |     && git clone https://gitlab.hrz.tu-chemnitz.de/pika/pika-packages.git \ | ||||||
|  |     && git clone https://github.com/nats-io/nats.c.git \ | ||||||
|  |     && git clone -b ${SLURM_TAG} --single-branch --depth=1 https://github.com/SchedMD/slurm.git \ | ||||||
|  |     && pushd slurm \ | ||||||
|  |     && ./configure --enable-debug --prefix=/usr --sysconfdir=/etc/slurm \ | ||||||
|  |         --with-mysql_config=/usr/bin  --libdir=/usr/lib64 \ | ||||||
|  |     && make install \ | ||||||
|  |     && install -D -m644 etc/cgroup.conf.example /etc/slurm/cgroup.conf.example \ | ||||||
|  |     && install -D -m644 etc/slurm.conf.example /etc/slurm/slurm.conf.example \ | ||||||
|  |     && install -D -m644 etc/slurmdbd.conf.example /etc/slurm/slurmdbd.conf.example \ | ||||||
|  |     && install -D -m644 contribs/slurm_completion_help/slurm_completion.sh /etc/profile.d/slurm_completion.sh \ | ||||||
|  |     && popd \ | ||||||
|  |     && cp -r slurm /opt \ | ||||||
|  |     && groupadd -r --gid=990 slurm \ | ||||||
|  |     && useradd -r -g slurm --uid=990 slurm \ | ||||||
|  |     && mkdir /etc/sysconfig/slurm \ | ||||||
|  |         /var/spool/slurmd \ | ||||||
|  |         /var/run/slurmd \ | ||||||
|  |         /var/run/slurmdbd \ | ||||||
|  |         /var/lib/slurmd \ | ||||||
|  |         /var/log/slurm \ | ||||||
|  |         /data \ | ||||||
|  |     && touch /var/lib/slurmd/node_state \ | ||||||
|  |         /var/lib/slurmd/front_end_state \ | ||||||
|  |         /var/lib/slurmd/job_state \ | ||||||
|  |         /var/lib/slurmd/resv_state \ | ||||||
|  |         /var/lib/slurmd/trigger_state \ | ||||||
|  |         /var/lib/slurmd/assoc_mgr_state \ | ||||||
|  |         /var/lib/slurmd/assoc_usage \ | ||||||
|  |         /var/lib/slurmd/qos_usage \ | ||||||
|  |         /var/lib/slurmd/fed_mgr_state \ | ||||||
|  |     && chown -R slurm:slurm /var/*/slurm* \ | ||||||
|  |     && /sbin/create-munge-key | ||||||
|  |  | ||||||
|  | COPY slurm-prep-pika_v4.c /home/slurm-prep-pika_v4.c | ||||||
|  | COPY makefile /home/makefile | ||||||
|  |  | ||||||
|  | COPY slurm.conf /etc/slurm/slurm.conf | ||||||
|  | COPY slurmdbd.conf /etc/slurm/slurmdbd.conf | ||||||
|  | RUN set -x \ | ||||||
|  |     && chown slurm:slurm /etc/slurm/slurmdbd.conf \ | ||||||
|  |     && chmod 600 /etc/slurm/slurmdbd.conf | ||||||
|  |  | ||||||
|  |  | ||||||
|  | COPY docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh | ||||||
|  | ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"] | ||||||
|  |  | ||||||
|  | CMD ["slurmdbd"] | ||||||
							
								
								
									
										2
									
								
								LICENSE
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								LICENSE
									
									
									
									
									
								
							| @@ -1,6 +1,6 @@ | |||||||
| MIT License | MIT License | ||||||
|  |  | ||||||
| Copyright (c) 2021 ClusterCockpit | Copyright (c) 2019 Giovanni Torres | ||||||
|  |  | ||||||
| Permission is hereby granted, free of charge, to any person obtaining a copy | Permission is hereby granted, free of charge, to any person obtaining a copy | ||||||
| of this software and associated documentation files (the "Software"), to deal | of this software and associated documentation files (the "Software"), to deal | ||||||
|   | |||||||
							
								
								
									
										158
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										158
									
								
								README.md
									
									
									
									
									
								
							| @@ -1,74 +1,116 @@ | |||||||
| # cc-docker | # Slurm Docker Cluster | ||||||
|  |  | ||||||
| This is a `docker-compose` setup which provides a quickly started environment for ClusterCockpit development and testing, using `cc-backend`. | This is a multi-container Slurm cluster using docker-compose.  The compose file | ||||||
| A number of services is readily available as docker container (nats, cc-metric-store, InfluxDB, LDAP), or easily added by manual configuration (MySQL). | creates named volumes for persistent storage of MySQL data files as well as | ||||||
|  | Slurm state and log directories. | ||||||
|  |  | ||||||
| It includes the following containers: | ## Containers and Volumes | ||||||
| * nats (Default) |  | ||||||
| * cc-metric-store (Default) |  | ||||||
| * influxdb (Default) |  | ||||||
| * openldap (Default) |  | ||||||
| * mysql (Optional) |  | ||||||
| * mariadb (Optional) |  | ||||||
| * phpmyadmin (Optional) |  | ||||||
|  |  | ||||||
| The setup comes with fixture data for a Job archive, cc-metric-store checkpoints, InfluxDB, MySQL, and a LDAP user directory. | The compose file will run the following containers: | ||||||
|  |  | ||||||
| ## Known Issues | * mysql | ||||||
|  | * slurmdbd | ||||||
|  | * slurmctld | ||||||
|  | * c1 (slurmd) | ||||||
|  | * c2 (slurmd) | ||||||
|  |  | ||||||
| * `docker-compose` installed on Ubuntu (18.04, 20.04) via `apt-get` can not correctly parse `docker-compose.yml` due to version differences. Install latest version of `docker-compose` from https://docs.docker.com/compose/install/ instead. | The compose file will create the following named volumes: | ||||||
| * You need to ensure that no other web server is running on ports 8080 (cc-backend), 8081 (phpmyadmin), 8084 (cc-metric-store), 8086 (nfluxDB), 4222 and 8222 (Nats), or 3306 (MySQL). If one or more ports are already in use, you habe to adapt the related config accordingly. |  | ||||||
| * Existing VPN connections sometimes cause problems with docker. If `docker-compose` does not start up correctly, try disabling any active VPN connection. Refer to https://stackoverflow.com/questions/45692255/how-make-openvpn-work-with-docker for further information. |  | ||||||
|  |  | ||||||
| ## Configuration Templates | * etc_munge         ( -> /etc/munge     ) | ||||||
|  | * etc_slurm         ( -> /etc/slurm     ) | ||||||
|  | * slurm_jobdir      ( -> /data          ) | ||||||
|  | * var_lib_mysql     ( -> /var/lib/mysql ) | ||||||
|  | * var_log_slurm     ( -> /var/log/slurm ) | ||||||
|  |  | ||||||
| Located in `./templates` | ## Building the Docker Image | ||||||
| * `docker-compose.yml.default`: Docker-Compose file to setup cc-metric-store, InfluxDB, MariaDB, PhpMyadmin, and LDAP containers (Default). Used in `setupDev.sh`. |  | ||||||
| * `docker-compose.yml.mysql`: Docker-Compose configuration template if MySQL is desired instead of MariaDB. |  | ||||||
| * `env.default`: Environment variables for setup with cc-metric-store, InfluxDB, MariaDB, PhpMyadmin, and LDAP containers (Default). Used in `setupDev.sh`. |  | ||||||
| * `env.mysql`: Additional environment variables required if MySQL is desired instead of MariaDB. |  | ||||||
|  |  | ||||||
| ## Setup | Build the image locally: | ||||||
|  |  | ||||||
| 1. Clone `cc-backend` repository in chosen base folder: `$> git clone https://github.com/ClusterCockpit/cc-backend.git` | ```console | ||||||
|  | docker build -t slurm-docker-cluster:21.08.6 . | ||||||
| 2. Run `$ ./setupDev.sh`:  **NOTICE** The script will download files of a total size of 338MB (mostly for the InfluxDB data). |  | ||||||
|  |  | ||||||
| 3. The setup-script launches the supporting container stack in the background automatically if everything went well. Run `$> ./cc-backend/cc-backend` to start `cc-backend.` |  | ||||||
|  |  | ||||||
| 4. By default, you can access `cc-backend` in your browser at `http://localhost:8080`. You can shut down the cc-backend server by pressing `CTRL-C`, remember to also shut down all containers via `$> docker-compose down` afterwards. |  | ||||||
|  |  | ||||||
| 5. You can restart the containers with: `$> docker-compose up -d`. |  | ||||||
|  |  | ||||||
| ## Post-Setup Adjustment for using `influxdb` |  | ||||||
|  |  | ||||||
| When using `influxdb` as a metric database, one must adjust the following files: |  | ||||||
| * `cc-backend/var/job-archive/emmy/cluster.json` |  | ||||||
| * `cc-backend/var/job-archive/woody/cluster.json` |  | ||||||
|  |  | ||||||
| In the JSON, exchange the content of the `metricDataRepository`-Entry (By default configured for `cc-metric-store`) with: |  | ||||||
| ``` | ``` | ||||||
| "metricDataRepository": { |  | ||||||
|     "kind": "influxdb", | Build a different version of Slurm using Docker build args and the Slurm Git | ||||||
|     "url": "http://localhost:8086", | tag: | ||||||
|     "token": "egLfcf7fx0FESqFYU3RpAAbj", |  | ||||||
|     "bucket": "ClusterCockpit", | ```console | ||||||
|     "org": "ClusterCockpit", | docker build --build-arg SLURM_TAG="slurm-19-05-2-1" -t slurm-docker-cluster:19.05.2 . | ||||||
|     "skiptls": false | ``` | ||||||
| } |  | ||||||
|  | Or equivalently using `docker-compose`: | ||||||
|  |  | ||||||
|  | ```console | ||||||
|  | SLURM_TAG=slurm-19-05-2-1 IMAGE_TAG=19.05.2 docker-compose build | ||||||
| ``` | ``` | ||||||
|  |  | ||||||
|  |  | ||||||
| ## Usage | ## Starting the Cluster | ||||||
|  |  | ||||||
| Credentials for the preconfigured demo user are: | Run `docker-compose` to instantiate the cluster: | ||||||
| * User: `demo` |  | ||||||
| * Password: `AdminDev` |  | ||||||
|  |  | ||||||
| You can also login as regular user using any credential in the LDAP user directory at `./data/ldap/users.ldif`. | ```console | ||||||
|  | IMAGE_TAG=19.05.2 docker-compose up -d | ||||||
|  | ``` | ||||||
|  |  | ||||||
| TODO: Update job archive and all other metric data. | ## Register the Cluster with SlurmDBD | ||||||
| The job archive with 1867 jobs originates from the second half of 2020. |  | ||||||
| Roughly 2700 jobs from the first week of 2021 are loaded with data from InfluxDB. | To register the cluster to the slurmdbd daemon, run the `register_cluster.sh` | ||||||
| Some views of ClusterCockpit (e.g. the Users view) show the last week or month. | script: | ||||||
| To show some data there you have to set the filter to time periods with jobs (August 2020 to January 2021). |  | ||||||
|  | ```console | ||||||
|  | ./register_cluster.sh | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | > Note: You may have to wait a few seconds for the cluster daemons to become | ||||||
|  | > ready before registering the cluster.  Otherwise, you may get an error such | ||||||
|  | > as **sacctmgr: error: Problem talking to the database: Connection refused**. | ||||||
|  | > | ||||||
|  | > You can check the status of the cluster by viewing the logs: `docker-compose | ||||||
|  | > logs -f` | ||||||
|  |  | ||||||
|  | ## Accessing the Cluster | ||||||
|  |  | ||||||
|  | Use `docker exec` to run a bash shell on the controller container: | ||||||
|  |  | ||||||
|  | ```console | ||||||
|  | docker exec -it slurmctld bash | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | From the shell, execute slurm commands, for example: | ||||||
|  |  | ||||||
|  | ```console | ||||||
|  | [root@slurmctld /]# sinfo | ||||||
|  | PARTITION AVAIL  TIMELIMIT  NODES  STATE NODELIST | ||||||
|  | normal*      up 5-00:00:00      2   idle c[1-2] | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | ## Submitting Jobs | ||||||
|  |  | ||||||
|  | The `slurm_jobdir` named volume is mounted on each Slurm container as `/data`. | ||||||
|  | Therefore, in order to see job output files while on the controller, change to | ||||||
|  | the `/data` directory when on the **slurmctld** container and then submit a job: | ||||||
|  |  | ||||||
|  | ```console | ||||||
|  | [root@slurmctld /]# cd /data/ | ||||||
|  | [root@slurmctld data]# sbatch --wrap="uptime" | ||||||
|  | Submitted batch job 2 | ||||||
|  | [root@slurmctld data]# ls | ||||||
|  | slurm-2.out | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | ## Stopping and Restarting the Cluster | ||||||
|  |  | ||||||
|  | ```console | ||||||
|  | docker-compose stop | ||||||
|  | docker-compose start | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | ## Deleting the Cluster | ||||||
|  |  | ||||||
|  | To remove all containers and volumes, run: | ||||||
|  |  | ||||||
|  | ```console | ||||||
|  | docker-compose stop | ||||||
|  | docker-compose rm -f | ||||||
|  | docker volume rm slurm-docker-cluster_etc_munge slurm-docker-cluster_etc_slurm slurm-docker-cluster_slurm_jobdir slurm-docker-cluster_var_lib_mysql slurm-docker-cluster_var_log_slurm | ||||||
|  | ``` | ||||||
|   | |||||||
| @@ -1,128 +1,84 @@ | |||||||
|  | version: "2.2" | ||||||
|  |  | ||||||
| services: | services: | ||||||
|   nats: |   mysql: | ||||||
|     container_name: nats |     image: mariadb:10.10 | ||||||
|     image: nats:alpine |     hostname: mysql | ||||||
|     ports: |     container_name: mysql | ||||||
|       - "4222:4222" |     environment: | ||||||
|       - "8222:8222" |       MYSQL_RANDOM_ROOT_PASSWORD: "yes" | ||||||
|  |       MYSQL_DATABASE: slurm_acct_db | ||||||
|  |       MYSQL_USER: slurm | ||||||
|  |       MYSQL_PASSWORD: password | ||||||
|  |     volumes: | ||||||
|  |       - var_lib_mysql:/var/lib/mysql | ||||||
|  |  | ||||||
|   cc-metric-store: |   slurmdbd: | ||||||
|     container_name: cc-metric-store |     image: slurm-docker-cluster:${IMAGE_TAG:-21.08} | ||||||
|     build: |     build: | ||||||
|       context: ./cc-metric-store |       context: . | ||||||
|     ports: |       args: | ||||||
|       - "8084:8084" |         SLURM_TAG: ${SLURM_TAG:-slurm-21-08-6-1} | ||||||
|  |     command: ["slurmdbd"] | ||||||
|  |     container_name: slurmdbd | ||||||
|  |     hostname: slurmdbd | ||||||
|     volumes: |     volumes: | ||||||
|       - ${DATADIR}/cc-metric-store:/data |       - etc_munge:/etc/munge | ||||||
|  |       - etc_slurm:/etc/slurm | ||||||
|  |       - var_log_slurm:/var/log/slurm | ||||||
|  |     expose: | ||||||
|  |       - "6819" | ||||||
|     depends_on: |     depends_on: | ||||||
|       - nats |       - mysql | ||||||
|  |  | ||||||
|   influxdb: |   slurmctld: | ||||||
|     container_name: influxdb |     image: slurm-docker-cluster:${IMAGE_TAG:-21.08} | ||||||
|     image: influxdb |     command: ["slurmctld"] | ||||||
|     command: ["--reporting-disabled"] |  | ||||||
|     environment: |  | ||||||
|       DOCKER_INFLUXDB_INIT_MODE: setup |  | ||||||
|       DOCKER_INFLUXDB_INIT_USERNAME: devel |  | ||||||
|       DOCKER_INFLUXDB_INIT_PASSWORD: ${INFLUXDB_PASSWORD} |  | ||||||
|       DOCKER_INFLUXDB_INIT_ORG: ${INFLUXDB_ORG} |  | ||||||
|       DOCKER_INFLUXDB_INIT_BUCKET: ${INFLUXDB_BUCKET} |  | ||||||
|       DOCKER_INFLUXDB_INIT_RETENTION: 100w |  | ||||||
|       DOCKER_INFLUXDB_INIT_ADMIN_TOKEN: ${INFLUXDB_ADMIN_TOKEN} |  | ||||||
|     ports: |  | ||||||
|       - "127.0.0.1:${INFLUXDB_PORT}:8086" |  | ||||||
|     volumes: |  | ||||||
|       - ${DATADIR}/influxdb/data:/var/lib/influxdb2 |  | ||||||
|       - ${DATADIR}/influxdb/config:/etc/influxdb2 |  | ||||||
|  |  | ||||||
|   openldap: |  | ||||||
|     container_name: ldap |  | ||||||
|     image: osixia/openldap:1.5.0 |  | ||||||
|     command: --copy-service --loglevel debug |  | ||||||
|     environment: |  | ||||||
|       - LDAP_ADMIN_PASSWORD=${LDAP_ADMIN_PASSWORD} |  | ||||||
|       - LDAP_ORGANISATION=${LDAP_ORGANISATION} |  | ||||||
|       - LDAP_DOMAIN=${LDAP_DOMAIN} |  | ||||||
|     volumes: |  | ||||||
|       - ${DATADIR}/ldap:/container/service/slapd/assets/config/bootstrap/ldif/custom |  | ||||||
|  |  | ||||||
|   mariadb: |  | ||||||
|     container_name: mariadb |  | ||||||
|     image: mariadb:latest |  | ||||||
|     command: ["--default-authentication-plugin=mysql_native_password"] |  | ||||||
|     environment: |  | ||||||
|       MARIADB_ROOT_PASSWORD: ${MARIADB_ROOT_PASSWORD} |  | ||||||
|       MARIADB_DATABASE: slurm_acct_db |  | ||||||
|       MARIADB_USER: slurm |  | ||||||
|       MARIADB_PASSWORD: demo |  | ||||||
|     ports: |  | ||||||
|       - "127.0.0.1:${MARIADB_PORT}:3306" |  | ||||||
|     volumes: |  | ||||||
|       - ${DATADIR}/mariadb:/etc/mysql/conf.d |  | ||||||
|       # - ${DATADIR}/sql-init:/docker-entrypoint-initdb.d |  | ||||||
|     cap_add: |  | ||||||
|       - SYS_NICE |  | ||||||
|  |  | ||||||
|   # mysql: |  | ||||||
|   #   container_name: mysql |  | ||||||
|   #   image: mysql:8.0.22 |  | ||||||
|   #   command: ["--default-authentication-plugin=mysql_native_password"] |  | ||||||
|   #   environment: |  | ||||||
|   #     MYSQL_ROOT_PASSWORD: ${MYSQL_ROOT_PASSWORD} |  | ||||||
|   #     MYSQL_DATABASE: ${MYSQL_DATABASE} |  | ||||||
|   #     MYSQL_USER: ${MYSQL_USER} |  | ||||||
|   #     MYSQL_PASSWORD: ${MYSQL_PASSWORD} |  | ||||||
|   #   ports: |  | ||||||
|   #     - "127.0.0.1:${MYSQL_PORT}:3306" |  | ||||||
|   #   # volumes: |  | ||||||
|   #     # - ${DATADIR}/sql-init:/docker-entrypoint-initdb.d |  | ||||||
|   #     # - ${DATADIR}/sqldata:/var/lib/mysql |  | ||||||
|   #   cap_add: |  | ||||||
|   #     - SYS_NICE |  | ||||||
|  |  | ||||||
|   slurm-controller: |  | ||||||
|     container_name: slurmctld |     container_name: slurmctld | ||||||
|     hostname: slurmctld |     hostname: slurmctld | ||||||
|     build: |  | ||||||
|       context: ./slurm/controller |  | ||||||
|     privileged: true |  | ||||||
|     volumes: |     volumes: | ||||||
|       - ${DATADIR}/slurm/home:/home |       - etc_munge:/etc/munge | ||||||
|       - ${DATADIR}/slurm/secret:/.secret |       - etc_slurm:/etc/slurm | ||||||
|  |       - slurm_jobdir:/data | ||||||
|   slurm-database: |       - var_log_slurm:/var/log/slurm | ||||||
|     container_name: slurmdb |     expose: | ||||||
|     hostname: slurmdb |       - "6817" | ||||||
|     build: |  | ||||||
|       context: ./slurm/database |  | ||||||
|     depends_on: |     depends_on: | ||||||
|       - mariadb |       - "slurmdbd" | ||||||
|       - slurm-controller |  | ||||||
|     privileged: true |  | ||||||
|     volumes: |  | ||||||
|       - ${DATADIR}/slurm/home:/home |  | ||||||
|       - ${DATADIR}/slurm/secret:/.secret |  | ||||||
|  |  | ||||||
|   slurm-worker01: |   c1: | ||||||
|     container_name: node01 |     image: slurm-docker-cluster:${IMAGE_TAG:-21.08} | ||||||
|     hostname: node01 |     command: ["slurmd"] | ||||||
|     build: |     hostname: c1 | ||||||
|       context: ./slurm/worker |     container_name: c1 | ||||||
|  |     volumes: | ||||||
|  |       - etc_munge:/etc/munge | ||||||
|  |       - etc_slurm:/etc/slurm | ||||||
|  |       - slurm_jobdir:/data | ||||||
|  |       - var_log_slurm:/var/log/slurm | ||||||
|  |     expose: | ||||||
|  |       - "6818" | ||||||
|     depends_on: |     depends_on: | ||||||
|       - slurm-controller |       - "slurmctld" | ||||||
|     privileged: true |  | ||||||
|     volumes: |  | ||||||
|       - ${DATADIR}/slurm/home:/home |  | ||||||
|       - ${DATADIR}/slurm/secret:/.secret |  | ||||||
|  |  | ||||||
|   # slurm-worker02: |   c2: | ||||||
|   #   container_name: node02 |     image: slurm-docker-cluster:${IMAGE_TAG:-21.08} | ||||||
|     # hostname: node02 |     command: ["slurmd"] | ||||||
|   #   build: |     hostname: c2 | ||||||
|   #     context: ./slurm/worker |     container_name: c2 | ||||||
|   #   depends_on: |     volumes: | ||||||
|   #     - slurm-controller |       - etc_munge:/etc/munge | ||||||
|   #   privileged: true |       - etc_slurm:/etc/slurm | ||||||
|   #   volumes: |       - slurm_jobdir:/data | ||||||
|       # - ${DATADIR}/slurm/home:/home |       - var_log_slurm:/var/log/slurm | ||||||
|       # - ${DATADIR}/slurm/secret:/.secret |     expose: | ||||||
|  |       - "6818" | ||||||
|  |     depends_on: | ||||||
|  |       - "slurmctld" | ||||||
|  |  | ||||||
|  | volumes: | ||||||
|  |   etc_munge: | ||||||
|  |   etc_slurm: | ||||||
|  |   slurm_jobdir: | ||||||
|  |   var_lib_mysql: | ||||||
|  |   var_log_slurm: | ||||||
|   | |||||||
							
								
								
									
										64
									
								
								docker-entrypoint.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										64
									
								
								docker-entrypoint.sh
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,64 @@ | |||||||
|  | #!/bin/bash | ||||||
|  | set -e | ||||||
|  |  | ||||||
|  | if [ "$1" = "slurmdbd" ] | ||||||
|  | then | ||||||
|  |     echo "---> Starting the MUNGE Authentication service (munged) ..." | ||||||
|  |     gosu munge /usr/sbin/munged | ||||||
|  |  | ||||||
|  |     echo "---> Starting the Slurm Database Daemon (slurmdbd) ..." | ||||||
|  |  | ||||||
|  |     { | ||||||
|  |         . /etc/slurm/slurmdbd.conf | ||||||
|  |         until echo "SELECT 1" | mysql -h $StorageHost -u$StorageUser -p$StoragePass 2>&1 > /dev/null | ||||||
|  |         do | ||||||
|  |             echo "-- Waiting for database to become active ..." | ||||||
|  |             sleep 2 | ||||||
|  |         done | ||||||
|  |     } | ||||||
|  |     echo "-- Database is now active ..." | ||||||
|  |  | ||||||
|  |     exec gosu slurm /usr/sbin/slurmdbd -Dvvv | ||||||
|  | fi | ||||||
|  |  | ||||||
|  | if [ "$1" = "slurmctld" ] | ||||||
|  | then | ||||||
|  |     echo "---> Starting the MUNGE Authentication service (munged) ..." | ||||||
|  |     gosu munge /usr/sbin/munged | ||||||
|  |  | ||||||
|  |     echo "---> Waiting for slurmdbd to become active before starting slurmctld ..." | ||||||
|  |  | ||||||
|  |     until 2>/dev/null >/dev/tcp/slurmdbd/6819 | ||||||
|  |     do | ||||||
|  |         echo "-- slurmdbd is not available.  Sleeping ..." | ||||||
|  |         sleep 2 | ||||||
|  |     done | ||||||
|  |     echo "-- slurmdbd is now active ..." | ||||||
|  |  | ||||||
|  |     echo "---> Starting the Slurm Controller Daemon (slurmctld) ..." | ||||||
|  |     if /usr/sbin/slurmctld -V | grep -q '17.02' ; then | ||||||
|  |         exec gosu slurm /usr/sbin/slurmctld -Dvvv | ||||||
|  |     else | ||||||
|  |         exec gosu slurm /usr/sbin/slurmctld -i -Dvvv | ||||||
|  |     fi | ||||||
|  | fi | ||||||
|  |  | ||||||
|  | if [ "$1" = "slurmd" ] | ||||||
|  | then | ||||||
|  |     echo "---> Starting the MUNGE Authentication service (munged) ..." | ||||||
|  |     gosu munge /usr/sbin/munged | ||||||
|  |  | ||||||
|  |     echo "---> Waiting for slurmctld to become active before starting slurmd..." | ||||||
|  |  | ||||||
|  |     until 2>/dev/null >/dev/tcp/slurmctld/6817 | ||||||
|  |     do | ||||||
|  |         echo "-- slurmctld is not available.  Sleeping ..." | ||||||
|  |         sleep 2 | ||||||
|  |     done | ||||||
|  |     echo "-- slurmctld is now active ..." | ||||||
|  |  | ||||||
|  |     echo "---> Starting the Slurm Node Daemon (slurmd) ..." | ||||||
|  |     exec /usr/sbin/slurmd -Dvvv | ||||||
|  | fi | ||||||
|  |  | ||||||
|  | exec "$@" | ||||||
							
								
								
									
										30
									
								
								makefile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								makefile
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,30 @@ | |||||||
|  | SLURM_ROOT_DIR = /usr | ||||||
|  | SLURM_INC_DIR = /usr/include/slurm | ||||||
|  | SLURM_LIB_DIR = /usr/lib64/slurm | ||||||
|  | SLURM_BUILD = 21.08.6 | ||||||
|  | SLURM_BUILD_DIR = /home/slurm | ||||||
|  |  | ||||||
|  | PLUGIN_TYPE = prep | ||||||
|  | PLUGIN_NAME = pika | ||||||
|  | PLUGIN_FILE = $(PLUGIN_TYPE)_$(PLUGIN_NAME).so | ||||||
|  |  | ||||||
|  | SRC_FILE = slurm-prep-pika_v4.c | ||||||
|  |  | ||||||
|  | CC      = gcc | ||||||
|  | CFLAGS  ?= -Wall -fPIC -g -I$(SLURM_INC_DIR) -I$(SLURM_BUILD_DIR) -I/home/slurm/src/ -I/home/slurm | ||||||
|  | LDFLAGS ?= --shared -L. | ||||||
|  |  | ||||||
|  | all: $(PLUGIN_FILE) | ||||||
|  |  | ||||||
|  | default: $(PLUGIN_FILE) | ||||||
|  |  | ||||||
|  | $(PLUGIN_FILE): $(SRC_FILE) | ||||||
|  | 	$(CC) $(CFLAGS) $(LDFLAGS) $^ -o $@ | ||||||
|  |  | ||||||
|  | install: $(PLUGIN_FILE) | ||||||
|  | 	install -m 755 $(PLUGIN_FILE) $(SLURM_LIB_DIR) | ||||||
|  |  | ||||||
|  | clean: | ||||||
|  | 	rm -f $(PLUGIN_FILE) | ||||||
|  |  | ||||||
|  | mrproper: clean | ||||||
							
								
								
									
										5
									
								
								register_cluster.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										5
									
								
								register_cluster.sh
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,5 @@ | |||||||
|  | #!/bin/bash | ||||||
|  | set -e | ||||||
|  |  | ||||||
|  | docker exec slurmctld bash -c "/usr/bin/sacctmgr --immediate add cluster name=linux" && \ | ||||||
|  | docker-compose restart slurmdbd slurmctld | ||||||
							
								
								
									
										1012
									
								
								slurm-prep-pika_v4.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1012
									
								
								slurm-prep-pika_v4.c
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										95
									
								
								slurm.conf
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										95
									
								
								slurm.conf
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,95 @@ | |||||||
|  | # slurm.conf | ||||||
|  | # | ||||||
|  | # See the slurm.conf man page for more information. | ||||||
|  | # | ||||||
|  | ClusterName=linux | ||||||
|  | ControlMachine=slurmctld | ||||||
|  | ControlAddr=slurmctld | ||||||
|  | #BackupController= | ||||||
|  | #BackupAddr= | ||||||
|  | # | ||||||
|  | SlurmUser=slurm | ||||||
|  | #SlurmdUser=root | ||||||
|  | SlurmctldPort=6817 | ||||||
|  | SlurmdPort=6818 | ||||||
|  | AuthType=auth/munge | ||||||
|  | #JobCredentialPrivateKey= | ||||||
|  | #JobCredentialPublicCertificate= | ||||||
|  | StateSaveLocation=/var/lib/slurmd | ||||||
|  | SlurmdSpoolDir=/var/spool/slurmd | ||||||
|  | SwitchType=switch/none | ||||||
|  | MpiDefault=none | ||||||
|  | SlurmctldPidFile=/var/run/slurmd/slurmctld.pid | ||||||
|  | SlurmdPidFile=/var/run/slurmd/slurmd.pid | ||||||
|  | ProctrackType=proctrack/linuxproc | ||||||
|  | #PluginDir= | ||||||
|  | #CacheGroups=0 | ||||||
|  | #FirstJobId= | ||||||
|  | ReturnToService=0 | ||||||
|  | #MaxJobCount= | ||||||
|  | #PlugStackConfig= | ||||||
|  | #PropagatePrioProcess= | ||||||
|  | #PropagateResourceLimits= | ||||||
|  | #PropagateResourceLimitsExcept= | ||||||
|  | #Prolog= | ||||||
|  | #Epilog= | ||||||
|  | #SrunProlog= | ||||||
|  | #SrunEpilog= | ||||||
|  | #TaskProlog= | ||||||
|  | #TaskEpilog= | ||||||
|  | #TaskPlugin= | ||||||
|  | #TrackWCKey=no | ||||||
|  | #TreeWidth=50 | ||||||
|  | #TmpFS= | ||||||
|  | #UsePAM= | ||||||
|  | # | ||||||
|  | # TIMERS | ||||||
|  | SlurmctldTimeout=300 | ||||||
|  | SlurmdTimeout=300 | ||||||
|  | InactiveLimit=0 | ||||||
|  | MinJobAge=300 | ||||||
|  | KillWait=30 | ||||||
|  | Waittime=0 | ||||||
|  | # | ||||||
|  | # SCHEDULING | ||||||
|  | SchedulerType=sched/backfill | ||||||
|  | #SchedulerAuth= | ||||||
|  | #SchedulerPort= | ||||||
|  | #SchedulerRootFilter= | ||||||
|  | SelectType=select/cons_res | ||||||
|  | SelectTypeParameters=CR_CPU_Memory | ||||||
|  | FastSchedule=1 | ||||||
|  | #PriorityType=priority/multifactor | ||||||
|  | #PriorityDecayHalfLife=14-0 | ||||||
|  | #PriorityUsageResetPeriod=14-0 | ||||||
|  | #PriorityWeightFairshare=100000 | ||||||
|  | #PriorityWeightAge=1000 | ||||||
|  | #PriorityWeightPartition=10000 | ||||||
|  | #PriorityWeightJobSize=1000 | ||||||
|  | #PriorityMaxAge=1-0 | ||||||
|  | # | ||||||
|  | # LOGGING | ||||||
|  | SlurmctldDebug=3 | ||||||
|  | SlurmctldLogFile=/var/log/slurm/slurmctld.log | ||||||
|  | SlurmdDebug=3 | ||||||
|  | SlurmdLogFile=/var/log/slurm/slurmd.log | ||||||
|  | JobCompType=jobcomp/filetxt | ||||||
|  | JobCompLoc=/var/log/slurm/jobcomp.log | ||||||
|  | # | ||||||
|  | # ACCOUNTING | ||||||
|  | JobAcctGatherType=jobacct_gather/linux | ||||||
|  | JobAcctGatherFrequency=30 | ||||||
|  | # | ||||||
|  | AccountingStorageType=accounting_storage/slurmdbd | ||||||
|  | AccountingStorageHost=slurmdbd | ||||||
|  | AccountingStoragePort=6819 | ||||||
|  | #AccountingStorageLoc=slurm_acct_db | ||||||
|  | #AccountingStoragePass= | ||||||
|  | #AccountingStorageUser= | ||||||
|  | # | ||||||
|  | # COMPUTE NODES | ||||||
|  | NodeName=c[1-2] RealMemory=1000 State=UNKNOWN | ||||||
|  | # | ||||||
|  | # PARTITIONS | ||||||
|  | PartitionName=normal Default=yes Nodes=c[1-2] Priority=50 DefMemPerCPU=500 Shared=NO MaxNodes=2 MaxTime=5-00:00:00 DefaultTime=5-00:00:00 State=UP | ||||||
|  | #PrEpPlugins=pika | ||||||
							
								
								
									
										
											BIN
										
									
								
								slurm/.DS_Store
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								slurm/.DS_Store
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										37
									
								
								slurmdbd.conf
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								slurmdbd.conf
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,37 @@ | |||||||
|  | # | ||||||
|  | # Example slurmdbd.conf file. | ||||||
|  | # | ||||||
|  | # See the slurmdbd.conf man page for more information. | ||||||
|  | # | ||||||
|  | # Archive info | ||||||
|  | #ArchiveJobs=yes | ||||||
|  | #ArchiveDir="/tmp" | ||||||
|  | #ArchiveSteps=yes | ||||||
|  | #ArchiveScript= | ||||||
|  | #JobPurge=12 | ||||||
|  | #StepPurge=1 | ||||||
|  | # | ||||||
|  | # Authentication info | ||||||
|  | AuthType=auth/munge | ||||||
|  | #AuthInfo=/var/run/munge/munge.socket.2 | ||||||
|  | # | ||||||
|  | # slurmDBD info | ||||||
|  | DbdAddr=slurmdbd | ||||||
|  | DbdHost=slurmdbd | ||||||
|  | #DbdPort=6819 | ||||||
|  | SlurmUser=slurm | ||||||
|  | #MessageTimeout=300 | ||||||
|  | DebugLevel=4 | ||||||
|  | #DefaultQOS=normal,standby | ||||||
|  | LogFile=/var/log/slurm/slurmdbd.log | ||||||
|  | PidFile=/var/run/slurmdbd/slurmdbd.pid | ||||||
|  | #PluginDir=/usr/lib/slurm | ||||||
|  | #PrivateData=accounts,users,usage,jobs | ||||||
|  | #TrackWCKey=yes | ||||||
|  | # | ||||||
|  | # Database info | ||||||
|  | StorageType=accounting_storage/mysql | ||||||
|  | StorageHost=mysql | ||||||
|  | StorageUser=slurm | ||||||
|  | StoragePass=password | ||||||
|  | #StorageLoc=slurm_acct_db | ||||||
		Reference in New Issue
	
	Block a user