diff --git a/.gitignore b/.gitignore index 23c5b49..147c94d 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,10 @@ data/job-archive/** data/influxdb data/sqldata data/cc-metric-store +data/cc-metric-store-source +data/ldap +data/mariadb +data/slurm cc-backend cc-backend/** .vscode diff --git a/README.md b/README.md old mode 100644 new mode 100755 diff --git a/docker-compose.yml b/docker-compose.yml old mode 100644 new mode 100755 index 7620c04..e45067c --- a/docker-compose.yml +++ b/docker-compose.yml @@ -104,6 +104,8 @@ services: volumes: - ${DATADIR}/slurm/home:/home - ${DATADIR}/slurm/secret:/.secret + - ./slurm/worker/cgroup.conf:/home/config/cgroup.conf + - ./slurm/controller/slurm.conf:/home/config/slurm.conf ports: - "6818:6818" diff --git a/env-template.txt b/env-template.txt old mode 100644 new mode 100755 diff --git a/setupDev.sh b/setupDev.sh index 14e9d81..81eeee6 100755 --- a/setupDev.sh +++ b/setupDev.sh @@ -1,4 +1,20 @@ #!/bin/bash +echo "" +echo "-----------------------------------------------------------------" +echo "Welcome to cc-docker automatic deployment script." +echo "Make sure you have sudo rights to run docker services" +echo "This script assumes that docker command is added to sudo group" +echo "This means that docker commands do not explicitly require" +echo "'sudo' keyword to run. You can use this following command:" +echo "" +echo "sudo groupadd docker" +echo "sudo usermod -aG docker $USER" +echo "" +echo "This will add docker to the sudo usergroup and all the docker" +echo "command will run as sudo by default without requiring" +echo "'sudo' keyword." +echo "-----------------------------------------------------------------" +echo "" # Check cc-backend, touch job.db if exists if [ ! -d cc-backend ]; then @@ -7,8 +23,6 @@ if [ ! -d cc-backend ]; then exit else cd cc-backend - make - if [ ! -d var ]; then wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-demo.tar tar xf job-archive-demo.tar @@ -17,6 +31,8 @@ else cp ./configs/env-template.txt .env cp ./configs/config-demo.json config.json + make + ./cc-backend -migrate-db ./cc-backend --init-db --add-user demo:admin:AdminDev cd .. @@ -28,6 +44,8 @@ else fi fi +mkdir -m777 data + # Download unedited checkpoint files to ./data/cc-metric-store-source/checkpoints if [ ! -d data/cc-metric-store-source ]; then mkdir -p data/cc-metric-store-source/checkpoints @@ -80,6 +98,5 @@ docker-compose up -d echo "" echo "Setup complete, containers are up by default: Shut down with 'docker-compose down'." -echo "Use './cc-backend/cc-backend' to start cc-backend." +echo "Use './cc-backend/cc-backend -server' to start cc-backend." echo "Use scripts in /scripts to load data into influx or mariadb." -# ./cc-backend/cc-backend diff --git a/slurm/controller/docker-entrypoint.sh b/slurm/controller/docker-entrypoint.sh index ce398a7..2faa507 100755 --- a/slurm/controller/docker-entrypoint.sh +++ b/slurm/controller/docker-entrypoint.sh @@ -55,7 +55,7 @@ _munge_start() { /usr/sbin/create-munge-key -r -f sh -c "dd if=/dev/urandom bs=1 count=1024 > /etc/munge/munge.key" chown munge: /etc/munge/munge.key - chmod 400 /etc/munge/munge.key + chmod 600 /etc/munge/munge.key sudo -u munge /sbin/munged munge -n munge -n | unmunge @@ -64,6 +64,10 @@ _munge_start() { # copy secrets to /.secret directory for other nodes _copy_secrets() { + while [ ! -f /home/worker/worker-secret.tar.gz ]; do + echo -n "." + sleep 1 + done cp /home/worker/worker-secret.tar.gz /.secret/worker-secret.tar.gz cp /home/worker/setup-worker-ssh.sh /.secret/setup-worker-ssh.sh cp /etc/munge/munge.key /.secret/munge.key diff --git a/slurm/controller/slurm.conf b/slurm/controller/slurm.conf index 63f48f8..6a9a393 100644 --- a/slurm/controller/slurm.conf +++ b/slurm/controller/slurm.conf @@ -21,7 +21,7 @@ SwitchType=switch/none MpiDefault=none SlurmctldPidFile=/var/run/slurmd/slurmctld.pid SlurmdPidFile=/var/run/slurmd/slurmd.pid -ProctrackType=proctrack/linuxproc +# ProctrackType=proctrack/linuxproc #PluginDir= #CacheGroups=0 #FirstJobId= @@ -58,7 +58,7 @@ SchedulerType=sched/backfill #SchedulerRootFilter= SelectType=select/cons_res SelectTypeParameters=CR_CPU_Memory -FastSchedule=1 +# FastSchedule=1 #PriorityType=priority/multifactor #PriorityDecayHalfLife=14-0 #PriorityUsageResetPeriod=14-0 diff --git a/slurm/database/docker-entrypoint.sh b/slurm/database/docker-entrypoint.sh index c314e94..504ecd1 100755 --- a/slurm/database/docker-entrypoint.sh +++ b/slurm/database/docker-entrypoint.sh @@ -68,7 +68,7 @@ _slurmdbd() { fi echo "Starting slurmdbd" cp /etc/slurm/slurmdbd.conf /.secret/slurmdbd.conf - /usr/sbin/slurmdbd + /usr/sbin/slurmdbd -Dvv echo "Started slurmdbd" } diff --git a/slurm/database/slurmdbd.conf b/slurm/database/slurmdbd.conf index f6d5a81..d584535 100644 --- a/slurm/database/slurmdbd.conf +++ b/slurm/database/slurmdbd.conf @@ -24,7 +24,7 @@ SlurmUser=slurm DebugLevel=4 #DefaultQOS=normal,standby LogFile=/var/log/slurm/slurmdbd.log -PidFile=/var/run/slurmdbd/slurmdbd.pid +# PidFile=/var/run/slurmdbd/slurmdbd.pid #PluginDir=/usr/lib/slurm #PrivateData=accounts,users,usage,jobs #TrackWCKey=yes diff --git a/slurm/worker/cgroup.conf b/slurm/worker/cgroup.conf new file mode 100644 index 0000000..f24d9d7 --- /dev/null +++ b/slurm/worker/cgroup.conf @@ -0,0 +1,5 @@ +CgroupPlugin=cgroup/v1 +ConstrainCores=yes +ConstrainDevices=no +ConstrainRAMSpace=yes +ConstrainSwapSpace=yes