Generalizing setup

This commit is contained in:
Aditya Ujeniya 2024-10-11 16:39:38 +02:00
parent 4be4456428
commit cf13ee5e7e
6 changed files with 182 additions and 44 deletions

View File

@ -63,58 +63,49 @@ services:
cap_add: cap_add:
- SYS_NICE - SYS_NICE
# mysql: slurmctld:
# container_name: mysql
# image: mysql:8.0.22
# command: ["--default-authentication-plugin=mysql_native_password"]
# environment:
# MYSQL_ROOT_PASSWORD: ${MYSQL_ROOT_PASSWORD}
# MYSQL_DATABASE: ${MYSQL_DATABASE}
# MYSQL_USER: ${MYSQL_USER}
# MYSQL_PASSWORD: ${MYSQL_PASSWORD}
# ports:
# - "127.0.0.1:${MYSQL_PORT}:3306"
# # volumes:
# # - ${DATADIR}/sql-init:/docker-entrypoint-initdb.d
# # - ${DATADIR}/sqldata:/var/lib/mysql
# cap_add:
# - SYS_NICE
slurm-controller:
image: clustercockpit:22.05.6
container_name: slurmctld container_name: slurmctld
hostname: slurmctld hostname: slurmctld
build: build:
context: ./slurm/controller context: ./slurm/controller
depends_on:
- slurmdbd
privileged: true privileged: true
ports:
- "6817:6817"
volumes: volumes:
- ${DATADIR}/slurm/home:/home - ${DATADIR}/slurm/home:/home
- ${DATADIR}/slurm/secret:/.secret - ${DATADIR}/slurm/secret:/.secret
- ./slurm/controller/slurm.conf:/home/config/slurm.conf
slurm-database: slurmdbd:
container_name: slurmdb container_name: slurmdbd
hostname: slurmdb hostname: slurmdbd
build: build:
context: ./slurm/database context: ./slurm/database
depends_on: depends_on:
- mariadb - mariadb
- slurm-controller
privileged: true privileged: true
ports:
- "6819:6819"
volumes: volumes:
- ${DATADIR}/slurm/home:/home - ${DATADIR}/slurm/home:/home
- ${DATADIR}/slurm/secret:/.secret - ${DATADIR}/slurm/secret:/.secret
- ./slurm/database/slurmdbd.conf:/home/config/slurmdbd.conf
slurm-worker01: node01:
container_name: node01 container_name: node01
hostname: node01 hostname: node01
build: build:
context: ./slurm/worker context: ./slurm/worker
depends_on: depends_on:
- slurm-controller - slurmctld
privileged: true privileged: true
volumes: volumes:
- ${DATADIR}/slurm/home:/home - ${DATADIR}/slurm/home:/home
- ${DATADIR}/slurm/secret:/.secret - ${DATADIR}/slurm/secret:/.secret
ports:
- "6818:6818"
# slurm-worker02: # slurm-worker02:
# container_name: node02 # container_name: node02

View File

@ -20,7 +20,7 @@ _ssh_worker() {
mkdir -p /home/worker mkdir -p /home/worker
chown -R worker:worker /home/worker chown -R worker:worker /home/worker
fi fi
cat > /home/worker/setup-worker-ssh.sh <<EOF2 cat >/home/worker/setup-worker-ssh.sh <<EOF2
mkdir -p ~/.ssh mkdir -p ~/.ssh
chmod 0700 ~/.ssh chmod 0700 ~/.ssh
ssh-keygen -b 2048 -t rsa -f ~/.ssh/id_rsa -q -N "" -C "$(whoami)@$(hostname)-$(date -I)" ssh-keygen -b 2048 -t rsa -f ~/.ssh/id_rsa -q -N "" -C "$(whoami)@$(hostname)-$(date -I)"
@ -98,11 +98,13 @@ _slurmctld() {
chown slurm: /etc/slurm/slurm.conf chown slurm: /etc/slurm/slurm.conf
chmod 600 /etc/slurm/slurm.conf chmod 600 /etc/slurm/slurm.conf
fi fi
sacctmgr -i add cluster "snowflake" sacctmgr -i add cluster "snowflake"
sleep 2s sleep 2s
echo "Starting slurmctld" echo "Starting slurmctld"
cp -f /etc/slurm/slurm.conf /.secret/ cp -f /etc/slurm/slurm.conf /.secret/
/usr/sbin/slurmctld /usr/sbin/slurmctld
echo "Started slurmctld"
} }
### main ### ### main ###

106
slurm/controller/slurm.conf Normal file
View File

@ -0,0 +1,106 @@
# slurm.conf
#
# See the slurm.conf man page for more information.
#
ClusterName=linux
ControlMachine=slurmctld
ControlAddr=slurmctld
#BackupController=
#BackupAddr=
#
SlurmUser=slurm
#SlurmdUser=root
SlurmctldPort=6817
SlurmdPort=6818
AuthType=auth/munge
#JobCredentialPrivateKey=
#JobCredentialPublicCertificate=
StateSaveLocation=/var/lib/slurmd
SlurmdSpoolDir=/var/spool/slurmd
SwitchType=switch/none
MpiDefault=none
SlurmctldPidFile=/var/run/slurmd/slurmctld.pid
SlurmdPidFile=/var/run/slurmd/slurmd.pid
ProctrackType=proctrack/linuxproc
#PluginDir=
#CacheGroups=0
#FirstJobId=
ReturnToService=0
#MaxJobCount=
#PlugStackConfig=
#PropagatePrioProcess=
#PropagateResourceLimits=
#PropagateResourceLimitsExcept=
#Prolog=
#Epilog=
#SrunProlog=
#SrunEpilog=
#TaskProlog=
#TaskEpilog=
TaskPlugin=task/none
#TrackWCKey=no
#TreeWidth=50
#TmpFS=
#UsePAM=
#
# TIMERS
SlurmctldTimeout=300
SlurmdTimeout=300
InactiveLimit=0
MinJobAge=300
KillWait=30
Waittime=0
#
# SCHEDULING
SchedulerType=sched/backfill
#SchedulerAuth=
#SchedulerPort=
#SchedulerRootFilter=
SelectType=select/cons_res
SelectTypeParameters=CR_CPU_Memory
FastSchedule=1
#PriorityType=priority/multifactor
#PriorityDecayHalfLife=14-0
#PriorityUsageResetPeriod=14-0
#PriorityWeightFairshare=100000
#PriorityWeightAge=1000
#PriorityWeightPartition=10000
#PriorityWeightJobSize=1000
#PriorityMaxAge=1-0
#
# LOGGING
SlurmctldDebug=3
SlurmctldLogFile=/var/log/slurm/slurmctld.log
SlurmdDebug=3
SlurmdLogFile=/var/log/slurm/slurmd.log
JobCompType=jobcomp/filetxt
JobCompLoc=/var/log/slurm/jobcomp.log
#
# ACCOUNTING
#JobAcctGatherType=jobacct_gather/linux
JobAcctGatherType=jobacct_gather/cgroup
ProctrackType=proctrack/cgroup
JobAcctGatherFrequency=30
#
AccountingStorageType=accounting_storage/slurmdbd
AccountingStorageHost=slurmdbd
AccountingStoragePort=6819
#AccountingStorageLoc=slurm_acct_db
#AccountingStoragePass=
#AccountingStorageUser=
#
# COMPUTE NODES
PartitionName=DEFAULT Nodes=c[1-2]
PartitionName=debug Nodes=c[1-2] Default=YES MaxTime=INFINITE State=UP
# # COMPUTE NODES
# NodeName=c[1-2] RealMemory=1000 State=UNKNOWN
NodeName=c[1-2] CPUs=12 Boards=1 SocketsPerBoard=2 CoresPerSocket=3 ThreadsPerCore=2
# #
# # PARTITIONS
# PartitionName=normal Default=yes Nodes=c[1-2] Priority=50 DefMemPerCPU=500 Shared=NO MaxNodes=2 MaxTime=5-00:00:00 DefaultTime=5-00:00:00 State=UP
#PrEpPlugins=pika

View File

@ -69,6 +69,7 @@ _slurmdbd() {
echo "Starting slurmdbd" echo "Starting slurmdbd"
cp /etc/slurm/slurmdbd.conf /.secret/slurmdbd.conf cp /etc/slurm/slurmdbd.conf /.secret/slurmdbd.conf
/usr/sbin/slurmdbd /usr/sbin/slurmdbd
echo "Started slurmdbd"
} }
### main ### ### main ###

View File

@ -0,0 +1,37 @@
#
# Example slurmdbd.conf file.
#
# See the slurmdbd.conf man page for more information.
#
# Archive info
#ArchiveJobs=yes
#ArchiveDir="/tmp"
#ArchiveSteps=yes
#ArchiveScript=
#JobPurge=12
#StepPurge=1
#
# Authentication info
AuthType=auth/munge
#AuthInfo=/var/run/munge/munge.socket.2
#
# slurmDBD info
DbdAddr=slurmdbd
DbdHost=slurmdbd
DbdPort=6819
SlurmUser=slurm
#MessageTimeout=300
DebugLevel=4
#DefaultQOS=normal,standby
LogFile=/var/log/slurm/slurmdbd.log
PidFile=/var/run/slurmdbd/slurmdbd.pid
#PluginDir=/usr/lib/slurm
#PrivateData=accounts,users,usage,jobs
#TrackWCKey=yes
#
# Database info
StorageType=accounting_storage/mysql
StorageHost=mariadb
StorageUser=slurm
StoragePass=demo
StorageLoc=slurm_acct_db

View File

@ -78,6 +78,7 @@ _slurmd() {
chown slurm: /var/log/slurmd.log chown slurm: /var/log/slurmd.log
echo -n "Starting slurmd" echo -n "Starting slurmd"
/usr/sbin/slurmd /usr/sbin/slurmd
echo -n "Started slurmd"
} }
### main ### ### main ###