Update for cc-slurm-adapter

This commit is contained in:
Aditya Ujeniya 2025-06-25 20:20:20 +02:00
parent 0590e84565
commit 60a346dec1
4 changed files with 84 additions and 49 deletions

View File

@ -28,7 +28,7 @@ fi
# This file automatically picked by mariadb after the docker service starts.
if [ ! -d data/mariadb ]; then
mkdir -p data/mariadb
cat > data/mariadb/01.databases.sql <<EOF
cat >data/mariadb/01.databases.sql <<EOF
CREATE DATABASE IF NOT EXISTS \`ccbackend\`;
EOF
else
@ -40,7 +40,7 @@ fi
# This file automatically picked by openldap after the docker service starts.
if [ ! -d data/ldap ]; then
mkdir -p data/ldap
cat > data/ldap/add_users.ldif <<EOF
cat >data/ldap/add_users.ldif <<EOF
dn: ou=users,dc=example,dc=com
objectClass: organizationalUnit
ou: users
@ -69,7 +69,7 @@ fi
# to subject 'hpc-nats' every 1 minute. Random data is generated only for node level metrics, not hardware level metrics.
if [ ! -d data/nats ]; then
mkdir -p data/nats
cat > data/nats/docker-entrypoint.sh <<EOF
cat >data/nats/docker-entrypoint.sh <<EOF
#!/bin/sh
set -e
@ -125,12 +125,24 @@ else
echo "'data/nats' already exists!"
fi
# prepare folders for influxdb3
if [ ! -d data/influxdb ]; then
mkdir -p data/influxdb/data
mkdir -p data/influxdb/config
if [ ! -d data/slurm/home/worker/CCSA ]; then
mkdir -p data/slurm/home/worker/CCSA
cat >data/slurm/home/worker/CCSA/config.json <<EOF
{
"pidFilePath": "/home/worker/CCSA/daemon.pid",
"ipcSockPath": "/home/worker/CCSA/daemon.sock",
"lastRunPath": "/home/worker/CCSA/last_run",
"slurmPollInterval": 10,
"ccRestUrl": "http://host.docker.internal:8080",
"ccRestJwt": ""
}
EOF
chmod 777 data/slurm/home/worker/CCSA/config.json
else
echo "'data/influxdb' already exists!"
echo "'data/slurm/worker' already exists!"
fi
echo ""

View File

@ -69,6 +69,8 @@ services:
- ${DATADIR}/slurm/state:/var/lib/slurm/d
ports:
- "6817:6817"
extra_hosts:
- "host.docker.internal:host-gateway"
slurmdbd:
container_name: slurmdbd
@ -126,21 +128,3 @@ services:
- /etc/localtime:/etc/localtime:ro
ports:
- "6820:6820"
# influxdb:
# container_name: influxdb
# image: influxdb:latest
# command: ["--reporting-disabled", "--log-level=debug"]
# environment:
# DOCKER_INFLUXDB_INIT_MODE: setup
# DOCKER_INFLUXDB_INIT_USERNAME: devel
# DOCKER_INFLUXDB_INIT_PASSWORD: ${INFLUXDB_PASSWORD}
# DOCKER_INFLUXDB_INIT_ORG: ${INFLUXDB_ORG}
# DOCKER_INFLUXDB_INIT_BUCKET: ${INFLUXDB_BUCKET}
# DOCKER_INFLUXDB_INIT_RETENTION: 100w
# DOCKER_INFLUXDB_INIT_ADMIN_TOKEN: ${INFLUXDB_ADMIN_TOKEN}
# ports:
# - "0.0.0.0:8086:8086"
# volumes:
# - ${DATADIR}/influxdb/data:/var/lib/influxdb2
# - ${DATADIR}/influxdb/config:/etc/influxdb2

View File

@ -39,6 +39,33 @@ fi
chmod u+x dataGenerationScript.sh
./dataGenerationScript.sh
cd cc-backend
rm -rf var
if [ ! -d var ]; then
wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-demo.tar
tar xf job-archive-demo.tar
rm ./job-archive-demo.tar
cp ./configs/env-template.txt .env
cp ./configs/config-demo.json config.json
sed -i 's/"addr": *"127\.0\.0\.1:8080"/"addr": "0.0.0.0:8080"/' config.json
make
./cc-backend -migrate-db
./cc-backend --init-db --add-user demo:admin,api:demo
JWT=$(./cc-backend -jwt demo | awk -F': ' '/Successfully generated JWT/ {print $3}')
cd ..
sed -i "s/\"ccRestJwt\": \"\"/\"ccRestJwt\": \"$JWT\"/" data/slurm/home/worker/CCSA/config.json
else
cd ..
fi
# Update timestamps for all the checkpoints in data/cc-metric-store-source
# and dumps new files in data/cc-metric-store.
perl ./migrateTimestamps.pl
@ -68,24 +95,6 @@ cd ../..
docker-compose build
docker-compose up -d
cd cc-backend
if [ ! -d var ]; then
wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-demo.tar
tar xf job-archive-demo.tar
rm ./job-archive-demo.tar
cp ./configs/env-template.txt .env
cp -f ../misc/config.json config.json
make
./cc-backend -migrate-db
./cc-backend --init-db --add-user demo:admin:demo
cd ..
else
cd ..
echo "'cc-backend/var' exists. Cautiously exiting."
fi
echo ""
echo "|--------------------------------------------------------------------------------------|"

View File

@ -35,8 +35,8 @@ _sshd_host() {
_ssh_worker() {
if [[ ! -d /home/worker ]]; then
mkdir -p /home/worker
chown -R worker:worker /home/worker
fi
chown -R worker:worker /home/worker
cat >/home/worker/setup-worker-ssh.sh <<EOF2
mkdir -p ~/.ssh
chmod 0700 ~/.ssh
@ -187,6 +187,8 @@ _slurmctld() {
sudo yum install -y iputils
sudo yum install -y lsof
sudo yum install -y jq
sudo yum install -y git
sudo yum install -y go
_openssl_jwt_key
@ -210,8 +212,36 @@ _slurmctld() {
sleep 2s
echo "Starting slurmctld"
cp -f /etc/slurm/slurm.conf /.secret/
/usr/sbin/slurmctld -Dvv
/usr/sbin/slurmctld -Dvv &
echo "Started slurmctld"
echo "Cloning cc-slurm-adapter"
git clone https://github.com/ClusterCockpit/cc-slurm-adapter.git
echo "Cloned cc-slurm-adapter"
cd cc-slurm-adapter
echo "Building cc-slurm-adapter"
go build
echo "Completed building cc-slurm-adapter"
mkdir /run/cc-slurm-adapter/
chmod 777 /run/cc-slurm-adapter/
cp -f /home/worker/CCSA/config.json .
JWT=$(cat config.json | grep "ccRestJwt" | awk -F': ' '/"ccRestJwt"/ {print $2}' | tr -d '"')
while true; do
if curl -X 'GET' 'http://host.docker.internal:8080/api/clusters/' -H 'accept: application/json' --head -H "X-Auth-Token: $JWT" | grep -q "200 OK"; then
echo "Service is UP at $(date)"
break
else
echo "Service is still DOWN at $(date)"
fi
sleep 2
done
./cc-slurm-adapter -daemon -config config.json -debug 1 &
}
### main ###