Update for cc-slurm-adapter

This commit is contained in:
Aditya Ujeniya 2025-06-25 20:20:20 +02:00
parent 0590e84565
commit 60a346dec1
4 changed files with 84 additions and 49 deletions

View File

@ -125,12 +125,24 @@ else
echo "'data/nats' already exists!" echo "'data/nats' already exists!"
fi fi
# prepare folders for influxdb3 if [ ! -d data/slurm/home/worker/CCSA ]; then
if [ ! -d data/influxdb ]; then mkdir -p data/slurm/home/worker/CCSA
mkdir -p data/influxdb/data
mkdir -p data/influxdb/config cat >data/slurm/home/worker/CCSA/config.json <<EOF
{
"pidFilePath": "/home/worker/CCSA/daemon.pid",
"ipcSockPath": "/home/worker/CCSA/daemon.sock",
"lastRunPath": "/home/worker/CCSA/last_run",
"slurmPollInterval": 10,
"ccRestUrl": "http://host.docker.internal:8080",
"ccRestJwt": ""
}
EOF
chmod 777 data/slurm/home/worker/CCSA/config.json
else else
echo "'data/influxdb' already exists!" echo "'data/slurm/worker' already exists!"
fi fi
echo "" echo ""

View File

@ -69,6 +69,8 @@ services:
- ${DATADIR}/slurm/state:/var/lib/slurm/d - ${DATADIR}/slurm/state:/var/lib/slurm/d
ports: ports:
- "6817:6817" - "6817:6817"
extra_hosts:
- "host.docker.internal:host-gateway"
slurmdbd: slurmdbd:
container_name: slurmdbd container_name: slurmdbd
@ -126,21 +128,3 @@ services:
- /etc/localtime:/etc/localtime:ro - /etc/localtime:/etc/localtime:ro
ports: ports:
- "6820:6820" - "6820:6820"
# influxdb:
# container_name: influxdb
# image: influxdb:latest
# command: ["--reporting-disabled", "--log-level=debug"]
# environment:
# DOCKER_INFLUXDB_INIT_MODE: setup
# DOCKER_INFLUXDB_INIT_USERNAME: devel
# DOCKER_INFLUXDB_INIT_PASSWORD: ${INFLUXDB_PASSWORD}
# DOCKER_INFLUXDB_INIT_ORG: ${INFLUXDB_ORG}
# DOCKER_INFLUXDB_INIT_BUCKET: ${INFLUXDB_BUCKET}
# DOCKER_INFLUXDB_INIT_RETENTION: 100w
# DOCKER_INFLUXDB_INIT_ADMIN_TOKEN: ${INFLUXDB_ADMIN_TOKEN}
# ports:
# - "0.0.0.0:8086:8086"
# volumes:
# - ${DATADIR}/influxdb/data:/var/lib/influxdb2
# - ${DATADIR}/influxdb/config:/etc/influxdb2

View File

@ -39,6 +39,33 @@ fi
chmod u+x dataGenerationScript.sh chmod u+x dataGenerationScript.sh
./dataGenerationScript.sh ./dataGenerationScript.sh
cd cc-backend
rm -rf var
if [ ! -d var ]; then
wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-demo.tar
tar xf job-archive-demo.tar
rm ./job-archive-demo.tar
cp ./configs/env-template.txt .env
cp ./configs/config-demo.json config.json
sed -i 's/"addr": *"127\.0\.0\.1:8080"/"addr": "0.0.0.0:8080"/' config.json
make
./cc-backend -migrate-db
./cc-backend --init-db --add-user demo:admin,api:demo
JWT=$(./cc-backend -jwt demo | awk -F': ' '/Successfully generated JWT/ {print $3}')
cd ..
sed -i "s/\"ccRestJwt\": \"\"/\"ccRestJwt\": \"$JWT\"/" data/slurm/home/worker/CCSA/config.json
else
cd ..
fi
# Update timestamps for all the checkpoints in data/cc-metric-store-source # Update timestamps for all the checkpoints in data/cc-metric-store-source
# and dumps new files in data/cc-metric-store. # and dumps new files in data/cc-metric-store.
perl ./migrateTimestamps.pl perl ./migrateTimestamps.pl
@ -68,24 +95,6 @@ cd ../..
docker-compose build docker-compose build
docker-compose up -d docker-compose up -d
cd cc-backend
if [ ! -d var ]; then
wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-demo.tar
tar xf job-archive-demo.tar
rm ./job-archive-demo.tar
cp ./configs/env-template.txt .env
cp -f ../misc/config.json config.json
make
./cc-backend -migrate-db
./cc-backend --init-db --add-user demo:admin:demo
cd ..
else
cd ..
echo "'cc-backend/var' exists. Cautiously exiting."
fi
echo "" echo ""
echo "|--------------------------------------------------------------------------------------|" echo "|--------------------------------------------------------------------------------------|"

View File

@ -35,8 +35,8 @@ _sshd_host() {
_ssh_worker() { _ssh_worker() {
if [[ ! -d /home/worker ]]; then if [[ ! -d /home/worker ]]; then
mkdir -p /home/worker mkdir -p /home/worker
chown -R worker:worker /home/worker
fi fi
chown -R worker:worker /home/worker
cat >/home/worker/setup-worker-ssh.sh <<EOF2 cat >/home/worker/setup-worker-ssh.sh <<EOF2
mkdir -p ~/.ssh mkdir -p ~/.ssh
chmod 0700 ~/.ssh chmod 0700 ~/.ssh
@ -187,6 +187,8 @@ _slurmctld() {
sudo yum install -y iputils sudo yum install -y iputils
sudo yum install -y lsof sudo yum install -y lsof
sudo yum install -y jq sudo yum install -y jq
sudo yum install -y git
sudo yum install -y go
_openssl_jwt_key _openssl_jwt_key
@ -210,8 +212,36 @@ _slurmctld() {
sleep 2s sleep 2s
echo "Starting slurmctld" echo "Starting slurmctld"
cp -f /etc/slurm/slurm.conf /.secret/ cp -f /etc/slurm/slurm.conf /.secret/
/usr/sbin/slurmctld -Dvv /usr/sbin/slurmctld -Dvv &
echo "Started slurmctld" echo "Started slurmctld"
echo "Cloning cc-slurm-adapter"
git clone https://github.com/ClusterCockpit/cc-slurm-adapter.git
echo "Cloned cc-slurm-adapter"
cd cc-slurm-adapter
echo "Building cc-slurm-adapter"
go build
echo "Completed building cc-slurm-adapter"
mkdir /run/cc-slurm-adapter/
chmod 777 /run/cc-slurm-adapter/
cp -f /home/worker/CCSA/config.json .
JWT=$(cat config.json | grep "ccRestJwt" | awk -F': ' '/"ccRestJwt"/ {print $2}' | tr -d '"')
while true; do
if curl -X 'GET' 'http://host.docker.internal:8080/api/clusters/' -H 'accept: application/json' --head -H "X-Auth-Token: $JWT" | grep -q "200 OK"; then
echo "Service is UP at $(date)"
break
else
echo "Service is still DOWN at $(date)"
fi
sleep 2
done
./cc-slurm-adapter -daemon -config config.json -debug 1 &
} }
### main ### ### main ###