Update to CgroupPlugin

This commit is contained in:
Aditya 2024-11-13 13:14:41 +00:00
parent 7654ea0291
commit 325ab9b27d
6 changed files with 11 additions and 58 deletions

View File

@ -1,20 +0,0 @@
FROM golang:1.22.4
RUN apt-get update
RUN apt-get -y install git
RUN git clone https://github.com/ClusterCockpit/cc-metric-store.git /cc-metric-store
RUN ls
RUN cd /cc-metric-store && go build ./cmd/cc-metric-store
# Reactivate when latest commit is available
#RUN go get -d -v github.com/ClusterCockpit/cc-metric-store
#RUN go install -v github.com/ClusterCockpit/cc-metric-store@latest
RUN mv /cc-metric-store/cc-metric-store /go/bin
COPY config.json /go/bin
VOLUME /data
WORKDIR /go/bin
CMD ["./cc-metric-store"]

View File

@ -1,28 +0,0 @@
{
"metrics": {
"clock": { "frequency": 60, "aggregation": null, "scope": "node" },
"cpi": { "frequency": 60, "aggregation": null, "scope": "node" },
"cpu_load": { "frequency": 60, "aggregation": null, "scope": "node" },
"flops_any": { "frequency": 60, "aggregation": null, "scope": "node" },
"flops_dp": { "frequency": 60, "aggregation": null, "scope": "node" },
"flops_sp": { "frequency": 60, "aggregation": null, "scope": "node" },
"ib_bw": { "frequency": 60, "aggregation": null, "scope": "node" },
"lustre_bw": { "frequency": 60, "aggregation": null, "scope": "node" },
"mem_bw": { "frequency": 60, "aggregation": null, "scope": "node" },
"mem_used": { "frequency": 60, "aggregation": null, "scope": "node" },
"rapl_power": { "frequency": 60, "aggregation": null, "scope": "node" }
},
"checkpoints": {
"interval": 100000000000,
"directory": "/data/checkpoints",
"restore": 100000000000
},
"archive": {
"interval": 100000000000,
"directory": "/data/archive"
},
"retention-in-memory": 100000000000,
"http-api-address": "0.0.0.0:8081",
"nats": "nats://cc-nats:4222",
"jwt-public-key": "kzfYrYy+TzpanWZHJ5qSdMj5uKUWgq74BWhQG6copP0="
}

View File

@ -1,3 +1,3 @@
SLURM_JWT=$(cat data/slurm/secret/jwt_token.txt)
curl -X 'GET' -v 'http://localhost:6820/slurm/v0.0.39/ping' --location --silent --show-error -H "X-SLURM-USER-NAME: root" -H "X-SLURM-USER-TOKEN: $SLURM_JWT"
curl -X 'GET' -v 'http://localhost:6820/slurm/v0.0.39/node/node01' --location --silent --show-error -H "X-SLURM-USER-NAME: root" -H "X-SLURM-USER-TOKEN: $SLURM_JWT"
# curl -v --unix-socket data/slurm/tmp/slurmrestd.socket 'http://localhost:6820/slurm/v0.0.39/ping'

View File

@ -39,7 +39,7 @@ ReturnToService=0
#SrunEpilog=
#TaskProlog=
#TaskEpilog=
TaskPlugin=task/none
TaskPlugin=task/affinity
#TrackWCKey=no
#TreeWidth=50
#TmpFS=
@ -79,8 +79,8 @@ JobCompType=jobcomp/filetxt
JobCompLoc=/var/log/slurm/jobcomp.log
#
# ACCOUNTING
#JobAcctGatherType=jobacct_gather/linux
JobAcctGatherType=jobacct_gather/cgroup
JobAcctGatherType=jobacct_gather/linux
#JobAcctGatherType=jobacct_gather/cgroup
#ProctrackType=proctrack/cgroup
JobAcctGatherFrequency=30
@ -99,7 +99,7 @@ PartitionName=debug Nodes=node01 Default=YES MaxTime=INFINITE State=UP
# # COMPUTE NODES
# NodeName=c[1-2] RealMemory=1000 State=UNKNOWN
NodeName=node01 CPUs=2 Boards=1 SocketsPerBoard=2 CoresPerSocket=1 ThreadsPerCore=1
NodeName=node01 CPUs=1 Boards=1 SocketsPerBoard=1 CoresPerSocket=1 ThreadsPerCore=1
# #
# # PARTITIONS

View File

@ -1,4 +1,4 @@
CgroupPlugin=cgroup/v1
CgroupPlugin=disabled
ConstrainCores=yes
ConstrainDevices=no
ConstrainRAMSpace=yes

View File

@ -78,9 +78,9 @@ _slurmd() {
fi
echo "found slurm.conf"
sudo yum install -y nc
sudo yum install -y procps
sudo yum install -y iputils
# sudo yum install -y nc
# sudo yum install -y procps
# sudo yum install -y iputils
mkdir -p /var/spool/slurm/d /etc/slurm /var/run/slurm/d /var/log/slurm
chown slurm: /var/spool/slurm/d /var/run/slurm/d /var/log/slurm
@ -98,6 +98,7 @@ _slurmd() {
chown slurm: /var/run/slurm/d/slurmd.pid
echo "Starting slurmd"
/usr/sbin/slurmstepd infinity &
/usr/sbin/slurmd -Dvv
echo "Started slurmd"
}