From 861036e864e3be7794a831f10e699aca8debf2ea Mon Sep 17 00:00:00 2001 From: Aditya Date: Wed, 13 Nov 2024 16:57:26 +0000 Subject: [PATCH] Update to CCMS config --- .gitignore | 1 + cc-metric-store/Dockerfile | 1 + cc-metric-store/config.json | 197 ++++++++++++++++++++++++++++++++---- 3 files changed, 179 insertions(+), 20 deletions(-) diff --git a/.gitignore b/.gitignore index 147c94d..28989ba 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ data/cc-metric-store-source data/ldap data/mariadb data/slurm +data cc-backend cc-backend/** .vscode diff --git a/cc-metric-store/Dockerfile b/cc-metric-store/Dockerfile index eb7aa48..a06d075 100644 --- a/cc-metric-store/Dockerfile +++ b/cc-metric-store/Dockerfile @@ -17,4 +17,5 @@ COPY config.json /go/bin VOLUME /data WORKDIR /go/bin +RUN mkdir -p ./var/checkpoints CMD ["./cc-metric-store"] diff --git a/cc-metric-store/config.json b/cc-metric-store/config.json index 674c67c..29d4d28 100644 --- a/cc-metric-store/config.json +++ b/cc-metric-store/config.json @@ -1,28 +1,185 @@ { "metrics": { - "clock": { "frequency": 60, "aggregation": null, "scope": "node" }, - "cpi": { "frequency": 60, "aggregation": null, "scope": "node" }, - "cpu_load": { "frequency": 60, "aggregation": null, "scope": "node" }, - "flops_any": { "frequency": 60, "aggregation": null, "scope": "node" }, - "flops_dp": { "frequency": 60, "aggregation": null, "scope": "node" }, - "flops_sp": { "frequency": 60, "aggregation": null, "scope": "node" }, - "ib_bw": { "frequency": 60, "aggregation": null, "scope": "node" }, - "lustre_bw": { "frequency": 60, "aggregation": null, "scope": "node" }, - "mem_bw": { "frequency": 60, "aggregation": null, "scope": "node" }, - "mem_used": { "frequency": 60, "aggregation": null, "scope": "node" }, - "rapl_power": { "frequency": 60, "aggregation": null, "scope": "node" } + "debug_metric": { + "frequency": 60, + "aggregation": "avg" + }, + "clock": { + "frequency": 60, + "aggregation": "avg" + }, + "cpu_idle": { + "frequency": 60, + "aggregation": "avg" + }, + "cpu_iowait": { + "frequency": 60, + "aggregation": "avg" + }, + "cpu_irq": { + "frequency": 60, + "aggregation": "avg" + }, + "cpu_system": { + "frequency": 60, + "aggregation": "avg" + }, + "cpu_user": { + "frequency": 60, + "aggregation": "avg" + }, + "nv_mem_util": { + "frequency": 60, + "aggregation": "avg" + }, + "nv_temp": { + "frequency": 60, + "aggregation": "avg" + }, + "nv_sm_clock": { + "frequency": 60, + "aggregation": "avg" + }, + "acc_utilization": { + "frequency": 60, + "aggregation": "avg" + }, + "acc_mem_used": { + "frequency": 60, + "aggregation": "sum" + }, + "acc_power": { + "frequency": 60, + "aggregation": "sum" + }, + "flops_any": { + "frequency": 60, + "aggregation": "sum" + }, + "flops_dp": { + "frequency": 60, + "aggregation": "sum" + }, + "flops_sp": { + "frequency": 60, + "aggregation": "sum" + }, + "ib_recv": { + "frequency": 60, + "aggregation": "sum" + }, + "ib_xmit": { + "frequency": 60, + "aggregation": "sum" + }, + "ib_recv_pkts": { + "frequency": 60, + "aggregation": "sum" + }, + "ib_xmit_pkts": { + "frequency": 60, + "aggregation": "sum" + }, + "cpu_power": { + "frequency": 60, + "aggregation": "sum" + }, + "core_power": { + "frequency": 60, + "aggregation": "sum" + }, + "mem_power": { + "frequency": 60, + "aggregation": "sum" + }, + "ipc": { + "frequency": 60, + "aggregation": "avg" + }, + "cpu_load": { + "frequency": 60, + "aggregation": null + }, + "lustre_close": { + "frequency": 60, + "aggregation": null + }, + "lustre_open": { + "frequency": 60, + "aggregation": null + }, + "lustre_statfs": { + "frequency": 60, + "aggregation": null + }, + "lustre_read_bytes": { + "frequency": 60, + "aggregation": null + }, + "lustre_write_bytes": { + "frequency": 60, + "aggregation": null + }, + "net_bw": { + "frequency": 60, + "aggregation": null + }, + "file_bw": { + "frequency": 60, + "aggregation": null + }, + "mem_bw": { + "frequency": 60, + "aggregation": "sum" + }, + "mem_cached": { + "frequency": 60, + "aggregation": null + }, + "mem_used": { + "frequency": 60, + "aggregation": null + }, + "net_bytes_in": { + "frequency": 60, + "aggregation": null + }, + "net_bytes_out": { + "frequency": 60, + "aggregation": null + }, + "nfs4_read": { + "frequency": 60, + "aggregation": null + }, + "nfs4_total": { + "frequency": 60, + "aggregation": null + }, + "nfs4_write": { + "frequency": 60, + "aggregation": null + }, + "vectorization_ratio": { + "frequency": 60, + "aggregation": "avg" + } }, "checkpoints": { - "interval": 100000000000, - "directory": "/data/checkpoints", - "restore": 100000000000 + "interval": "12h", + "directory": "./var/checkpoints", + "restore": "48h" }, "archive": { - "interval": 100000000000, - "directory": "/data/archive" + "interval": "50h", + "directory": "./var/archive" }, - "retention-in-memory": 100000000000, - "http-api-address": "0.0.0.0:8081", - "nats": "nats://cc-nats:4222", + "http-api": { + "address": "localhost:8082", + "https-cert-file": null, + "https-key-file": null + }, + "retention-in-memory": "48h", + "nats": null, "jwt-public-key": "kzfYrYy+TzpanWZHJ5qSdMj5uKUWgq74BWhQG6copP0=" -} +} \ No newline at end of file