mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2025-10-24 06:45:07 +02:00
Compare commits
49 Commits
cc-docs
...
fix-nvidia
Author | SHA1 | Date | |
---|---|---|---|
|
b3eb0679b9 | ||
|
6a4ad067ac | ||
|
ed2378f794 | ||
|
99e066ff5f | ||
|
67cdbefb02 | ||
|
b522aca693 | ||
|
ea7c4f4ec7 | ||
|
09cf89a951 | ||
|
d6499935a4 | ||
|
3e19c47ae4 | ||
|
97e09f13f4 | ||
|
e08bd3d926 | ||
|
fc525b7430 | ||
|
69d4567ecf | ||
|
c5183feafc | ||
|
a45366646e | ||
|
a551616566 | ||
|
a9fa168117 | ||
|
39d37597ab | ||
|
aeaba0021b | ||
|
5ceffb44b4 | ||
|
e29942a4be | ||
|
0b9b9a6e68 | ||
|
b47cb3a0c4 | ||
|
b49ae7b612 | ||
|
1fc5cc8483 | ||
|
e81099af8d | ||
|
eaca327d73 | ||
|
2e48996d87 | ||
|
7cdbada522 | ||
|
babe1e020d | ||
|
776af72231 | ||
|
2d4894b8f7 | ||
|
35295b0b3a | ||
|
1e734baa35 | ||
|
aa6181a018 | ||
|
0a2a85f2ce | ||
|
48f5afe2be | ||
|
979192af4e | ||
|
c1032ff329 | ||
|
6b03d3aee8 | ||
|
b9665d0d68 | ||
|
4c7a0e064f | ||
|
d8f10384a1 | ||
|
f74d856e69 | ||
|
fabb37ea70 | ||
|
3a0f148728 | ||
|
ec34b40295 | ||
|
03cd965099 |
11
.github/dependabot.yml
vendored
Normal file
11
.github/dependabot.yml
vendored
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
# To get started with Dependabot version updates, you'll need to specify which
|
||||||
|
# package ecosystems to update and where the package manifests are located.
|
||||||
|
# Please see the documentation for all configuration options:
|
||||||
|
# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
|
||||||
|
|
||||||
|
version: 2
|
||||||
|
updates:
|
||||||
|
- package-ecosystem: "gomod"
|
||||||
|
directory: "/"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
36
.github/workflows/Release.yml
vendored
36
.github/workflows/Release.yml
vendored
@@ -48,10 +48,10 @@ jobs:
|
|||||||
- name: Setup Golang
|
- name: Setup Golang
|
||||||
run: |
|
run: |
|
||||||
dnf --assumeyes --disableplugin=subscription-manager install \
|
dnf --assumeyes --disableplugin=subscription-manager install \
|
||||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/go-toolset-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \
|
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \
|
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-bin-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \
|
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-src-1.22.9-1.module_el8.10.0+3938+8c723e16.noarch.rpm
|
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.noarch.rpm
|
||||||
|
|
||||||
- name: RPM build MetricCollector
|
- name: RPM build MetricCollector
|
||||||
id: rpmbuild
|
id: rpmbuild
|
||||||
@@ -126,11 +126,11 @@ jobs:
|
|||||||
- name: Setup Golang
|
- name: Setup Golang
|
||||||
run: |
|
run: |
|
||||||
dnf --assumeyes --disableplugin=subscription-manager install \
|
dnf --assumeyes --disableplugin=subscription-manager install \
|
||||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/go-toolset-1.22.7-2.el9_5.x86_64.rpm \
|
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.el9_6.x86_64.rpm \
|
||||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-1.22.7-2.el9_5.x86_64.rpm \
|
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-1.23.9-1.el9_6.x86_64.rpm \
|
||||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-bin-1.22.7-2.el9_5.x86_64.rpm \
|
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.el9_6.x86_64.rpm \
|
||||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-src-1.22.7-2.el9_5.noarch.rpm \
|
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.el9_6.noarch.rpm \
|
||||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-race-1.22.7-2.el9_5.x86_64.rpm
|
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-race-1.23.9-1.el9_6.x86_64.rpm
|
||||||
|
|
||||||
- name: RPM build MetricCollector
|
- name: RPM build MetricCollector
|
||||||
id: rpmbuild
|
id: rpmbuild
|
||||||
@@ -202,10 +202,10 @@ jobs:
|
|||||||
- name: Setup Golang
|
- name: Setup Golang
|
||||||
run: |
|
run: |
|
||||||
dnf --assumeyes --disableplugin=subscription-manager install \
|
dnf --assumeyes --disableplugin=subscription-manager install \
|
||||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/go-toolset-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \
|
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \
|
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-bin-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \
|
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-src-1.22.9-1.module_el8.10.0+3938+8c723e16.noarch.rpm
|
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.noarch.rpm
|
||||||
|
|
||||||
- name: RPM build MetricCollector
|
- name: RPM build MetricCollector
|
||||||
id: rpmbuild
|
id: rpmbuild
|
||||||
@@ -262,11 +262,11 @@ jobs:
|
|||||||
- name: Setup Golang
|
- name: Setup Golang
|
||||||
run: |
|
run: |
|
||||||
dnf --assumeyes --disableplugin=subscription-manager install \
|
dnf --assumeyes --disableplugin=subscription-manager install \
|
||||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/go-toolset-1.22.7-2.el9_5.x86_64.rpm \
|
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.el9_6.x86_64.rpm \
|
||||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-1.22.7-2.el9_5.x86_64.rpm \
|
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-1.23.9-1.el9_6.x86_64.rpm \
|
||||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-bin-1.22.7-2.el9_5.x86_64.rpm \
|
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.el9_6.x86_64.rpm \
|
||||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-src-1.22.7-2.el9_5.noarch.rpm \
|
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.el9_6.noarch.rpm \
|
||||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-race-1.22.7-2.el9_5.x86_64.rpm
|
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-race-1.23.9-1.el9_6.x86_64.rpm
|
||||||
|
|
||||||
- name: RPM build MetricCollector
|
- name: RPM build MetricCollector
|
||||||
id: rpmbuild
|
id: rpmbuild
|
||||||
|
36
.github/workflows/runonce.yml
vendored
36
.github/workflows/runonce.yml
vendored
@@ -71,10 +71,10 @@ jobs:
|
|||||||
- name: Setup Golang
|
- name: Setup Golang
|
||||||
run: |
|
run: |
|
||||||
dnf --assumeyes --disableplugin=subscription-manager install \
|
dnf --assumeyes --disableplugin=subscription-manager install \
|
||||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/go-toolset-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \
|
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \
|
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-bin-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \
|
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-src-1.22.9-1.module_el8.10.0+3938+8c723e16.noarch.rpm
|
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.noarch.rpm
|
||||||
|
|
||||||
- name: RPM build MetricCollector
|
- name: RPM build MetricCollector
|
||||||
id: rpmbuild
|
id: rpmbuild
|
||||||
@@ -116,11 +116,11 @@ jobs:
|
|||||||
- name: Setup Golang
|
- name: Setup Golang
|
||||||
run: |
|
run: |
|
||||||
dnf --assumeyes --disableplugin=subscription-manager install \
|
dnf --assumeyes --disableplugin=subscription-manager install \
|
||||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/go-toolset-1.22.7-2.el9_5.x86_64.rpm \
|
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.el9_6.x86_64.rpm \
|
||||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-1.22.7-2.el9_5.x86_64.rpm \
|
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-1.23.9-1.el9_6.x86_64.rpm \
|
||||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-bin-1.22.7-2.el9_5.x86_64.rpm \
|
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.el9_6.x86_64.rpm \
|
||||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-src-1.22.7-2.el9_5.noarch.rpm \
|
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.el9_6.noarch.rpm \
|
||||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-race-1.22.7-2.el9_5.x86_64.rpm
|
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-race-1.23.9-1.el9_6.x86_64.rpm
|
||||||
|
|
||||||
- name: RPM build MetricCollector
|
- name: RPM build MetricCollector
|
||||||
id: rpmbuild
|
id: rpmbuild
|
||||||
@@ -160,10 +160,10 @@ jobs:
|
|||||||
- name: Setup Golang
|
- name: Setup Golang
|
||||||
run: |
|
run: |
|
||||||
dnf --assumeyes --disableplugin=subscription-manager install \
|
dnf --assumeyes --disableplugin=subscription-manager install \
|
||||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/go-toolset-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \
|
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \
|
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-bin-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \
|
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-src-1.22.9-1.module_el8.10.0+3938+8c723e16.noarch.rpm
|
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.noarch.rpm
|
||||||
|
|
||||||
- name: RPM build MetricCollector
|
- name: RPM build MetricCollector
|
||||||
id: rpmbuild
|
id: rpmbuild
|
||||||
@@ -202,11 +202,11 @@ jobs:
|
|||||||
- name: Setup Golang
|
- name: Setup Golang
|
||||||
run: |
|
run: |
|
||||||
dnf --assumeyes --disableplugin=subscription-manager install \
|
dnf --assumeyes --disableplugin=subscription-manager install \
|
||||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/go-toolset-1.22.7-2.el9_5.x86_64.rpm \
|
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.el9_6.x86_64.rpm \
|
||||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-1.22.7-2.el9_5.x86_64.rpm \
|
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-1.23.9-1.el9_6.x86_64.rpm \
|
||||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-bin-1.22.7-2.el9_5.x86_64.rpm \
|
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.el9_6.x86_64.rpm \
|
||||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-src-1.22.7-2.el9_5.noarch.rpm \
|
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.el9_6.noarch.rpm \
|
||||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-race-1.22.7-2.el9_5.x86_64.rpm
|
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-race-1.23.9-1.el9_6.x86_64.rpm
|
||||||
|
|
||||||
- name: RPM build MetricCollector
|
- name: RPM build MetricCollector
|
||||||
id: rpmbuild
|
id: rpmbuild
|
||||||
|
27
README.md
27
README.md
@@ -1,6 +1,17 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: cc-metric-collector
|
||||||
|
description: Metric collecting node agent
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 2
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/_index.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
# cc-metric-collector
|
# cc-metric-collector
|
||||||
|
|
||||||
A node agent for measuring, processing and forwarding node level metrics. It is part of the [ClusterCockpit ecosystem](./docs/introduction.md).
|
A node agent for measuring, processing and forwarding node level metrics. It is part of the [ClusterCockpit ecosystem](https://clustercockpit.org/docs/overview/).
|
||||||
|
|
||||||
The metric collector sends (and receives) metric in the [InfluxDB line protocol](https://docs.influxdata.com/influxdb/cloud/reference/syntax/line-protocol/) as it provides flexibility while providing a separation between tags (like index columns in relational databases) and fields (like data columns).
|
The metric collector sends (and receives) metric in the [InfluxDB line protocol](https://docs.influxdata.com/influxdb/cloud/reference/syntax/line-protocol/) as it provides flexibility while providing a separation between tags (like index columns in relational databases) and fields (like data columns).
|
||||||
|
|
||||||
@@ -21,12 +32,14 @@ There is a main configuration file with basic settings that point to the other c
|
|||||||
|
|
||||||
``` json
|
``` json
|
||||||
{
|
{
|
||||||
"sinks": "sinks.json",
|
"sinks-file": "sinks.json",
|
||||||
"collectors" : "collectors.json",
|
"collectors-file" : "collectors.json",
|
||||||
"receivers" : "receivers.json",
|
"receivers-file" : "receivers.json",
|
||||||
"router" : "router.json",
|
"router-file" : "router.json",
|
||||||
|
"main": {
|
||||||
"interval": "10s",
|
"interval": "10s",
|
||||||
"duration": "1s"
|
"duration": "1s"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -35,8 +48,8 @@ The `interval` defines how often the metrics should be read and send to the sink
|
|||||||
See the component READMEs for their configuration:
|
See the component READMEs for their configuration:
|
||||||
|
|
||||||
* [`collectors`](./collectors/README.md)
|
* [`collectors`](./collectors/README.md)
|
||||||
* [`sinks`](./sinks/README.md)
|
* [`sinks`](https://github.com/ClusterCockpit/cc-lib/blob/main/sinks/README.md)
|
||||||
* [`receivers`](./receivers/README.md)
|
* [`receivers`](https://github.com/ClusterCockpit/cc-lib/blob/main/receivers/README.md)
|
||||||
* [`router`](./internal/metricRouter/README.md)
|
* [`router`](./internal/metricRouter/README.md)
|
||||||
|
|
||||||
# Installation
|
# Installation
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,3 +1,14 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: Metric Collectors
|
||||||
|
description: Metric collectors for cc-metric-collector
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 2
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/collectors/_index.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
# CCMetric collectors
|
# CCMetric collectors
|
||||||
|
|
||||||
This folder contains the collectors for the cc-metric-collector.
|
This folder contains the collectors for the cc-metric-collector.
|
||||||
@@ -23,7 +34,6 @@ In contrast to the configuration files for sinks and receivers, the collectors c
|
|||||||
* [`loadavg`](./loadavgMetric.md)
|
* [`loadavg`](./loadavgMetric.md)
|
||||||
* [`netstat`](./netstatMetric.md)
|
* [`netstat`](./netstatMetric.md)
|
||||||
* [`ibstat`](./infinibandMetric.md)
|
* [`ibstat`](./infinibandMetric.md)
|
||||||
* [`ibstat_perfquery`](./infinibandPerfQueryMetric.md)
|
|
||||||
* [`tempstat`](./tempMetric.md)
|
* [`tempstat`](./tempMetric.md)
|
||||||
* [`lustrestat`](./lustreMetric.md)
|
* [`lustrestat`](./lustreMetric.md)
|
||||||
* [`likwid`](./likwidMetric.md)
|
* [`likwid`](./likwidMetric.md)
|
||||||
@@ -42,6 +52,7 @@ In contrast to the configuration files for sinks and receivers, the collectors c
|
|||||||
* [`beegfs_meta`](./beegfsmetaMetric.md)
|
* [`beegfs_meta`](./beegfsmetaMetric.md)
|
||||||
* [`beegfs_storage`](./beegfsstorageMetric.md)
|
* [`beegfs_storage`](./beegfsstorageMetric.md)
|
||||||
* [`rocm_smi`](./rocmsmiMetric.md)
|
* [`rocm_smi`](./rocmsmiMetric.md)
|
||||||
|
* [`slurm_cgroup`](./slurmCgroupMetric.md)
|
||||||
|
|
||||||
## Todos
|
## Todos
|
||||||
|
|
||||||
@@ -53,7 +64,7 @@ A collector reads data from any source, parses it to metrics and submits these m
|
|||||||
* `Name() string`: Return the name of the collector
|
* `Name() string`: Return the name of the collector
|
||||||
* `Init(config json.RawMessage) error`: Initializes the collector using the given collector-specific config in JSON. Check if needed files/commands exists, ...
|
* `Init(config json.RawMessage) error`: Initializes the collector using the given collector-specific config in JSON. Check if needed files/commands exists, ...
|
||||||
* `Initialized() bool`: Check if a collector is successfully initialized
|
* `Initialized() bool`: Check if a collector is successfully initialized
|
||||||
* `Read(duration time.Duration, output chan ccMetric.CCMetric)`: Read, parse and submit data to the `output` channel as [`CCMetric`](../internal/ccMetric/README.md). If the collector has to measure anything for some duration, use the provided function argument `duration`.
|
* `Read(duration time.Duration, output chan ccMessage.CCMessage)`: Read, parse and submit data to the `output` channel as [`CCMessage`](https://github.com/ClusterCockpit/cc-lib/blob/main/ccMessage/README.md). If the collector has to measure anything for some duration, use the provided function argument `duration`.
|
||||||
* `Close()`: Closes down the collector.
|
* `Close()`: Closes down the collector.
|
||||||
|
|
||||||
It is recommanded to call `setup()` in the `Init()` function.
|
It is recommanded to call `setup()` in the `Init()` function.
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,5 +1,17 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: BeeGFS metadata metric collector
|
||||||
|
description: Collect metadata clientstats for `BeeGFS on Demand`
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 2
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/collectors/beegfsmeta.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
|
|
||||||
## `BeeGFS on Demand` collector
|
## `BeeGFS on Demand` collector
|
||||||
This Collector is to collect BeeGFS on Demand (BeeOND) metadata clientstats.
|
This Collector is to collect `BeeGFS on Demand` (BeeOND) metadata clientstats.
|
||||||
|
|
||||||
```json
|
```json
|
||||||
"beegfs_meta": {
|
"beegfs_meta": {
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,3 +1,14 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: "BeeGFS on Demand metric collector"
|
||||||
|
description: Collect performance metrics for BeeGFS filesystems
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 2
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/collectors/beegfsstorage.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
## `BeeGFS on Demand` collector
|
## `BeeGFS on Demand` collector
|
||||||
This Collector is to collect BeeGFS on Demand (BeeOND) storage stats.
|
This Collector is to collect BeeGFS on Demand (BeeOND) storage stats.
|
||||||
|
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
@@ -40,6 +47,7 @@ var AvailableCollectors = map[string]MetricCollector{
|
|||||||
"self": new(SelfCollector),
|
"self": new(SelfCollector),
|
||||||
"schedstat": new(SchedstatCollector),
|
"schedstat": new(SchedstatCollector),
|
||||||
"nfsiostat": new(NfsIOStatCollector),
|
"nfsiostat": new(NfsIOStatCollector),
|
||||||
|
"slurm_cgroup": new(SlurmCgroupCollector),
|
||||||
}
|
}
|
||||||
|
|
||||||
// Metric collector manager data structure
|
// Metric collector manager data structure
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,3 +1,14 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: CPU frequency metric collector through cpuinfo
|
||||||
|
description: Collect the CPU frequency from `/proc/cpuinfo`
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 2
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/collectors/cpufreq_cpuinfo.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
## `cpufreq_cpuinfo` collector
|
## `cpufreq_cpuinfo` collector
|
||||||
|
|
||||||
```json
|
```json
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,3 +1,14 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: CPU frequency metric collector through sysfs
|
||||||
|
description: Collect the CPU frequency metrics from `/sys/.../cpu/.../cpufreq`
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 2
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/collectors/cpufreq.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
## `cpufreq_cpuinfo` collector
|
## `cpufreq_cpuinfo` collector
|
||||||
|
|
||||||
```json
|
```json
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,3 +1,14 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: CPU usage metric collector
|
||||||
|
description: Collect CPU metrics from `/proc/stat`
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 2
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/collectors/cpustat.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
|
|
||||||
## `cpustat` collector
|
## `cpustat` collector
|
||||||
|
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,3 +1,13 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: CustomCommand metric collector
|
||||||
|
description: Collect messages from custom command or files
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 2
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/collectors/customcmd.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
## `customcmd` collector
|
## `customcmd` collector
|
||||||
|
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,3 +1,13 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: Disk usage statistics metric collector
|
||||||
|
description: Collect metrics for various filesystems from `/proc/self/mounts`
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 2
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/collectors/diskstat.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
## `diskstat` collector
|
## `diskstat` collector
|
||||||
|
|
||||||
|
@@ -1,9 +1,17 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
"bytes"
|
"bytes"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"log"
|
"log"
|
||||||
@@ -11,6 +19,7 @@ import (
|
|||||||
"os/user"
|
"os/user"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
@@ -20,8 +29,17 @@ import (
|
|||||||
const DEFAULT_GPFS_CMD = "mmpmon"
|
const DEFAULT_GPFS_CMD = "mmpmon"
|
||||||
|
|
||||||
type GpfsCollectorLastState struct {
|
type GpfsCollectorLastState struct {
|
||||||
|
numOpens int64
|
||||||
|
numCloses int64
|
||||||
|
numReads int64
|
||||||
|
numWrites int64
|
||||||
|
numReaddirs int64
|
||||||
|
numInodeUpdates int64
|
||||||
bytesRead int64
|
bytesRead int64
|
||||||
bytesWritten int64
|
bytesWritten int64
|
||||||
|
bytesTotal int64
|
||||||
|
iops int64
|
||||||
|
metaops int64
|
||||||
}
|
}
|
||||||
|
|
||||||
type GpfsCollector struct {
|
type GpfsCollector struct {
|
||||||
@@ -30,9 +48,12 @@ type GpfsCollector struct {
|
|||||||
config struct {
|
config struct {
|
||||||
Mmpmon string `json:"mmpmon_path,omitempty"`
|
Mmpmon string `json:"mmpmon_path,omitempty"`
|
||||||
ExcludeFilesystem []string `json:"exclude_filesystem,omitempty"`
|
ExcludeFilesystem []string `json:"exclude_filesystem,omitempty"`
|
||||||
|
Sudo bool `json:"use_sudo,omitempty"`
|
||||||
SendBandwidths bool `json:"send_bandwidths"`
|
SendBandwidths bool `json:"send_bandwidths"`
|
||||||
SendTotalValues bool `json:"send_total_values"`
|
SendTotalValues bool `json:"send_total_values"`
|
||||||
|
SendDerivedValues bool `json:"send_derived_values"`
|
||||||
}
|
}
|
||||||
|
sudoCmd string
|
||||||
skipFS map[string]struct{}
|
skipFS map[string]struct{}
|
||||||
lastTimestamp time.Time // Store time stamp of last tick to derive bandwidths
|
lastTimestamp time.Time // Store time stamp of last tick to derive bandwidths
|
||||||
lastState map[string]GpfsCollectorLastState
|
lastState map[string]GpfsCollectorLastState
|
||||||
@@ -75,19 +96,44 @@ func (m *GpfsCollector) Init(config json.RawMessage) error {
|
|||||||
m.lastState = make(map[string]GpfsCollectorLastState)
|
m.lastState = make(map[string]GpfsCollectorLastState)
|
||||||
|
|
||||||
// GPFS / IBM Spectrum Scale file system statistics can only be queried by user root
|
// GPFS / IBM Spectrum Scale file system statistics can only be queried by user root
|
||||||
|
if !m.config.Sudo {
|
||||||
user, err := user.Current()
|
user, err := user.Current()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to get current user: %v", err)
|
cclog.ComponentError(m.name, "Failed to get current user:", err.Error())
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
if user.Uid != "0" {
|
if user.Uid != "0" {
|
||||||
return fmt.Errorf("GPFS file system statistics can only be queried by user root")
|
cclog.ComponentError(m.name, "GPFS file system statistics can only be queried by user root")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
p, err := exec.LookPath("sudo")
|
||||||
|
if err != nil {
|
||||||
|
cclog.ComponentError(m.name, "Cannot find 'sudo'")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
m.sudoCmd = p
|
||||||
|
}
|
||||||
|
|
||||||
|
// when using sudo, the full path of mmpmon must be specified because
|
||||||
|
// exec.LookPath will not work as mmpmon is not executable as user
|
||||||
|
if m.config.Sudo && !strings.HasPrefix(m.config.Mmpmon, "/") {
|
||||||
|
return fmt.Errorf("when using sudo, mmpmon_path must be provided and an absolute path: %s", m.config.Mmpmon)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if mmpmon is in executable search path
|
// Check if mmpmon is in executable search path
|
||||||
p, err := exec.LookPath(m.config.Mmpmon)
|
p, err := exec.LookPath(m.config.Mmpmon)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
// if using sudo, exec.lookPath will return EACCES (file mode r-x------), this can be ignored
|
||||||
|
if m.config.Sudo && errors.Is(err, syscall.EACCES) {
|
||||||
|
cclog.ComponentWarn(m.name, fmt.Sprintf("got error looking for mmpmon binary '%s': %v . This is expected when using sudo, continuing.", m.config.Mmpmon, err))
|
||||||
|
// the file was given in the config, use it
|
||||||
|
p = m.config.Mmpmon
|
||||||
|
} else {
|
||||||
|
cclog.ComponentError(m.name, fmt.Sprintf("failed to find mmpmon binary '%s': %v", m.config.Mmpmon, err))
|
||||||
return fmt.Errorf("failed to find mmpmon binary '%s': %v", m.config.Mmpmon, err)
|
return fmt.Errorf("failed to find mmpmon binary '%s': %v", m.config.Mmpmon, err)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
m.config.Mmpmon = p
|
m.config.Mmpmon = p
|
||||||
|
|
||||||
m.init = true
|
m.init = true
|
||||||
@@ -111,7 +157,13 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
|||||||
// -p: generate output that can be parsed
|
// -p: generate output that can be parsed
|
||||||
// -s: suppress the prompt on input
|
// -s: suppress the prompt on input
|
||||||
// fs_io_s: Displays I/O statistics per mounted file system
|
// fs_io_s: Displays I/O statistics per mounted file system
|
||||||
cmd := exec.Command(m.config.Mmpmon, "-p", "-s")
|
var cmd *exec.Cmd
|
||||||
|
if m.config.Sudo {
|
||||||
|
cmd = exec.Command(m.sudoCmd, m.config.Mmpmon, "-p", "-s")
|
||||||
|
} else {
|
||||||
|
cmd = exec.Command(m.config.Mmpmon, "-p", "-s")
|
||||||
|
}
|
||||||
|
|
||||||
cmd.Stdin = strings.NewReader("once fs_io_s\n")
|
cmd.Stdin = strings.NewReader("once fs_io_s\n")
|
||||||
cmdStdout := new(bytes.Buffer)
|
cmdStdout := new(bytes.Buffer)
|
||||||
cmdStderr := new(bytes.Buffer)
|
cmdStderr := new(bytes.Buffer)
|
||||||
@@ -178,6 +230,22 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if m.config.SendDerivedValues {
|
||||||
|
if _, ok := m.lastState[filesystem]; !ok {
|
||||||
|
m.lastState[filesystem] = GpfsCollectorLastState{
|
||||||
|
numReads: -1,
|
||||||
|
numWrites: -1,
|
||||||
|
numOpens: -1,
|
||||||
|
numCloses: -1,
|
||||||
|
numReaddirs: -1,
|
||||||
|
numInodeUpdates: -1,
|
||||||
|
bytesTotal: -1,
|
||||||
|
iops: -1,
|
||||||
|
metaops: -1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// return code
|
// return code
|
||||||
rc, err := strconv.Atoi(key_value["_rc_"])
|
rc, err := strconv.Atoi(key_value["_rc_"])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -271,7 +339,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
|||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
if m.config.SendBandwidths {
|
if m.config.SendBandwidths {
|
||||||
if lastBytesWritten := m.lastState[filesystem].bytesRead; lastBytesWritten >= 0 {
|
if lastBytesWritten := m.lastState[filesystem].bytesWritten; lastBytesWritten >= 0 {
|
||||||
bwWrite := float64(bytesWritten-lastBytesWritten) / timeDiff
|
bwWrite := float64(bytesWritten-lastBytesWritten) / timeDiff
|
||||||
if y, err :=
|
if y, err :=
|
||||||
lp.NewMessage(
|
lp.NewMessage(
|
||||||
@@ -289,13 +357,6 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if m.config.SendBandwidths {
|
|
||||||
m.lastState[filesystem] = GpfsCollectorLastState{
|
|
||||||
bytesRead: bytesRead,
|
|
||||||
bytesWritten: bytesWritten,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// number of opens
|
// number of opens
|
||||||
numOpens, err := strconv.ParseInt(key_value["_oc_"], 10, 64)
|
numOpens, err := strconv.ParseInt(key_value["_oc_"], 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -307,6 +368,24 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
|||||||
if y, err := lp.NewMessage("gpfs_num_opens", m.tags, m.meta, map[string]interface{}{"value": numOpens}, timestamp); err == nil {
|
if y, err := lp.NewMessage("gpfs_num_opens", m.tags, m.meta, map[string]interface{}{"value": numOpens}, timestamp); err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
|
if m.config.SendDerivedValues {
|
||||||
|
if lastNumOpens := m.lastState[filesystem].numOpens; lastNumOpens >= 0 {
|
||||||
|
opensRate := float64(numOpens-lastNumOpens) / timeDiff
|
||||||
|
if y, err :=
|
||||||
|
lp.NewMessage(
|
||||||
|
"gpfs_opens_rate",
|
||||||
|
m.tags,
|
||||||
|
m.meta,
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": opensRate,
|
||||||
|
},
|
||||||
|
timestamp,
|
||||||
|
); err == nil {
|
||||||
|
y.AddMeta("unit", "requests/sec")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// number of closes
|
// number of closes
|
||||||
numCloses, err := strconv.ParseInt(key_value["_cc_"], 10, 64)
|
numCloses, err := strconv.ParseInt(key_value["_cc_"], 10, 64)
|
||||||
@@ -319,6 +398,24 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
|||||||
if y, err := lp.NewMessage("gpfs_num_closes", m.tags, m.meta, map[string]interface{}{"value": numCloses}, timestamp); err == nil {
|
if y, err := lp.NewMessage("gpfs_num_closes", m.tags, m.meta, map[string]interface{}{"value": numCloses}, timestamp); err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
|
if m.config.SendDerivedValues {
|
||||||
|
if lastNumCloses := m.lastState[filesystem].numCloses; lastNumCloses >= 0 {
|
||||||
|
closesRate := float64(numCloses-lastNumCloses) / timeDiff
|
||||||
|
if y, err :=
|
||||||
|
lp.NewMessage(
|
||||||
|
"gpfs_closes_rate",
|
||||||
|
m.tags,
|
||||||
|
m.meta,
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": closesRate,
|
||||||
|
},
|
||||||
|
timestamp,
|
||||||
|
); err == nil {
|
||||||
|
y.AddMeta("unit", "requests/sec")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// number of reads
|
// number of reads
|
||||||
numReads, err := strconv.ParseInt(key_value["_rdc_"], 10, 64)
|
numReads, err := strconv.ParseInt(key_value["_rdc_"], 10, 64)
|
||||||
@@ -331,6 +428,24 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
|||||||
if y, err := lp.NewMessage("gpfs_num_reads", m.tags, m.meta, map[string]interface{}{"value": numReads}, timestamp); err == nil {
|
if y, err := lp.NewMessage("gpfs_num_reads", m.tags, m.meta, map[string]interface{}{"value": numReads}, timestamp); err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
|
if m.config.SendDerivedValues {
|
||||||
|
if lastNumReads := m.lastState[filesystem].numReads; lastNumReads >= 0 {
|
||||||
|
readsRate := float64(numReads-lastNumReads) / timeDiff
|
||||||
|
if y, err :=
|
||||||
|
lp.NewMessage(
|
||||||
|
"gpfs_reads_rate",
|
||||||
|
m.tags,
|
||||||
|
m.meta,
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": readsRate,
|
||||||
|
},
|
||||||
|
timestamp,
|
||||||
|
); err == nil {
|
||||||
|
y.AddMeta("unit", "requests/sec")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// number of writes
|
// number of writes
|
||||||
numWrites, err := strconv.ParseInt(key_value["_wc_"], 10, 64)
|
numWrites, err := strconv.ParseInt(key_value["_wc_"], 10, 64)
|
||||||
@@ -343,6 +458,24 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
|||||||
if y, err := lp.NewMessage("gpfs_num_writes", m.tags, m.meta, map[string]interface{}{"value": numWrites}, timestamp); err == nil {
|
if y, err := lp.NewMessage("gpfs_num_writes", m.tags, m.meta, map[string]interface{}{"value": numWrites}, timestamp); err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
|
if m.config.SendDerivedValues {
|
||||||
|
if lastNumWrites := m.lastState[filesystem].numWrites; lastNumWrites >= 0 {
|
||||||
|
writesRate := float64(numWrites-lastNumWrites) / timeDiff
|
||||||
|
if y, err :=
|
||||||
|
lp.NewMessage(
|
||||||
|
"gpfs_writes_rate",
|
||||||
|
m.tags,
|
||||||
|
m.meta,
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": writesRate,
|
||||||
|
},
|
||||||
|
timestamp,
|
||||||
|
); err == nil {
|
||||||
|
y.AddMeta("unit", "requests/sec")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// number of read directories
|
// number of read directories
|
||||||
numReaddirs, err := strconv.ParseInt(key_value["_dir_"], 10, 64)
|
numReaddirs, err := strconv.ParseInt(key_value["_dir_"], 10, 64)
|
||||||
@@ -355,6 +488,24 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
|||||||
if y, err := lp.NewMessage("gpfs_num_readdirs", m.tags, m.meta, map[string]interface{}{"value": numReaddirs}, timestamp); err == nil {
|
if y, err := lp.NewMessage("gpfs_num_readdirs", m.tags, m.meta, map[string]interface{}{"value": numReaddirs}, timestamp); err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
|
if m.config.SendDerivedValues {
|
||||||
|
if lastNumReaddirs := m.lastState[filesystem].numReaddirs; lastNumReaddirs >= 0 {
|
||||||
|
readdirsRate := float64(numReaddirs-lastNumReaddirs) / timeDiff
|
||||||
|
if y, err :=
|
||||||
|
lp.NewMessage(
|
||||||
|
"gpfs_readdirs_rate",
|
||||||
|
m.tags,
|
||||||
|
m.meta,
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": readdirsRate,
|
||||||
|
},
|
||||||
|
timestamp,
|
||||||
|
); err == nil {
|
||||||
|
y.AddMeta("unit", "requests/sec")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Number of inode updates
|
// Number of inode updates
|
||||||
numInodeUpdates, err := strconv.ParseInt(key_value["_iu_"], 10, 64)
|
numInodeUpdates, err := strconv.ParseInt(key_value["_iu_"], 10, 64)
|
||||||
@@ -367,10 +518,31 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
|||||||
if y, err := lp.NewMessage("gpfs_num_inode_updates", m.tags, m.meta, map[string]interface{}{"value": numInodeUpdates}, timestamp); err == nil {
|
if y, err := lp.NewMessage("gpfs_num_inode_updates", m.tags, m.meta, map[string]interface{}{"value": numInodeUpdates}, timestamp); err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
|
if m.config.SendDerivedValues {
|
||||||
|
if lastNumInodeUpdates := m.lastState[filesystem].numInodeUpdates; lastNumInodeUpdates >= 0 {
|
||||||
|
inodeUpdatesRate := float64(numInodeUpdates-lastNumInodeUpdates) / timeDiff
|
||||||
|
if y, err :=
|
||||||
|
lp.NewMessage(
|
||||||
|
"gpfs_inode_updates_rate",
|
||||||
|
m.tags,
|
||||||
|
m.meta,
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": inodeUpdatesRate,
|
||||||
|
},
|
||||||
|
timestamp,
|
||||||
|
); err == nil {
|
||||||
|
y.AddMeta("unit", "requests/sec")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Total values
|
// Total values
|
||||||
|
bytesTotal := int64(-1);
|
||||||
|
iops := int64(-1);
|
||||||
|
metaops := int64(-1);
|
||||||
if m.config.SendTotalValues {
|
if m.config.SendTotalValues {
|
||||||
bytesTotal := bytesRead + bytesWritten
|
bytesTotal = bytesRead + bytesWritten
|
||||||
if y, err :=
|
if y, err :=
|
||||||
lp.NewMessage("gpfs_bytes_total",
|
lp.NewMessage("gpfs_bytes_total",
|
||||||
m.tags,
|
m.tags,
|
||||||
@@ -383,7 +555,26 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
|||||||
y.AddMeta("unit", "bytes")
|
y.AddMeta("unit", "bytes")
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
iops := numReads + numWrites
|
if m.config.SendBandwidths {
|
||||||
|
if lastBytesTotal := m.lastState[filesystem].bytesTotal; lastBytesTotal >= 0 {
|
||||||
|
bwTotal := float64(bytesTotal-lastBytesTotal) / timeDiff
|
||||||
|
if y, err :=
|
||||||
|
lp.NewMessage(
|
||||||
|
"gpfs_bw_total",
|
||||||
|
m.tags,
|
||||||
|
m.meta,
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": bwTotal,
|
||||||
|
},
|
||||||
|
timestamp,
|
||||||
|
); err == nil {
|
||||||
|
y.AddMeta("unit", "bytes/sec")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
iops = numReads + numWrites
|
||||||
if y, err :=
|
if y, err :=
|
||||||
lp.NewMessage("gpfs_iops",
|
lp.NewMessage("gpfs_iops",
|
||||||
m.tags,
|
m.tags,
|
||||||
@@ -395,7 +586,26 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
|||||||
); err == nil {
|
); err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
metaops := numInodeUpdates + numCloses + numOpens + numReaddirs
|
if m.config.SendDerivedValues {
|
||||||
|
if lastIops := m.lastState[filesystem].iops; lastIops >= 0 {
|
||||||
|
iopsRate := float64(iops-lastIops) / timeDiff
|
||||||
|
if y, err :=
|
||||||
|
lp.NewMessage(
|
||||||
|
"gpfs_iops_rate",
|
||||||
|
m.tags,
|
||||||
|
m.meta,
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": iopsRate,
|
||||||
|
},
|
||||||
|
timestamp,
|
||||||
|
); err == nil {
|
||||||
|
y.AddMeta("unit", "requests/sec")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
metaops = numInodeUpdates + numCloses + numOpens + numReaddirs
|
||||||
if y, err :=
|
if y, err :=
|
||||||
lp.NewMessage("gpfs_metaops",
|
lp.NewMessage("gpfs_metaops",
|
||||||
m.tags,
|
m.tags,
|
||||||
@@ -407,8 +617,42 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
|||||||
); err == nil {
|
); err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
|
if m.config.SendDerivedValues {
|
||||||
|
if lastMetaops := m.lastState[filesystem].metaops; lastMetaops >= 0 {
|
||||||
|
metaopsRate := float64(metaops-lastMetaops) / timeDiff
|
||||||
|
if y, err :=
|
||||||
|
lp.NewMessage(
|
||||||
|
"gpfs_metaops_rate",
|
||||||
|
m.tags,
|
||||||
|
m.meta,
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": metaopsRate,
|
||||||
|
},
|
||||||
|
timestamp,
|
||||||
|
); err == nil {
|
||||||
|
y.AddMeta("unit", "requests/sec")
|
||||||
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save last state
|
||||||
|
m.lastState[filesystem] = GpfsCollectorLastState{
|
||||||
|
bytesRead: bytesRead,
|
||||||
|
bytesWritten: bytesWritten,
|
||||||
|
numOpens: numOpens,
|
||||||
|
numCloses: numCloses,
|
||||||
|
numReads: numReads,
|
||||||
|
numWrites: numWrites,
|
||||||
|
numReaddirs: numReaddirs,
|
||||||
|
numInodeUpdates: numInodeUpdates,
|
||||||
|
bytesTotal: bytesTotal,
|
||||||
|
iops: iops,
|
||||||
|
metaops: metaops,
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *GpfsCollector) Close() {
|
func (m *GpfsCollector) Close() {
|
||||||
|
@@ -1,13 +1,26 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: GPFS collector
|
||||||
|
description: Collect infos about GPFS filesystems
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 2
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/collectors/gpfs.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
## `gpfs` collector
|
## `gpfs` collector
|
||||||
|
|
||||||
```json
|
```json
|
||||||
"ibstat": {
|
"gpfs": {
|
||||||
"mmpmon_path": "/path/to/mmpmon",
|
"mmpmon_path": "/path/to/mmpmon",
|
||||||
|
"use_sudo": "true",
|
||||||
"exclude_filesystem": [
|
"exclude_filesystem": [
|
||||||
"fs1"
|
"fs1"
|
||||||
],
|
],
|
||||||
"send_bandwidths": true,
|
"send_bandwidths": true,
|
||||||
"send_total_values": true
|
"send_total_values": true,
|
||||||
|
"send_derived_values": true
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -20,6 +33,11 @@ in the configuration.
|
|||||||
The path to the `mmpmon` command can be configured with the `mmpmon_path` option
|
The path to the `mmpmon` command can be configured with the `mmpmon_path` option
|
||||||
in the configuration. If nothing is set, the collector searches in `$PATH` for `mmpmon`.
|
in the configuration. If nothing is set, the collector searches in `$PATH` for `mmpmon`.
|
||||||
|
|
||||||
|
If cc-metric-collector is run as non-root, `sudo` can be enabled with `use_sudo`.
|
||||||
|
Because `mmpmon` is by default only executable as root, the Go procedure to
|
||||||
|
search for it in `$PATH` will fail. If you use `sudo`, you must specify the
|
||||||
|
complete path for `mmpmon` using the parameter `mmpmon_path`.
|
||||||
|
|
||||||
|
|
||||||
Metrics:
|
Metrics:
|
||||||
* `gpfs_bytes_read`
|
* `gpfs_bytes_read`
|
||||||
@@ -30,10 +48,19 @@ Metrics:
|
|||||||
* `gpfs_num_writes`
|
* `gpfs_num_writes`
|
||||||
* `gpfs_num_readdirs`
|
* `gpfs_num_readdirs`
|
||||||
* `gpfs_num_inode_updates`
|
* `gpfs_num_inode_updates`
|
||||||
|
* `gpfs_opens_rate` (if `send_derived_values == true`)
|
||||||
|
* `gpfs_closes_rate` (if `send_derived_values == true`)
|
||||||
|
* `gpfs_reads_rate` (if `send_derived_values == true`)
|
||||||
|
* `gpfs_writes_rate` (if `send_derived_values == true`)
|
||||||
|
* `gpfs_readdirs_rate` (if `send_derived_values == true`)
|
||||||
|
* `gpfs_inode_updates_rate` (if `send_derived_values == true`)
|
||||||
* `gpfs_bytes_total = gpfs_bytes_read + gpfs_bytes_written` (if `send_total_values == true`)
|
* `gpfs_bytes_total = gpfs_bytes_read + gpfs_bytes_written` (if `send_total_values == true`)
|
||||||
* `gpfs_iops = gpfs_num_reads + gpfs_num_writes` (if `send_total_values == true`)
|
* `gpfs_iops = gpfs_num_reads + gpfs_num_writes` (if `send_total_values == true`)
|
||||||
|
* `gpfs_iops_rate` (if `send_total_values == true` and `send_derived_values == true`)
|
||||||
* `gpfs_metaops = gpfs_num_inode_updates + gpfs_num_closes + gpfs_num_opens + gpfs_num_readdirs` (if `send_total_values == true`)
|
* `gpfs_metaops = gpfs_num_inode_updates + gpfs_num_closes + gpfs_num_opens + gpfs_num_readdirs` (if `send_total_values == true`)
|
||||||
|
* `gpfs_metaops_rate` (if `send_total_values == true` and `send_derived_values == true`)
|
||||||
* `gpfs_bw_read` (if `send_bandwidths == true`)
|
* `gpfs_bw_read` (if `send_bandwidths == true`)
|
||||||
* `gpfs_bw_write` (if `send_bandwidths == true`)
|
* `gpfs_bw_write` (if `send_bandwidths == true`)
|
||||||
|
* `gpfs_bw_total` (if `send_bandwidths == true` and `send_total_values == true`)
|
||||||
|
|
||||||
The collector adds a `filesystem` tag to all metrics
|
The collector adds a `filesystem` tag to all metrics
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,3 +1,13 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: InfiniBand Metric collector
|
||||||
|
description: Collect metrics for InfiniBand devices
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 2
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/collectors/infiniband.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
## `ibstat` collector
|
## `ibstat` collector
|
||||||
|
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,3 +1,13 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: IOStat Metric collector
|
||||||
|
description: Collect metrics from `/proc/diskstats`
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 2
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/collectors/iostat.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
## `iostat` collector
|
## `iostat` collector
|
||||||
|
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,3 +1,13 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: IPMI Metric collector
|
||||||
|
description: Collect metrics using ipmitool or ipmi-sensors
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 2
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/collectors/ipmi.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
## `ipmistat` collector
|
## `ipmistat` collector
|
||||||
|
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@@ -1,3 +1,13 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: LIKWID collector
|
||||||
|
description: Collect hardware performance events and metrics using LIKWID
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 2
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/collectors/likwid.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
## `likwid` collector
|
## `likwid` collector
|
||||||
|
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,3 +1,14 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: Load average metric collector
|
||||||
|
description: Collect metrics from `/proc/loadavg`
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 2
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/collectors/loadavg.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
|
|
||||||
## `loadavg` collector
|
## `loadavg` collector
|
||||||
|
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,3 +1,14 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: Lustre filesystem metric collector
|
||||||
|
description: Collect metrics for Lustre filesystems
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 2
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/collectors/lustre.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
|
|
||||||
## `lustrestat` collector
|
## `lustrestat` collector
|
||||||
|
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,3 +1,14 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: Memory statistics metric collector
|
||||||
|
description: Collect metrics from `/proc/meminfo`
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 2
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/collectors/memstat.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
|
|
||||||
## `memstat` collector
|
## `memstat` collector
|
||||||
|
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,3 +1,13 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: Network device metric collector
|
||||||
|
description: Collect metrics for network devices through procfs
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 2
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/collectors/netstat.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
## `netstat` collector
|
## `netstat` collector
|
||||||
|
|
||||||
|
@@ -1,3 +1,14 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: NFS network filesystem (v3) metric collector
|
||||||
|
description: Collect metrics for NFS network filesystems in version 3
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 2
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/collectors/nfs3.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
|
|
||||||
## `nfs3stat` collector
|
## `nfs3stat` collector
|
||||||
|
|
||||||
|
@@ -1,3 +1,14 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: NFS network filesystem (v4) metric collector
|
||||||
|
description: Collect metrics for NFS network filesystems in version 4
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 2
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/collectors/nfs4.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
|
|
||||||
## `nfs4stat` collector
|
## `nfs4stat` collector
|
||||||
|
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,3 +1,14 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: NFS network filesystem metrics from procfs
|
||||||
|
description: Collect NFS network filesystem metrics for mounts from `/proc/self/mountstats`
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 2
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/collectors/nfsio.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
## `nfsiostat` collector
|
## `nfsiostat` collector
|
||||||
|
|
||||||
```json
|
```json
|
||||||
|
@@ -78,6 +78,14 @@ func (m *NUMAStatsCollector) Init(config json.RawMessage) error {
|
|||||||
"group": "NUMA",
|
"group": "NUMA",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
m.config.SendAbsoluteValues = true
|
||||||
|
if len(config) > 0 {
|
||||||
|
err := json.Unmarshal(config, &m.config)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("unable to unmarshal numastat configuration: %s", err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Loop for all NUMA node directories
|
// Loop for all NUMA node directories
|
||||||
base := "/sys/devices/system/node/node"
|
base := "/sys/devices/system/node/node"
|
||||||
globPattern := base + "[0-9]*"
|
globPattern := base + "[0-9]*"
|
||||||
@@ -95,7 +103,10 @@ func (m *NUMAStatsCollector) Init(config json.RawMessage) error {
|
|||||||
m.topology = append(m.topology,
|
m.topology = append(m.topology,
|
||||||
NUMAStatsCollectorTopolgy{
|
NUMAStatsCollectorTopolgy{
|
||||||
file: file,
|
file: file,
|
||||||
tagSet: map[string]string{"memoryDomain": node},
|
tagSet: map[string]string{
|
||||||
|
"type": "memoryDomain",
|
||||||
|
"type-id": node,
|
||||||
|
},
|
||||||
previousValues: make(map[string]int64),
|
previousValues: make(map[string]int64),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -145,11 +156,11 @@ func (m *NUMAStatsCollector) Read(interval time.Duration, output chan lp.CCMessa
|
|||||||
}
|
}
|
||||||
|
|
||||||
if m.config.SendAbsoluteValues {
|
if m.config.SendAbsoluteValues {
|
||||||
msg, err := lp.NewMessage(
|
msg, err := lp.NewMetric(
|
||||||
"numastats_"+key,
|
"numastats_"+key,
|
||||||
t.tagSet,
|
t.tagSet,
|
||||||
m.meta,
|
m.meta,
|
||||||
map[string]interface{}{"value": value},
|
value,
|
||||||
now,
|
now,
|
||||||
)
|
)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
@@ -161,11 +172,11 @@ func (m *NUMAStatsCollector) Read(interval time.Duration, output chan lp.CCMessa
|
|||||||
prev, ok := t.previousValues[key]
|
prev, ok := t.previousValues[key]
|
||||||
if ok {
|
if ok {
|
||||||
rate := float64(value-prev) / timeDiff
|
rate := float64(value-prev) / timeDiff
|
||||||
msg, err := lp.NewMessage(
|
msg, err := lp.NewMetric(
|
||||||
"numastats_"+key+"_rate",
|
"numastats_"+key+"_rate",
|
||||||
t.tagSet,
|
t.tagSet,
|
||||||
m.meta,
|
m.meta,
|
||||||
map[string]interface{}{"value": rate},
|
rate,
|
||||||
now,
|
now,
|
||||||
)
|
)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
|
@@ -1,3 +1,13 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: NUMAStat collector
|
||||||
|
description: Collect infos about NUMA domains
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 2
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/collectors/numastat.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
## `numastat` collector
|
## `numastat` collector
|
||||||
|
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
@@ -31,6 +38,8 @@ type NvidiaCollectorDevice struct {
|
|||||||
excludeMetrics map[string]bool
|
excludeMetrics map[string]bool
|
||||||
tags map[string]string
|
tags map[string]string
|
||||||
meta map[string]string
|
meta map[string]string
|
||||||
|
lastEnergyReading uint64
|
||||||
|
lastEnergyTimestamp time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
type NvidiaCollector struct {
|
type NvidiaCollector struct {
|
||||||
@@ -149,6 +158,8 @@ func (m *NvidiaCollector) Init(config json.RawMessage) error {
|
|||||||
|
|
||||||
// Add device handle
|
// Add device handle
|
||||||
g.device = device
|
g.device = device
|
||||||
|
g.lastEnergyReading = 0
|
||||||
|
g.lastEnergyTimestamp = time.Now()
|
||||||
|
|
||||||
// Add tags
|
// Add tags
|
||||||
g.tags = map[string]string{
|
g.tags = map[string]string{
|
||||||
@@ -206,7 +217,7 @@ func (m *NvidiaCollector) Init(config json.RawMessage) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func readMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
func readMemoryInfo(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||||
if !device.excludeMetrics["nv_fb_mem_total"] || !device.excludeMetrics["nv_fb_mem_used"] || !device.excludeMetrics["nv_fb_mem_reserved"] {
|
if !device.excludeMetrics["nv_fb_mem_total"] || !device.excludeMetrics["nv_fb_mem_used"] || !device.excludeMetrics["nv_fb_mem_reserved"] {
|
||||||
var total uint64
|
var total uint64
|
||||||
var used uint64
|
var used uint64
|
||||||
@@ -250,7 +261,7 @@ func readMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMessage) erro
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func readBarMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
func readBarMemoryInfo(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||||
if !device.excludeMetrics["nv_bar1_mem_total"] || !device.excludeMetrics["nv_bar1_mem_used"] {
|
if !device.excludeMetrics["nv_bar1_mem_total"] || !device.excludeMetrics["nv_bar1_mem_used"] {
|
||||||
meminfo, ret := nvml.DeviceGetBAR1MemoryInfo(device.device)
|
meminfo, ret := nvml.DeviceGetBAR1MemoryInfo(device.device)
|
||||||
if ret != nvml.SUCCESS {
|
if ret != nvml.SUCCESS {
|
||||||
@@ -277,7 +288,7 @@ func readBarMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMessage) e
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func readUtilization(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
func readUtilization(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||||
isMig, ret := nvml.DeviceIsMigDeviceHandle(device.device)
|
isMig, ret := nvml.DeviceIsMigDeviceHandle(device.device)
|
||||||
if ret != nvml.SUCCESS {
|
if ret != nvml.SUCCESS {
|
||||||
err := errors.New(nvml.ErrorString(ret))
|
err := errors.New(nvml.ErrorString(ret))
|
||||||
@@ -319,7 +330,7 @@ func readUtilization(device NvidiaCollectorDevice, output chan lp.CCMessage) err
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func readTemp(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
func readTemp(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||||
if !device.excludeMetrics["nv_temp"] {
|
if !device.excludeMetrics["nv_temp"] {
|
||||||
// Retrieves the current temperature readings for the device, in degrees C.
|
// Retrieves the current temperature readings for the device, in degrees C.
|
||||||
//
|
//
|
||||||
@@ -338,7 +349,7 @@ func readTemp(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func readFan(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
func readFan(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||||
if !device.excludeMetrics["nv_fan"] {
|
if !device.excludeMetrics["nv_fan"] {
|
||||||
// Retrieves the intended operating speed of the device's fan.
|
// Retrieves the intended operating speed of the device's fan.
|
||||||
//
|
//
|
||||||
@@ -361,7 +372,7 @@ func readFan(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// func readFans(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
// func readFans(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||||
// if !device.excludeMetrics["nv_fan"] {
|
// if !device.excludeMetrics["nv_fan"] {
|
||||||
// numFans, ret := nvml.DeviceGetNumFans(device.device)
|
// numFans, ret := nvml.DeviceGetNumFans(device.device)
|
||||||
// if ret == nvml.SUCCESS {
|
// if ret == nvml.SUCCESS {
|
||||||
@@ -382,7 +393,7 @@ func readFan(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
|||||||
// return nil
|
// return nil
|
||||||
// }
|
// }
|
||||||
|
|
||||||
func readEccMode(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
func readEccMode(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||||
if !device.excludeMetrics["nv_ecc_mode"] {
|
if !device.excludeMetrics["nv_ecc_mode"] {
|
||||||
// Retrieves the current and pending ECC modes for the device.
|
// Retrieves the current and pending ECC modes for the device.
|
||||||
//
|
//
|
||||||
@@ -416,7 +427,7 @@ func readEccMode(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func readPerfState(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
func readPerfState(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||||
if !device.excludeMetrics["nv_perf_state"] {
|
if !device.excludeMetrics["nv_perf_state"] {
|
||||||
// Retrieves the current performance state for the device.
|
// Retrieves the current performance state for the device.
|
||||||
//
|
//
|
||||||
@@ -436,13 +447,16 @@ func readPerfState(device NvidiaCollectorDevice, output chan lp.CCMessage) error
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func readPowerUsage(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
func readPowerUsage(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||||
if !device.excludeMetrics["nv_power_usage"] {
|
if !device.excludeMetrics["nv_power_usage"] {
|
||||||
// Retrieves power usage for this GPU in milliwatts and its associated circuitry (e.g. memory)
|
// Retrieves power usage for this GPU in milliwatts and its associated circuitry (e.g. memory)
|
||||||
//
|
//
|
||||||
// On Fermi and Kepler GPUs the reading is accurate to within +/- 5% of current power draw.
|
// On Fermi and Kepler GPUs the reading is accurate to within +/- 5% of current power draw.
|
||||||
|
// On Ampere (except GA100) or newer GPUs, the API returns power averaged over 1 sec interval.
|
||||||
|
// On GA100 and older architectures, instantaneous power is returned.
|
||||||
//
|
//
|
||||||
// It is only available if power management mode is supported
|
// It is only available if power management mode is supported.
|
||||||
|
|
||||||
mode, ret := nvml.DeviceGetPowerManagementMode(device.device)
|
mode, ret := nvml.DeviceGetPowerManagementMode(device.device)
|
||||||
if ret != nvml.SUCCESS {
|
if ret != nvml.SUCCESS {
|
||||||
return nil
|
return nil
|
||||||
@@ -461,7 +475,54 @@ func readPowerUsage(device NvidiaCollectorDevice, output chan lp.CCMessage) erro
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func readClocks(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
func readEnergyConsumption(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||||
|
// Retrieves total energy consumption for this GPU in millijoules (mJ) since the driver was last reloaded
|
||||||
|
|
||||||
|
// For Volta or newer fully supported devices.
|
||||||
|
if (!device.excludeMetrics["nv_energy"]) && (!device.excludeMetrics["nv_energy_abs"]) && (!device.excludeMetrics["nv_average_power"]) {
|
||||||
|
now := time.Now()
|
||||||
|
mode, ret := nvml.DeviceGetPowerManagementMode(device.device)
|
||||||
|
if ret != nvml.SUCCESS {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if mode == nvml.FEATURE_ENABLED {
|
||||||
|
energy, ret := nvml.DeviceGetTotalEnergyConsumption(device.device)
|
||||||
|
if ret == nvml.SUCCESS {
|
||||||
|
if device.lastEnergyReading != 0 {
|
||||||
|
if !device.excludeMetrics["nv_energy"] {
|
||||||
|
y, err := lp.NewMetric("nv_energy", device.tags, device.meta, (energy-device.lastEnergyReading)/1000, now)
|
||||||
|
if err == nil {
|
||||||
|
y.AddMeta("unit", "Joules")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !device.excludeMetrics["nv_average_power"] {
|
||||||
|
|
||||||
|
energyDiff := (energy - device.lastEnergyReading) / 1000
|
||||||
|
timeDiff := now.Sub(device.lastEnergyTimestamp)
|
||||||
|
y, err := lp.NewMetric("nv_average_power", device.tags, device.meta, energyDiff/uint64(timeDiff.Seconds()), now)
|
||||||
|
if err == nil {
|
||||||
|
y.AddMeta("unit", "watts")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !device.excludeMetrics["nv_energy_abs"] {
|
||||||
|
y, err := lp.NewMetric("nv_energy_abs", device.tags, device.meta, energy/1000, now)
|
||||||
|
if err == nil {
|
||||||
|
y.AddMeta("unit", "Joules")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
device.lastEnergyReading = energy
|
||||||
|
device.lastEnergyTimestamp = time.Now()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func readClocks(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||||
// Retrieves the current clock speeds for the device.
|
// Retrieves the current clock speeds for the device.
|
||||||
//
|
//
|
||||||
// Available clock information:
|
// Available clock information:
|
||||||
@@ -513,7 +574,7 @@ func readClocks(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func readMaxClocks(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
func readMaxClocks(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||||
// Retrieves the maximum clock speeds for the device.
|
// Retrieves the maximum clock speeds for the device.
|
||||||
//
|
//
|
||||||
// Available clock information:
|
// Available clock information:
|
||||||
@@ -528,7 +589,7 @@ func readMaxClocks(device NvidiaCollectorDevice, output chan lp.CCMessage) error
|
|||||||
if !device.excludeMetrics["nv_max_graphics_clock"] {
|
if !device.excludeMetrics["nv_max_graphics_clock"] {
|
||||||
max_gclk, ret := nvml.DeviceGetMaxClockInfo(device.device, nvml.CLOCK_GRAPHICS)
|
max_gclk, ret := nvml.DeviceGetMaxClockInfo(device.device, nvml.CLOCK_GRAPHICS)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
y, err := lp.NewMessage("nv_max_graphics_clock", device.tags, device.meta, map[string]interface{}{"value": float64(max_gclk)}, time.Now())
|
y, err := lp.NewMetric("nv_max_graphics_clock", device.tags, device.meta, float64(max_gclk), time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "MHz")
|
y.AddMeta("unit", "MHz")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -537,9 +598,9 @@ func readMaxClocks(device NvidiaCollectorDevice, output chan lp.CCMessage) error
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !device.excludeMetrics["nv_max_sm_clock"] {
|
if !device.excludeMetrics["nv_max_sm_clock"] {
|
||||||
maxSmClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_SM)
|
maxSmClock, ret := nvml.DeviceGetMaxClockInfo(device.device, nvml.CLOCK_SM)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
y, err := lp.NewMessage("nv_max_sm_clock", device.tags, device.meta, map[string]interface{}{"value": float64(maxSmClock)}, time.Now())
|
y, err := lp.NewMetric("nv_max_sm_clock", device.tags, device.meta, float64(maxSmClock), time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "MHz")
|
y.AddMeta("unit", "MHz")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -548,9 +609,9 @@ func readMaxClocks(device NvidiaCollectorDevice, output chan lp.CCMessage) error
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !device.excludeMetrics["nv_max_mem_clock"] {
|
if !device.excludeMetrics["nv_max_mem_clock"] {
|
||||||
maxMemClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_MEM)
|
maxMemClock, ret := nvml.DeviceGetMaxClockInfo(device.device, nvml.CLOCK_MEM)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
y, err := lp.NewMessage("nv_max_mem_clock", device.tags, device.meta, map[string]interface{}{"value": float64(maxMemClock)}, time.Now())
|
y, err := lp.NewMetric("nv_max_mem_clock", device.tags, device.meta, float64(maxMemClock), time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "MHz")
|
y.AddMeta("unit", "MHz")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -559,9 +620,9 @@ func readMaxClocks(device NvidiaCollectorDevice, output chan lp.CCMessage) error
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !device.excludeMetrics["nv_max_video_clock"] {
|
if !device.excludeMetrics["nv_max_video_clock"] {
|
||||||
maxMemClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_VIDEO)
|
maxVideoClock, ret := nvml.DeviceGetMaxClockInfo(device.device, nvml.CLOCK_VIDEO)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
y, err := lp.NewMessage("nv_max_video_clock", device.tags, device.meta, map[string]interface{}{"value": float64(maxMemClock)}, time.Now())
|
y, err := lp.NewMetric("nv_max_video_clock", device.tags, device.meta, float64(maxVideoClock), time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "MHz")
|
y.AddMeta("unit", "MHz")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -571,7 +632,7 @@ func readMaxClocks(device NvidiaCollectorDevice, output chan lp.CCMessage) error
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func readEccErrors(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
func readEccErrors(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||||
if !device.excludeMetrics["nv_ecc_uncorrected_error"] {
|
if !device.excludeMetrics["nv_ecc_uncorrected_error"] {
|
||||||
// Retrieves the total ECC error counts for the device.
|
// Retrieves the total ECC error counts for the device.
|
||||||
//
|
//
|
||||||
@@ -602,7 +663,7 @@ func readEccErrors(device NvidiaCollectorDevice, output chan lp.CCMessage) error
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func readPowerLimit(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
func readPowerLimit(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||||
if !device.excludeMetrics["nv_power_max_limit"] {
|
if !device.excludeMetrics["nv_power_max_limit"] {
|
||||||
// Retrieves the power management limit associated with this device.
|
// Retrieves the power management limit associated with this device.
|
||||||
//
|
//
|
||||||
@@ -622,7 +683,7 @@ func readPowerLimit(device NvidiaCollectorDevice, output chan lp.CCMessage) erro
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func readEncUtilization(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
func readEncUtilization(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||||
isMig, ret := nvml.DeviceIsMigDeviceHandle(device.device)
|
isMig, ret := nvml.DeviceIsMigDeviceHandle(device.device)
|
||||||
if ret != nvml.SUCCESS {
|
if ret != nvml.SUCCESS {
|
||||||
err := errors.New(nvml.ErrorString(ret))
|
err := errors.New(nvml.ErrorString(ret))
|
||||||
@@ -649,7 +710,7 @@ func readEncUtilization(device NvidiaCollectorDevice, output chan lp.CCMessage)
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func readDecUtilization(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
func readDecUtilization(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||||
isMig, ret := nvml.DeviceIsMigDeviceHandle(device.device)
|
isMig, ret := nvml.DeviceIsMigDeviceHandle(device.device)
|
||||||
if ret != nvml.SUCCESS {
|
if ret != nvml.SUCCESS {
|
||||||
err := errors.New(nvml.ErrorString(ret))
|
err := errors.New(nvml.ErrorString(ret))
|
||||||
@@ -676,7 +737,7 @@ func readDecUtilization(device NvidiaCollectorDevice, output chan lp.CCMessage)
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func readRemappedRows(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
func readRemappedRows(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||||
if !device.excludeMetrics["nv_remapped_rows_corrected"] ||
|
if !device.excludeMetrics["nv_remapped_rows_corrected"] ||
|
||||||
!device.excludeMetrics["nv_remapped_rows_uncorrected"] ||
|
!device.excludeMetrics["nv_remapped_rows_uncorrected"] ||
|
||||||
!device.excludeMetrics["nv_remapped_rows_pending"] ||
|
!device.excludeMetrics["nv_remapped_rows_pending"] ||
|
||||||
@@ -729,7 +790,7 @@ func readRemappedRows(device NvidiaCollectorDevice, output chan lp.CCMessage) er
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func readProcessCounts(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
func readProcessCounts(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||||
if !device.excludeMetrics["nv_compute_processes"] {
|
if !device.excludeMetrics["nv_compute_processes"] {
|
||||||
// Get information about processes with a compute context on a device
|
// Get information about processes with a compute context on a device
|
||||||
//
|
//
|
||||||
@@ -821,7 +882,7 @@ func readProcessCounts(device NvidiaCollectorDevice, output chan lp.CCMessage) e
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func readViolationStats(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
func readViolationStats(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||||
var violTime nvml.ViolationTime
|
var violTime nvml.ViolationTime
|
||||||
var ret nvml.Return
|
var ret nvml.Return
|
||||||
|
|
||||||
@@ -935,7 +996,7 @@ func readViolationStats(device NvidiaCollectorDevice, output chan lp.CCMessage)
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func readNVLinkStats(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
func readNVLinkStats(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||||
// Retrieves the specified error counter value
|
// Retrieves the specified error counter value
|
||||||
// Please refer to \a nvmlNvLinkErrorCounter_t for error counters that are available
|
// Please refer to \a nvmlNvLinkErrorCounter_t for error counters that are available
|
||||||
//
|
//
|
||||||
@@ -1070,7 +1131,7 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMessage)
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
readAll := func(device NvidiaCollectorDevice, output chan lp.CCMessage) {
|
readAll := func(device *NvidiaCollectorDevice, output chan lp.CCMessage) {
|
||||||
name, ret := nvml.DeviceGetName(device.device)
|
name, ret := nvml.DeviceGetName(device.device)
|
||||||
if ret != nvml.SUCCESS {
|
if ret != nvml.SUCCESS {
|
||||||
name = "NoName"
|
name = "NoName"
|
||||||
@@ -1110,6 +1171,11 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMessage)
|
|||||||
cclog.ComponentDebug(m.name, "readPowerUsage for device", name, "failed")
|
cclog.ComponentDebug(m.name, "readPowerUsage for device", name, "failed")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
err = readEnergyConsumption(device, output)
|
||||||
|
if err != nil {
|
||||||
|
cclog.ComponentDebug(m.name, "readEnergyConsumption for device", name, "failed")
|
||||||
|
}
|
||||||
|
|
||||||
err = readClocks(device, output)
|
err = readClocks(device, output)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentDebug(m.name, "readClocks for device", name, "failed")
|
cclog.ComponentDebug(m.name, "readClocks for device", name, "failed")
|
||||||
@@ -1169,7 +1235,7 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMessage)
|
|||||||
// Actual read loop over all attached Nvidia GPUs
|
// Actual read loop over all attached Nvidia GPUs
|
||||||
for i := 0; i < m.num_gpus; i++ {
|
for i := 0; i < m.num_gpus; i++ {
|
||||||
|
|
||||||
readAll(m.gpus[i], output)
|
readAll(&m.gpus[i], output)
|
||||||
|
|
||||||
// Iterate over all MIG devices if any
|
// Iterate over all MIG devices if any
|
||||||
if m.config.ProcessMigDevices {
|
if m.config.ProcessMigDevices {
|
||||||
@@ -1243,7 +1309,7 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMessage)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
readAll(migDevice, output)
|
readAll(&migDevice, output)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -1,3 +1,13 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: "Nvidia NVML metric collector"
|
||||||
|
description: Collect metrics for Nvidia GPUs using the NVML
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 2
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/collectors/nvidia.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
## `nvidia` collector
|
## `nvidia` collector
|
||||||
|
|
||||||
@@ -72,5 +82,8 @@ Metrics:
|
|||||||
* `nv_nvlink_ecc_errors`
|
* `nv_nvlink_ecc_errors`
|
||||||
* `nv_nvlink_replay_errors`
|
* `nv_nvlink_replay_errors`
|
||||||
* `nv_nvlink_recovery_errors`
|
* `nv_nvlink_recovery_errors`
|
||||||
|
* `nv_energy`
|
||||||
|
* `nv_energy_abs`
|
||||||
|
* `nv_average_power`
|
||||||
|
|
||||||
Some metrics add the additional sub type tag (`stype`) like the `nv_nvlink_*` metrics set `stype=nvlink,stype-id=<link_number>`.
|
Some metrics add the additional sub type tag (`stype`) like the `nv_nvlink_*` metrics set `stype=nvlink,stype-id=<link_number>`.
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,3 +1,14 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: RAPL metric collector
|
||||||
|
description: Collect energy data through the RAPL sysfs interface
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 2
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/collectors/rapl.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
## `rapl` collector
|
## `rapl` collector
|
||||||
|
|
||||||
This collector reads running average power limit (RAPL) monitoring attributes to compute average power consumption metrics. See <https://www.kernel.org/doc/html/latest/power/powercap/powercap.html#monitoring-attributes>.
|
This collector reads running average power limit (RAPL) monitoring attributes to compute average power consumption metrics. See <https://www.kernel.org/doc/html/latest/power/powercap/powercap.html#monitoring-attributes>.
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,3 +1,14 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: "ROCm SMI metric collector"
|
||||||
|
description: Collect metrics for AMD GPUs using the SMI library
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 2
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/collectors/rocmsmi.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
|
|
||||||
## `rocm_smi` collector
|
## `rocm_smi` collector
|
||||||
|
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,3 +1,13 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: SchedStat Metric collector
|
||||||
|
description: Collect metrics from `/proc/schedstat`
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 2
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/collectors/schedstat.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
## `schedstat` collector
|
## `schedstat` collector
|
||||||
```json
|
```json
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,3 +1,14 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: Self-monitoring metric collector
|
||||||
|
description: Collect metrics from the execution of cc-metric-collector itself
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 2
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/collectors/self.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
## `self` collector
|
## `self` collector
|
||||||
|
|
||||||
```json
|
```json
|
||||||
|
349
collectors/slurmCgroupMetric.go
Normal file
349
collectors/slurmCgroupMetric.go
Normal file
@@ -0,0 +1,349 @@
|
|||||||
|
package collectors
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"os/user"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
|
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||||
|
)
|
||||||
|
|
||||||
|
type SlurmJobData struct {
|
||||||
|
MemoryUsage float64
|
||||||
|
MaxMemoryUsage float64
|
||||||
|
LimitMemoryUsage float64
|
||||||
|
CpuUsageUser float64
|
||||||
|
CpuUsageSys float64
|
||||||
|
CpuSet []int
|
||||||
|
}
|
||||||
|
|
||||||
|
type SlurmCgroupsConfig struct {
|
||||||
|
CgroupBase string `json:"cgroup_base"`
|
||||||
|
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
||||||
|
UseSudo bool `json:"use_sudo,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type SlurmCgroupCollector struct {
|
||||||
|
metricCollector
|
||||||
|
config SlurmCgroupsConfig
|
||||||
|
meta map[string]string
|
||||||
|
tags map[string]string
|
||||||
|
allCPUs []int
|
||||||
|
cpuUsed map[int]bool
|
||||||
|
cgroupBase string
|
||||||
|
excludeMetrics map[string]struct{}
|
||||||
|
useSudo bool
|
||||||
|
}
|
||||||
|
|
||||||
|
const defaultCgroupBase = "/sys/fs/cgroup/system.slice/slurmstepd.scope"
|
||||||
|
|
||||||
|
func ParseCPUs(cpuset string) ([]int, error) {
|
||||||
|
var result []int
|
||||||
|
if cpuset == "" {
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
ranges := strings.Split(cpuset, ",")
|
||||||
|
for _, r := range ranges {
|
||||||
|
if strings.Contains(r, "-") {
|
||||||
|
parts := strings.Split(r, "-")
|
||||||
|
if len(parts) != 2 {
|
||||||
|
return nil, fmt.Errorf("invalid CPU range: %s", r)
|
||||||
|
}
|
||||||
|
start, err := strconv.Atoi(strings.TrimSpace(parts[0]))
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("invalid CPU range start: %s", parts[0])
|
||||||
|
}
|
||||||
|
end, err := strconv.Atoi(strings.TrimSpace(parts[1]))
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("invalid CPU range end: %s", parts[1])
|
||||||
|
}
|
||||||
|
for i := start; i <= end; i++ {
|
||||||
|
result = append(result, i)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cpu, err := strconv.Atoi(strings.TrimSpace(r))
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("invalid CPU ID: %s", r)
|
||||||
|
}
|
||||||
|
result = append(result, cpu)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetAllCPUs() ([]int, error) {
|
||||||
|
data, err := os.ReadFile("/sys/devices/system/cpu/online")
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to read /sys/devices/system/cpu/online: %v", err)
|
||||||
|
}
|
||||||
|
return ParseCPUs(strings.TrimSpace(string(data)))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *SlurmCgroupCollector) isExcluded(metric string) bool {
|
||||||
|
_, found := m.excludeMetrics[metric]
|
||||||
|
return found
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *SlurmCgroupCollector) readFile(path string) ([]byte, error) {
|
||||||
|
if m.useSudo {
|
||||||
|
cmd := exec.Command("sudo", "cat", path)
|
||||||
|
return cmd.Output()
|
||||||
|
}
|
||||||
|
return os.ReadFile(path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *SlurmCgroupCollector) Init(config json.RawMessage) error {
|
||||||
|
var err error
|
||||||
|
m.name = "SlurmCgroupCollector"
|
||||||
|
m.setup()
|
||||||
|
m.parallel = true
|
||||||
|
m.meta = map[string]string{"source": m.name, "group": "SLURM"}
|
||||||
|
m.tags = map[string]string{"type": "hwthread"}
|
||||||
|
m.cpuUsed = make(map[int]bool)
|
||||||
|
m.cgroupBase = defaultCgroupBase
|
||||||
|
|
||||||
|
if len(config) > 0 {
|
||||||
|
err = json.Unmarshal(config, &m.config)
|
||||||
|
if err != nil {
|
||||||
|
cclog.ComponentError(m.name, "Error reading config:", err.Error())
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
m.excludeMetrics = make(map[string]struct{})
|
||||||
|
for _, metric := range m.config.ExcludeMetrics {
|
||||||
|
m.excludeMetrics[metric] = struct{}{}
|
||||||
|
}
|
||||||
|
if m.config.CgroupBase != "" {
|
||||||
|
m.cgroupBase = m.config.CgroupBase
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
m.useSudo = m.config.UseSudo
|
||||||
|
if !m.useSudo {
|
||||||
|
user, err := user.Current()
|
||||||
|
if err != nil {
|
||||||
|
cclog.ComponentError(m.name, "Failed to get current user:", err.Error())
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if user.Uid != "0" {
|
||||||
|
cclog.ComponentError(m.name, "Reading cgroup files requires root privileges (or enable use_sudo in config)")
|
||||||
|
return fmt.Errorf("not root")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
m.allCPUs, err = GetAllCPUs()
|
||||||
|
if err != nil {
|
||||||
|
cclog.ComponentError(m.name, "Error reading online CPUs:", err.Error())
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
m.init = true
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *SlurmCgroupCollector) ReadJobData(jobdir string) (SlurmJobData, error) {
|
||||||
|
jobdata := SlurmJobData{
|
||||||
|
MemoryUsage: 0,
|
||||||
|
MaxMemoryUsage: 0,
|
||||||
|
LimitMemoryUsage: 0,
|
||||||
|
CpuUsageUser: 0,
|
||||||
|
CpuUsageSys: 0,
|
||||||
|
CpuSet: []int{},
|
||||||
|
}
|
||||||
|
|
||||||
|
cg := func(f string) string { return filepath.Join(m.cgroupBase, jobdir, f) }
|
||||||
|
|
||||||
|
memUsage, err := m.readFile(cg("memory.current"))
|
||||||
|
if err == nil {
|
||||||
|
x, err := strconv.ParseFloat(strings.TrimSpace(string(memUsage)), 64)
|
||||||
|
if err == nil {
|
||||||
|
jobdata.MemoryUsage = x
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
maxMem, err := m.readFile(cg("memory.peak"))
|
||||||
|
if err == nil {
|
||||||
|
x, err := strconv.ParseFloat(strings.TrimSpace(string(maxMem)), 64)
|
||||||
|
if err == nil {
|
||||||
|
jobdata.MaxMemoryUsage = x
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
limitMem, err := m.readFile(cg("memory.max"))
|
||||||
|
if err == nil {
|
||||||
|
x, err := strconv.ParseFloat(strings.TrimSpace(string(limitMem)), 64)
|
||||||
|
if err == nil {
|
||||||
|
jobdata.LimitMemoryUsage = x
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cpuStat, err := m.readFile(cg("cpu.stat"))
|
||||||
|
if err == nil {
|
||||||
|
lines := strings.Split(strings.TrimSpace(string(cpuStat)), "\n")
|
||||||
|
var usageUsec, userUsec, systemUsec float64
|
||||||
|
for _, line := range lines {
|
||||||
|
fields := strings.Fields(line)
|
||||||
|
if len(fields) < 2 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
value, err := strconv.ParseFloat(fields[1], 64)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
switch fields[0] {
|
||||||
|
case "usage_usec":
|
||||||
|
usageUsec = value
|
||||||
|
case "user_usec":
|
||||||
|
userUsec = value
|
||||||
|
case "system_usec":
|
||||||
|
systemUsec = value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if usageUsec > 0 {
|
||||||
|
jobdata.CpuUsageUser = (userUsec * 100 / usageUsec)
|
||||||
|
jobdata.CpuUsageSys = (systemUsec * 100 / usageUsec)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cpuSet, err := m.readFile(cg("cpuset.cpus"))
|
||||||
|
if err == nil {
|
||||||
|
cpus, err := ParseCPUs(strings.TrimSpace(string(cpuSet)))
|
||||||
|
if err == nil {
|
||||||
|
jobdata.CpuSet = cpus
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return jobdata, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *SlurmCgroupCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||||
|
timestamp := time.Now()
|
||||||
|
|
||||||
|
for k := range m.cpuUsed {
|
||||||
|
delete(m.cpuUsed, k)
|
||||||
|
}
|
||||||
|
|
||||||
|
globPattern := filepath.Join(m.cgroupBase, "job_*")
|
||||||
|
jobDirs, err := filepath.Glob(globPattern)
|
||||||
|
if err != nil {
|
||||||
|
cclog.ComponentError(m.name, "Error globbing job directories:", err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, jdir := range jobDirs {
|
||||||
|
jKey := filepath.Base(jdir)
|
||||||
|
|
||||||
|
jobdata, err := m.ReadJobData(jKey)
|
||||||
|
if err != nil {
|
||||||
|
cclog.ComponentError(m.name, "Error reading job data for", jKey, ":", err.Error())
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(jobdata.CpuSet) > 0 {
|
||||||
|
coreCount := float64(len(jobdata.CpuSet))
|
||||||
|
for _, cpu := range jobdata.CpuSet {
|
||||||
|
coreTags := map[string]string{
|
||||||
|
"type": "hwthread",
|
||||||
|
"type-id": fmt.Sprintf("%d", cpu),
|
||||||
|
}
|
||||||
|
|
||||||
|
if coreCount > 0 && !m.isExcluded("job_mem_used") {
|
||||||
|
memPerCore := jobdata.MemoryUsage / coreCount
|
||||||
|
if y, err := lp.NewMessage("job_mem_used", coreTags, m.meta, map[string]interface{}{"value": memPerCore}, timestamp); err == nil {
|
||||||
|
y.AddMeta("unit", "Bytes")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if coreCount > 0 && !m.isExcluded("job_max_mem_used") {
|
||||||
|
maxMemPerCore := jobdata.MaxMemoryUsage / coreCount
|
||||||
|
if y, err := lp.NewMessage("job_max_mem_used", coreTags, m.meta, map[string]interface{}{"value": maxMemPerCore}, timestamp); err == nil {
|
||||||
|
y.AddMeta("unit", "Bytes")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if coreCount > 0 && !m.isExcluded("job_mem_limit") {
|
||||||
|
limitPerCore := jobdata.LimitMemoryUsage / coreCount
|
||||||
|
if y, err := lp.NewMessage("job_mem_limit", coreTags, m.meta, map[string]interface{}{"value": limitPerCore}, timestamp); err == nil {
|
||||||
|
y.AddMeta("unit", "Bytes")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if coreCount > 0 && !m.isExcluded("job_user_cpu") {
|
||||||
|
cpuUserPerCore := jobdata.CpuUsageUser / coreCount
|
||||||
|
if y, err := lp.NewMessage("job_user_cpu", coreTags, m.meta, map[string]interface{}{"value": cpuUserPerCore}, timestamp); err == nil {
|
||||||
|
y.AddMeta("unit", "%")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if coreCount > 0 && !m.isExcluded("job_sys_cpu") {
|
||||||
|
cpuSysPerCore := jobdata.CpuUsageSys / coreCount
|
||||||
|
if y, err := lp.NewMessage("job_sys_cpu", coreTags, m.meta, map[string]interface{}{"value": cpuSysPerCore}, timestamp); err == nil {
|
||||||
|
y.AddMeta("unit", "%")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
m.cpuUsed[cpu] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, cpu := range m.allCPUs {
|
||||||
|
if !m.cpuUsed[cpu] {
|
||||||
|
coreTags := map[string]string{
|
||||||
|
"type": "hwthread",
|
||||||
|
"type-id": fmt.Sprintf("%d", cpu),
|
||||||
|
}
|
||||||
|
|
||||||
|
if !m.isExcluded("job_mem_used") {
|
||||||
|
if y, err := lp.NewMessage("job_mem_used", coreTags, m.meta, map[string]interface{}{"value": 0}, timestamp); err == nil {
|
||||||
|
y.AddMeta("unit", "Bytes")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !m.isExcluded("job_max_mem_used") {
|
||||||
|
if y, err := lp.NewMessage("job_max_mem_used", coreTags, m.meta, map[string]interface{}{"value": 0}, timestamp); err == nil {
|
||||||
|
y.AddMeta("unit", "Bytes")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !m.isExcluded("job_mem_limit") {
|
||||||
|
if y, err := lp.NewMessage("job_mem_limit", coreTags, m.meta, map[string]interface{}{"value": 0}, timestamp); err == nil {
|
||||||
|
y.AddMeta("unit", "Bytes")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !m.isExcluded("job_user_cpu") {
|
||||||
|
if y, err := lp.NewMessage("job_user_cpu", coreTags, m.meta, map[string]interface{}{"value": 0}, timestamp); err == nil {
|
||||||
|
y.AddMeta("unit", "%")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !m.isExcluded("job_sys_cpu") {
|
||||||
|
if y, err := lp.NewMessage("job_sys_cpu", coreTags, m.meta, map[string]interface{}{"value": 0}, timestamp); err == nil {
|
||||||
|
y.AddMeta("unit", "%")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *SlurmCgroupCollector) Close() {
|
||||||
|
m.init = false
|
||||||
|
}
|
50
collectors/slurmCgroupMetric.md
Normal file
50
collectors/slurmCgroupMetric.md
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: Slurm cgroup metric collector
|
||||||
|
description: Collect per-core memory and CPU usage for SLURM jobs from cgroup v2
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 3
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/collectors/slurm_cgroup.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
|
## `slurm_cgroup` collector
|
||||||
|
|
||||||
|
The `slurm_cgroup` collector reads job-specific resource metrics from the cgroup v2 filesystem and provides **hwthread** metrics for memory and CPU usage of running SLURM jobs.
|
||||||
|
|
||||||
|
### Example configuration
|
||||||
|
|
||||||
|
```json
|
||||||
|
"slurm_cgroup": {
|
||||||
|
"cgroup_base": "/sys/fs/cgroup/system.slice/slurmstepd.scope",
|
||||||
|
"exclude_metrics": [
|
||||||
|
"job_sys_cpu",
|
||||||
|
"job_mem_limit"
|
||||||
|
],
|
||||||
|
"use_sudo": false
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
* The `cgroup_base` parameter (optional) can be set to specify the root path to SLURM job cgroups. The default is `/sys/fs/cgroup/system.slice/slurmstepd.scope`.
|
||||||
|
* The `exclude_metrics` array can be used to suppress individual metrics from being sent to the sink.
|
||||||
|
* The cgroups metrics are only available for root users. If password-less sudo is configured, you can enable sudo in the configuration.
|
||||||
|
|
||||||
|
### Reported metrics
|
||||||
|
|
||||||
|
All metrics are available **per hardware thread** :
|
||||||
|
|
||||||
|
* `job_mem_used` (`unit=Bytes`): Current memory usage of the job
|
||||||
|
* `job_max_mem_used` (`unit=Bytes`): Peak memory usage
|
||||||
|
* `job_mem_limit` (`unit=Bytes`): Cgroup memory limit
|
||||||
|
* `job_user_cpu` (`unit=%`): User CPU utilization percentage
|
||||||
|
* `job_sys_cpu` (`unit=%`): System CPU utilization percentage
|
||||||
|
|
||||||
|
Each metric has tags:
|
||||||
|
|
||||||
|
* `type=hwthread`
|
||||||
|
* `type-id=<core_id>`
|
||||||
|
|
||||||
|
### Limitations
|
||||||
|
|
||||||
|
* **cgroups v2 required:** This collector only supports systems running with cgroups v2 (unified hierarchy).
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,3 +1,14 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: Temperature metric collector
|
||||||
|
description: Collect thermal metrics from `/sys/class/hwmon/*`
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 2
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/collectors/temp.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
|
|
||||||
## `tempstat` collector
|
## `tempstat` collector
|
||||||
|
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,3 +1,15 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: TopProcs collector
|
||||||
|
description: Collect infos about most CPU-consuming processes
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Admin']
|
||||||
|
weight: 2
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/collectors/topprocs.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## `topprocs` collector
|
## `topprocs` collector
|
||||||
|
|
||||||
|
53
go.mod
53
go.mod
@@ -1,48 +1,45 @@
|
|||||||
module github.com/ClusterCockpit/cc-metric-collector
|
module github.com/ClusterCockpit/cc-metric-collector
|
||||||
|
|
||||||
go 1.23.4
|
go 1.24.0
|
||||||
|
|
||||||
toolchain go1.23.7
|
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/ClusterCockpit/cc-lib v0.1.1
|
github.com/ClusterCockpit/cc-lib v0.10.1
|
||||||
github.com/ClusterCockpit/go-rocm-smi v0.3.0
|
github.com/ClusterCockpit/go-rocm-smi v0.3.0
|
||||||
github.com/NVIDIA/go-nvml v0.12.0-2
|
github.com/NVIDIA/go-nvml v0.13.0-1
|
||||||
github.com/PaesslerAG/gval v1.2.2
|
github.com/PaesslerAG/gval v1.2.4
|
||||||
github.com/fsnotify/fsnotify v1.7.0
|
github.com/fsnotify/fsnotify v1.9.0
|
||||||
github.com/gorilla/mux v1.8.1
|
|
||||||
github.com/influxdata/influxdb-client-go/v2 v2.14.0
|
|
||||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf
|
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf
|
||||||
github.com/influxdata/line-protocol/v2 v2.2.1
|
github.com/tklauser/go-sysconf v0.3.15
|
||||||
github.com/nats-io/nats.go v1.39.0
|
|
||||||
github.com/prometheus/client_golang v1.20.5
|
|
||||||
github.com/stmcginnis/gofish v0.15.0
|
|
||||||
github.com/tklauser/go-sysconf v0.3.13
|
|
||||||
golang.design/x/thread v0.0.0-20210122121316-335e9adffdf1
|
golang.design/x/thread v0.0.0-20210122121316-335e9adffdf1
|
||||||
golang.org/x/exp v0.0.0-20250215185904-eff6e970281f
|
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b
|
||||||
golang.org/x/sys v0.30.0
|
golang.org/x/sys v0.37.0
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/ClusterCockpit/cc-backend v1.4.2 // indirect
|
|
||||||
github.com/ClusterCockpit/cc-units v0.4.0 // indirect
|
|
||||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
|
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
|
||||||
github.com/beorn7/perks v1.0.1 // indirect
|
github.com/beorn7/perks v1.0.1 // indirect
|
||||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||||
github.com/expr-lang/expr v1.17.0 // indirect
|
github.com/expr-lang/expr v1.17.6 // indirect
|
||||||
github.com/google/uuid v1.6.0 // indirect
|
github.com/google/uuid v1.6.0 // indirect
|
||||||
github.com/klauspost/compress v1.17.9 // indirect
|
github.com/gorilla/mux v1.8.1 // indirect
|
||||||
|
github.com/influxdata/influxdb-client-go/v2 v2.14.0 // indirect
|
||||||
|
github.com/influxdata/line-protocol/v2 v2.2.1 // indirect
|
||||||
|
github.com/klauspost/compress v1.18.0 // indirect
|
||||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||||
github.com/nats-io/nkeys v0.4.9 // indirect
|
github.com/nats-io/nats.go v1.46.1 // indirect
|
||||||
|
github.com/nats-io/nkeys v0.4.11 // indirect
|
||||||
github.com/nats-io/nuid v1.0.1 // indirect
|
github.com/nats-io/nuid v1.0.1 // indirect
|
||||||
github.com/oapi-codegen/runtime v1.1.1 // indirect
|
github.com/oapi-codegen/runtime v1.1.1 // indirect
|
||||||
github.com/prometheus/client_model v0.6.1 // indirect
|
github.com/prometheus/client_golang v1.23.2 // indirect
|
||||||
github.com/prometheus/common v0.55.0 // indirect
|
github.com/prometheus/client_model v0.6.2 // indirect
|
||||||
github.com/prometheus/procfs v0.15.1 // indirect
|
github.com/prometheus/common v0.66.1 // indirect
|
||||||
|
github.com/prometheus/procfs v0.16.1 // indirect
|
||||||
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 // indirect
|
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 // indirect
|
||||||
github.com/shopspring/decimal v1.3.1 // indirect
|
github.com/shopspring/decimal v1.3.1 // indirect
|
||||||
github.com/tklauser/numcpus v0.7.0 // indirect
|
github.com/stmcginnis/gofish v0.20.0 // indirect
|
||||||
golang.org/x/crypto v0.35.0 // indirect
|
github.com/tklauser/numcpus v0.10.0 // indirect
|
||||||
golang.org/x/net v0.36.0 // indirect
|
go.yaml.in/yaml/v2 v2.4.2 // indirect
|
||||||
google.golang.org/protobuf v1.35.2 // indirect
|
golang.org/x/crypto v0.42.0 // indirect
|
||||||
|
golang.org/x/net v0.43.0 // indirect
|
||||||
|
google.golang.org/protobuf v1.36.8 // indirect
|
||||||
)
|
)
|
||||||
|
126
go.sum
126
go.sum
@@ -1,21 +1,17 @@
|
|||||||
github.com/ClusterCockpit/cc-backend v1.4.2 h1:kTOzqkh9N0564N9nqQThnSs7TAfg8RLgvSm00e5HtIc=
|
github.com/ClusterCockpit/cc-lib v0.10.1 h1:tjGEH8mFGgznYxO8BKLiiar0eZR1Oytk8x5iIQHZR5s=
|
||||||
github.com/ClusterCockpit/cc-backend v1.4.2/go.mod h1:g8TNHXe4AXej26snu2//jO3mUF980elT93iV/k11O/c=
|
github.com/ClusterCockpit/cc-lib v0.10.1/go.mod h1:nvTZuxFCTwlos8I1rL5O1RPab7vRtkU8E/PGiaF6pQA=
|
||||||
github.com/ClusterCockpit/cc-lib v0.1.0-beta.1 h1:dz9j0g2cod8+SMDjuoIY6ISpiHHeekhX6yQaeiwiwJw=
|
|
||||||
github.com/ClusterCockpit/cc-lib v0.1.0-beta.1/go.mod h1:kXMskla1i5ZSfXW0vVRIHgGeXMU5zu2PzYOYnUaOr80=
|
|
||||||
github.com/ClusterCockpit/cc-lib v0.1.1 h1:AXZWYUzgTaE/WdxLNSWPR7FJoA5WlzvYZxw4gIw3gNw=
|
|
||||||
github.com/ClusterCockpit/cc-lib v0.1.1/go.mod h1:SHKcWW/+kN+pcofAtHJFxvmx1FV0VIJuQv5PuT0HDcc=
|
|
||||||
github.com/ClusterCockpit/cc-units v0.4.0 h1:zP5DOu99GmErW0tCDf0gcLrlWt42RQ9dpoONEOh4cI0=
|
|
||||||
github.com/ClusterCockpit/cc-units v0.4.0/go.mod h1:3S3PAhAayS3pbgcT4q9Vn9VJw22Op51X0YimtG77zBw=
|
|
||||||
github.com/ClusterCockpit/go-rocm-smi v0.3.0 h1:1qZnSpG7/NyLtc7AjqnUL9Jb8xtqG1nMVgp69rJfaR8=
|
github.com/ClusterCockpit/go-rocm-smi v0.3.0 h1:1qZnSpG7/NyLtc7AjqnUL9Jb8xtqG1nMVgp69rJfaR8=
|
||||||
github.com/ClusterCockpit/go-rocm-smi v0.3.0/go.mod h1:+I3UMeX3OlizXDf1WpGD43W4KGZZGVSGmny6rTeOnWA=
|
github.com/ClusterCockpit/go-rocm-smi v0.3.0/go.mod h1:+I3UMeX3OlizXDf1WpGD43W4KGZZGVSGmny6rTeOnWA=
|
||||||
github.com/NVIDIA/go-nvml v0.11.6-0/go.mod h1:hy7HYeQy335x6nEss0Ne3PYqleRa6Ct+VKD9RQ4nyFs=
|
github.com/NVIDIA/go-nvml v0.11.6-0/go.mod h1:hy7HYeQy335x6nEss0Ne3PYqleRa6Ct+VKD9RQ4nyFs=
|
||||||
github.com/NVIDIA/go-nvml v0.12.0-2 h1:Sg239yy7jmopu/cuvYauoMj9fOpcGMngxVxxS1EBXeY=
|
github.com/NVIDIA/go-nvml v0.13.0-1 h1:OLX8Jq3dONuPOQPC7rndB6+iDmDakw0XTYgzMxObkEw=
|
||||||
github.com/NVIDIA/go-nvml v0.12.0-2/go.mod h1:7ruy85eOM73muOc/I37euONSwEyFqZsv5ED9AogD4G0=
|
github.com/NVIDIA/go-nvml v0.13.0-1/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4=
|
||||||
github.com/PaesslerAG/gval v1.2.2 h1:Y7iBzhgE09IGTt5QgGQ2IdaYYYOU134YGHBThD+wm9E=
|
github.com/PaesslerAG/gval v1.2.4 h1:rhX7MpjJlcxYwL2eTTYIOBUyEKZ+A96T9vQySWkVUiU=
|
||||||
github.com/PaesslerAG/gval v1.2.2/go.mod h1:XRFLwvmkTEdYziLdaCeCa5ImcGVrfQbeNUbVR+C6xac=
|
github.com/PaesslerAG/gval v1.2.4/go.mod h1:XRFLwvmkTEdYziLdaCeCa5ImcGVrfQbeNUbVR+C6xac=
|
||||||
github.com/PaesslerAG/jsonpath v0.1.0 h1:gADYeifvlqK3R3i2cR5B4DGgxLXIPb3TRTH1mGi0jPI=
|
github.com/PaesslerAG/jsonpath v0.1.0 h1:gADYeifvlqK3R3i2cR5B4DGgxLXIPb3TRTH1mGi0jPI=
|
||||||
github.com/PaesslerAG/jsonpath v0.1.0/go.mod h1:4BzmtoM/PI8fPO4aQGIusjGxGir2BzcV0grWtFzq1Y8=
|
github.com/PaesslerAG/jsonpath v0.1.0/go.mod h1:4BzmtoM/PI8fPO4aQGIusjGxGir2BzcV0grWtFzq1Y8=
|
||||||
github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk=
|
github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk=
|
||||||
|
github.com/antithesishq/antithesis-sdk-go v0.4.3-default-no-op h1:+OSa/t11TFhqfrX0EOSqQBDJ0YlpmK0rDSiB19dg9M0=
|
||||||
|
github.com/antithesishq/antithesis-sdk-go v0.4.3-default-no-op/go.mod h1:IUpT2DPAKh6i/YhSbt6Gl3v2yvUZjmKncl7U91fup7E=
|
||||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7Dml6nw9rQ=
|
github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7Dml6nw9rQ=
|
||||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0/go.mod h1:lvDnEdqiQrp0O42VQGgmlKpxL1AP2+08jFMw88y4klk=
|
github.com/apapsch/go-jsonmerge/v2 v2.0.0/go.mod h1:lvDnEdqiQrp0O42VQGgmlKpxL1AP2+08jFMw88y4klk=
|
||||||
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||||
@@ -27,20 +23,20 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
|
|||||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/expr-lang/expr v1.16.9 h1:WUAzmR0JNI9JCiF0/ewwHB1gmcGw5wW7nWt8gc6PpCI=
|
github.com/expr-lang/expr v1.17.6 h1:1h6i8ONk9cexhDmowO/A64VPxHScu7qfSl2k8OlINec=
|
||||||
github.com/expr-lang/expr v1.16.9/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4=
|
github.com/expr-lang/expr v1.17.6/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4=
|
||||||
github.com/expr-lang/expr v1.17.0 h1:+vpszOyzKLQXC9VF+wA8cVA0tlA984/Wabc/1hF9Whg=
|
|
||||||
github.com/expr-lang/expr v1.17.0/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4=
|
|
||||||
github.com/frankban/quicktest v1.11.0/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s=
|
github.com/frankban/quicktest v1.11.0/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s=
|
||||||
github.com/frankban/quicktest v1.11.2/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s=
|
github.com/frankban/quicktest v1.11.2/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s=
|
||||||
github.com/frankban/quicktest v1.13.0 h1:yNZif1OkDfNoDfb9zZa9aXIpejNR4F23Wely0c+Qdqk=
|
github.com/frankban/quicktest v1.13.0 h1:yNZif1OkDfNoDfb9zZa9aXIpejNR4F23Wely0c+Qdqk=
|
||||||
github.com/frankban/quicktest v1.13.0/go.mod h1:qLE0fzW0VuyUAJgPU19zByoIr0HtCHN/r/VLSOOIySU=
|
github.com/frankban/quicktest v1.13.0/go.mod h1:qLE0fzW0VuyUAJgPU19zByoIr0HtCHN/r/VLSOOIySU=
|
||||||
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
|
github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
|
||||||
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
|
github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
|
||||||
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||||
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||||
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
|
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
||||||
|
github.com/google/go-tpm v0.9.5 h1:ocUmnDebX54dnW+MQWGQRbdaAcJELsa6PqZhJ48KwVU=
|
||||||
|
github.com/google/go-tpm v0.9.5/go.mod h1:h9jEsEECg7gtLis0upRBQU+GhYVH6jMjrFxI8u6bVUY=
|
||||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||||
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
|
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
|
||||||
@@ -57,8 +53,8 @@ github.com/influxdata/line-protocol/v2 v2.1.0/go.mod h1:QKw43hdUBg3GTk2iC3iyCxks
|
|||||||
github.com/influxdata/line-protocol/v2 v2.2.1 h1:EAPkqJ9Km4uAxtMRgUubJyqAr6zgWM0dznKMLRauQRE=
|
github.com/influxdata/line-protocol/v2 v2.2.1 h1:EAPkqJ9Km4uAxtMRgUubJyqAr6zgWM0dznKMLRauQRE=
|
||||||
github.com/influxdata/line-protocol/v2 v2.2.1/go.mod h1:DmB3Cnh+3oxmG6LOBIxce4oaL4CPj3OmMPgvauXh+tM=
|
github.com/influxdata/line-protocol/v2 v2.2.1/go.mod h1:DmB3Cnh+3oxmG6LOBIxce4oaL4CPj3OmMPgvauXh+tM=
|
||||||
github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE=
|
github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE=
|
||||||
github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
|
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
|
||||||
github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
|
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
|
||||||
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
|
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
|
||||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||||
@@ -68,12 +64,18 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
|||||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||||
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
|
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
|
||||||
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
|
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
|
||||||
|
github.com/minio/highwayhash v1.0.3 h1:kbnuUMoHYyVl7szWjSxJnxw11k2U709jqFPPmIUyD6Q=
|
||||||
|
github.com/minio/highwayhash v1.0.3/go.mod h1:GGYsuwP/fPD6Y9hMiXuapVvlIUEhFhMTh0rxU3ik1LQ=
|
||||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
|
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
|
||||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
||||||
github.com/nats-io/nats.go v1.39.0 h1:2/yg2JQjiYYKLwDuBzV0FbB2sIV+eFNkEevlRi4n9lI=
|
github.com/nats-io/jwt/v2 v2.8.0 h1:K7uzyz50+yGZDO5o772eRE7atlcSEENpL7P+b74JV1g=
|
||||||
github.com/nats-io/nats.go v1.39.0/go.mod h1:MgRb8oOdigA6cYpEPhXJuRVH6UE/V4jblJ2jQ27IXYM=
|
github.com/nats-io/jwt/v2 v2.8.0/go.mod h1:me11pOkwObtcBNR8AiMrUbtVOUGkqYjMQZ6jnSdVUIA=
|
||||||
github.com/nats-io/nkeys v0.4.9 h1:qe9Faq2Gxwi6RZnZMXfmGMZkg3afLLOtrU+gDZJ35b0=
|
github.com/nats-io/nats-server/v2 v2.12.0 h1:OIwe8jZUqJFrh+hhiyKu8snNib66qsx806OslqJuo74=
|
||||||
github.com/nats-io/nkeys v0.4.9/go.mod h1:jcMqs+FLG+W5YO36OX6wFIFcmpdAns+w1Wm6D3I/evE=
|
github.com/nats-io/nats-server/v2 v2.12.0/go.mod h1:nr8dhzqkP5E/lDwmn+A2CvQPMd1yDKXQI7iGg3lAvww=
|
||||||
|
github.com/nats-io/nats.go v1.46.1 h1:bqQ2ZcxVd2lpYI97xYASeRTY3I5boe/IVmuUDPitHfo=
|
||||||
|
github.com/nats-io/nats.go v1.46.1/go.mod h1:iRWIPokVIFbVijxuMQq4y9ttaBTMe0SFdlZfMDd+33g=
|
||||||
|
github.com/nats-io/nkeys v0.4.11 h1:q44qGV008kYd9W1b1nEBkNzvnWxtRSQ7A8BoqRrcfa0=
|
||||||
|
github.com/nats-io/nkeys v0.4.11/go.mod h1:szDimtgmfOi9n25JpfIdGw12tZFYXqhGxjhVxsatHVE=
|
||||||
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
|
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
|
||||||
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
|
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
|
||||||
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
|
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
|
||||||
@@ -81,14 +83,14 @@ github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmt
|
|||||||
github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg=
|
github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg=
|
||||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y=
|
github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=
|
||||||
github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
|
github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
|
||||||
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
|
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
|
||||||
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
|
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
|
||||||
github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc=
|
github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs=
|
||||||
github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8=
|
github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA=
|
||||||
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
|
github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg=
|
||||||
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
|
github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is=
|
||||||
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
|
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
|
||||||
github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
|
github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
|
||||||
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 h1:lZUw3E0/J3roVtGQ+SCrUrg3ON6NgVqpn3+iol9aGu4=
|
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 h1:lZUw3E0/J3roVtGQ+SCrUrg3ON6NgVqpn3+iol9aGu4=
|
||||||
@@ -96,44 +98,40 @@ github.com/santhosh-tekuri/jsonschema/v5 v5.3.1/go.mod h1:uToXkOrWAZ6/Oc07xWQrPO
|
|||||||
github.com/shopspring/decimal v1.3.1 h1:2Usl1nmF/WZucqkFZhnfFYxxxu8LG21F6nPQBE5gKV8=
|
github.com/shopspring/decimal v1.3.1 h1:2Usl1nmF/WZucqkFZhnfFYxxxu8LG21F6nPQBE5gKV8=
|
||||||
github.com/shopspring/decimal v1.3.1/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
|
github.com/shopspring/decimal v1.3.1/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
|
||||||
github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKkMo8ZTx3f+BZEkzsRUY10Xsm2mwU0=
|
github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKkMo8ZTx3f+BZEkzsRUY10Xsm2mwU0=
|
||||||
github.com/stmcginnis/gofish v0.15.0 h1:8TG41+lvJk/0Nf8CIIYErxbMlQUy80W0JFRZP3Ld82A=
|
github.com/stmcginnis/gofish v0.20.0 h1:hH2V2Qe898F2wWT1loApnkDUrXXiLKqbSlMaH3Y1n08=
|
||||||
github.com/stmcginnis/gofish v0.15.0/go.mod h1:BLDSFTp8pDlf/xDbLZa+F7f7eW0E/CHCboggsu8CznI=
|
github.com/stmcginnis/gofish v0.20.0/go.mod h1:PzF5i8ecRG9A2ol8XT64npKUunyraJ+7t0kYMpQAtqU=
|
||||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
|
||||||
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
|
||||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||||
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
github.com/tklauser/go-sysconf v0.3.15 h1:VE89k0criAymJ/Os65CSn1IXaol+1wrsFHEB8Ol49K4=
|
||||||
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
|
github.com/tklauser/go-sysconf v0.3.15/go.mod h1:Dmjwr6tYFIseJw7a3dRLJfsHAMXZ3nEnL/aZY+0IuI4=
|
||||||
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
github.com/tklauser/numcpus v0.10.0 h1:18njr6LDBk1zuna922MgdjQuJFjrdppsZG60sHGfjso=
|
||||||
github.com/tklauser/go-sysconf v0.3.13 h1:GBUpcahXSpR2xN01jhkNAbTLRk2Yzgggk8IM08lq3r4=
|
github.com/tklauser/numcpus v0.10.0/go.mod h1:BiTKazU708GQTYF4mB+cmlpT2Is1gLk7XVuEeem8LsQ=
|
||||||
github.com/tklauser/go-sysconf v0.3.13/go.mod h1:zwleP4Q4OehZHGn4CYZDipCgg9usW5IJePewFCGVEa0=
|
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
|
||||||
github.com/tklauser/numcpus v0.7.0 h1:yjuerZP127QG9m5Zh/mSO4wqurYil27tHrqwRoRjpr4=
|
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
|
||||||
github.com/tklauser/numcpus v0.7.0/go.mod h1:bb6dMVcj8A42tSE7i32fsIUCbQNllK5iDguyOZRUzAY=
|
go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI=
|
||||||
|
go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU=
|
||||||
golang.design/x/thread v0.0.0-20210122121316-335e9adffdf1 h1:P7S/GeHBAFEZIYp0ePPs2kHXoazz8q2KsyxHyQVGCJg=
|
golang.design/x/thread v0.0.0-20210122121316-335e9adffdf1 h1:P7S/GeHBAFEZIYp0ePPs2kHXoazz8q2KsyxHyQVGCJg=
|
||||||
golang.design/x/thread v0.0.0-20210122121316-335e9adffdf1/go.mod h1:9CWpnTUmlQkfdpdutA1nNf4iE5lAVt3QZOu0Z6hahBE=
|
golang.design/x/thread v0.0.0-20210122121316-335e9adffdf1/go.mod h1:9CWpnTUmlQkfdpdutA1nNf4iE5lAVt3QZOu0Z6hahBE=
|
||||||
golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U=
|
golang.org/x/crypto v0.42.0 h1:chiH31gIWm57EkTXpwnqf8qeuMUi0yekh6mT2AvFlqI=
|
||||||
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
|
golang.org/x/crypto v0.42.0/go.mod h1:4+rDnOTJhQCx2q7/j6rAN5XDw8kPjeaXEUR2eL94ix8=
|
||||||
golang.org/x/crypto v0.35.0 h1:b15kiHdrGCHrP6LvwaQ3c03kgNhhiMgvlhxHQhmg2Xs=
|
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b h1:M2rDM6z3Fhozi9O7NWsxAkg/yqS/lQJ6PmkyIV3YP+o=
|
||||||
golang.org/x/crypto v0.35.0/go.mod h1:dy7dXNW32cAb/6/PRuTNsix8T+vJAqvuIy5Bli/x0YQ=
|
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8=
|
||||||
golang.org/x/exp v0.0.0-20250215185904-eff6e970281f h1:oFMYAjX0867ZD2jcNiLBrI9BdpmEkvPyi5YrBGXbamg=
|
golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
|
||||||
golang.org/x/exp v0.0.0-20250215185904-eff6e970281f/go.mod h1:BHOTPb3L19zxehTsLoJXVaTktb06DFgmdW6Wb9s8jqk=
|
golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
|
||||||
golang.org/x/net v0.31.0 h1:68CPQngjLL0r2AlUKiSxtQFKvzRVbnzLwMUn5SzcLHo=
|
|
||||||
golang.org/x/net v0.31.0/go.mod h1:P4fl1q7dY2hnZFxEk4pPSkDHF+QqjitcnDjUQyMM+pM=
|
|
||||||
golang.org/x/net v0.36.0 h1:vWF2fRbw4qslQsQzgFqZff+BItCvGFQqKzKIzx1rmoA=
|
|
||||||
golang.org/x/net v0.36.0/go.mod h1:bFmbeoIPfrw4sMHNhb4J9f6+tPziuGjq7Jk/38fxi1I=
|
|
||||||
golang.org/x/sys v0.0.0-20210122093101-04d7465088b8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
golang.org/x/sys v0.0.0-20210122093101-04d7465088b8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA=
|
golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
|
||||||
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
||||||
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
|
golang.org/x/time v0.13.0 h1:eUlYslOIt32DgYD6utsuUeHs4d7AsEYLuIAdg7FlYgI=
|
||||||
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
golang.org/x/time v0.13.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4=
|
||||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
google.golang.org/protobuf v1.35.2 h1:8Ar7bF+apOIoThw1EdZl0p1oWvMqTHmpA2fRTyZO8io=
|
google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
|
||||||
google.golang.org/protobuf v1.35.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
|
google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||||
|
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
||||||
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
|
@@ -1,3 +1,14 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: Metric Aggregator
|
||||||
|
description: Subsystem for evaluating expressions on metrics (deprecated)
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Developer']
|
||||||
|
weight: 1
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/internal/metricaggregator/_index.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
# The MetricAggregator
|
# The MetricAggregator
|
||||||
|
|
||||||
In some cases, further combination of metrics or raw values is required. For that strings like `foo + 1` with runtime dependent `foo` need to be evaluated. The MetricAggregator relies on the [`gval`](https://github.com/PaesslerAG/gval) Golang package to perform all expression evaluation. The `gval` package provides the basic arithmetic operations but the MetricAggregator defines additional ones.
|
In some cases, further combination of metrics or raw values is required. For that strings like `foo + 1` with runtime dependent `foo` need to be evaluated. The MetricAggregator relies on the [`gval`](https://github.com/PaesslerAG/gval) Golang package to perform all expression evaluation. The `gval` package provides the basic arithmetic operations but the MetricAggregator defines additional ones.
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package metricAggregator
|
package metricAggregator
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package metricAggregator
|
package metricAggregator
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,11 +1,22 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: Message Router
|
||||||
|
description: Routing component inside cc-metric-collector
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Developer']
|
||||||
|
weight: 1
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/internal/metricrouter/_index.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
# CC Metric Router
|
# CC Metric Router
|
||||||
|
|
||||||
The CCMetric router sits in between the collectors and the sinks and can be used to add and remove tags to/from traversing [CCMessages](https://pkg.go.dev/github.com/ClusterCockpit/cc-energy-manager@v0.0.0-20240919152819-92a17f2da4f7/pkg/cc-message.
|
The CCMetric router sits in between the collectors and the sinks and can be used to add and remove tags to/from traversing [CCMessages](https://pkg.go.dev/github.com/ClusterCockpit/cc-lib/ccMessage).
|
||||||
|
|
||||||
|
|
||||||
# Configuration
|
# Configuration
|
||||||
|
|
||||||
**Note**: Use the [message processor configuration](../../pkg/messageProcessor/README.md) with option `process_messages`.
|
**Note**: Use the [message processor configuration](https://github.com/ClusterCockpit/cc-lib/blob/main/messageProcessor/README.md) with option `process_messages`.
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
@@ -69,7 +80,7 @@ The CCMetric router sits in between the collectors and the sinks and can be used
|
|||||||
|
|
||||||
There are three main options `add_tags`, `delete_tags` and `interval_timestamp`. `add_tags` and `delete_tags` are lists consisting of dicts with `key`, `value` and `if`. The `value` can be omitted in the `delete_tags` part as it only uses the `key` for removal. The `interval_timestamp` setting means that a unique timestamp is applied to all metrics traversing the router during an interval.
|
There are three main options `add_tags`, `delete_tags` and `interval_timestamp`. `add_tags` and `delete_tags` are lists consisting of dicts with `key`, `value` and `if`. The `value` can be omitted in the `delete_tags` part as it only uses the `key` for removal. The `interval_timestamp` setting means that a unique timestamp is applied to all metrics traversing the router during an interval.
|
||||||
|
|
||||||
**Note**: Use the [message processor configuration](../../pkg/messageProcessor/README.md) (option `process_messages`) instead of `add_tags`, `delete_tags`, `drop_metrics`, `drop_metrics_if`, `rename_metrics`, `normalize_units` and `change_unit_prefix`. These options are deprecated and will be removed in future versions. Until then, they are added to the message processor.
|
**Note**: Use the [message processor configuration](https://github.com/ClusterCockpit/cc-lib/blob/main/messageProcessor/README.md) (option `process_messages`) instead of `add_tags`, `delete_tags`, `drop_metrics`, `drop_metrics_if`, `rename_metrics`, `normalize_units` and `change_unit_prefix`. These options are deprecated and will be removed in future versions. Until then, they are added to the message processor.
|
||||||
|
|
||||||
# Processing order in the router
|
# Processing order in the router
|
||||||
|
|
||||||
@@ -225,13 +236,13 @@ __deprecated__
|
|||||||
|
|
||||||
|
|
||||||
The cc-metric-collector tries to read the data from the system as it is reported. If available, it tries to read the metric unit from the system as well (e.g. from `/proc/meminfo`). The problem is that, depending on the source, the metric units are named differently. Just think about `byte`, `Byte`, `B`, `bytes`, ...
|
The cc-metric-collector tries to read the data from the system as it is reported. If available, it tries to read the metric unit from the system as well (e.g. from `/proc/meminfo`). The problem is that, depending on the source, the metric units are named differently. Just think about `byte`, `Byte`, `B`, `bytes`, ...
|
||||||
The [cc-units](https://github.com/ClusterCockpit/cc-units) package provides us a normalization option to use the same metric unit name for all metrics. It this option is set to true, all `unit` meta tags are normalized.
|
The [cc-units](https://github.com/ClusterCockpit/cc-lib/ccUnits) package provides us a normalization option to use the same metric unit name for all metrics. It this option is set to true, all `unit` meta tags are normalized.
|
||||||
|
|
||||||
## The `change_unit_prefix` section
|
## The `change_unit_prefix` section
|
||||||
|
|
||||||
__deprecated__
|
__deprecated__
|
||||||
|
|
||||||
It is often the case that metrics are reported by the system using a rather outdated unit prefix (like `/proc/meminfo` still uses kByte despite current memory sizes are in the GByte range). If you want to change the prefix of a unit, you can do that with the help of [cc-units](https://github.com/ClusterCockpit/cc-units). The setting works on the metric name and requires the new prefix for the metric. The cc-units package determines the scaling factor.
|
It is often the case that metrics are reported by the system using a rather outdated unit prefix (like `/proc/meminfo` still uses kByte despite current memory sizes are in the GByte range). If you want to change the prefix of a unit, you can do that with the help of [cc-units](https://github.com/ClusterCockpit/cc-lib/ccUnits). The setting works on the metric name and requires the new prefix for the metric. The cc-units package determines the scaling factor.
|
||||||
|
|
||||||
# Aggregate metric values of the current interval with the `interval_aggregates` option
|
# Aggregate metric values of the current interval with the `interval_aggregates` option
|
||||||
|
|
||||||
@@ -263,7 +274,7 @@ The above configuration, collects all metric values for metrics evaluating `if`
|
|||||||
If you are not interested in the input metrics `sub_metric_%d+` at all, you can add the same condition used here to the `drop_metrics_if` section to drop them.
|
If you are not interested in the input metrics `sub_metric_%d+` at all, you can add the same condition used here to the `drop_metrics_if` section to drop them.
|
||||||
|
|
||||||
Use cases for `interval_aggregates`:
|
Use cases for `interval_aggregates`:
|
||||||
- Combine multiple metrics of the a collector to a new one like the [MemstatCollector](../../collectors/memstatMetric.md) does it for `mem_used`)):
|
- Combine multiple metrics of the a collector to a new one like the [MemstatCollector](../../collectors/memstatMetric.md) does it for `mem_used`:
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"name" : "mem_used",
|
"name" : "mem_used",
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package metricRouter
|
package metricRouter
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package metricRouter
|
package metricRouter
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package ccTopology
|
package ccTopology
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,125 +0,0 @@
|
|||||||
package hostlist
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"regexp"
|
|
||||||
"sort"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
)
|
|
||||||
|
|
||||||
func Expand(in string) (result []string, err error) {
|
|
||||||
|
|
||||||
// Create ranges regular expression
|
|
||||||
reStNumber := "[[:digit:]]+"
|
|
||||||
reStRange := reStNumber + "-" + reStNumber
|
|
||||||
reStOptionalNumberOrRange := "(" + reStNumber + ",|" + reStRange + ",)*"
|
|
||||||
reStNumberOrRange := "(" + reStNumber + "|" + reStRange + ")"
|
|
||||||
reStBraceLeft := "[[]"
|
|
||||||
reStBraceRight := "[]]"
|
|
||||||
reStRanges := reStBraceLeft +
|
|
||||||
reStOptionalNumberOrRange +
|
|
||||||
reStNumberOrRange +
|
|
||||||
reStBraceRight
|
|
||||||
reRanges := regexp.MustCompile(reStRanges)
|
|
||||||
|
|
||||||
// Create host list regular expression
|
|
||||||
reStDNSChars := "[a-zA-Z0-9-]+"
|
|
||||||
reStPrefix := "^(" + reStDNSChars + ")"
|
|
||||||
reStOptionalSuffix := "(" + reStDNSChars + ")?"
|
|
||||||
re := regexp.MustCompile(reStPrefix + "([[][0-9,-]+[]])?" + reStOptionalSuffix)
|
|
||||||
|
|
||||||
// Remove all delimiters from the input
|
|
||||||
in = strings.TrimLeft(in, ", ")
|
|
||||||
|
|
||||||
for len(in) > 0 {
|
|
||||||
if v := re.FindStringSubmatch(in); v != nil {
|
|
||||||
|
|
||||||
// Remove matched part from the input
|
|
||||||
lenPrefix := len(v[0])
|
|
||||||
in = in[lenPrefix:]
|
|
||||||
|
|
||||||
// Remove all delimiters from the input
|
|
||||||
in = strings.TrimLeft(in, ", ")
|
|
||||||
|
|
||||||
// matched prefix, range and suffix
|
|
||||||
hlPrefix := v[1]
|
|
||||||
hlRanges := v[2]
|
|
||||||
hlSuffix := v[3]
|
|
||||||
|
|
||||||
// Single node without ranges
|
|
||||||
if hlRanges == "" {
|
|
||||||
result = append(result, hlPrefix)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Node with ranges
|
|
||||||
if v := reRanges.FindStringSubmatch(hlRanges); v != nil {
|
|
||||||
|
|
||||||
// Remove braces
|
|
||||||
hlRanges = hlRanges[1 : len(hlRanges)-1]
|
|
||||||
|
|
||||||
// Split host ranges at ,
|
|
||||||
for _, hlRange := range strings.Split(hlRanges, ",") {
|
|
||||||
|
|
||||||
// Split host range at -
|
|
||||||
RangeStartEnd := strings.Split(hlRange, "-")
|
|
||||||
|
|
||||||
// Range is only a single number
|
|
||||||
if len(RangeStartEnd) == 1 {
|
|
||||||
result = append(result, hlPrefix+RangeStartEnd[0]+hlSuffix)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Range has a start and an end
|
|
||||||
widthRangeStart := len(RangeStartEnd[0])
|
|
||||||
widthRangeEnd := len(RangeStartEnd[1])
|
|
||||||
iStart, _ := strconv.ParseUint(RangeStartEnd[0], 10, 64)
|
|
||||||
iEnd, _ := strconv.ParseUint(RangeStartEnd[1], 10, 64)
|
|
||||||
if iStart > iEnd {
|
|
||||||
return nil, fmt.Errorf("single range start is greater than end: %s", hlRange)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create print format string for range numbers
|
|
||||||
doPadding := widthRangeStart == widthRangeEnd
|
|
||||||
widthPadding := widthRangeStart
|
|
||||||
var formatString string
|
|
||||||
if doPadding {
|
|
||||||
formatString = "%0" + fmt.Sprint(widthPadding) + "d"
|
|
||||||
} else {
|
|
||||||
formatString = "%d"
|
|
||||||
}
|
|
||||||
formatString = hlPrefix + formatString + hlSuffix
|
|
||||||
|
|
||||||
// Add nodes from this range
|
|
||||||
for i := iStart; i <= iEnd; i++ {
|
|
||||||
result = append(result, fmt.Sprintf(formatString, i))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return nil, fmt.Errorf("not at hostlist range: %s", hlRanges)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return nil, fmt.Errorf("not a hostlist: %s", in)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if result != nil {
|
|
||||||
// sort
|
|
||||||
sort.Strings(result)
|
|
||||||
|
|
||||||
// uniq
|
|
||||||
previous := 1
|
|
||||||
for current := 1; current < len(result); current++ {
|
|
||||||
if result[current-1] != result[current] {
|
|
||||||
if previous != current {
|
|
||||||
result[previous] = result[current]
|
|
||||||
}
|
|
||||||
previous++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
result = result[:previous]
|
|
||||||
}
|
|
||||||
|
|
||||||
return
|
|
||||||
}
|
|
@@ -1,126 +0,0 @@
|
|||||||
package hostlist
|
|
||||||
|
|
||||||
import (
|
|
||||||
"testing"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestExpand(t *testing.T) {
|
|
||||||
|
|
||||||
// Compare two slices of strings
|
|
||||||
equal := func(a, b []string) bool {
|
|
||||||
if len(a) != len(b) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
for i, v := range a {
|
|
||||||
if v != b[i] {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
type testDefinition struct {
|
|
||||||
input string
|
|
||||||
resultExpected []string
|
|
||||||
errorExpected bool
|
|
||||||
}
|
|
||||||
|
|
||||||
expandTests := []testDefinition{
|
|
||||||
{
|
|
||||||
// Single node
|
|
||||||
input: "n1",
|
|
||||||
resultExpected: []string{"n1"},
|
|
||||||
errorExpected: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
// Single node, duplicated
|
|
||||||
input: "n1,n1",
|
|
||||||
resultExpected: []string{"n1"},
|
|
||||||
errorExpected: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
// Single node with padding
|
|
||||||
input: "n[01]",
|
|
||||||
resultExpected: []string{"n01"},
|
|
||||||
errorExpected: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
// Single node with suffix
|
|
||||||
input: "n[01]-p",
|
|
||||||
resultExpected: []string{"n01-p"},
|
|
||||||
errorExpected: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
// Multiple nodes with a single range
|
|
||||||
input: "n[1-2]",
|
|
||||||
resultExpected: []string{"n1", "n2"},
|
|
||||||
errorExpected: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
// Multiple nodes with a single range and a single index
|
|
||||||
input: "n[1-2,3]",
|
|
||||||
resultExpected: []string{"n1", "n2", "n3"},
|
|
||||||
errorExpected: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
// Multiple nodes with different prefixes
|
|
||||||
input: "n[1-2],m[1,2]",
|
|
||||||
resultExpected: []string{"m1", "m2", "n1", "n2"},
|
|
||||||
errorExpected: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
// Multiple nodes with different suffixes
|
|
||||||
input: "n[1-2]-p,n[1,2]-q",
|
|
||||||
resultExpected: []string{"n1-p", "n1-q", "n2-p", "n2-q"},
|
|
||||||
errorExpected: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
// Multiple nodes with and without node ranges
|
|
||||||
input: " n09, n[01-04,06-07,09] , , n10,n04",
|
|
||||||
resultExpected: []string{"n01", "n02", "n03", "n04", "n06", "n07", "n09", "n10"},
|
|
||||||
errorExpected: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
// Forbidden DNS character
|
|
||||||
input: "n@",
|
|
||||||
resultExpected: []string{},
|
|
||||||
errorExpected: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
// Forbidden range
|
|
||||||
input: "n[1-2-2,3]",
|
|
||||||
resultExpected: []string{},
|
|
||||||
errorExpected: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
// Forbidden range limits
|
|
||||||
input: "n[2-1]",
|
|
||||||
resultExpected: []string{},
|
|
||||||
errorExpected: true,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, expandTest := range expandTests {
|
|
||||||
result, err := Expand(expandTest.input)
|
|
||||||
|
|
||||||
hasError := err != nil
|
|
||||||
if hasError != expandTest.errorExpected && hasError {
|
|
||||||
t.Errorf("Expand('%s') failed: unexpected error '%v'",
|
|
||||||
expandTest.input, err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if hasError != expandTest.errorExpected && !hasError {
|
|
||||||
t.Errorf("Expand('%s') did not fail as expected: got result '%+v'",
|
|
||||||
expandTest.input, result)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if !hasError && !equal(result, expandTest.resultExpected) {
|
|
||||||
t.Errorf("Expand('%s') failed: got result '%+v', expected result '%v'",
|
|
||||||
expandTest.input, result, expandTest.resultExpected)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
t.Logf("Checked hostlist.Expand('%s'): result = '%+v', err = '%v'",
|
|
||||||
expandTest.input, result, err)
|
|
||||||
}
|
|
||||||
}
|
|
@@ -1,3 +1,14 @@
|
|||||||
|
<!--
|
||||||
|
---
|
||||||
|
title: Multi-channel Ticker
|
||||||
|
description: Timer ticker that sends out the tick to multiple channels
|
||||||
|
categories: [cc-metric-collector]
|
||||||
|
tags: ['Developer']
|
||||||
|
weight: 1
|
||||||
|
hugo_path: docs/reference/cc-metric-collector/pkg/multichanticker/_index.md
|
||||||
|
---
|
||||||
|
-->
|
||||||
|
|
||||||
# MultiChanTicker
|
# MultiChanTicker
|
||||||
|
|
||||||
The idea of this ticker is to multiply the output channels. The original Golang `time.Ticker` provides only a single output channel, so the signal can only be received by a single other class. This ticker allows to add multiple channels which get all notified about the time tick.
|
The idea of this ticker is to multiply the output channels. The original Golang `time.Ticker` provides only a single output channel, so the signal can only be received by a single other class. This ticker allows to add multiple channels which get all notified about the time tick.
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-lib.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// additional authors:
|
||||||
|
// Holger Obermaier (NHR@KIT)
|
||||||
|
|
||||||
package multiChanTicker
|
package multiChanTicker
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@@ -1,4 +1,6 @@
|
|||||||
Package: cc-metric-collector
|
Package: cc-metric-collector
|
||||||
|
Section: misc
|
||||||
|
Priority: optional
|
||||||
Version: {VERSION}
|
Version: {VERSION}
|
||||||
Installed-Size: {INSTALLED_SIZE}
|
Installed-Size: {INSTALLED_SIZE}
|
||||||
Architecture: {ARCH}
|
Architecture: {ARCH}
|
||||||
|
@@ -44,6 +44,8 @@ def group_to_json(groupfile):
|
|||||||
scope = "socket"
|
scope = "socket"
|
||||||
if "PWR" in calc:
|
if "PWR" in calc:
|
||||||
scope = "socket"
|
scope = "socket"
|
||||||
|
if "UMC" in calc:
|
||||||
|
scope = "socket"
|
||||||
|
|
||||||
m = {"name" : metric, "calc": calc, "type" : scope, "publish" : True}
|
m = {"name" : metric, "calc": calc, "type" : scope, "publish" : True}
|
||||||
metrics.append(m)
|
metrics.append(m)
|
||||||
|
Reference in New Issue
Block a user