Compare commits

...

49 Commits

Author SHA1 Message Date
Thomas Roehl
ce9e21c48e Move example configurations and update docs. Fixed #150 2025-10-20 17:11:13 +02:00
Thomas Roehl
6243203880 Fix startup error of iostat collector 2025-10-20 17:06:10 +02:00
Thomas Roehl
c7c9f8c273 Fix max clock metrics 2025-10-20 17:05:59 +02:00
Roland Pabel
6a4ad067ac return new error 2025-10-20 16:29:36 +02:00
Roland Pabel
ed2378f794 StartsWith -> HasPrefix 2025-10-20 16:29:36 +02:00
Roland Pabel
99e066ff5f docu update for sudo 2025-10-20 16:29:36 +02:00
Roland Pabel
67cdbefb02 getting filename from error doesn't work, mmpmon path must be provided when using sudo 2025-10-20 16:29:36 +02:00
Roland Pabel
b522aca693 fix config.Mmpmon is the empty string because of the error thrown 2025-10-20 16:29:36 +02:00
Roland Pabel
ea7c4f4ec7 correctly check for EACCESS when searching for mmpmon with exec.LookPath 2025-10-20 16:29:36 +02:00
Roland Pabel
09cf89a951 with sudo, ignore EPERM for exec.LookPath 2025-10-20 16:29:36 +02:00
Roland Pabel
d6499935a4 enable sudo support 2025-10-20 16:29:36 +02:00
dependabot[bot]
3e19c47ae4 Bump github.com/ClusterCockpit/cc-lib from 0.9.1 to 0.10.1
Bumps [github.com/ClusterCockpit/cc-lib](https://github.com/ClusterCockpit/cc-lib) from 0.9.1 to 0.10.1.
- [Release notes](https://github.com/ClusterCockpit/cc-lib/releases)
- [Commits](https://github.com/ClusterCockpit/cc-lib/compare/v0.9.1...v0.10.1)

---
updated-dependencies:
- dependency-name: github.com/ClusterCockpit/cc-lib
  dependency-version: 0.10.1
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-10-20 16:14:43 +02:00
brinkcoder
97e09f13f4 fix numastat collector sending node metrics instead of memoryDomain metrics 2025-10-20 16:12:58 +02:00
Roland Pabel
e08bd3d926 fix wrong variable in calculation of gpfs_reads_rate 2025-10-15 17:20:08 +02:00
dependabot[bot]
fc525b7430 Bump golang.org/x/sys from 0.36.0 to 0.37.0
Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.36.0 to 0.37.0.
- [Commits](https://github.com/golang/sys/compare/v0.36.0...v0.37.0)

---
updated-dependencies:
- dependency-name: golang.org/x/sys
  dependency-version: 0.37.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-10-13 12:04:12 +02:00
brinkcoder
69d4567ecf add support for passwordless sudo 2025-10-07 13:10:17 +02:00
brinkcoder
c5183feafc add slurm_cgroup Collector 2025-10-07 13:10:17 +02:00
dependabot[bot]
a45366646e Bump github.com/ClusterCockpit/cc-lib from 0.8.0 to 0.9.1
Bumps [github.com/ClusterCockpit/cc-lib](https://github.com/ClusterCockpit/cc-lib) from 0.8.0 to 0.9.1.
- [Release notes](https://github.com/ClusterCockpit/cc-lib/releases)
- [Commits](https://github.com/ClusterCockpit/cc-lib/compare/v0.8.0...v0.9.1)

---
updated-dependencies:
- dependency-name: github.com/ClusterCockpit/cc-lib
  dependency-version: 0.9.1
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-10-06 12:07:06 +02:00
dependabot[bot]
a551616566 Bump github.com/ClusterCockpit/cc-lib from 0.7.0 to 0.8.0
Bumps [github.com/ClusterCockpit/cc-lib](https://github.com/ClusterCockpit/cc-lib) from 0.7.0 to 0.8.0.
- [Release notes](https://github.com/ClusterCockpit/cc-lib/releases)
- [Commits](https://github.com/ClusterCockpit/cc-lib/compare/v0.7.0...v0.8.0)

---
updated-dependencies:
- dependency-name: github.com/ClusterCockpit/cc-lib
  dependency-version: 0.8.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-09-15 13:42:07 +02:00
dependabot[bot]
a9fa168117 Bump golang.org/x/sys from 0.35.0 to 0.36.0
Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.35.0 to 0.36.0.
- [Commits](https://github.com/golang/sys/compare/v0.35.0...v0.36.0)

---
updated-dependencies:
- dependency-name: golang.org/x/sys
  dependency-version: 0.36.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-09-15 13:10:18 +02:00
Thomas Gruber
39d37597ab Update README.md 2025-09-09 14:43:07 +02:00
Thomas Gruber
aeaba0021b Update likwid_perfgroup_to_cc_config.py
Add "UMC" to socket-counters
2025-08-28 15:59:48 +02:00
dependabot[bot]
5ceffb44b4 Bump github.com/NVIDIA/go-nvml from 0.12.9-0 to 0.13.0-1
Bumps [github.com/NVIDIA/go-nvml](https://github.com/NVIDIA/go-nvml) from 0.12.9-0 to 0.13.0-1.
- [Release notes](https://github.com/NVIDIA/go-nvml/releases)
- [Commits](https://github.com/NVIDIA/go-nvml/compare/v0.12.9-0...v0.13.0-1)

---
updated-dependencies:
- dependency-name: github.com/NVIDIA/go-nvml
  dependency-version: 0.13.0-1
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-08-25 14:14:21 +02:00
dependabot[bot]
e29942a4be Bump github.com/ClusterCockpit/cc-lib from 0.6.0 to 0.7.0
Bumps [github.com/ClusterCockpit/cc-lib](https://github.com/ClusterCockpit/cc-lib) from 0.6.0 to 0.7.0.
- [Release notes](https://github.com/ClusterCockpit/cc-lib/releases)
- [Commits](https://github.com/ClusterCockpit/cc-lib/compare/v0.6.0...v0.7.0)

---
updated-dependencies:
- dependency-name: github.com/ClusterCockpit/cc-lib
  dependency-version: 0.7.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-08-11 12:24:44 +02:00
dependabot[bot]
0b9b9a6e68 Bump golang.org/x/sys from 0.34.0 to 0.35.0
Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.34.0 to 0.35.0.
- [Commits](https://github.com/golang/sys/compare/v0.34.0...v0.35.0)

---
updated-dependencies:
- dependency-name: golang.org/x/sys
  dependency-version: 0.35.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-08-11 12:24:27 +02:00
dependabot[bot]
b47cb3a0c4 Merge pull request #163 from ClusterCockpit/dependabot/go_modules/github.com/ClusterCockpit/cc-lib-0.6.0 2025-07-28 05:18:37 +00:00
dependabot[bot]
b49ae7b612 Bump github.com/ClusterCockpit/cc-lib from 0.5.0 to 0.6.0
Bumps [github.com/ClusterCockpit/cc-lib](https://github.com/ClusterCockpit/cc-lib) from 0.5.0 to 0.6.0.
- [Release notes](https://github.com/ClusterCockpit/cc-lib/releases)
- [Commits](https://github.com/ClusterCockpit/cc-lib/compare/v0.5.0...v0.6.0)

---
updated-dependencies:
- dependency-name: github.com/ClusterCockpit/cc-lib
  dependency-version: 0.6.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-07-28 04:49:00 +00:00
dependabot[bot]
1fc5cc8483 Bump golang.org/x/sys from 0.33.0 to 0.34.0 (#162)
Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.33.0 to 0.34.0.
- [Commits](https://github.com/golang/sys/compare/v0.33.0...v0.34.0)

---
updated-dependencies:
- dependency-name: golang.org/x/sys
  dependency-version: 0.34.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-07-13 21:49:13 -07:00
dependabot[bot]
e81099af8d Bump github.com/NVIDIA/go-nvml from 0.12.4-1 to 0.12.9-0 (#159)
Bumps [github.com/NVIDIA/go-nvml](https://github.com/NVIDIA/go-nvml) from 0.12.4-1 to 0.12.9-0.
- [Release notes](https://github.com/NVIDIA/go-nvml/releases)
- [Commits](https://github.com/NVIDIA/go-nvml/compare/v0.12.4-1...v0.12.9-0)

---
updated-dependencies:
- dependency-name: github.com/NVIDIA/go-nvml
  dependency-version: 0.12.9-0
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-07-08 18:36:41 +02:00
dependabot[bot]
eaca327d73 Bump github.com/ClusterCockpit/cc-lib from 0.2.0 to 0.5.0 (#160)
Bumps [github.com/ClusterCockpit/cc-lib](https://github.com/ClusterCockpit/cc-lib) from 0.2.0 to 0.5.0.
- [Release notes](https://github.com/ClusterCockpit/cc-lib/releases)
- [Commits](https://github.com/ClusterCockpit/cc-lib/compare/v0.2.0...v0.5.0)

---
updated-dependencies:
- dependency-name: github.com/ClusterCockpit/cc-lib
  dependency-version: 0.5.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-07-08 18:36:31 +02:00
dependabot[bot]
2e48996d87 Bump github.com/fsnotify/fsnotify from 1.7.0 to 1.9.0 (#161)
Bumps [github.com/fsnotify/fsnotify](https://github.com/fsnotify/fsnotify) from 1.7.0 to 1.9.0.
- [Release notes](https://github.com/fsnotify/fsnotify/releases)
- [Changelog](https://github.com/fsnotify/fsnotify/blob/main/CHANGELOG.md)
- [Commits](https://github.com/fsnotify/fsnotify/compare/v1.7.0...v1.9.0)

---
updated-dependencies:
- dependency-name: github.com/fsnotify/fsnotify
  dependency-version: 1.9.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-07-08 18:36:21 +02:00
dependabot[bot]
7cdbada522 Bump github.com/tklauser/go-sysconf from 0.3.13 to 0.3.15 (#158)
Bumps [github.com/tklauser/go-sysconf](https://github.com/tklauser/go-sysconf) from 0.3.13 to 0.3.15.
- [Release notes](https://github.com/tklauser/go-sysconf/releases)
- [Commits](https://github.com/tklauser/go-sysconf/compare/v0.3.13...v0.3.15)

---
updated-dependencies:
- dependency-name: github.com/tklauser/go-sysconf
  dependency-version: 0.3.15
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-07-08 18:34:46 +02:00
dependabot[bot]
babe1e020d Bump github.com/PaesslerAG/gval from 1.2.2 to 1.2.4 (#157)
Bumps [github.com/PaesslerAG/gval](https://github.com/PaesslerAG/gval) from 1.2.2 to 1.2.4.
- [Release notes](https://github.com/PaesslerAG/gval/releases)
- [Commits](https://github.com/PaesslerAG/gval/compare/v1.2.2...v1.2.4)

---
updated-dependencies:
- dependency-name: github.com/PaesslerAG/gval
  dependency-version: 1.2.4
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-07-08 18:34:34 +02:00
oscarminus
776af72231 Add meta operations and total values as value per second (#151)
Co-authored-by: Michael Schwarz <schwarz@uni-paderborn.de>
2025-07-03 14:57:59 +02:00
Thomas Gruber
2d4894b8f7 Update dependabot.yml 2025-07-03 14:39:46 +02:00
Thomas Roehl
35295b0b3a Add dependabot config 2025-07-03 14:38:46 +02:00
Thomas Roehl
1e734baa35 Merge branch 'main' of github.com:ClusterCockpit/cc-metric-collector 2025-07-03 14:37:33 +02:00
Michael Schwarz
aa6181a018 Read written bytes instead of read bytes 2025-07-02 13:43:57 +02:00
Michael Panzlaff
0a2a85f2ce Add missing 'Section' and 'Priority' to .deb.control 2025-06-23 14:01:57 +02:00
Thomas Roehl
48f5afe2be Update cc-lib to 0.2.0 2025-06-18 12:22:07 +02:00
Thomas Gruber
979192af4e Fix Golang RPM URLs in Release Action 2025-06-17 11:59:53 +02:00
Thomas Gruber
c1032ff329 Fix '+' in Golang RPM URLs 2025-06-17 11:51:34 +02:00
Thomas Gruber
6b03d3aee8 Update golang RPM links in CI 2025-06-17 11:51:34 +02:00
Thomas Gruber
b9665d0d68 Numastats: Read in config and send abs values by default. Fixes #146 (#147) 2025-06-17 11:51:34 +02:00
Thomas Röhl
4c7a0e064f Add copyright header to Golang files 2025-06-17 11:51:34 +02:00
Thomas Röhl
d8f10384a1 Remove hostlist, now in cc-lib 2025-06-17 11:51:34 +02:00
Thomas Gruber
f74d856e69 Nvidia energy metrics to Nvidia collector (#144)
* Add energy metrics from NVML to Nvidia NVML collector

* Add energy metrics to Nvidia collector README
2025-06-17 11:51:34 +02:00
Thomas Roehl
fabb37ea70 Fix URL to new location of cc-units 2025-06-17 11:51:34 +02:00
Thomas Gruber
3a0f148728 Fix Typo in gpfsMetric.md 2025-06-17 10:35:51 +02:00
59 changed files with 1215 additions and 461 deletions

11
.github/dependabot.yml vendored Normal file
View File

@@ -0,0 +1,11 @@
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for all configuration options:
# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
version: 2
updates:
- package-ecosystem: "gomod"
directory: "/"
schedule:
interval: "weekly"

View File

@@ -48,10 +48,10 @@ jobs:
- name: Setup Golang - name: Setup Golang
run: | run: |
dnf --assumeyes --disableplugin=subscription-manager install \ dnf --assumeyes --disableplugin=subscription-manager install \
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/go-toolset-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \ https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \ https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-bin-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \ https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-src-1.22.9-1.module_el8.10.0+3938+8c723e16.noarch.rpm https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.noarch.rpm
- name: RPM build MetricCollector - name: RPM build MetricCollector
id: rpmbuild id: rpmbuild
@@ -126,11 +126,11 @@ jobs:
- name: Setup Golang - name: Setup Golang
run: | run: |
dnf --assumeyes --disableplugin=subscription-manager install \ dnf --assumeyes --disableplugin=subscription-manager install \
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/go-toolset-1.22.7-2.el9_5.x86_64.rpm \ https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.el9_6.x86_64.rpm \
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-1.22.7-2.el9_5.x86_64.rpm \ https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-1.23.9-1.el9_6.x86_64.rpm \
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-bin-1.22.7-2.el9_5.x86_64.rpm \ https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.el9_6.x86_64.rpm \
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-src-1.22.7-2.el9_5.noarch.rpm \ https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.el9_6.noarch.rpm \
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-race-1.22.7-2.el9_5.x86_64.rpm https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-race-1.23.9-1.el9_6.x86_64.rpm
- name: RPM build MetricCollector - name: RPM build MetricCollector
id: rpmbuild id: rpmbuild
@@ -202,10 +202,10 @@ jobs:
- name: Setup Golang - name: Setup Golang
run: | run: |
dnf --assumeyes --disableplugin=subscription-manager install \ dnf --assumeyes --disableplugin=subscription-manager install \
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/go-toolset-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \ https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \ https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-bin-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \ https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-src-1.22.9-1.module_el8.10.0+3938+8c723e16.noarch.rpm https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.noarch.rpm
- name: RPM build MetricCollector - name: RPM build MetricCollector
id: rpmbuild id: rpmbuild
@@ -262,11 +262,11 @@ jobs:
- name: Setup Golang - name: Setup Golang
run: | run: |
dnf --assumeyes --disableplugin=subscription-manager install \ dnf --assumeyes --disableplugin=subscription-manager install \
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/go-toolset-1.22.7-2.el9_5.x86_64.rpm \ https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.el9_6.x86_64.rpm \
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-1.22.7-2.el9_5.x86_64.rpm \ https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-1.23.9-1.el9_6.x86_64.rpm \
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-bin-1.22.7-2.el9_5.x86_64.rpm \ https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.el9_6.x86_64.rpm \
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-src-1.22.7-2.el9_5.noarch.rpm \ https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.el9_6.noarch.rpm \
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-race-1.22.7-2.el9_5.x86_64.rpm https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-race-1.23.9-1.el9_6.x86_64.rpm
- name: RPM build MetricCollector - name: RPM build MetricCollector
id: rpmbuild id: rpmbuild

View File

@@ -71,10 +71,10 @@ jobs:
- name: Setup Golang - name: Setup Golang
run: | run: |
dnf --assumeyes --disableplugin=subscription-manager install \ dnf --assumeyes --disableplugin=subscription-manager install \
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/go-toolset-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \ https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \ https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-bin-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \ https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-src-1.22.9-1.module_el8.10.0+3938+8c723e16.noarch.rpm https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.noarch.rpm
- name: RPM build MetricCollector - name: RPM build MetricCollector
id: rpmbuild id: rpmbuild
@@ -116,11 +116,11 @@ jobs:
- name: Setup Golang - name: Setup Golang
run: | run: |
dnf --assumeyes --disableplugin=subscription-manager install \ dnf --assumeyes --disableplugin=subscription-manager install \
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/go-toolset-1.22.7-2.el9_5.x86_64.rpm \ https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.el9_6.x86_64.rpm \
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-1.22.7-2.el9_5.x86_64.rpm \ https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-1.23.9-1.el9_6.x86_64.rpm \
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-bin-1.22.7-2.el9_5.x86_64.rpm \ https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.el9_6.x86_64.rpm \
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-src-1.22.7-2.el9_5.noarch.rpm \ https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.el9_6.noarch.rpm \
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-race-1.22.7-2.el9_5.x86_64.rpm https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-race-1.23.9-1.el9_6.x86_64.rpm
- name: RPM build MetricCollector - name: RPM build MetricCollector
id: rpmbuild id: rpmbuild
@@ -160,10 +160,10 @@ jobs:
- name: Setup Golang - name: Setup Golang
run: | run: |
dnf --assumeyes --disableplugin=subscription-manager install \ dnf --assumeyes --disableplugin=subscription-manager install \
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/go-toolset-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \ https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \ https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-bin-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \ https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-src-1.22.9-1.module_el8.10.0+3938+8c723e16.noarch.rpm https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.noarch.rpm
- name: RPM build MetricCollector - name: RPM build MetricCollector
id: rpmbuild id: rpmbuild
@@ -202,11 +202,11 @@ jobs:
- name: Setup Golang - name: Setup Golang
run: | run: |
dnf --assumeyes --disableplugin=subscription-manager install \ dnf --assumeyes --disableplugin=subscription-manager install \
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/go-toolset-1.22.7-2.el9_5.x86_64.rpm \ https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.el9_6.x86_64.rpm \
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-1.22.7-2.el9_5.x86_64.rpm \ https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-1.23.9-1.el9_6.x86_64.rpm \
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-bin-1.22.7-2.el9_5.x86_64.rpm \ https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.el9_6.x86_64.rpm \
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-src-1.22.7-2.el9_5.noarch.rpm \ https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.el9_6.noarch.rpm \
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-race-1.22.7-2.el9_5.x86_64.rpm https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-race-1.23.9-1.el9_6.x86_64.rpm
- name: RPM build MetricCollector - name: RPM build MetricCollector
id: rpmbuild id: rpmbuild

View File

@@ -32,13 +32,15 @@ There is a main configuration file with basic settings that point to the other c
``` json ``` json
{ {
"sinks": "sinks.json", "sinks-file": "sinks.json",
"collectors" : "collectors.json", "collectors-file" : "collectors.json",
"receivers" : "receivers.json", "receivers-file" : "receivers.json",
"router" : "router.json", "router-file" : "router.json",
"main": {
"interval": "10s", "interval": "10s",
"duration": "1s" "duration": "1s"
} }
}
``` ```
The `interval` defines how often the metrics should be read and send to the sink. The `duration` tells collectors how long one measurement has to take. This is important for some collectors, like the `likwid` collector. For more information, see [here](./docs/configuration.md). The `interval` defines how often the metrics should be read and send to the sink. The `duration` tells collectors how long one measurement has to take. This is important for some collectors, like the `likwid` collector. For more information, see [here](./docs/configuration.md).

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package main package main
import ( import (

View File

@@ -52,6 +52,7 @@ In contrast to the configuration files for sinks and receivers, the collectors c
* [`beegfs_meta`](./beegfsmetaMetric.md) * [`beegfs_meta`](./beegfsmetaMetric.md)
* [`beegfs_storage`](./beegfsstorageMetric.md) * [`beegfs_storage`](./beegfsstorageMetric.md)
* [`rocm_smi`](./rocmsmiMetric.md) * [`rocm_smi`](./rocmsmiMetric.md)
* [`slurm_cgroup`](./slurmCgroupMetric.md)
## Todos ## Todos

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package collectors package collectors
import ( import (

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package collectors package collectors
import ( import (

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package collectors package collectors
import ( import (
@@ -40,6 +47,7 @@ var AvailableCollectors = map[string]MetricCollector{
"self": new(SelfCollector), "self": new(SelfCollector),
"schedstat": new(SchedstatCollector), "schedstat": new(SchedstatCollector),
"nfsiostat": new(NfsIOStatCollector), "nfsiostat": new(NfsIOStatCollector),
"slurm_cgroup": new(SlurmCgroupCollector),
} }
// Metric collector manager data structure // Metric collector manager data structure

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package collectors package collectors
import ( import (

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package collectors package collectors
import ( import (

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package collectors package collectors
import ( import (

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package collectors package collectors
import ( import (

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package collectors package collectors
import ( import (

View File

@@ -1,9 +1,17 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package collectors package collectors
import ( import (
"bufio" "bufio"
"bytes" "bytes"
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"io" "io"
"log" "log"
@@ -11,6 +19,7 @@ import (
"os/user" "os/user"
"strconv" "strconv"
"strings" "strings"
"syscall"
"time" "time"
cclog "github.com/ClusterCockpit/cc-lib/ccLogger" cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
@@ -20,8 +29,17 @@ import (
const DEFAULT_GPFS_CMD = "mmpmon" const DEFAULT_GPFS_CMD = "mmpmon"
type GpfsCollectorLastState struct { type GpfsCollectorLastState struct {
numOpens int64
numCloses int64
numReads int64
numWrites int64
numReaddirs int64
numInodeUpdates int64
bytesRead int64 bytesRead int64
bytesWritten int64 bytesWritten int64
bytesTotal int64
iops int64
metaops int64
} }
type GpfsCollector struct { type GpfsCollector struct {
@@ -30,9 +48,12 @@ type GpfsCollector struct {
config struct { config struct {
Mmpmon string `json:"mmpmon_path,omitempty"` Mmpmon string `json:"mmpmon_path,omitempty"`
ExcludeFilesystem []string `json:"exclude_filesystem,omitempty"` ExcludeFilesystem []string `json:"exclude_filesystem,omitempty"`
Sudo bool `json:"use_sudo,omitempty"`
SendBandwidths bool `json:"send_bandwidths"` SendBandwidths bool `json:"send_bandwidths"`
SendTotalValues bool `json:"send_total_values"` SendTotalValues bool `json:"send_total_values"`
SendDerivedValues bool `json:"send_derived_values"`
} }
sudoCmd string
skipFS map[string]struct{} skipFS map[string]struct{}
lastTimestamp time.Time // Store time stamp of last tick to derive bandwidths lastTimestamp time.Time // Store time stamp of last tick to derive bandwidths
lastState map[string]GpfsCollectorLastState lastState map[string]GpfsCollectorLastState
@@ -75,19 +96,44 @@ func (m *GpfsCollector) Init(config json.RawMessage) error {
m.lastState = make(map[string]GpfsCollectorLastState) m.lastState = make(map[string]GpfsCollectorLastState)
// GPFS / IBM Spectrum Scale file system statistics can only be queried by user root // GPFS / IBM Spectrum Scale file system statistics can only be queried by user root
if !m.config.Sudo {
user, err := user.Current() user, err := user.Current()
if err != nil { if err != nil {
return fmt.Errorf("failed to get current user: %v", err) cclog.ComponentError(m.name, "Failed to get current user:", err.Error())
return err
} }
if user.Uid != "0" { if user.Uid != "0" {
return fmt.Errorf("GPFS file system statistics can only be queried by user root") cclog.ComponentError(m.name, "GPFS file system statistics can only be queried by user root")
return err
}
} else {
p, err := exec.LookPath("sudo")
if err != nil {
cclog.ComponentError(m.name, "Cannot find 'sudo'")
return err
}
m.sudoCmd = p
}
// when using sudo, the full path of mmpmon must be specified because
// exec.LookPath will not work as mmpmon is not executable as user
if m.config.Sudo && !strings.HasPrefix(m.config.Mmpmon, "/") {
return fmt.Errorf("when using sudo, mmpmon_path must be provided and an absolute path: %s", m.config.Mmpmon)
} }
// Check if mmpmon is in executable search path // Check if mmpmon is in executable search path
p, err := exec.LookPath(m.config.Mmpmon) p, err := exec.LookPath(m.config.Mmpmon)
if err != nil { if err != nil {
// if using sudo, exec.lookPath will return EACCES (file mode r-x------), this can be ignored
if m.config.Sudo && errors.Is(err, syscall.EACCES) {
cclog.ComponentWarn(m.name, fmt.Sprintf("got error looking for mmpmon binary '%s': %v . This is expected when using sudo, continuing.", m.config.Mmpmon, err))
// the file was given in the config, use it
p = m.config.Mmpmon
} else {
cclog.ComponentError(m.name, fmt.Sprintf("failed to find mmpmon binary '%s': %v", m.config.Mmpmon, err))
return fmt.Errorf("failed to find mmpmon binary '%s': %v", m.config.Mmpmon, err) return fmt.Errorf("failed to find mmpmon binary '%s': %v", m.config.Mmpmon, err)
} }
}
m.config.Mmpmon = p m.config.Mmpmon = p
m.init = true m.init = true
@@ -111,7 +157,13 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
// -p: generate output that can be parsed // -p: generate output that can be parsed
// -s: suppress the prompt on input // -s: suppress the prompt on input
// fs_io_s: Displays I/O statistics per mounted file system // fs_io_s: Displays I/O statistics per mounted file system
cmd := exec.Command(m.config.Mmpmon, "-p", "-s") var cmd *exec.Cmd
if m.config.Sudo {
cmd = exec.Command(m.sudoCmd, m.config.Mmpmon, "-p", "-s")
} else {
cmd = exec.Command(m.config.Mmpmon, "-p", "-s")
}
cmd.Stdin = strings.NewReader("once fs_io_s\n") cmd.Stdin = strings.NewReader("once fs_io_s\n")
cmdStdout := new(bytes.Buffer) cmdStdout := new(bytes.Buffer)
cmdStderr := new(bytes.Buffer) cmdStderr := new(bytes.Buffer)
@@ -178,6 +230,22 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
} }
} }
if m.config.SendDerivedValues {
if _, ok := m.lastState[filesystem]; !ok {
m.lastState[filesystem] = GpfsCollectorLastState{
numReads: -1,
numWrites: -1,
numOpens: -1,
numCloses: -1,
numReaddirs: -1,
numInodeUpdates: -1,
bytesTotal: -1,
iops: -1,
metaops: -1,
}
}
}
// return code // return code
rc, err := strconv.Atoi(key_value["_rc_"]) rc, err := strconv.Atoi(key_value["_rc_"])
if err != nil { if err != nil {
@@ -271,7 +339,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
output <- y output <- y
} }
if m.config.SendBandwidths { if m.config.SendBandwidths {
if lastBytesWritten := m.lastState[filesystem].bytesRead; lastBytesWritten >= 0 { if lastBytesWritten := m.lastState[filesystem].bytesWritten; lastBytesWritten >= 0 {
bwWrite := float64(bytesWritten-lastBytesWritten) / timeDiff bwWrite := float64(bytesWritten-lastBytesWritten) / timeDiff
if y, err := if y, err :=
lp.NewMessage( lp.NewMessage(
@@ -289,13 +357,6 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
} }
} }
if m.config.SendBandwidths {
m.lastState[filesystem] = GpfsCollectorLastState{
bytesRead: bytesRead,
bytesWritten: bytesWritten,
}
}
// number of opens // number of opens
numOpens, err := strconv.ParseInt(key_value["_oc_"], 10, 64) numOpens, err := strconv.ParseInt(key_value["_oc_"], 10, 64)
if err != nil { if err != nil {
@@ -307,6 +368,24 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
if y, err := lp.NewMessage("gpfs_num_opens", m.tags, m.meta, map[string]interface{}{"value": numOpens}, timestamp); err == nil { if y, err := lp.NewMessage("gpfs_num_opens", m.tags, m.meta, map[string]interface{}{"value": numOpens}, timestamp); err == nil {
output <- y output <- y
} }
if m.config.SendDerivedValues {
if lastNumOpens := m.lastState[filesystem].numOpens; lastNumOpens >= 0 {
opensRate := float64(numOpens-lastNumOpens) / timeDiff
if y, err :=
lp.NewMessage(
"gpfs_opens_rate",
m.tags,
m.meta,
map[string]interface{}{
"value": opensRate,
},
timestamp,
); err == nil {
y.AddMeta("unit", "requests/sec")
output <- y
}
}
}
// number of closes // number of closes
numCloses, err := strconv.ParseInt(key_value["_cc_"], 10, 64) numCloses, err := strconv.ParseInt(key_value["_cc_"], 10, 64)
@@ -319,6 +398,24 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
if y, err := lp.NewMessage("gpfs_num_closes", m.tags, m.meta, map[string]interface{}{"value": numCloses}, timestamp); err == nil { if y, err := lp.NewMessage("gpfs_num_closes", m.tags, m.meta, map[string]interface{}{"value": numCloses}, timestamp); err == nil {
output <- y output <- y
} }
if m.config.SendDerivedValues {
if lastNumCloses := m.lastState[filesystem].numCloses; lastNumCloses >= 0 {
closesRate := float64(numCloses-lastNumCloses) / timeDiff
if y, err :=
lp.NewMessage(
"gpfs_closes_rate",
m.tags,
m.meta,
map[string]interface{}{
"value": closesRate,
},
timestamp,
); err == nil {
y.AddMeta("unit", "requests/sec")
output <- y
}
}
}
// number of reads // number of reads
numReads, err := strconv.ParseInt(key_value["_rdc_"], 10, 64) numReads, err := strconv.ParseInt(key_value["_rdc_"], 10, 64)
@@ -331,6 +428,24 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
if y, err := lp.NewMessage("gpfs_num_reads", m.tags, m.meta, map[string]interface{}{"value": numReads}, timestamp); err == nil { if y, err := lp.NewMessage("gpfs_num_reads", m.tags, m.meta, map[string]interface{}{"value": numReads}, timestamp); err == nil {
output <- y output <- y
} }
if m.config.SendDerivedValues {
if lastNumReads := m.lastState[filesystem].numReads; lastNumReads >= 0 {
readsRate := float64(numReads-lastNumReads) / timeDiff
if y, err :=
lp.NewMessage(
"gpfs_reads_rate",
m.tags,
m.meta,
map[string]interface{}{
"value": readsRate,
},
timestamp,
); err == nil {
y.AddMeta("unit", "requests/sec")
output <- y
}
}
}
// number of writes // number of writes
numWrites, err := strconv.ParseInt(key_value["_wc_"], 10, 64) numWrites, err := strconv.ParseInt(key_value["_wc_"], 10, 64)
@@ -343,6 +458,24 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
if y, err := lp.NewMessage("gpfs_num_writes", m.tags, m.meta, map[string]interface{}{"value": numWrites}, timestamp); err == nil { if y, err := lp.NewMessage("gpfs_num_writes", m.tags, m.meta, map[string]interface{}{"value": numWrites}, timestamp); err == nil {
output <- y output <- y
} }
if m.config.SendDerivedValues {
if lastNumWrites := m.lastState[filesystem].numWrites; lastNumWrites >= 0 {
writesRate := float64(numWrites-lastNumWrites) / timeDiff
if y, err :=
lp.NewMessage(
"gpfs_writes_rate",
m.tags,
m.meta,
map[string]interface{}{
"value": writesRate,
},
timestamp,
); err == nil {
y.AddMeta("unit", "requests/sec")
output <- y
}
}
}
// number of read directories // number of read directories
numReaddirs, err := strconv.ParseInt(key_value["_dir_"], 10, 64) numReaddirs, err := strconv.ParseInt(key_value["_dir_"], 10, 64)
@@ -355,6 +488,24 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
if y, err := lp.NewMessage("gpfs_num_readdirs", m.tags, m.meta, map[string]interface{}{"value": numReaddirs}, timestamp); err == nil { if y, err := lp.NewMessage("gpfs_num_readdirs", m.tags, m.meta, map[string]interface{}{"value": numReaddirs}, timestamp); err == nil {
output <- y output <- y
} }
if m.config.SendDerivedValues {
if lastNumReaddirs := m.lastState[filesystem].numReaddirs; lastNumReaddirs >= 0 {
readdirsRate := float64(numReaddirs-lastNumReaddirs) / timeDiff
if y, err :=
lp.NewMessage(
"gpfs_readdirs_rate",
m.tags,
m.meta,
map[string]interface{}{
"value": readdirsRate,
},
timestamp,
); err == nil {
y.AddMeta("unit", "requests/sec")
output <- y
}
}
}
// Number of inode updates // Number of inode updates
numInodeUpdates, err := strconv.ParseInt(key_value["_iu_"], 10, 64) numInodeUpdates, err := strconv.ParseInt(key_value["_iu_"], 10, 64)
@@ -367,10 +518,31 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
if y, err := lp.NewMessage("gpfs_num_inode_updates", m.tags, m.meta, map[string]interface{}{"value": numInodeUpdates}, timestamp); err == nil { if y, err := lp.NewMessage("gpfs_num_inode_updates", m.tags, m.meta, map[string]interface{}{"value": numInodeUpdates}, timestamp); err == nil {
output <- y output <- y
} }
if m.config.SendDerivedValues {
if lastNumInodeUpdates := m.lastState[filesystem].numInodeUpdates; lastNumInodeUpdates >= 0 {
inodeUpdatesRate := float64(numInodeUpdates-lastNumInodeUpdates) / timeDiff
if y, err :=
lp.NewMessage(
"gpfs_inode_updates_rate",
m.tags,
m.meta,
map[string]interface{}{
"value": inodeUpdatesRate,
},
timestamp,
); err == nil {
y.AddMeta("unit", "requests/sec")
output <- y
}
}
}
// Total values // Total values
bytesTotal := int64(-1);
iops := int64(-1);
metaops := int64(-1);
if m.config.SendTotalValues { if m.config.SendTotalValues {
bytesTotal := bytesRead + bytesWritten bytesTotal = bytesRead + bytesWritten
if y, err := if y, err :=
lp.NewMessage("gpfs_bytes_total", lp.NewMessage("gpfs_bytes_total",
m.tags, m.tags,
@@ -383,7 +555,26 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
y.AddMeta("unit", "bytes") y.AddMeta("unit", "bytes")
output <- y output <- y
} }
iops := numReads + numWrites if m.config.SendBandwidths {
if lastBytesTotal := m.lastState[filesystem].bytesTotal; lastBytesTotal >= 0 {
bwTotal := float64(bytesTotal-lastBytesTotal) / timeDiff
if y, err :=
lp.NewMessage(
"gpfs_bw_total",
m.tags,
m.meta,
map[string]interface{}{
"value": bwTotal,
},
timestamp,
); err == nil {
y.AddMeta("unit", "bytes/sec")
output <- y
}
}
}
iops = numReads + numWrites
if y, err := if y, err :=
lp.NewMessage("gpfs_iops", lp.NewMessage("gpfs_iops",
m.tags, m.tags,
@@ -395,7 +586,26 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
); err == nil { ); err == nil {
output <- y output <- y
} }
metaops := numInodeUpdates + numCloses + numOpens + numReaddirs if m.config.SendDerivedValues {
if lastIops := m.lastState[filesystem].iops; lastIops >= 0 {
iopsRate := float64(iops-lastIops) / timeDiff
if y, err :=
lp.NewMessage(
"gpfs_iops_rate",
m.tags,
m.meta,
map[string]interface{}{
"value": iopsRate,
},
timestamp,
); err == nil {
y.AddMeta("unit", "requests/sec")
output <- y
}
}
}
metaops = numInodeUpdates + numCloses + numOpens + numReaddirs
if y, err := if y, err :=
lp.NewMessage("gpfs_metaops", lp.NewMessage("gpfs_metaops",
m.tags, m.tags,
@@ -407,9 +617,43 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
); err == nil { ); err == nil {
output <- y output <- y
} }
if m.config.SendDerivedValues {
if lastMetaops := m.lastState[filesystem].metaops; lastMetaops >= 0 {
metaopsRate := float64(metaops-lastMetaops) / timeDiff
if y, err :=
lp.NewMessage(
"gpfs_metaops_rate",
m.tags,
m.meta,
map[string]interface{}{
"value": metaopsRate,
},
timestamp,
); err == nil {
y.AddMeta("unit", "requests/sec")
output <- y
} }
} }
} }
}
// Save last state
m.lastState[filesystem] = GpfsCollectorLastState{
bytesRead: bytesRead,
bytesWritten: bytesWritten,
numOpens: numOpens,
numCloses: numCloses,
numReads: numReads,
numWrites: numWrites,
numReaddirs: numReaddirs,
numInodeUpdates: numInodeUpdates,
bytesTotal: bytesTotal,
iops: iops,
metaops: metaops,
}
}
}
func (m *GpfsCollector) Close() { func (m *GpfsCollector) Close() {
m.init = false m.init = false

View File

@@ -12,13 +12,15 @@ hugo_path: docs/reference/cc-metric-collector/collectors/gpfs.md
## `gpfs` collector ## `gpfs` collector
```json ```json
"ibstat": { "gpfs": {
"mmpmon_path": "/path/to/mmpmon", "mmpmon_path": "/path/to/mmpmon",
"use_sudo": "true",
"exclude_filesystem": [ "exclude_filesystem": [
"fs1" "fs1"
], ],
"send_bandwidths": true, "send_bandwidths": true,
"send_total_values": true "send_total_values": true,
"send_derived_values": true
} }
``` ```
@@ -31,6 +33,11 @@ in the configuration.
The path to the `mmpmon` command can be configured with the `mmpmon_path` option The path to the `mmpmon` command can be configured with the `mmpmon_path` option
in the configuration. If nothing is set, the collector searches in `$PATH` for `mmpmon`. in the configuration. If nothing is set, the collector searches in `$PATH` for `mmpmon`.
If cc-metric-collector is run as non-root, `sudo` can be enabled with `use_sudo`.
Because `mmpmon` is by default only executable as root, the Go procedure to
search for it in `$PATH` will fail. If you use `sudo`, you must specify the
complete path for `mmpmon` using the parameter `mmpmon_path`.
Metrics: Metrics:
* `gpfs_bytes_read` * `gpfs_bytes_read`
@@ -41,10 +48,19 @@ Metrics:
* `gpfs_num_writes` * `gpfs_num_writes`
* `gpfs_num_readdirs` * `gpfs_num_readdirs`
* `gpfs_num_inode_updates` * `gpfs_num_inode_updates`
* `gpfs_opens_rate` (if `send_derived_values == true`)
* `gpfs_closes_rate` (if `send_derived_values == true`)
* `gpfs_reads_rate` (if `send_derived_values == true`)
* `gpfs_writes_rate` (if `send_derived_values == true`)
* `gpfs_readdirs_rate` (if `send_derived_values == true`)
* `gpfs_inode_updates_rate` (if `send_derived_values == true`)
* `gpfs_bytes_total = gpfs_bytes_read + gpfs_bytes_written` (if `send_total_values == true`) * `gpfs_bytes_total = gpfs_bytes_read + gpfs_bytes_written` (if `send_total_values == true`)
* `gpfs_iops = gpfs_num_reads + gpfs_num_writes` (if `send_total_values == true`) * `gpfs_iops = gpfs_num_reads + gpfs_num_writes` (if `send_total_values == true`)
* `gpfs_iops_rate` (if `send_total_values == true` and `send_derived_values == true`)
* `gpfs_metaops = gpfs_num_inode_updates + gpfs_num_closes + gpfs_num_opens + gpfs_num_readdirs` (if `send_total_values == true`) * `gpfs_metaops = gpfs_num_inode_updates + gpfs_num_closes + gpfs_num_opens + gpfs_num_readdirs` (if `send_total_values == true`)
* `gpfs_metaops_rate` (if `send_total_values == true` and `send_derived_values == true`)
* `gpfs_bw_read` (if `send_bandwidths == true`) * `gpfs_bw_read` (if `send_bandwidths == true`)
* `gpfs_bw_write` (if `send_bandwidths == true`) * `gpfs_bw_write` (if `send_bandwidths == true`)
* `gpfs_bw_total` (if `send_bandwidths == true` and `send_total_values == true`)
The collector adds a `filesystem` tag to all metrics The collector adds a `filesystem` tag to all metrics

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package collectors package collectors
import ( import (

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package collectors package collectors
import ( import (
@@ -13,16 +20,15 @@ import (
lp "github.com/ClusterCockpit/cc-lib/ccMessage" lp "github.com/ClusterCockpit/cc-lib/ccMessage"
) )
// Konstante für den Pfad zu /proc/diskstats
const IOSTATFILE = `/proc/diskstats` const IOSTATFILE = `/proc/diskstats`
type IOstatCollectorConfig struct { type IOstatCollectorConfig struct {
ExcludeMetrics []string `json:"exclude_metrics,omitempty"` ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
// Neues Feld zum Ausschließen von Devices per JSON-Konfiguration
ExcludeDevices []string `json:"exclude_devices,omitempty"` ExcludeDevices []string `json:"exclude_devices,omitempty"`
} }
type IOstatCollectorEntry struct { type IOstatCollectorEntry struct {
currentValues map[string]int64
lastValues map[string]int64 lastValues map[string]int64
tags map[string]string tags map[string]string
} }
@@ -98,16 +104,27 @@ func (m *IOstatCollector) Init(config json.RawMessage) error {
if _, skip := stringArrayContains(m.config.ExcludeDevices, device); skip { if _, skip := stringArrayContains(m.config.ExcludeDevices, device); skip {
continue continue
} }
values := make(map[string]int64) currentValues := make(map[string]int64)
lastValues := make(map[string]int64)
for m := range m.matches { for m := range m.matches {
values[m] = 0 currentValues[m] = 0
lastValues[m] = 0
}
for name, idx := range m.matches {
if idx < len(linefields) {
if value, err := strconv.ParseInt(linefields[idx], 0, 64); err == nil {
currentValues[name] = value
lastValues[name] = value // Set last to current for first read
}
}
} }
m.devices[device] = IOstatCollectorEntry{ m.devices[device] = IOstatCollectorEntry{
tags: map[string]string{ tags: map[string]string{
"device": device, "device": device,
"type": "node", "type": "node",
}, },
lastValues: values, currentValues: currentValues,
lastValues: lastValues,
} }
} }
m.init = true m.init = true
@@ -146,18 +163,22 @@ func (m *IOstatCollector) Read(interval time.Duration, output chan lp.CCMessage)
if _, ok := m.devices[device]; !ok { if _, ok := m.devices[device]; !ok {
continue continue
} }
// Update current and last values
entry := m.devices[device] entry := m.devices[device]
for name, idx := range m.matches { for name, idx := range m.matches {
if idx < len(linefields) { if idx < len(linefields) {
x, err := strconv.ParseInt(linefields[idx], 0, 64) x, err := strconv.ParseInt(linefields[idx], 0, 64)
if err == nil { if err == nil {
diff := x - entry.lastValues[name] // Calculate difference using previous current and new value
y, err := lp.NewMessage(name, entry.tags, m.meta, map[string]interface{}{"value": int(diff)}, time.Now()) diff := x - entry.currentValues[name]
y, err := lp.NewMetric(name, entry.tags, m.meta, int(diff), time.Now())
if err == nil { if err == nil {
output <- y output <- y
} }
// Update last to previous current, and current to new value
entry.lastValues[name] = entry.currentValues[name]
entry.currentValues[name] = x
} }
entry.lastValues[name] = x
} }
} }
m.devices[device] = entry m.devices[device] = entry

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package collectors package collectors
import ( import (

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package collectors package collectors
/* /*

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package collectors package collectors
import ( import (

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package collectors package collectors
import ( import (

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package collectors package collectors
import ( import (

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package collectors package collectors
import ( import (

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package collectors package collectors
import ( import (

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package collectors package collectors
import ( import (

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package collectors package collectors
import ( import (

View File

@@ -78,6 +78,14 @@ func (m *NUMAStatsCollector) Init(config json.RawMessage) error {
"group": "NUMA", "group": "NUMA",
} }
m.config.SendAbsoluteValues = true
if len(config) > 0 {
err := json.Unmarshal(config, &m.config)
if err != nil {
return fmt.Errorf("unable to unmarshal numastat configuration: %s", err.Error())
}
}
// Loop for all NUMA node directories // Loop for all NUMA node directories
base := "/sys/devices/system/node/node" base := "/sys/devices/system/node/node"
globPattern := base + "[0-9]*" globPattern := base + "[0-9]*"
@@ -95,7 +103,10 @@ func (m *NUMAStatsCollector) Init(config json.RawMessage) error {
m.topology = append(m.topology, m.topology = append(m.topology,
NUMAStatsCollectorTopolgy{ NUMAStatsCollectorTopolgy{
file: file, file: file,
tagSet: map[string]string{"memoryDomain": node}, tagSet: map[string]string{
"type": "memoryDomain",
"type-id": node,
},
previousValues: make(map[string]int64), previousValues: make(map[string]int64),
}) })
} }
@@ -145,11 +156,11 @@ func (m *NUMAStatsCollector) Read(interval time.Duration, output chan lp.CCMessa
} }
if m.config.SendAbsoluteValues { if m.config.SendAbsoluteValues {
msg, err := lp.NewMessage( msg, err := lp.NewMetric(
"numastats_"+key, "numastats_"+key,
t.tagSet, t.tagSet,
m.meta, m.meta,
map[string]interface{}{"value": value}, value,
now, now,
) )
if err == nil { if err == nil {
@@ -161,11 +172,11 @@ func (m *NUMAStatsCollector) Read(interval time.Duration, output chan lp.CCMessa
prev, ok := t.previousValues[key] prev, ok := t.previousValues[key]
if ok { if ok {
rate := float64(value-prev) / timeDiff rate := float64(value-prev) / timeDiff
msg, err := lp.NewMessage( msg, err := lp.NewMetric(
"numastats_"+key+"_rate", "numastats_"+key+"_rate",
t.tagSet, t.tagSet,
m.meta, m.meta,
map[string]interface{}{"value": rate}, rate,
now, now,
) )
if err == nil { if err == nil {

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package collectors package collectors
import ( import (
@@ -31,6 +38,8 @@ type NvidiaCollectorDevice struct {
excludeMetrics map[string]bool excludeMetrics map[string]bool
tags map[string]string tags map[string]string
meta map[string]string meta map[string]string
lastEnergyReading uint64
lastEnergyTimestamp time.Time
} }
type NvidiaCollector struct { type NvidiaCollector struct {
@@ -149,6 +158,8 @@ func (m *NvidiaCollector) Init(config json.RawMessage) error {
// Add device handle // Add device handle
g.device = device g.device = device
g.lastEnergyReading = 0
g.lastEnergyTimestamp = time.Now()
// Add tags // Add tags
g.tags = map[string]string{ g.tags = map[string]string{
@@ -206,7 +217,7 @@ func (m *NvidiaCollector) Init(config json.RawMessage) error {
return nil return nil
} }
func readMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMessage) error { func readMemoryInfo(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
if !device.excludeMetrics["nv_fb_mem_total"] || !device.excludeMetrics["nv_fb_mem_used"] || !device.excludeMetrics["nv_fb_mem_reserved"] { if !device.excludeMetrics["nv_fb_mem_total"] || !device.excludeMetrics["nv_fb_mem_used"] || !device.excludeMetrics["nv_fb_mem_reserved"] {
var total uint64 var total uint64
var used uint64 var used uint64
@@ -250,7 +261,7 @@ func readMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMessage) erro
return nil return nil
} }
func readBarMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMessage) error { func readBarMemoryInfo(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
if !device.excludeMetrics["nv_bar1_mem_total"] || !device.excludeMetrics["nv_bar1_mem_used"] { if !device.excludeMetrics["nv_bar1_mem_total"] || !device.excludeMetrics["nv_bar1_mem_used"] {
meminfo, ret := nvml.DeviceGetBAR1MemoryInfo(device.device) meminfo, ret := nvml.DeviceGetBAR1MemoryInfo(device.device)
if ret != nvml.SUCCESS { if ret != nvml.SUCCESS {
@@ -277,7 +288,7 @@ func readBarMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMessage) e
return nil return nil
} }
func readUtilization(device NvidiaCollectorDevice, output chan lp.CCMessage) error { func readUtilization(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
isMig, ret := nvml.DeviceIsMigDeviceHandle(device.device) isMig, ret := nvml.DeviceIsMigDeviceHandle(device.device)
if ret != nvml.SUCCESS { if ret != nvml.SUCCESS {
err := errors.New(nvml.ErrorString(ret)) err := errors.New(nvml.ErrorString(ret))
@@ -319,7 +330,7 @@ func readUtilization(device NvidiaCollectorDevice, output chan lp.CCMessage) err
return nil return nil
} }
func readTemp(device NvidiaCollectorDevice, output chan lp.CCMessage) error { func readTemp(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
if !device.excludeMetrics["nv_temp"] { if !device.excludeMetrics["nv_temp"] {
// Retrieves the current temperature readings for the device, in degrees C. // Retrieves the current temperature readings for the device, in degrees C.
// //
@@ -338,7 +349,7 @@ func readTemp(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
return nil return nil
} }
func readFan(device NvidiaCollectorDevice, output chan lp.CCMessage) error { func readFan(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
if !device.excludeMetrics["nv_fan"] { if !device.excludeMetrics["nv_fan"] {
// Retrieves the intended operating speed of the device's fan. // Retrieves the intended operating speed of the device's fan.
// //
@@ -361,7 +372,7 @@ func readFan(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
return nil return nil
} }
// func readFans(device NvidiaCollectorDevice, output chan lp.CCMessage) error { // func readFans(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
// if !device.excludeMetrics["nv_fan"] { // if !device.excludeMetrics["nv_fan"] {
// numFans, ret := nvml.DeviceGetNumFans(device.device) // numFans, ret := nvml.DeviceGetNumFans(device.device)
// if ret == nvml.SUCCESS { // if ret == nvml.SUCCESS {
@@ -382,7 +393,7 @@ func readFan(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
// return nil // return nil
// } // }
func readEccMode(device NvidiaCollectorDevice, output chan lp.CCMessage) error { func readEccMode(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
if !device.excludeMetrics["nv_ecc_mode"] { if !device.excludeMetrics["nv_ecc_mode"] {
// Retrieves the current and pending ECC modes for the device. // Retrieves the current and pending ECC modes for the device.
// //
@@ -416,7 +427,7 @@ func readEccMode(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
return nil return nil
} }
func readPerfState(device NvidiaCollectorDevice, output chan lp.CCMessage) error { func readPerfState(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
if !device.excludeMetrics["nv_perf_state"] { if !device.excludeMetrics["nv_perf_state"] {
// Retrieves the current performance state for the device. // Retrieves the current performance state for the device.
// //
@@ -436,13 +447,16 @@ func readPerfState(device NvidiaCollectorDevice, output chan lp.CCMessage) error
return nil return nil
} }
func readPowerUsage(device NvidiaCollectorDevice, output chan lp.CCMessage) error { func readPowerUsage(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
if !device.excludeMetrics["nv_power_usage"] { if !device.excludeMetrics["nv_power_usage"] {
// Retrieves power usage for this GPU in milliwatts and its associated circuitry (e.g. memory) // Retrieves power usage for this GPU in milliwatts and its associated circuitry (e.g. memory)
// //
// On Fermi and Kepler GPUs the reading is accurate to within +/- 5% of current power draw. // On Fermi and Kepler GPUs the reading is accurate to within +/- 5% of current power draw.
// On Ampere (except GA100) or newer GPUs, the API returns power averaged over 1 sec interval.
// On GA100 and older architectures, instantaneous power is returned.
// //
// It is only available if power management mode is supported // It is only available if power management mode is supported.
mode, ret := nvml.DeviceGetPowerManagementMode(device.device) mode, ret := nvml.DeviceGetPowerManagementMode(device.device)
if ret != nvml.SUCCESS { if ret != nvml.SUCCESS {
return nil return nil
@@ -461,7 +475,54 @@ func readPowerUsage(device NvidiaCollectorDevice, output chan lp.CCMessage) erro
return nil return nil
} }
func readClocks(device NvidiaCollectorDevice, output chan lp.CCMessage) error { func readEnergyConsumption(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
// Retrieves total energy consumption for this GPU in millijoules (mJ) since the driver was last reloaded
// For Volta or newer fully supported devices.
if (!device.excludeMetrics["nv_energy"]) && (!device.excludeMetrics["nv_energy_abs"]) && (!device.excludeMetrics["nv_average_power"]) {
now := time.Now()
mode, ret := nvml.DeviceGetPowerManagementMode(device.device)
if ret != nvml.SUCCESS {
return nil
}
if mode == nvml.FEATURE_ENABLED {
energy, ret := nvml.DeviceGetTotalEnergyConsumption(device.device)
if ret == nvml.SUCCESS {
if device.lastEnergyReading != 0 {
if !device.excludeMetrics["nv_energy"] {
y, err := lp.NewMetric("nv_energy", device.tags, device.meta, (energy-device.lastEnergyReading)/1000, now)
if err == nil {
y.AddMeta("unit", "Joules")
output <- y
}
}
if !device.excludeMetrics["nv_average_power"] {
energyDiff := (energy - device.lastEnergyReading) / 1000
timeDiff := now.Sub(device.lastEnergyTimestamp)
y, err := lp.NewMetric("nv_average_power", device.tags, device.meta, energyDiff/uint64(timeDiff.Seconds()), now)
if err == nil {
y.AddMeta("unit", "watts")
output <- y
}
}
}
if !device.excludeMetrics["nv_energy_abs"] {
y, err := lp.NewMetric("nv_energy_abs", device.tags, device.meta, energy/1000, now)
if err == nil {
y.AddMeta("unit", "Joules")
output <- y
}
}
device.lastEnergyReading = energy
device.lastEnergyTimestamp = time.Now()
}
}
}
return nil
}
func readClocks(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
// Retrieves the current clock speeds for the device. // Retrieves the current clock speeds for the device.
// //
// Available clock information: // Available clock information:
@@ -513,7 +574,7 @@ func readClocks(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
return nil return nil
} }
func readMaxClocks(device NvidiaCollectorDevice, output chan lp.CCMessage) error { func readMaxClocks(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
// Retrieves the maximum clock speeds for the device. // Retrieves the maximum clock speeds for the device.
// //
// Available clock information: // Available clock information:
@@ -528,7 +589,7 @@ func readMaxClocks(device NvidiaCollectorDevice, output chan lp.CCMessage) error
if !device.excludeMetrics["nv_max_graphics_clock"] { if !device.excludeMetrics["nv_max_graphics_clock"] {
max_gclk, ret := nvml.DeviceGetMaxClockInfo(device.device, nvml.CLOCK_GRAPHICS) max_gclk, ret := nvml.DeviceGetMaxClockInfo(device.device, nvml.CLOCK_GRAPHICS)
if ret == nvml.SUCCESS { if ret == nvml.SUCCESS {
y, err := lp.NewMessage("nv_max_graphics_clock", device.tags, device.meta, map[string]interface{}{"value": float64(max_gclk)}, time.Now()) y, err := lp.NewMetric("nv_max_graphics_clock", device.tags, device.meta, float64(max_gclk), time.Now())
if err == nil { if err == nil {
y.AddMeta("unit", "MHz") y.AddMeta("unit", "MHz")
output <- y output <- y
@@ -537,9 +598,9 @@ func readMaxClocks(device NvidiaCollectorDevice, output chan lp.CCMessage) error
} }
if !device.excludeMetrics["nv_max_sm_clock"] { if !device.excludeMetrics["nv_max_sm_clock"] {
maxSmClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_SM) maxSmClock, ret := nvml.DeviceGetMaxClockInfo(device.device, nvml.CLOCK_SM)
if ret == nvml.SUCCESS { if ret == nvml.SUCCESS {
y, err := lp.NewMessage("nv_max_sm_clock", device.tags, device.meta, map[string]interface{}{"value": float64(maxSmClock)}, time.Now()) y, err := lp.NewMetric("nv_max_sm_clock", device.tags, device.meta, float64(maxSmClock), time.Now())
if err == nil { if err == nil {
y.AddMeta("unit", "MHz") y.AddMeta("unit", "MHz")
output <- y output <- y
@@ -548,9 +609,9 @@ func readMaxClocks(device NvidiaCollectorDevice, output chan lp.CCMessage) error
} }
if !device.excludeMetrics["nv_max_mem_clock"] { if !device.excludeMetrics["nv_max_mem_clock"] {
maxMemClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_MEM) maxMemClock, ret := nvml.DeviceGetMaxClockInfo(device.device, nvml.CLOCK_MEM)
if ret == nvml.SUCCESS { if ret == nvml.SUCCESS {
y, err := lp.NewMessage("nv_max_mem_clock", device.tags, device.meta, map[string]interface{}{"value": float64(maxMemClock)}, time.Now()) y, err := lp.NewMetric("nv_max_mem_clock", device.tags, device.meta, float64(maxMemClock), time.Now())
if err == nil { if err == nil {
y.AddMeta("unit", "MHz") y.AddMeta("unit", "MHz")
output <- y output <- y
@@ -559,9 +620,9 @@ func readMaxClocks(device NvidiaCollectorDevice, output chan lp.CCMessage) error
} }
if !device.excludeMetrics["nv_max_video_clock"] { if !device.excludeMetrics["nv_max_video_clock"] {
maxMemClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_VIDEO) maxVideoClock, ret := nvml.DeviceGetMaxClockInfo(device.device, nvml.CLOCK_VIDEO)
if ret == nvml.SUCCESS { if ret == nvml.SUCCESS {
y, err := lp.NewMessage("nv_max_video_clock", device.tags, device.meta, map[string]interface{}{"value": float64(maxMemClock)}, time.Now()) y, err := lp.NewMetric("nv_max_video_clock", device.tags, device.meta, float64(maxVideoClock), time.Now())
if err == nil { if err == nil {
y.AddMeta("unit", "MHz") y.AddMeta("unit", "MHz")
output <- y output <- y
@@ -571,7 +632,7 @@ func readMaxClocks(device NvidiaCollectorDevice, output chan lp.CCMessage) error
return nil return nil
} }
func readEccErrors(device NvidiaCollectorDevice, output chan lp.CCMessage) error { func readEccErrors(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
if !device.excludeMetrics["nv_ecc_uncorrected_error"] { if !device.excludeMetrics["nv_ecc_uncorrected_error"] {
// Retrieves the total ECC error counts for the device. // Retrieves the total ECC error counts for the device.
// //
@@ -602,7 +663,7 @@ func readEccErrors(device NvidiaCollectorDevice, output chan lp.CCMessage) error
return nil return nil
} }
func readPowerLimit(device NvidiaCollectorDevice, output chan lp.CCMessage) error { func readPowerLimit(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
if !device.excludeMetrics["nv_power_max_limit"] { if !device.excludeMetrics["nv_power_max_limit"] {
// Retrieves the power management limit associated with this device. // Retrieves the power management limit associated with this device.
// //
@@ -622,7 +683,7 @@ func readPowerLimit(device NvidiaCollectorDevice, output chan lp.CCMessage) erro
return nil return nil
} }
func readEncUtilization(device NvidiaCollectorDevice, output chan lp.CCMessage) error { func readEncUtilization(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
isMig, ret := nvml.DeviceIsMigDeviceHandle(device.device) isMig, ret := nvml.DeviceIsMigDeviceHandle(device.device)
if ret != nvml.SUCCESS { if ret != nvml.SUCCESS {
err := errors.New(nvml.ErrorString(ret)) err := errors.New(nvml.ErrorString(ret))
@@ -649,7 +710,7 @@ func readEncUtilization(device NvidiaCollectorDevice, output chan lp.CCMessage)
return nil return nil
} }
func readDecUtilization(device NvidiaCollectorDevice, output chan lp.CCMessage) error { func readDecUtilization(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
isMig, ret := nvml.DeviceIsMigDeviceHandle(device.device) isMig, ret := nvml.DeviceIsMigDeviceHandle(device.device)
if ret != nvml.SUCCESS { if ret != nvml.SUCCESS {
err := errors.New(nvml.ErrorString(ret)) err := errors.New(nvml.ErrorString(ret))
@@ -676,7 +737,7 @@ func readDecUtilization(device NvidiaCollectorDevice, output chan lp.CCMessage)
return nil return nil
} }
func readRemappedRows(device NvidiaCollectorDevice, output chan lp.CCMessage) error { func readRemappedRows(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
if !device.excludeMetrics["nv_remapped_rows_corrected"] || if !device.excludeMetrics["nv_remapped_rows_corrected"] ||
!device.excludeMetrics["nv_remapped_rows_uncorrected"] || !device.excludeMetrics["nv_remapped_rows_uncorrected"] ||
!device.excludeMetrics["nv_remapped_rows_pending"] || !device.excludeMetrics["nv_remapped_rows_pending"] ||
@@ -729,7 +790,7 @@ func readRemappedRows(device NvidiaCollectorDevice, output chan lp.CCMessage) er
return nil return nil
} }
func readProcessCounts(device NvidiaCollectorDevice, output chan lp.CCMessage) error { func readProcessCounts(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
if !device.excludeMetrics["nv_compute_processes"] { if !device.excludeMetrics["nv_compute_processes"] {
// Get information about processes with a compute context on a device // Get information about processes with a compute context on a device
// //
@@ -821,7 +882,7 @@ func readProcessCounts(device NvidiaCollectorDevice, output chan lp.CCMessage) e
return nil return nil
} }
func readViolationStats(device NvidiaCollectorDevice, output chan lp.CCMessage) error { func readViolationStats(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
var violTime nvml.ViolationTime var violTime nvml.ViolationTime
var ret nvml.Return var ret nvml.Return
@@ -935,7 +996,7 @@ func readViolationStats(device NvidiaCollectorDevice, output chan lp.CCMessage)
return nil return nil
} }
func readNVLinkStats(device NvidiaCollectorDevice, output chan lp.CCMessage) error { func readNVLinkStats(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
// Retrieves the specified error counter value // Retrieves the specified error counter value
// Please refer to \a nvmlNvLinkErrorCounter_t for error counters that are available // Please refer to \a nvmlNvLinkErrorCounter_t for error counters that are available
// //
@@ -1070,7 +1131,7 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMessage)
return return
} }
readAll := func(device NvidiaCollectorDevice, output chan lp.CCMessage) { readAll := func(device *NvidiaCollectorDevice, output chan lp.CCMessage) {
name, ret := nvml.DeviceGetName(device.device) name, ret := nvml.DeviceGetName(device.device)
if ret != nvml.SUCCESS { if ret != nvml.SUCCESS {
name = "NoName" name = "NoName"
@@ -1110,6 +1171,11 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMessage)
cclog.ComponentDebug(m.name, "readPowerUsage for device", name, "failed") cclog.ComponentDebug(m.name, "readPowerUsage for device", name, "failed")
} }
err = readEnergyConsumption(device, output)
if err != nil {
cclog.ComponentDebug(m.name, "readEnergyConsumption for device", name, "failed")
}
err = readClocks(device, output) err = readClocks(device, output)
if err != nil { if err != nil {
cclog.ComponentDebug(m.name, "readClocks for device", name, "failed") cclog.ComponentDebug(m.name, "readClocks for device", name, "failed")
@@ -1169,7 +1235,7 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMessage)
// Actual read loop over all attached Nvidia GPUs // Actual read loop over all attached Nvidia GPUs
for i := 0; i < m.num_gpus; i++ { for i := 0; i < m.num_gpus; i++ {
readAll(m.gpus[i], output) readAll(&m.gpus[i], output)
// Iterate over all MIG devices if any // Iterate over all MIG devices if any
if m.config.ProcessMigDevices { if m.config.ProcessMigDevices {
@@ -1243,7 +1309,7 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMessage)
} }
} }
readAll(migDevice, output) readAll(&migDevice, output)
} }
} }
} }

View File

@@ -82,5 +82,8 @@ Metrics:
* `nv_nvlink_ecc_errors` * `nv_nvlink_ecc_errors`
* `nv_nvlink_replay_errors` * `nv_nvlink_replay_errors`
* `nv_nvlink_recovery_errors` * `nv_nvlink_recovery_errors`
* `nv_energy`
* `nv_energy_abs`
* `nv_average_power`
Some metrics add the additional sub type tag (`stype`) like the `nv_nvlink_*` metrics set `stype=nvlink,stype-id=<link_number>`. Some metrics add the additional sub type tag (`stype`) like the `nv_nvlink_*` metrics set `stype=nvlink,stype-id=<link_number>`.

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package collectors package collectors
import ( import (

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package collectors package collectors
import ( import (

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package collectors package collectors
import ( import (

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package collectors package collectors
import ( import (

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package collectors package collectors
import ( import (

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package collectors package collectors
import ( import (

View File

@@ -0,0 +1,349 @@
package collectors
import (
"encoding/json"
"fmt"
"os"
"os/exec"
"os/user"
"path/filepath"
"strconv"
"strings"
"time"
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
)
type SlurmJobData struct {
MemoryUsage float64
MaxMemoryUsage float64
LimitMemoryUsage float64
CpuUsageUser float64
CpuUsageSys float64
CpuSet []int
}
type SlurmCgroupsConfig struct {
CgroupBase string `json:"cgroup_base"`
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
UseSudo bool `json:"use_sudo,omitempty"`
}
type SlurmCgroupCollector struct {
metricCollector
config SlurmCgroupsConfig
meta map[string]string
tags map[string]string
allCPUs []int
cpuUsed map[int]bool
cgroupBase string
excludeMetrics map[string]struct{}
useSudo bool
}
const defaultCgroupBase = "/sys/fs/cgroup/system.slice/slurmstepd.scope"
func ParseCPUs(cpuset string) ([]int, error) {
var result []int
if cpuset == "" {
return result, nil
}
ranges := strings.Split(cpuset, ",")
for _, r := range ranges {
if strings.Contains(r, "-") {
parts := strings.Split(r, "-")
if len(parts) != 2 {
return nil, fmt.Errorf("invalid CPU range: %s", r)
}
start, err := strconv.Atoi(strings.TrimSpace(parts[0]))
if err != nil {
return nil, fmt.Errorf("invalid CPU range start: %s", parts[0])
}
end, err := strconv.Atoi(strings.TrimSpace(parts[1]))
if err != nil {
return nil, fmt.Errorf("invalid CPU range end: %s", parts[1])
}
for i := start; i <= end; i++ {
result = append(result, i)
}
} else {
cpu, err := strconv.Atoi(strings.TrimSpace(r))
if err != nil {
return nil, fmt.Errorf("invalid CPU ID: %s", r)
}
result = append(result, cpu)
}
}
return result, nil
}
func GetAllCPUs() ([]int, error) {
data, err := os.ReadFile("/sys/devices/system/cpu/online")
if err != nil {
return nil, fmt.Errorf("failed to read /sys/devices/system/cpu/online: %v", err)
}
return ParseCPUs(strings.TrimSpace(string(data)))
}
func (m *SlurmCgroupCollector) isExcluded(metric string) bool {
_, found := m.excludeMetrics[metric]
return found
}
func (m *SlurmCgroupCollector) readFile(path string) ([]byte, error) {
if m.useSudo {
cmd := exec.Command("sudo", "cat", path)
return cmd.Output()
}
return os.ReadFile(path)
}
func (m *SlurmCgroupCollector) Init(config json.RawMessage) error {
var err error
m.name = "SlurmCgroupCollector"
m.setup()
m.parallel = true
m.meta = map[string]string{"source": m.name, "group": "SLURM"}
m.tags = map[string]string{"type": "hwthread"}
m.cpuUsed = make(map[int]bool)
m.cgroupBase = defaultCgroupBase
if len(config) > 0 {
err = json.Unmarshal(config, &m.config)
if err != nil {
cclog.ComponentError(m.name, "Error reading config:", err.Error())
return err
}
m.excludeMetrics = make(map[string]struct{})
for _, metric := range m.config.ExcludeMetrics {
m.excludeMetrics[metric] = struct{}{}
}
if m.config.CgroupBase != "" {
m.cgroupBase = m.config.CgroupBase
}
}
m.useSudo = m.config.UseSudo
if !m.useSudo {
user, err := user.Current()
if err != nil {
cclog.ComponentError(m.name, "Failed to get current user:", err.Error())
return err
}
if user.Uid != "0" {
cclog.ComponentError(m.name, "Reading cgroup files requires root privileges (or enable use_sudo in config)")
return fmt.Errorf("not root")
}
}
m.allCPUs, err = GetAllCPUs()
if err != nil {
cclog.ComponentError(m.name, "Error reading online CPUs:", err.Error())
return err
}
m.init = true
return nil
}
func (m *SlurmCgroupCollector) ReadJobData(jobdir string) (SlurmJobData, error) {
jobdata := SlurmJobData{
MemoryUsage: 0,
MaxMemoryUsage: 0,
LimitMemoryUsage: 0,
CpuUsageUser: 0,
CpuUsageSys: 0,
CpuSet: []int{},
}
cg := func(f string) string { return filepath.Join(m.cgroupBase, jobdir, f) }
memUsage, err := m.readFile(cg("memory.current"))
if err == nil {
x, err := strconv.ParseFloat(strings.TrimSpace(string(memUsage)), 64)
if err == nil {
jobdata.MemoryUsage = x
}
}
maxMem, err := m.readFile(cg("memory.peak"))
if err == nil {
x, err := strconv.ParseFloat(strings.TrimSpace(string(maxMem)), 64)
if err == nil {
jobdata.MaxMemoryUsage = x
}
}
limitMem, err := m.readFile(cg("memory.max"))
if err == nil {
x, err := strconv.ParseFloat(strings.TrimSpace(string(limitMem)), 64)
if err == nil {
jobdata.LimitMemoryUsage = x
}
}
cpuStat, err := m.readFile(cg("cpu.stat"))
if err == nil {
lines := strings.Split(strings.TrimSpace(string(cpuStat)), "\n")
var usageUsec, userUsec, systemUsec float64
for _, line := range lines {
fields := strings.Fields(line)
if len(fields) < 2 {
continue
}
value, err := strconv.ParseFloat(fields[1], 64)
if err != nil {
continue
}
switch fields[0] {
case "usage_usec":
usageUsec = value
case "user_usec":
userUsec = value
case "system_usec":
systemUsec = value
}
}
if usageUsec > 0 {
jobdata.CpuUsageUser = (userUsec * 100 / usageUsec)
jobdata.CpuUsageSys = (systemUsec * 100 / usageUsec)
}
}
cpuSet, err := m.readFile(cg("cpuset.cpus"))
if err == nil {
cpus, err := ParseCPUs(strings.TrimSpace(string(cpuSet)))
if err == nil {
jobdata.CpuSet = cpus
}
}
return jobdata, nil
}
func (m *SlurmCgroupCollector) Read(interval time.Duration, output chan lp.CCMessage) {
timestamp := time.Now()
for k := range m.cpuUsed {
delete(m.cpuUsed, k)
}
globPattern := filepath.Join(m.cgroupBase, "job_*")
jobDirs, err := filepath.Glob(globPattern)
if err != nil {
cclog.ComponentError(m.name, "Error globbing job directories:", err.Error())
return
}
for _, jdir := range jobDirs {
jKey := filepath.Base(jdir)
jobdata, err := m.ReadJobData(jKey)
if err != nil {
cclog.ComponentError(m.name, "Error reading job data for", jKey, ":", err.Error())
continue
}
if len(jobdata.CpuSet) > 0 {
coreCount := float64(len(jobdata.CpuSet))
for _, cpu := range jobdata.CpuSet {
coreTags := map[string]string{
"type": "hwthread",
"type-id": fmt.Sprintf("%d", cpu),
}
if coreCount > 0 && !m.isExcluded("job_mem_used") {
memPerCore := jobdata.MemoryUsage / coreCount
if y, err := lp.NewMessage("job_mem_used", coreTags, m.meta, map[string]interface{}{"value": memPerCore}, timestamp); err == nil {
y.AddMeta("unit", "Bytes")
output <- y
}
}
if coreCount > 0 && !m.isExcluded("job_max_mem_used") {
maxMemPerCore := jobdata.MaxMemoryUsage / coreCount
if y, err := lp.NewMessage("job_max_mem_used", coreTags, m.meta, map[string]interface{}{"value": maxMemPerCore}, timestamp); err == nil {
y.AddMeta("unit", "Bytes")
output <- y
}
}
if coreCount > 0 && !m.isExcluded("job_mem_limit") {
limitPerCore := jobdata.LimitMemoryUsage / coreCount
if y, err := lp.NewMessage("job_mem_limit", coreTags, m.meta, map[string]interface{}{"value": limitPerCore}, timestamp); err == nil {
y.AddMeta("unit", "Bytes")
output <- y
}
}
if coreCount > 0 && !m.isExcluded("job_user_cpu") {
cpuUserPerCore := jobdata.CpuUsageUser / coreCount
if y, err := lp.NewMessage("job_user_cpu", coreTags, m.meta, map[string]interface{}{"value": cpuUserPerCore}, timestamp); err == nil {
y.AddMeta("unit", "%")
output <- y
}
}
if coreCount > 0 && !m.isExcluded("job_sys_cpu") {
cpuSysPerCore := jobdata.CpuUsageSys / coreCount
if y, err := lp.NewMessage("job_sys_cpu", coreTags, m.meta, map[string]interface{}{"value": cpuSysPerCore}, timestamp); err == nil {
y.AddMeta("unit", "%")
output <- y
}
}
m.cpuUsed[cpu] = true
}
}
}
for _, cpu := range m.allCPUs {
if !m.cpuUsed[cpu] {
coreTags := map[string]string{
"type": "hwthread",
"type-id": fmt.Sprintf("%d", cpu),
}
if !m.isExcluded("job_mem_used") {
if y, err := lp.NewMessage("job_mem_used", coreTags, m.meta, map[string]interface{}{"value": 0}, timestamp); err == nil {
y.AddMeta("unit", "Bytes")
output <- y
}
}
if !m.isExcluded("job_max_mem_used") {
if y, err := lp.NewMessage("job_max_mem_used", coreTags, m.meta, map[string]interface{}{"value": 0}, timestamp); err == nil {
y.AddMeta("unit", "Bytes")
output <- y
}
}
if !m.isExcluded("job_mem_limit") {
if y, err := lp.NewMessage("job_mem_limit", coreTags, m.meta, map[string]interface{}{"value": 0}, timestamp); err == nil {
y.AddMeta("unit", "Bytes")
output <- y
}
}
if !m.isExcluded("job_user_cpu") {
if y, err := lp.NewMessage("job_user_cpu", coreTags, m.meta, map[string]interface{}{"value": 0}, timestamp); err == nil {
y.AddMeta("unit", "%")
output <- y
}
}
if !m.isExcluded("job_sys_cpu") {
if y, err := lp.NewMessage("job_sys_cpu", coreTags, m.meta, map[string]interface{}{"value": 0}, timestamp); err == nil {
y.AddMeta("unit", "%")
output <- y
}
}
}
}
}
func (m *SlurmCgroupCollector) Close() {
m.init = false
}

View File

@@ -0,0 +1,50 @@
<!--
---
title: Slurm cgroup metric collector
description: Collect per-core memory and CPU usage for SLURM jobs from cgroup v2
categories: [cc-metric-collector]
tags: ['Admin']
weight: 3
hugo_path: docs/reference/cc-metric-collector/collectors/slurm_cgroup.md
---
-->
## `slurm_cgroup` collector
The `slurm_cgroup` collector reads job-specific resource metrics from the cgroup v2 filesystem and provides **hwthread** metrics for memory and CPU usage of running SLURM jobs.
### Example configuration
```json
"slurm_cgroup": {
"cgroup_base": "/sys/fs/cgroup/system.slice/slurmstepd.scope",
"exclude_metrics": [
"job_sys_cpu",
"job_mem_limit"
],
"use_sudo": false
}
```
* The `cgroup_base` parameter (optional) can be set to specify the root path to SLURM job cgroups. The default is `/sys/fs/cgroup/system.slice/slurmstepd.scope`.
* The `exclude_metrics` array can be used to suppress individual metrics from being sent to the sink.
* The cgroups metrics are only available for root users. If password-less sudo is configured, you can enable sudo in the configuration.
### Reported metrics
All metrics are available **per hardware thread** :
* `job_mem_used` (`unit=Bytes`): Current memory usage of the job
* `job_max_mem_used` (`unit=Bytes`): Peak memory usage
* `job_mem_limit` (`unit=Bytes`): Cgroup memory limit
* `job_user_cpu` (`unit=%`): User CPU utilization percentage
* `job_sys_cpu` (`unit=%`): System CPU utilization percentage
Each metric has tags:
* `type=hwthread`
* `type-id=<core_id>`
### Limitations
* **cgroups v2 required:** This collector only supports systems running with cgroups v2 (unified hierarchy).

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package collectors package collectors
import ( import (

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package collectors package collectors
import ( import (

View File

@@ -4,7 +4,7 @@ The configuration of the CC metric collector consists of five configuration file
## Global configuration ## Global configuration
The global file contains the paths to the other four files and some global options. The global file contains the paths to the other four files and some global options. You can find examples in `example_configs`.
```json ```json
{ {

53
go.mod
View File

@@ -1,48 +1,45 @@
module github.com/ClusterCockpit/cc-metric-collector module github.com/ClusterCockpit/cc-metric-collector
go 1.23.4 go 1.24.0
toolchain go1.23.7
require ( require (
github.com/ClusterCockpit/cc-lib v0.1.1 github.com/ClusterCockpit/cc-lib v0.10.1
github.com/ClusterCockpit/go-rocm-smi v0.3.0 github.com/ClusterCockpit/go-rocm-smi v0.3.0
github.com/NVIDIA/go-nvml v0.12.0-2 github.com/NVIDIA/go-nvml v0.13.0-1
github.com/PaesslerAG/gval v1.2.2 github.com/PaesslerAG/gval v1.2.4
github.com/fsnotify/fsnotify v1.7.0 github.com/fsnotify/fsnotify v1.9.0
github.com/gorilla/mux v1.8.1
github.com/influxdata/influxdb-client-go/v2 v2.14.0
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf
github.com/influxdata/line-protocol/v2 v2.2.1 github.com/tklauser/go-sysconf v0.3.15
github.com/nats-io/nats.go v1.39.0
github.com/prometheus/client_golang v1.20.5
github.com/stmcginnis/gofish v0.15.0
github.com/tklauser/go-sysconf v0.3.13
golang.design/x/thread v0.0.0-20210122121316-335e9adffdf1 golang.design/x/thread v0.0.0-20210122121316-335e9adffdf1
golang.org/x/exp v0.0.0-20250215185904-eff6e970281f golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b
golang.org/x/sys v0.30.0 golang.org/x/sys v0.37.0
) )
require ( require (
github.com/ClusterCockpit/cc-backend v1.4.2 // indirect
github.com/ClusterCockpit/cc-units v0.4.0 // indirect
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/expr-lang/expr v1.17.0 // indirect github.com/expr-lang/expr v1.17.6 // indirect
github.com/google/uuid v1.6.0 // indirect github.com/google/uuid v1.6.0 // indirect
github.com/klauspost/compress v1.17.9 // indirect github.com/gorilla/mux v1.8.1 // indirect
github.com/influxdata/influxdb-client-go/v2 v2.14.0 // indirect
github.com/influxdata/line-protocol/v2 v2.2.1 // indirect
github.com/klauspost/compress v1.18.0 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/nats-io/nkeys v0.4.9 // indirect github.com/nats-io/nats.go v1.46.1 // indirect
github.com/nats-io/nkeys v0.4.11 // indirect
github.com/nats-io/nuid v1.0.1 // indirect github.com/nats-io/nuid v1.0.1 // indirect
github.com/oapi-codegen/runtime v1.1.1 // indirect github.com/oapi-codegen/runtime v1.1.1 // indirect
github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/client_golang v1.23.2 // indirect
github.com/prometheus/common v0.55.0 // indirect github.com/prometheus/client_model v0.6.2 // indirect
github.com/prometheus/procfs v0.15.1 // indirect github.com/prometheus/common v0.66.1 // indirect
github.com/prometheus/procfs v0.16.1 // indirect
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 // indirect github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 // indirect
github.com/shopspring/decimal v1.3.1 // indirect github.com/shopspring/decimal v1.3.1 // indirect
github.com/tklauser/numcpus v0.7.0 // indirect github.com/stmcginnis/gofish v0.20.0 // indirect
golang.org/x/crypto v0.35.0 // indirect github.com/tklauser/numcpus v0.10.0 // indirect
golang.org/x/net v0.36.0 // indirect go.yaml.in/yaml/v2 v2.4.2 // indirect
google.golang.org/protobuf v1.35.2 // indirect golang.org/x/crypto v0.42.0 // indirect
golang.org/x/net v0.43.0 // indirect
google.golang.org/protobuf v1.36.8 // indirect
) )

126
go.sum
View File

@@ -1,21 +1,17 @@
github.com/ClusterCockpit/cc-backend v1.4.2 h1:kTOzqkh9N0564N9nqQThnSs7TAfg8RLgvSm00e5HtIc= github.com/ClusterCockpit/cc-lib v0.10.1 h1:tjGEH8mFGgznYxO8BKLiiar0eZR1Oytk8x5iIQHZR5s=
github.com/ClusterCockpit/cc-backend v1.4.2/go.mod h1:g8TNHXe4AXej26snu2//jO3mUF980elT93iV/k11O/c= github.com/ClusterCockpit/cc-lib v0.10.1/go.mod h1:nvTZuxFCTwlos8I1rL5O1RPab7vRtkU8E/PGiaF6pQA=
github.com/ClusterCockpit/cc-lib v0.1.0-beta.1 h1:dz9j0g2cod8+SMDjuoIY6ISpiHHeekhX6yQaeiwiwJw=
github.com/ClusterCockpit/cc-lib v0.1.0-beta.1/go.mod h1:kXMskla1i5ZSfXW0vVRIHgGeXMU5zu2PzYOYnUaOr80=
github.com/ClusterCockpit/cc-lib v0.1.1 h1:AXZWYUzgTaE/WdxLNSWPR7FJoA5WlzvYZxw4gIw3gNw=
github.com/ClusterCockpit/cc-lib v0.1.1/go.mod h1:SHKcWW/+kN+pcofAtHJFxvmx1FV0VIJuQv5PuT0HDcc=
github.com/ClusterCockpit/cc-units v0.4.0 h1:zP5DOu99GmErW0tCDf0gcLrlWt42RQ9dpoONEOh4cI0=
github.com/ClusterCockpit/cc-units v0.4.0/go.mod h1:3S3PAhAayS3pbgcT4q9Vn9VJw22Op51X0YimtG77zBw=
github.com/ClusterCockpit/go-rocm-smi v0.3.0 h1:1qZnSpG7/NyLtc7AjqnUL9Jb8xtqG1nMVgp69rJfaR8= github.com/ClusterCockpit/go-rocm-smi v0.3.0 h1:1qZnSpG7/NyLtc7AjqnUL9Jb8xtqG1nMVgp69rJfaR8=
github.com/ClusterCockpit/go-rocm-smi v0.3.0/go.mod h1:+I3UMeX3OlizXDf1WpGD43W4KGZZGVSGmny6rTeOnWA= github.com/ClusterCockpit/go-rocm-smi v0.3.0/go.mod h1:+I3UMeX3OlizXDf1WpGD43W4KGZZGVSGmny6rTeOnWA=
github.com/NVIDIA/go-nvml v0.11.6-0/go.mod h1:hy7HYeQy335x6nEss0Ne3PYqleRa6Ct+VKD9RQ4nyFs= github.com/NVIDIA/go-nvml v0.11.6-0/go.mod h1:hy7HYeQy335x6nEss0Ne3PYqleRa6Ct+VKD9RQ4nyFs=
github.com/NVIDIA/go-nvml v0.12.0-2 h1:Sg239yy7jmopu/cuvYauoMj9fOpcGMngxVxxS1EBXeY= github.com/NVIDIA/go-nvml v0.13.0-1 h1:OLX8Jq3dONuPOQPC7rndB6+iDmDakw0XTYgzMxObkEw=
github.com/NVIDIA/go-nvml v0.12.0-2/go.mod h1:7ruy85eOM73muOc/I37euONSwEyFqZsv5ED9AogD4G0= github.com/NVIDIA/go-nvml v0.13.0-1/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4=
github.com/PaesslerAG/gval v1.2.2 h1:Y7iBzhgE09IGTt5QgGQ2IdaYYYOU134YGHBThD+wm9E= github.com/PaesslerAG/gval v1.2.4 h1:rhX7MpjJlcxYwL2eTTYIOBUyEKZ+A96T9vQySWkVUiU=
github.com/PaesslerAG/gval v1.2.2/go.mod h1:XRFLwvmkTEdYziLdaCeCa5ImcGVrfQbeNUbVR+C6xac= github.com/PaesslerAG/gval v1.2.4/go.mod h1:XRFLwvmkTEdYziLdaCeCa5ImcGVrfQbeNUbVR+C6xac=
github.com/PaesslerAG/jsonpath v0.1.0 h1:gADYeifvlqK3R3i2cR5B4DGgxLXIPb3TRTH1mGi0jPI= github.com/PaesslerAG/jsonpath v0.1.0 h1:gADYeifvlqK3R3i2cR5B4DGgxLXIPb3TRTH1mGi0jPI=
github.com/PaesslerAG/jsonpath v0.1.0/go.mod h1:4BzmtoM/PI8fPO4aQGIusjGxGir2BzcV0grWtFzq1Y8= github.com/PaesslerAG/jsonpath v0.1.0/go.mod h1:4BzmtoM/PI8fPO4aQGIusjGxGir2BzcV0grWtFzq1Y8=
github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk= github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk=
github.com/antithesishq/antithesis-sdk-go v0.4.3-default-no-op h1:+OSa/t11TFhqfrX0EOSqQBDJ0YlpmK0rDSiB19dg9M0=
github.com/antithesishq/antithesis-sdk-go v0.4.3-default-no-op/go.mod h1:IUpT2DPAKh6i/YhSbt6Gl3v2yvUZjmKncl7U91fup7E=
github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7Dml6nw9rQ= github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7Dml6nw9rQ=
github.com/apapsch/go-jsonmerge/v2 v2.0.0/go.mod h1:lvDnEdqiQrp0O42VQGgmlKpxL1AP2+08jFMw88y4klk= github.com/apapsch/go-jsonmerge/v2 v2.0.0/go.mod h1:lvDnEdqiQrp0O42VQGgmlKpxL1AP2+08jFMw88y4klk=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
@@ -27,20 +23,20 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/expr-lang/expr v1.16.9 h1:WUAzmR0JNI9JCiF0/ewwHB1gmcGw5wW7nWt8gc6PpCI= github.com/expr-lang/expr v1.17.6 h1:1h6i8ONk9cexhDmowO/A64VPxHScu7qfSl2k8OlINec=
github.com/expr-lang/expr v1.16.9/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4= github.com/expr-lang/expr v1.17.6/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4=
github.com/expr-lang/expr v1.17.0 h1:+vpszOyzKLQXC9VF+wA8cVA0tlA984/Wabc/1hF9Whg=
github.com/expr-lang/expr v1.17.0/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4=
github.com/frankban/quicktest v1.11.0/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s= github.com/frankban/quicktest v1.11.0/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s=
github.com/frankban/quicktest v1.11.2/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s= github.com/frankban/quicktest v1.11.2/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s=
github.com/frankban/quicktest v1.13.0 h1:yNZif1OkDfNoDfb9zZa9aXIpejNR4F23Wely0c+Qdqk= github.com/frankban/quicktest v1.13.0 h1:yNZif1OkDfNoDfb9zZa9aXIpejNR4F23Wely0c+Qdqk=
github.com/frankban/quicktest v1.13.0/go.mod h1:qLE0fzW0VuyUAJgPU19zByoIr0HtCHN/r/VLSOOIySU= github.com/frankban/quicktest v1.13.0/go.mod h1:qLE0fzW0VuyUAJgPU19zByoIr0HtCHN/r/VLSOOIySU=
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/go-tpm v0.9.5 h1:ocUmnDebX54dnW+MQWGQRbdaAcJELsa6PqZhJ48KwVU=
github.com/google/go-tpm v0.9.5/go.mod h1:h9jEsEECg7gtLis0upRBQU+GhYVH6jMjrFxI8u6bVUY=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
@@ -57,8 +53,8 @@ github.com/influxdata/line-protocol/v2 v2.1.0/go.mod h1:QKw43hdUBg3GTk2iC3iyCxks
github.com/influxdata/line-protocol/v2 v2.2.1 h1:EAPkqJ9Km4uAxtMRgUubJyqAr6zgWM0dznKMLRauQRE= github.com/influxdata/line-protocol/v2 v2.2.1 h1:EAPkqJ9Km4uAxtMRgUubJyqAr6zgWM0dznKMLRauQRE=
github.com/influxdata/line-protocol/v2 v2.2.1/go.mod h1:DmB3Cnh+3oxmG6LOBIxce4oaL4CPj3OmMPgvauXh+tM= github.com/influxdata/line-protocol/v2 v2.2.1/go.mod h1:DmB3Cnh+3oxmG6LOBIxce4oaL4CPj3OmMPgvauXh+tM=
github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE= github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE=
github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
@@ -68,12 +64,18 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/minio/highwayhash v1.0.3 h1:kbnuUMoHYyVl7szWjSxJnxw11k2U709jqFPPmIUyD6Q=
github.com/minio/highwayhash v1.0.3/go.mod h1:GGYsuwP/fPD6Y9hMiXuapVvlIUEhFhMTh0rxU3ik1LQ=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/nats-io/nats.go v1.39.0 h1:2/yg2JQjiYYKLwDuBzV0FbB2sIV+eFNkEevlRi4n9lI= github.com/nats-io/jwt/v2 v2.8.0 h1:K7uzyz50+yGZDO5o772eRE7atlcSEENpL7P+b74JV1g=
github.com/nats-io/nats.go v1.39.0/go.mod h1:MgRb8oOdigA6cYpEPhXJuRVH6UE/V4jblJ2jQ27IXYM= github.com/nats-io/jwt/v2 v2.8.0/go.mod h1:me11pOkwObtcBNR8AiMrUbtVOUGkqYjMQZ6jnSdVUIA=
github.com/nats-io/nkeys v0.4.9 h1:qe9Faq2Gxwi6RZnZMXfmGMZkg3afLLOtrU+gDZJ35b0= github.com/nats-io/nats-server/v2 v2.12.0 h1:OIwe8jZUqJFrh+hhiyKu8snNib66qsx806OslqJuo74=
github.com/nats-io/nkeys v0.4.9/go.mod h1:jcMqs+FLG+W5YO36OX6wFIFcmpdAns+w1Wm6D3I/evE= github.com/nats-io/nats-server/v2 v2.12.0/go.mod h1:nr8dhzqkP5E/lDwmn+A2CvQPMd1yDKXQI7iGg3lAvww=
github.com/nats-io/nats.go v1.46.1 h1:bqQ2ZcxVd2lpYI97xYASeRTY3I5boe/IVmuUDPitHfo=
github.com/nats-io/nats.go v1.46.1/go.mod h1:iRWIPokVIFbVijxuMQq4y9ttaBTMe0SFdlZfMDd+33g=
github.com/nats-io/nkeys v0.4.11 h1:q44qGV008kYd9W1b1nEBkNzvnWxtRSQ7A8BoqRrcfa0=
github.com/nats-io/nkeys v0.4.11/go.mod h1:szDimtgmfOi9n25JpfIdGw12tZFYXqhGxjhVxsatHVE=
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw= github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
@@ -81,14 +83,14 @@ github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmt
github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg= github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=
github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc= github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs=
github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8= github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA=
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg=
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is=
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 h1:lZUw3E0/J3roVtGQ+SCrUrg3ON6NgVqpn3+iol9aGu4= github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 h1:lZUw3E0/J3roVtGQ+SCrUrg3ON6NgVqpn3+iol9aGu4=
@@ -96,44 +98,40 @@ github.com/santhosh-tekuri/jsonschema/v5 v5.3.1/go.mod h1:uToXkOrWAZ6/Oc07xWQrPO
github.com/shopspring/decimal v1.3.1 h1:2Usl1nmF/WZucqkFZhnfFYxxxu8LG21F6nPQBE5gKV8= github.com/shopspring/decimal v1.3.1 h1:2Usl1nmF/WZucqkFZhnfFYxxxu8LG21F6nPQBE5gKV8=
github.com/shopspring/decimal v1.3.1/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o= github.com/shopspring/decimal v1.3.1/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKkMo8ZTx3f+BZEkzsRUY10Xsm2mwU0= github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKkMo8ZTx3f+BZEkzsRUY10Xsm2mwU0=
github.com/stmcginnis/gofish v0.15.0 h1:8TG41+lvJk/0Nf8CIIYErxbMlQUy80W0JFRZP3Ld82A= github.com/stmcginnis/gofish v0.20.0 h1:hH2V2Qe898F2wWT1loApnkDUrXXiLKqbSlMaH3Y1n08=
github.com/stmcginnis/gofish v0.15.0/go.mod h1:BLDSFTp8pDlf/xDbLZa+F7f7eW0E/CHCboggsu8CznI= github.com/stmcginnis/gofish v0.20.0/go.mod h1:PzF5i8ecRG9A2ol8XT64npKUunyraJ+7t0kYMpQAtqU=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/tklauser/go-sysconf v0.3.15 h1:VE89k0criAymJ/Os65CSn1IXaol+1wrsFHEB8Ol49K4=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/tklauser/go-sysconf v0.3.15/go.mod h1:Dmjwr6tYFIseJw7a3dRLJfsHAMXZ3nEnL/aZY+0IuI4=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/tklauser/numcpus v0.10.0 h1:18njr6LDBk1zuna922MgdjQuJFjrdppsZG60sHGfjso=
github.com/tklauser/go-sysconf v0.3.13 h1:GBUpcahXSpR2xN01jhkNAbTLRk2Yzgggk8IM08lq3r4= github.com/tklauser/numcpus v0.10.0/go.mod h1:BiTKazU708GQTYF4mB+cmlpT2Is1gLk7XVuEeem8LsQ=
github.com/tklauser/go-sysconf v0.3.13/go.mod h1:zwleP4Q4OehZHGn4CYZDipCgg9usW5IJePewFCGVEa0= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
github.com/tklauser/numcpus v0.7.0 h1:yjuerZP127QG9m5Zh/mSO4wqurYil27tHrqwRoRjpr4= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
github.com/tklauser/numcpus v0.7.0/go.mod h1:bb6dMVcj8A42tSE7i32fsIUCbQNllK5iDguyOZRUzAY= go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI=
go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU=
golang.design/x/thread v0.0.0-20210122121316-335e9adffdf1 h1:P7S/GeHBAFEZIYp0ePPs2kHXoazz8q2KsyxHyQVGCJg= golang.design/x/thread v0.0.0-20210122121316-335e9adffdf1 h1:P7S/GeHBAFEZIYp0ePPs2kHXoazz8q2KsyxHyQVGCJg=
golang.design/x/thread v0.0.0-20210122121316-335e9adffdf1/go.mod h1:9CWpnTUmlQkfdpdutA1nNf4iE5lAVt3QZOu0Z6hahBE= golang.design/x/thread v0.0.0-20210122121316-335e9adffdf1/go.mod h1:9CWpnTUmlQkfdpdutA1nNf4iE5lAVt3QZOu0Z6hahBE=
golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U= golang.org/x/crypto v0.42.0 h1:chiH31gIWm57EkTXpwnqf8qeuMUi0yekh6mT2AvFlqI=
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= golang.org/x/crypto v0.42.0/go.mod h1:4+rDnOTJhQCx2q7/j6rAN5XDw8kPjeaXEUR2eL94ix8=
golang.org/x/crypto v0.35.0 h1:b15kiHdrGCHrP6LvwaQ3c03kgNhhiMgvlhxHQhmg2Xs= golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b h1:M2rDM6z3Fhozi9O7NWsxAkg/yqS/lQJ6PmkyIV3YP+o=
golang.org/x/crypto v0.35.0/go.mod h1:dy7dXNW32cAb/6/PRuTNsix8T+vJAqvuIy5Bli/x0YQ= golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8=
golang.org/x/exp v0.0.0-20250215185904-eff6e970281f h1:oFMYAjX0867ZD2jcNiLBrI9BdpmEkvPyi5YrBGXbamg= golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
golang.org/x/exp v0.0.0-20250215185904-eff6e970281f/go.mod h1:BHOTPb3L19zxehTsLoJXVaTktb06DFgmdW6Wb9s8jqk= golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
golang.org/x/net v0.31.0 h1:68CPQngjLL0r2AlUKiSxtQFKvzRVbnzLwMUn5SzcLHo=
golang.org/x/net v0.31.0/go.mod h1:P4fl1q7dY2hnZFxEk4pPSkDHF+QqjitcnDjUQyMM+pM=
golang.org/x/net v0.36.0 h1:vWF2fRbw4qslQsQzgFqZff+BItCvGFQqKzKIzx1rmoA=
golang.org/x/net v0.36.0/go.mod h1:bFmbeoIPfrw4sMHNhb4J9f6+tPziuGjq7Jk/38fxi1I=
golang.org/x/sys v0.0.0-20210122093101-04d7465088b8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210122093101-04d7465088b8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= golang.org/x/time v0.13.0 h1:eUlYslOIt32DgYD6utsuUeHs4d7AsEYLuIAdg7FlYgI=
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/time v0.13.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.35.2 h1:8Ar7bF+apOIoThw1EdZl0p1oWvMqTHmpA2fRTyZO8io= google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
google.golang.org/protobuf v1.35.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package metricAggregator package metricAggregator
import ( import (

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package metricAggregator package metricAggregator
import ( import (

View File

@@ -236,13 +236,13 @@ __deprecated__
The cc-metric-collector tries to read the data from the system as it is reported. If available, it tries to read the metric unit from the system as well (e.g. from `/proc/meminfo`). The problem is that, depending on the source, the metric units are named differently. Just think about `byte`, `Byte`, `B`, `bytes`, ... The cc-metric-collector tries to read the data from the system as it is reported. If available, it tries to read the metric unit from the system as well (e.g. from `/proc/meminfo`). The problem is that, depending on the source, the metric units are named differently. Just think about `byte`, `Byte`, `B`, `bytes`, ...
The [cc-units](https://github.com/ClusterCockpit/cc-units) package provides us a normalization option to use the same metric unit name for all metrics. It this option is set to true, all `unit` meta tags are normalized. The [cc-units](https://github.com/ClusterCockpit/cc-lib/ccUnits) package provides us a normalization option to use the same metric unit name for all metrics. It this option is set to true, all `unit` meta tags are normalized.
## The `change_unit_prefix` section ## The `change_unit_prefix` section
__deprecated__ __deprecated__
It is often the case that metrics are reported by the system using a rather outdated unit prefix (like `/proc/meminfo` still uses kByte despite current memory sizes are in the GByte range). If you want to change the prefix of a unit, you can do that with the help of [cc-units](https://github.com/ClusterCockpit/cc-units). The setting works on the metric name and requires the new prefix for the metric. The cc-units package determines the scaling factor. It is often the case that metrics are reported by the system using a rather outdated unit prefix (like `/proc/meminfo` still uses kByte despite current memory sizes are in the GByte range). If you want to change the prefix of a unit, you can do that with the help of [cc-units](https://github.com/ClusterCockpit/cc-lib/ccUnits). The setting works on the metric name and requires the new prefix for the metric. The cc-units package determines the scaling factor.
# Aggregate metric values of the current interval with the `interval_aggregates` option # Aggregate metric values of the current interval with the `interval_aggregates` option

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package metricRouter package metricRouter
import ( import (

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package metricRouter package metricRouter
import ( import (

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package ccTopology package ccTopology
import ( import (

View File

@@ -1,125 +0,0 @@
package hostlist
import (
"fmt"
"regexp"
"sort"
"strconv"
"strings"
)
func Expand(in string) (result []string, err error) {
// Create ranges regular expression
reStNumber := "[[:digit:]]+"
reStRange := reStNumber + "-" + reStNumber
reStOptionalNumberOrRange := "(" + reStNumber + ",|" + reStRange + ",)*"
reStNumberOrRange := "(" + reStNumber + "|" + reStRange + ")"
reStBraceLeft := "[[]"
reStBraceRight := "[]]"
reStRanges := reStBraceLeft +
reStOptionalNumberOrRange +
reStNumberOrRange +
reStBraceRight
reRanges := regexp.MustCompile(reStRanges)
// Create host list regular expression
reStDNSChars := "[a-zA-Z0-9-]+"
reStPrefix := "^(" + reStDNSChars + ")"
reStOptionalSuffix := "(" + reStDNSChars + ")?"
re := regexp.MustCompile(reStPrefix + "([[][0-9,-]+[]])?" + reStOptionalSuffix)
// Remove all delimiters from the input
in = strings.TrimLeft(in, ", ")
for len(in) > 0 {
if v := re.FindStringSubmatch(in); v != nil {
// Remove matched part from the input
lenPrefix := len(v[0])
in = in[lenPrefix:]
// Remove all delimiters from the input
in = strings.TrimLeft(in, ", ")
// matched prefix, range and suffix
hlPrefix := v[1]
hlRanges := v[2]
hlSuffix := v[3]
// Single node without ranges
if hlRanges == "" {
result = append(result, hlPrefix)
continue
}
// Node with ranges
if v := reRanges.FindStringSubmatch(hlRanges); v != nil {
// Remove braces
hlRanges = hlRanges[1 : len(hlRanges)-1]
// Split host ranges at ,
for _, hlRange := range strings.Split(hlRanges, ",") {
// Split host range at -
RangeStartEnd := strings.Split(hlRange, "-")
// Range is only a single number
if len(RangeStartEnd) == 1 {
result = append(result, hlPrefix+RangeStartEnd[0]+hlSuffix)
continue
}
// Range has a start and an end
widthRangeStart := len(RangeStartEnd[0])
widthRangeEnd := len(RangeStartEnd[1])
iStart, _ := strconv.ParseUint(RangeStartEnd[0], 10, 64)
iEnd, _ := strconv.ParseUint(RangeStartEnd[1], 10, 64)
if iStart > iEnd {
return nil, fmt.Errorf("single range start is greater than end: %s", hlRange)
}
// Create print format string for range numbers
doPadding := widthRangeStart == widthRangeEnd
widthPadding := widthRangeStart
var formatString string
if doPadding {
formatString = "%0" + fmt.Sprint(widthPadding) + "d"
} else {
formatString = "%d"
}
formatString = hlPrefix + formatString + hlSuffix
// Add nodes from this range
for i := iStart; i <= iEnd; i++ {
result = append(result, fmt.Sprintf(formatString, i))
}
}
} else {
return nil, fmt.Errorf("not at hostlist range: %s", hlRanges)
}
} else {
return nil, fmt.Errorf("not a hostlist: %s", in)
}
}
if result != nil {
// sort
sort.Strings(result)
// uniq
previous := 1
for current := 1; current < len(result); current++ {
if result[current-1] != result[current] {
if previous != current {
result[previous] = result[current]
}
previous++
}
}
result = result[:previous]
}
return
}

View File

@@ -1,126 +0,0 @@
package hostlist
import (
"testing"
)
func TestExpand(t *testing.T) {
// Compare two slices of strings
equal := func(a, b []string) bool {
if len(a) != len(b) {
return false
}
for i, v := range a {
if v != b[i] {
return false
}
}
return true
}
type testDefinition struct {
input string
resultExpected []string
errorExpected bool
}
expandTests := []testDefinition{
{
// Single node
input: "n1",
resultExpected: []string{"n1"},
errorExpected: false,
},
{
// Single node, duplicated
input: "n1,n1",
resultExpected: []string{"n1"},
errorExpected: false,
},
{
// Single node with padding
input: "n[01]",
resultExpected: []string{"n01"},
errorExpected: false,
},
{
// Single node with suffix
input: "n[01]-p",
resultExpected: []string{"n01-p"},
errorExpected: false,
},
{
// Multiple nodes with a single range
input: "n[1-2]",
resultExpected: []string{"n1", "n2"},
errorExpected: false,
},
{
// Multiple nodes with a single range and a single index
input: "n[1-2,3]",
resultExpected: []string{"n1", "n2", "n3"},
errorExpected: false,
},
{
// Multiple nodes with different prefixes
input: "n[1-2],m[1,2]",
resultExpected: []string{"m1", "m2", "n1", "n2"},
errorExpected: false,
},
{
// Multiple nodes with different suffixes
input: "n[1-2]-p,n[1,2]-q",
resultExpected: []string{"n1-p", "n1-q", "n2-p", "n2-q"},
errorExpected: false,
},
{
// Multiple nodes with and without node ranges
input: " n09, n[01-04,06-07,09] , , n10,n04",
resultExpected: []string{"n01", "n02", "n03", "n04", "n06", "n07", "n09", "n10"},
errorExpected: false,
},
{
// Forbidden DNS character
input: "n@",
resultExpected: []string{},
errorExpected: true,
},
{
// Forbidden range
input: "n[1-2-2,3]",
resultExpected: []string{},
errorExpected: true,
},
{
// Forbidden range limits
input: "n[2-1]",
resultExpected: []string{},
errorExpected: true,
},
}
for _, expandTest := range expandTests {
result, err := Expand(expandTest.input)
hasError := err != nil
if hasError != expandTest.errorExpected && hasError {
t.Errorf("Expand('%s') failed: unexpected error '%v'",
expandTest.input, err)
continue
}
if hasError != expandTest.errorExpected && !hasError {
t.Errorf("Expand('%s') did not fail as expected: got result '%+v'",
expandTest.input, result)
continue
}
if !hasError && !equal(result, expandTest.resultExpected) {
t.Errorf("Expand('%s') failed: got result '%+v', expected result '%v'",
expandTest.input, result, expandTest.resultExpected)
continue
}
t.Logf("Checked hostlist.Expand('%s'): result = '%+v', err = '%v'",
expandTest.input, result, err)
}
}

View File

@@ -1,3 +1,10 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-lib.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// additional authors:
// Holger Obermaier (NHR@KIT)
package multiChanTicker package multiChanTicker
import ( import (

View File

@@ -1,4 +1,6 @@
Package: cc-metric-collector Package: cc-metric-collector
Section: misc
Priority: optional
Version: {VERSION} Version: {VERSION}
Installed-Size: {INSTALLED_SIZE} Installed-Size: {INSTALLED_SIZE}
Architecture: {ARCH} Architecture: {ARCH}

View File

@@ -44,6 +44,8 @@ def group_to_json(groupfile):
scope = "socket" scope = "socket"
if "PWR" in calc: if "PWR" in calc:
scope = "socket" scope = "socket"
if "UMC" in calc:
scope = "socket"
m = {"name" : metric, "calc": calc, "type" : scope, "publish" : True} m = {"name" : metric, "calc": calc, "type" : scope, "publish" : True}
metrics.append(m) metrics.append(m)