mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2026-02-13 14:41:45 +01:00
Compare commits
19 Commits
golangci-l
...
develop
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6b10797556 | ||
|
|
f964a9d065 | ||
|
|
8c5a3cc07b | ||
|
|
4804576ad6 | ||
|
|
623391d271 | ||
|
|
6af85fe52f | ||
|
|
dd62929b42 | ||
|
|
88588ca80b | ||
|
|
0cc01c93c1 | ||
|
|
412ae24b20 | ||
|
|
40fd936f48 | ||
|
|
a9b301b7d4 | ||
|
|
a5a0474573 | ||
|
|
c85c4eeb21 | ||
|
|
e03e21021d | ||
|
|
0e730c9720 | ||
|
|
eb452770a5 | ||
|
|
f060a1bb17 | ||
|
|
f8b2ac0d2c |
129
.github/workflows/Release.yml
vendored
129
.github/workflows/Release.yml
vendored
@@ -5,10 +5,10 @@ name: Release
|
||||
|
||||
# Run on tag push
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- '**'
|
||||
workflow_dispatch:
|
||||
push:
|
||||
tags:
|
||||
- '**'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
|
||||
@@ -36,14 +36,22 @@ jobs:
|
||||
# fetch-depth must be 0 to use git describe
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
|
||||
# - name: Setup Golang
|
||||
# uses: actions/setup-go@v5
|
||||
# with:
|
||||
# go-version: 'stable'
|
||||
- name: Setup Golang
|
||||
run: |
|
||||
dnf --assumeyes --disableplugin=subscription-manager --enablerepo appstream install go-toolset
|
||||
dnf --assumeyes --disableplugin=subscription-manager install \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.noarch.rpm
|
||||
|
||||
- name: RPM build MetricCollector
|
||||
id: rpmbuild
|
||||
@@ -70,13 +78,13 @@ jobs:
|
||||
|
||||
# See: https://github.com/actions/upload-artifact
|
||||
- name: Save RPM as artifact
|
||||
uses: actions/upload-artifact@v6
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: cc-metric-collector RPM for AlmaLinux 8
|
||||
path: ${{ steps.rpmrename.outputs.RPM }}
|
||||
overwrite: true
|
||||
- name: Save SRPM as artifact
|
||||
uses: actions/upload-artifact@v6
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: cc-metric-collector SRPM for AlmaLinux 8
|
||||
path: ${{ steps.rpmrename.outputs.SRPM }}
|
||||
@@ -106,14 +114,23 @@ jobs:
|
||||
# fetch-depth must be 0 to use git describe
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
|
||||
# - name: Setup Golang
|
||||
# uses: actions/setup-go@v5
|
||||
# with:
|
||||
# go-version: 'stable'
|
||||
- name: Setup Golang
|
||||
run: |
|
||||
dnf --assumeyes --disableplugin=subscription-manager --enablerepo appstream install go-toolset
|
||||
dnf --assumeyes --disableplugin=subscription-manager install \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.el9_6.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-1.23.9-1.el9_6.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.el9_6.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.el9_6.noarch.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-race-1.23.9-1.el9_6.x86_64.rpm
|
||||
|
||||
- name: RPM build MetricCollector
|
||||
id: rpmbuild
|
||||
@@ -140,26 +157,25 @@ jobs:
|
||||
|
||||
# See: https://github.com/actions/upload-artifact
|
||||
- name: Save RPM as artifact
|
||||
uses: actions/upload-artifact@v6
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: cc-metric-collector RPM for AlmaLinux 9
|
||||
path: ${{ steps.rpmrename.outputs.RPM }}
|
||||
overwrite: true
|
||||
- name: Save SRPM as artifact
|
||||
uses: actions/upload-artifact@v6
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: cc-metric-collector SRPM for AlmaLinux 9
|
||||
path: ${{ steps.rpmrename.outputs.SRPM }}
|
||||
overwrite: true
|
||||
|
||||
#
|
||||
# Build on Red Hat Universal Base Image (UBI 8) using go-toolset
|
||||
# Build on UBI 8 using go-toolset
|
||||
#
|
||||
UBI-8-RPM-build:
|
||||
runs-on: ubuntu-latest
|
||||
# See: https://catalog.redhat.com/en/search?searchType=Containers&q=Red+Hat+Universal+Base+Image+8
|
||||
# https://hub.docker.com/r/redhat/ubi8
|
||||
container: redhat/ubi8
|
||||
# See: https://catalog.redhat.com/software/containers/ubi8/ubi/5c35984d70cc534b3a3784e?container-tabs=gti
|
||||
container: registry.access.redhat.com/ubi8/ubi:8.8-1032.1692772289
|
||||
# The job outputs link to the outputs of the 'rpmbuild' step
|
||||
outputs:
|
||||
rpm : ${{steps.rpmbuild.outputs.RPM}}
|
||||
@@ -174,14 +190,22 @@ jobs:
|
||||
# fetch-depth must be 0 to use git describe
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
|
||||
# - name: Setup Golang
|
||||
# uses: actions/setup-go@v5
|
||||
# with:
|
||||
# go-version: 'stable'
|
||||
- name: Setup Golang
|
||||
run: |
|
||||
dnf --assumeyes --disableplugin=subscription-manager --enablerepo ubi-8-appstream-rpms install go-toolset
|
||||
dnf --assumeyes --disableplugin=subscription-manager install \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.noarch.rpm
|
||||
|
||||
- name: RPM build MetricCollector
|
||||
id: rpmbuild
|
||||
@@ -191,25 +215,24 @@ jobs:
|
||||
|
||||
# See: https://github.com/actions/upload-artifact
|
||||
- name: Save RPM as artifact
|
||||
uses: actions/upload-artifact@v6
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: cc-metric-collector RPM for UBI 8
|
||||
path: ${{ steps.rpmbuild.outputs.RPM }}
|
||||
overwrite: true
|
||||
- name: Save SRPM as artifact
|
||||
uses: actions/upload-artifact@v6
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: cc-metric-collector SRPM for UBI 8
|
||||
path: ${{ steps.rpmbuild.outputs.SRPM }}
|
||||
overwrite: true
|
||||
|
||||
#
|
||||
# Build on Red Hat Universal Base Image (UBI 9) using go-toolset
|
||||
# Build on UBI 9 using go-toolset
|
||||
#
|
||||
UBI-9-RPM-build:
|
||||
runs-on: ubuntu-latest
|
||||
# See: https://catalog.redhat.com/en/search?searchType=Containers&q=Red+Hat+Universal+Base+Image+9
|
||||
# https://hub.docker.com/r/redhat/ubi9
|
||||
# See: https://catalog.redhat.com/software/containers/ubi8/ubi/5c359854d70cc534b3a3784e?container-tabs=gti
|
||||
container: redhat/ubi9
|
||||
# The job outputs link to the outputs of the 'rpmbuild' step
|
||||
# The job outputs link to the outputs of the 'rpmbuild' step
|
||||
@@ -220,20 +243,30 @@ jobs:
|
||||
|
||||
# Use dnf to install development packages
|
||||
- name: Install development packages
|
||||
run: dnf --assumeyes --disableplugin=subscription-manager install rpm-build go-srpm-macros gcc make python39 git wget openssl-devel diffutils delve
|
||||
run: dnf --assumeyes --disableplugin=subscription-manager install rpm-build go-srpm-macros gcc make python39 git wget openssl-devel diffutils delve
|
||||
|
||||
# Checkout git repository and submodules
|
||||
# fetch-depth must be 0 to use git describe
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
|
||||
# See: https://github.com/marketplace/actions/setup-go-environment
|
||||
# - name: Setup Golang
|
||||
# uses: actions/setup-go@v5
|
||||
# with:
|
||||
# go-version: 'stable'
|
||||
- name: Setup Golang
|
||||
run: |
|
||||
dnf --assumeyes --disableplugin=subscription-manager --enablerepo ubi-9-appstream-rpms install go-toolset
|
||||
dnf --assumeyes --disableplugin=subscription-manager install \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.el9_6.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-1.23.9-1.el9_6.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.el9_6.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.el9_6.noarch.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-race-1.23.9-1.el9_6.x86_64.rpm
|
||||
|
||||
- name: RPM build MetricCollector
|
||||
id: rpmbuild
|
||||
@@ -243,13 +276,13 @@ jobs:
|
||||
|
||||
# See: https://github.com/actions/upload-artifact
|
||||
- name: Save RPM as artifact
|
||||
uses: actions/upload-artifact@v6
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: cc-metric-collector RPM for UBI 9
|
||||
path: ${{ steps.rpmbuild.outputs.RPM }}
|
||||
overwrite: true
|
||||
- name: Save SRPM as artifact
|
||||
uses: actions/upload-artifact@v6
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: cc-metric-collector SRPM for UBI 9
|
||||
path: ${{ steps.rpmbuild.outputs.SRPM }}
|
||||
@@ -275,14 +308,13 @@ jobs:
|
||||
# fetch-depth must be 0 to use git describe
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
# Use official golang package
|
||||
# See: https://github.com/marketplace/actions/setup-go-environment
|
||||
|
||||
- name: Setup Golang
|
||||
uses: actions/setup-go@v6
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: 'stable'
|
||||
|
||||
@@ -300,13 +332,13 @@ jobs:
|
||||
echo "DEB=${NEW_DEB_FILE}" >> $GITHUB_OUTPUT
|
||||
# See: https://github.com/actions/upload-artifact
|
||||
- name: Save DEB as artifact
|
||||
uses: actions/upload-artifact@v6
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: cc-metric-collector DEB for Ubuntu 22.04
|
||||
path: ${{ steps.debrename.outputs.DEB }}
|
||||
overwrite: true
|
||||
|
||||
#
|
||||
#
|
||||
# Build on Ubuntu 24.04 using official go package
|
||||
#
|
||||
Ubuntu-noblenumbat-build:
|
||||
@@ -326,14 +358,13 @@ jobs:
|
||||
# fetch-depth must be 0 to use git describe
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
# Use official golang package
|
||||
# See: https://github.com/marketplace/actions/setup-go-environment
|
||||
|
||||
- name: Setup Golang
|
||||
uses: actions/setup-go@v6
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: 'stable'
|
||||
|
||||
@@ -351,7 +382,7 @@ jobs:
|
||||
echo "DEB=${NEW_DEB_FILE}" >> $GITHUB_OUTPUT
|
||||
# See: https://github.com/actions/upload-artifact
|
||||
- name: Save DEB as artifact
|
||||
uses: actions/upload-artifact@v6
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: cc-metric-collector DEB for Ubuntu 24.04
|
||||
path: ${{ steps.debrename.outputs.DEB }}
|
||||
@@ -369,48 +400,48 @@ jobs:
|
||||
steps:
|
||||
# See: https://github.com/actions/download-artifact
|
||||
- name: Download AlmaLinux 8 RPM
|
||||
uses: actions/download-artifact@v7
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: cc-metric-collector RPM for AlmaLinux 8
|
||||
- name: Download AlmaLinux 8 SRPM
|
||||
uses: actions/download-artifact@v7
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: cc-metric-collector SRPM for AlmaLinux 8
|
||||
|
||||
- name: Download AlmaLinux 9 RPM
|
||||
uses: actions/download-artifact@v7
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: cc-metric-collector RPM for AlmaLinux 9
|
||||
- name: Download AlmaLinux 9 SRPM
|
||||
uses: actions/download-artifact@v7
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: cc-metric-collector SRPM for AlmaLinux 9
|
||||
|
||||
- name: Download UBI 8 RPM
|
||||
uses: actions/download-artifact@v7
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: cc-metric-collector RPM for UBI 8
|
||||
- name: Download UBI 8 SRPM
|
||||
uses: actions/download-artifact@v7
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: cc-metric-collector SRPM for UBI 8
|
||||
|
||||
- name: Download UBI 9 RPM
|
||||
uses: actions/download-artifact@v7
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: cc-metric-collector RPM for UBI 9
|
||||
- name: Download UBI 9 SRPM
|
||||
uses: actions/download-artifact@v7
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: cc-metric-collector SRPM for UBI 9
|
||||
|
||||
- name: Download Ubuntu 22.04 DEB
|
||||
uses: actions/download-artifact@v7
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: cc-metric-collector DEB for Ubuntu 22.04
|
||||
|
||||
- name: Download Ubuntu 24.04 DEB
|
||||
uses: actions/download-artifact@v7
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: cc-metric-collector DEB for Ubuntu 24.04
|
||||
|
||||
|
||||
186
.github/workflows/runonce.yml
vendored
186
.github/workflows/runonce.yml
vendored
@@ -20,60 +20,25 @@ jobs:
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
# Checkout git repository and submodules
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
# See: https://github.com/marketplace/actions/setup-go-environment
|
||||
- name: Setup Golang
|
||||
uses: actions/setup-go@v6
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: 'stable'
|
||||
go-version: '1.21'
|
||||
check-latest: true
|
||||
|
||||
- name: Install reviewdog
|
||||
run: |
|
||||
go install github.com/reviewdog/reviewdog/cmd/reviewdog@latest
|
||||
|
||||
# See: https://staticcheck.io
|
||||
- name: Install staticcheck
|
||||
run: |
|
||||
go install honnef.co/go/tools/cmd/staticcheck@latest
|
||||
|
||||
# See: https://golangci-lint.run
|
||||
- name: Install GolangCI-Lint
|
||||
run: |
|
||||
go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@latest
|
||||
|
||||
- name: Build MetricCollector
|
||||
run: make
|
||||
|
||||
- name: Run MetricCollector once
|
||||
run: ./cc-metric-collector --once --config .github/ci-config.json
|
||||
|
||||
# Running the linter requires likwid.h, which gets downloaded in the build step
|
||||
- name: Static Analysis with GolangCI-Lint and Upload Report with reviewdog
|
||||
run: |
|
||||
golangci-lint run | reviewdog -f=golangci-lint -name "Check golangci-lint on build-latest" -reporter=github-check -filter-mode=nofilter -fail-level none
|
||||
env:
|
||||
REVIEWDOG_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
# Running the linter requires likwid.h, which gets downloaded in the build step
|
||||
- name: Run Static Analysis with go vet and Upload Report with reviewdog
|
||||
run: |
|
||||
go vet ./... | reviewdog -f=govet -name "Check govet on build-latest" -reporter=github-check -filter-mode=nofilter -fail-level none
|
||||
env:
|
||||
REVIEWDOG_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
# Running the linter requires likwid.h, which gets downloaded in the build step
|
||||
- name: Run Static Analysis with staticcheck and Upload Report with reviewdog
|
||||
run: |
|
||||
staticcheck ./... | reviewdog -f=staticcheck -name "Check staticcheck on build-latest" -reporter=github-check -filter-mode=nofilter -fail-level none
|
||||
env:
|
||||
REVIEWDOG_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
#
|
||||
# Build on AlmaLinux 8 using go-toolset
|
||||
# Build on AlmaLinux 8
|
||||
#
|
||||
AlmaLinux8-RPM-build:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -93,14 +58,23 @@ jobs:
|
||||
# fetch-depth must be 0 to use git describe
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
|
||||
# See: https://github.com/marketplace/actions/setup-go-environment
|
||||
# - name: Setup Golang
|
||||
# uses: actions/setup-go@v5
|
||||
# with:
|
||||
# go-version: 'stable'
|
||||
- name: Setup Golang
|
||||
run: |
|
||||
dnf --assumeyes --disableplugin=subscription-manager --enablerepo appstream install go-toolset
|
||||
dnf --assumeyes --disableplugin=subscription-manager install \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.noarch.rpm
|
||||
|
||||
- name: RPM build MetricCollector
|
||||
id: rpmbuild
|
||||
@@ -109,7 +83,7 @@ jobs:
|
||||
make RPM
|
||||
|
||||
#
|
||||
# Build on AlmaLinux 9 using go-toolset
|
||||
# Build on AlmaLinux 9
|
||||
#
|
||||
AlmaLinux9-RPM-build:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -129,14 +103,24 @@ jobs:
|
||||
# fetch-depth must be 0 to use git describe
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
|
||||
# See: https://github.com/marketplace/actions/setup-go-environment
|
||||
# - name: Setup Golang
|
||||
# uses: actions/setup-go@v5
|
||||
# with:
|
||||
# go-version: 'stable'
|
||||
- name: Setup Golang
|
||||
run: |
|
||||
dnf --assumeyes --disableplugin=subscription-manager --enablerepo appstream install go-toolset
|
||||
dnf --assumeyes --disableplugin=subscription-manager install \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.el9_6.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-1.23.9-1.el9_6.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.el9_6.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.el9_6.noarch.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-race-1.23.9-1.el9_6.x86_64.rpm
|
||||
|
||||
- name: RPM build MetricCollector
|
||||
id: rpmbuild
|
||||
@@ -144,49 +128,13 @@ jobs:
|
||||
git config --global --add safe.directory /__w/cc-metric-collector/cc-metric-collector
|
||||
make RPM
|
||||
|
||||
#
|
||||
# Build on AlmaLinux 10 using go-toolset
|
||||
#
|
||||
AlmaLinux10-RPM-build:
|
||||
runs-on: ubuntu-latest
|
||||
# See: https://hub.docker.com/_/almalinux
|
||||
container: almalinux:10
|
||||
# The job outputs link to the outputs of the 'rpmrename' step
|
||||
# Only job outputs can be used in child jobs
|
||||
steps:
|
||||
|
||||
# Use dnf to install development packages
|
||||
- name: Install development packages
|
||||
run: |
|
||||
dnf --assumeyes group install "Development Tools" "RPM Development Tools"
|
||||
dnf --assumeyes install wget openssl-devel diffutils delve which
|
||||
|
||||
# Checkout git repository and submodules
|
||||
# fetch-depth must be 0 to use git describe
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup Golang
|
||||
run: |
|
||||
dnf --assumeyes --disableplugin=subscription-manager --enablerepo appstream install go-toolset
|
||||
|
||||
- name: RPM build MetricCollector
|
||||
id: rpmbuild
|
||||
run: |
|
||||
git config --global --add safe.directory /__w/cc-metric-collector/cc-metric-collector
|
||||
make RPM
|
||||
|
||||
#
|
||||
# Build on Red Hat Universal Base Image (UBI 8) using go-toolset
|
||||
# Build on UBI 8 using go-toolset
|
||||
#
|
||||
UBI-8-RPM-build:
|
||||
runs-on: ubuntu-latest
|
||||
# See: https://catalog.redhat.com/en/search?searchType=Containers&q=Red+Hat+Universal+Base+Image+8
|
||||
# https://hub.docker.com/r/redhat/ubi8
|
||||
# See: https://catalog.redhat.com/software/containers/ubi8/ubi/5c359854d70cc534b3a3784e?container-tabs=gti
|
||||
container: redhat/ubi8
|
||||
# The job outputs link to the outputs of the 'rpmbuild' step
|
||||
steps:
|
||||
@@ -199,14 +147,23 @@ jobs:
|
||||
# fetch-depth must be 0 to use git describe
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
|
||||
# See: https://github.com/marketplace/actions/setup-go-environment
|
||||
# - name: Setup Golang
|
||||
# uses: actions/setup-go@v5
|
||||
# with:
|
||||
# go-version: 'stable'
|
||||
- name: Setup Golang
|
||||
run: |
|
||||
dnf --assumeyes --disableplugin=subscription-manager --enablerepo ubi-8-appstream-rpms install go-toolset
|
||||
dnf --assumeyes --disableplugin=subscription-manager install \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.noarch.rpm
|
||||
|
||||
- name: RPM build MetricCollector
|
||||
id: rpmbuild
|
||||
@@ -215,12 +172,11 @@ jobs:
|
||||
make RPM
|
||||
|
||||
#
|
||||
# Build on Red Hat Universal Base Image (UBI 9) using go-toolset
|
||||
# Build on UBI 9 using go-toolset
|
||||
#
|
||||
UBI-9-RPM-build:
|
||||
runs-on: ubuntu-latest
|
||||
# See: https://catalog.redhat.com/en/search?searchType=Containers&q=Red+Hat+Universal+Base+Image+9
|
||||
# https://hub.docker.com/r/redhat/ubi9
|
||||
# See: https://catalog.redhat.com/software/containers/ubi8/ubi/5c359854d70cc534b3a3784e?container-tabs=gti
|
||||
container: redhat/ubi9
|
||||
# The job outputs link to the outputs of the 'rpmbuild' step
|
||||
steps:
|
||||
@@ -233,48 +189,24 @@ jobs:
|
||||
# fetch-depth must be 0 to use git describe
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
|
||||
# See: https://github.com/marketplace/actions/setup-go-environment
|
||||
# - name: Setup Golang
|
||||
# uses: actions/setup-go@v5
|
||||
# with:
|
||||
# go-version: 'stable'
|
||||
- name: Setup Golang
|
||||
run: |
|
||||
dnf --assumeyes --disableplugin=subscription-manager --enablerepo ubi-9-appstream-rpms install go-toolset
|
||||
|
||||
- name: RPM build MetricCollector
|
||||
id: rpmbuild
|
||||
run: |
|
||||
git config --global --add safe.directory /__w/cc-metric-collector/cc-metric-collector
|
||||
make RPM
|
||||
|
||||
#
|
||||
# Build on Red Hat Universal Base Image (UBI 10) using go-toolset
|
||||
#
|
||||
UBI-10-RPM-build:
|
||||
runs-on: ubuntu-latest
|
||||
# See: https://catalog.redhat.com/en/search?searchType=Containers&q=Red+Hat+Universal+Base+Image+10
|
||||
# https://hub.docker.com/r/redhat/ubi10
|
||||
container: redhat/ubi10
|
||||
# The job outputs link to the outputs of the 'rpmbuild' step
|
||||
steps:
|
||||
|
||||
# Use dnf to install development packages
|
||||
- name: Install development packages
|
||||
run: dnf --assumeyes --disableplugin=subscription-manager install rpm-build go-srpm-macros gcc make python3 git wget openssl-devel diffutils delve
|
||||
|
||||
# Checkout git repository and submodules
|
||||
# fetch-depth must be 0 to use git describe
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup Golang
|
||||
run: |
|
||||
dnf --assumeyes --disableplugin=subscription-manager --enablerepo ubi-10-for-x86_64-appstream-rpms install go-toolset
|
||||
dnf --assumeyes --disableplugin=subscription-manager install \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.el9_6.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-1.23.9-1.el9_6.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.el9_6.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.el9_6.noarch.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-race-1.23.9-1.el9_6.x86_64.rpm
|
||||
|
||||
- name: RPM build MetricCollector
|
||||
id: rpmbuild
|
||||
@@ -299,14 +231,14 @@ jobs:
|
||||
# fetch-depth must be 0 to use git describe
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
# Use official golang package
|
||||
# See: https://github.com/marketplace/actions/setup-go-environment
|
||||
- name: Setup Golang
|
||||
uses: actions/setup-go@v6
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: 'stable'
|
||||
|
||||
@@ -333,14 +265,14 @@ jobs:
|
||||
# fetch-depth must be 0 to use git describe
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
# Use official golang package
|
||||
# See: https://github.com/marketplace/actions/setup-go-environment
|
||||
- name: Setup Golang
|
||||
uses: actions/setup-go@v6
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: 'stable'
|
||||
|
||||
|
||||
5
Makefile
5
Makefile
@@ -72,11 +72,6 @@ staticcheck:
|
||||
$(GOBIN) install honnef.co/go/tools/cmd/staticcheck@latest
|
||||
$$($(GOBIN) env GOPATH)/bin/staticcheck ./...
|
||||
|
||||
.PHONY: golangci-lint
|
||||
golangci-lint:
|
||||
$(GOBIN) install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@latest
|
||||
$$($(GOBIN) env GOPATH)/bin/golangci-lint run
|
||||
|
||||
.ONESHELL:
|
||||
.PHONY: RPM
|
||||
RPM: scripts/cc-metric-collector.spec
|
||||
|
||||
@@ -54,11 +54,14 @@ See the component READMEs for their configuration:
|
||||
|
||||
# Installation
|
||||
|
||||
Dependecies:
|
||||
- golang
|
||||
- hwloc
|
||||
|
||||
```
|
||||
$ git clone git@github.com:ClusterCockpit/cc-metric-collector.git
|
||||
$ export CGO_LDFLAGS="-L/path/to/hwloc/lib/dir"
|
||||
$ make (downloads LIKWID, builds it as static library with 'direct' accessmode and copies all required files for the collector)
|
||||
$ go get (requires at least golang 1.16)
|
||||
$ make
|
||||
```
|
||||
|
||||
For more information, see [here](./docs/building.md).
|
||||
|
||||
@@ -14,17 +14,17 @@ import (
|
||||
"os/signal"
|
||||
"syscall"
|
||||
|
||||
"github.com/ClusterCockpit/cc-lib/v2/receivers"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/sinks"
|
||||
"github.com/ClusterCockpit/cc-lib/receivers"
|
||||
"github.com/ClusterCockpit/cc-lib/sinks"
|
||||
"github.com/ClusterCockpit/cc-metric-collector/collectors"
|
||||
|
||||
// "strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
ccconf "github.com/ClusterCockpit/cc-lib/v2/ccConfig"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
ccconf "github.com/ClusterCockpit/cc-lib/ccConfig"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
mr "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
||||
mct "github.com/ClusterCockpit/cc-metric-collector/pkg/multiChanTicker"
|
||||
)
|
||||
|
||||
@@ -1,19 +1,6 @@
|
||||
{
|
||||
"cpufreq": {},
|
||||
"cpufreq_cpuinfo": {},
|
||||
"cpustat": {
|
||||
"exclude_metrics": [
|
||||
"cpu_idle"
|
||||
]
|
||||
},
|
||||
"diskstat": {
|
||||
"exclude_metrics": [
|
||||
"disk_total"
|
||||
],
|
||||
"exclude_mounts": [
|
||||
"slurm-tmpfs"
|
||||
]
|
||||
},
|
||||
"gpfs": {
|
||||
"exclude_filesystem": [
|
||||
"test_fs"
|
||||
@@ -34,8 +21,6 @@
|
||||
},
|
||||
"numastats": {},
|
||||
"nvidia": {},
|
||||
"schedstat": {
|
||||
},
|
||||
"tempstat": {
|
||||
"report_max_temperature": true,
|
||||
"report_critical_temperature": true,
|
||||
@@ -53,4 +38,4 @@
|
||||
"topprocs": {
|
||||
"num_procs": 5
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -52,7 +52,6 @@ In contrast to the configuration files for sinks and receivers, the collectors c
|
||||
* [`beegfs_meta`](./beegfsmetaMetric.md)
|
||||
* [`beegfs_storage`](./beegfsstorageMetric.md)
|
||||
* [`rocm_smi`](./rocmsmiMetric.md)
|
||||
* [`slurm_cgroup`](./slurmCgroupMetric.md)
|
||||
|
||||
## Todos
|
||||
|
||||
@@ -67,7 +66,7 @@ A collector reads data from any source, parses it to metrics and submits these m
|
||||
* `Read(duration time.Duration, output chan ccMessage.CCMessage)`: Read, parse and submit data to the `output` channel as [`CCMessage`](https://github.com/ClusterCockpit/cc-lib/blob/main/ccMessage/README.md). If the collector has to measure anything for some duration, use the provided function argument `duration`.
|
||||
* `Close()`: Closes down the collector.
|
||||
|
||||
It is recommended to call `setup()` in the `Init()` function.
|
||||
It is recommanded to call `setup()` in the `Init()` function.
|
||||
|
||||
Finally, the collector needs to be registered in the `collectorManager.go`. There is a list of collectors called `AvailableCollectors` which is a map (`collector_type_string` -> `pointer to MetricCollector interface`). Add a new entry with a descriptive name and the new collector.
|
||||
|
||||
@@ -100,12 +99,11 @@ func (m *SampleCollector) Init(config json.RawMessage) error {
|
||||
}
|
||||
|
||||
m.name = "SampleCollector"
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
}
|
||||
m.setup()
|
||||
if len(config) > 0 {
|
||||
if err := json.Unmarshal(config, &m.config); err != nil {
|
||||
return fmt.Errorf("%s Init(): json.Unmarshal() call failed: %w", m.name, err)
|
||||
err := json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
m.meta = map[string]string{"source": m.name, "group": "Sample"}
|
||||
|
||||
@@ -17,13 +17,12 @@ import (
|
||||
"os/exec"
|
||||
"os/user"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
)
|
||||
|
||||
const DEFAULT_BEEGFS_CMD = "beegfs-ctl"
|
||||
@@ -62,9 +61,7 @@ func (m *BeegfsMetaCollector) Init(config json.RawMessage) error {
|
||||
"rmXA", "setXA", "mirror"}
|
||||
|
||||
m.name = "BeegfsMetaCollector"
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
}
|
||||
m.setup()
|
||||
m.parallel = true
|
||||
// Set default beegfs-ctl binary
|
||||
|
||||
@@ -81,7 +78,8 @@ func (m *BeegfsMetaCollector) Init(config json.RawMessage) error {
|
||||
//create map with possible variables
|
||||
m.matches = make(map[string]string)
|
||||
for _, value := range nodeMdstat_array {
|
||||
if slices.Contains(m.config.ExcludeMetrics, value) {
|
||||
_, skip := stringArrayContains(m.config.ExcludeMetrics, value)
|
||||
if skip {
|
||||
m.matches["other"] = "0"
|
||||
} else {
|
||||
m.matches["beegfs_cmeta_"+value] = "0"
|
||||
|
||||
@@ -17,13 +17,12 @@ import (
|
||||
"os/exec"
|
||||
"os/user"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
)
|
||||
|
||||
// Struct for the collector-specific JSON config
|
||||
@@ -55,9 +54,7 @@ func (m *BeegfsStorageCollector) Init(config json.RawMessage) error {
|
||||
"storInf", "unlnk"}
|
||||
|
||||
m.name = "BeegfsStorageCollector"
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
}
|
||||
m.setup()
|
||||
m.parallel = true
|
||||
// Set default beegfs-ctl binary
|
||||
|
||||
@@ -74,7 +71,8 @@ func (m *BeegfsStorageCollector) Init(config json.RawMessage) error {
|
||||
//create map with possible variables
|
||||
m.matches = make(map[string]string)
|
||||
for _, value := range storageStat_array {
|
||||
if slices.Contains(m.config.ExcludeMetrics, value) {
|
||||
_, skip := stringArrayContains(m.config.ExcludeMetrics, value)
|
||||
if skip {
|
||||
m.matches["other"] = "0"
|
||||
} else {
|
||||
m.matches["beegfs_cstorage_"+value] = "0"
|
||||
|
||||
@@ -9,12 +9,11 @@ package collectors
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
mct "github.com/ClusterCockpit/cc-metric-collector/pkg/multiChanTicker"
|
||||
)
|
||||
|
||||
@@ -48,7 +47,6 @@ var AvailableCollectors = map[string]MetricCollector{
|
||||
"self": new(SelfCollector),
|
||||
"schedstat": new(SchedstatCollector),
|
||||
"nfsiostat": new(NfsIOStatCollector),
|
||||
"slurm_cgroup": new(SlurmCgroupCollector),
|
||||
}
|
||||
|
||||
// Metric collector manager data structure
|
||||
@@ -105,7 +103,7 @@ func (cm *collectorManager) Init(ticker mct.MultiChanTicker, duration time.Durat
|
||||
|
||||
err = collector.Init(collectorCfg)
|
||||
if err != nil {
|
||||
cclog.ComponentError("CollectorManager", fmt.Sprintf("Collector %s initialization failed: %v", collectorName, err))
|
||||
cclog.ComponentError("CollectorManager", "Collector", collectorName, "initialization failed:", err.Error())
|
||||
continue
|
||||
}
|
||||
cclog.ComponentDebug("CollectorManager", "ADD COLLECTOR", collector.Name())
|
||||
|
||||
@@ -17,8 +17,8 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
)
|
||||
|
||||
// CPUFreqCollector
|
||||
@@ -41,10 +41,9 @@ func (m *CPUFreqCpuInfoCollector) Init(config json.RawMessage) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
m.setup()
|
||||
|
||||
m.name = "CPUFreqCpuInfoCollector"
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
}
|
||||
m.parallel = true
|
||||
m.meta = map[string]string{
|
||||
"source": m.name,
|
||||
@@ -57,6 +56,7 @@ func (m *CPUFreqCpuInfoCollector) Init(config json.RawMessage) error {
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open file '%s': %v", cpuInfoFile, err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// Collect topology information from file cpuinfo
|
||||
foundFreq := false
|
||||
@@ -86,10 +86,6 @@ func (m *CPUFreqCpuInfoCollector) Init(config json.RawMessage) error {
|
||||
}
|
||||
}
|
||||
|
||||
if err := file.Close(); err != nil {
|
||||
return fmt.Errorf("%s Init(): Call to file.Close() failed: %w", m.name, err)
|
||||
}
|
||||
|
||||
// were all topology information collected?
|
||||
if foundFreq &&
|
||||
len(processor) > 0 &&
|
||||
@@ -144,13 +140,7 @@ func (m *CPUFreqCpuInfoCollector) Read(interval time.Duration, output chan lp.CC
|
||||
fmt.Sprintf("Read(): Failed to open file '%s': %v", cpuInfoFile, err))
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
if err := file.Close(); err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to close file '%s': %v", cpuInfoFile, err))
|
||||
}
|
||||
}()
|
||||
defer file.Close()
|
||||
|
||||
processorCounter := 0
|
||||
now := time.Now()
|
||||
|
||||
@@ -16,8 +16,8 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
"github.com/ClusterCockpit/cc-metric-collector/pkg/ccTopology"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
@@ -48,9 +48,7 @@ func (m *CPUFreqCollector) Init(config json.RawMessage) error {
|
||||
}
|
||||
|
||||
m.name = "CPUFreqCollector"
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
}
|
||||
m.setup()
|
||||
m.parallel = true
|
||||
if len(config) > 0 {
|
||||
err := json.Unmarshal(config, &m.config)
|
||||
|
||||
@@ -12,13 +12,12 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
sysconf "github.com/tklauser/go-sysconf"
|
||||
)
|
||||
|
||||
@@ -40,17 +39,10 @@ type CpustatCollector struct {
|
||||
|
||||
func (m *CpustatCollector) Init(config json.RawMessage) error {
|
||||
m.name = "CpustatCollector"
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
}
|
||||
m.setup()
|
||||
m.parallel = true
|
||||
m.meta = map[string]string{
|
||||
"source": m.name,
|
||||
"group": "CPU",
|
||||
}
|
||||
m.nodetags = map[string]string{
|
||||
"type": "node",
|
||||
}
|
||||
m.meta = map[string]string{"source": m.name, "group": "CPU"}
|
||||
m.nodetags = map[string]string{"type": "node"}
|
||||
if len(config) > 0 {
|
||||
err := json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
@@ -72,7 +64,14 @@ func (m *CpustatCollector) Init(config json.RawMessage) error {
|
||||
|
||||
m.matches = make(map[string]int)
|
||||
for match, index := range matches {
|
||||
if !slices.Contains(m.config.ExcludeMetrics, match) {
|
||||
doExclude := false
|
||||
for _, exclude := range m.config.ExcludeMetrics {
|
||||
if match == exclude {
|
||||
doExclude = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !doExclude {
|
||||
m.matches[match] = index
|
||||
}
|
||||
}
|
||||
@@ -80,17 +79,9 @@ func (m *CpustatCollector) Init(config json.RawMessage) error {
|
||||
// Check input file
|
||||
file, err := os.Open(string(CPUSTATFILE))
|
||||
if err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Init(): Failed to open file '%s': %v", string(CPUSTATFILE), err))
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
}
|
||||
defer func() {
|
||||
if err := file.Close(); err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Init(): Failed to close file '%s': %v", string(CPUSTATFILE), err))
|
||||
}
|
||||
}()
|
||||
defer file.Close()
|
||||
|
||||
// Pre-generate tags for all CPUs
|
||||
num_cpus := 0
|
||||
@@ -164,17 +155,9 @@ func (m *CpustatCollector) Read(interval time.Duration, output chan lp.CCMessage
|
||||
|
||||
file, err := os.Open(string(CPUSTATFILE))
|
||||
if err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to open file '%s': %v", string(CPUSTATFILE), err))
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
}
|
||||
defer func() {
|
||||
if err := file.Close(); err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to close file '%s': %v", string(CPUSTATFILE), err))
|
||||
}
|
||||
}()
|
||||
defer file.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
|
||||
@@ -10,15 +10,13 @@ package collectors
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"slices"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
influx "github.com/influxdata/line-protocol"
|
||||
)
|
||||
|
||||
@@ -51,16 +49,11 @@ func (m *CustomCmdCollector) Init(config json.RawMessage) error {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
}
|
||||
m.setup()
|
||||
for _, c := range m.config.Commands {
|
||||
cmdfields := strings.Fields(c)
|
||||
command := exec.Command(cmdfields[0], cmdfields[1:]...)
|
||||
if err := command.Wait(); err != nil {
|
||||
log.Print(err)
|
||||
continue
|
||||
}
|
||||
command := exec.Command(cmdfields[0], strings.Join(cmdfields[1:], " "))
|
||||
command.Wait()
|
||||
_, err = command.Output()
|
||||
if err == nil {
|
||||
m.commands = append(m.commands, c)
|
||||
@@ -95,11 +88,8 @@ func (m *CustomCmdCollector) Read(interval time.Duration, output chan lp.CCMessa
|
||||
}
|
||||
for _, cmd := range m.commands {
|
||||
cmdfields := strings.Fields(cmd)
|
||||
command := exec.Command(cmdfields[0], cmdfields[1:]...)
|
||||
if err := command.Wait(); err != nil {
|
||||
log.Print(err)
|
||||
continue
|
||||
}
|
||||
command := exec.Command(cmdfields[0], strings.Join(cmdfields[1:], " "))
|
||||
command.Wait()
|
||||
stdout, err := command.Output()
|
||||
if err != nil {
|
||||
log.Print(err)
|
||||
@@ -111,7 +101,8 @@ func (m *CustomCmdCollector) Read(interval time.Duration, output chan lp.CCMessa
|
||||
continue
|
||||
}
|
||||
for _, c := range cmdmetrics {
|
||||
if slices.Contains(m.config.ExcludeMetrics, c.Name()) {
|
||||
_, skip := stringArrayContains(m.config.ExcludeMetrics, c.Name())
|
||||
if skip {
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -130,7 +121,8 @@ func (m *CustomCmdCollector) Read(interval time.Duration, output chan lp.CCMessa
|
||||
continue
|
||||
}
|
||||
for _, f := range fmetrics {
|
||||
if slices.Contains(m.config.ExcludeMetrics, f.Name()) {
|
||||
_, skip := stringArrayContains(m.config.ExcludeMetrics, f.Name())
|
||||
if skip {
|
||||
continue
|
||||
}
|
||||
output <- lp.FromInfluxMetric(f)
|
||||
|
||||
@@ -10,14 +10,13 @@ package collectors
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
)
|
||||
|
||||
const MOUNTFILE = `/proc/self/mounts`
|
||||
@@ -37,9 +36,7 @@ func (m *DiskstatCollector) Init(config json.RawMessage) error {
|
||||
m.name = "DiskstatCollector"
|
||||
m.parallel = true
|
||||
m.meta = map[string]string{"source": m.name, "group": "Disk"}
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
}
|
||||
m.setup()
|
||||
if len(config) > 0 {
|
||||
if err := json.Unmarshal(config, &m.config); err != nil {
|
||||
return err
|
||||
@@ -57,11 +54,10 @@ func (m *DiskstatCollector) Init(config json.RawMessage) error {
|
||||
}
|
||||
file, err := os.Open(MOUNTFILE)
|
||||
if err != nil {
|
||||
return fmt.Errorf("%s Init(): file open for file \"%s\" failed: %w", m.name, MOUNTFILE, err)
|
||||
}
|
||||
if err := file.Close(); err != nil {
|
||||
return fmt.Errorf("%s Init(): file close for file \"%s\" failed: %w", m.name, MOUNTFILE, err)
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
return err
|
||||
}
|
||||
defer file.Close()
|
||||
m.init = true
|
||||
return nil
|
||||
}
|
||||
@@ -73,18 +69,10 @@ func (m *DiskstatCollector) Read(interval time.Duration, output chan lp.CCMessag
|
||||
|
||||
file, err := os.Open(MOUNTFILE)
|
||||
if err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to open file '%s': %v", MOUNTFILE, err))
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
if err := file.Close(); err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to close file '%s': %v", MOUNTFILE, err))
|
||||
}
|
||||
}()
|
||||
defer file.Close()
|
||||
|
||||
part_max_used := uint64(0)
|
||||
scanner := bufio.NewScanner(file)
|
||||
@@ -105,7 +93,7 @@ mountLoop:
|
||||
continue
|
||||
}
|
||||
|
||||
mountPath := strings.ReplaceAll(linefields[1], `\040`, " ")
|
||||
mountPath := strings.Replace(linefields[1], `\040`, " ", -1)
|
||||
|
||||
for _, excl := range m.config.ExcludeMounts {
|
||||
if strings.Contains(mountPath, excl) {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -14,18 +14,12 @@ hugo_path: docs/reference/cc-metric-collector/collectors/gpfs.md
|
||||
```json
|
||||
"gpfs": {
|
||||
"mmpmon_path": "/path/to/mmpmon",
|
||||
"use_sudo": "true",
|
||||
"exclude_filesystem": [
|
||||
"fs1"
|
||||
],
|
||||
"exclude_metrics": [
|
||||
"gpfs_bytes_written"
|
||||
],
|
||||
"send_abs_values": true,
|
||||
"send_diff_values": true,
|
||||
"send_derived_values": true,
|
||||
"send_bandwidths": true,
|
||||
"send_total_values": true,
|
||||
"send_bandwidths": true
|
||||
"send_derived_values": true
|
||||
}
|
||||
```
|
||||
|
||||
@@ -34,50 +28,33 @@ GPFS / IBM Spectrum Scale filesystems.
|
||||
|
||||
The reported filesystems can be filtered with the `exclude_filesystem` option
|
||||
in the configuration.
|
||||
Individual metrics can be disabled for reporting using option `exclude_metrics`.
|
||||
|
||||
The path to the `mmpmon` command can be configured with the `mmpmon_path` option
|
||||
in the configuration. If nothing is set, the collector searches in `$PATH` for `mmpmon`.
|
||||
|
||||
If cc-metric-collector is run as non-root, password-less `sudo` can be enabled with `use_sudo`.
|
||||
Because `mmpmon` is by default only executable as root, the Go procedure to
|
||||
search for it in `$PATH` will fail. If you use `sudo`, you must specify the
|
||||
complete path for `mmpmon` using the parameter `mmpmon_path`.
|
||||
|
||||
|
||||
Metrics:
|
||||
* `gpfs_bytes_read` (if `send_abs_values == true`)
|
||||
* `gpfs_bytes_written` (if `send_abs_values == true`)
|
||||
* `gpfs_num_opens` (if `send_abs_values == true`)
|
||||
* `gpfs_num_closes` (if `send_abs_values == true`)
|
||||
* `gpfs_num_reads` (if `send_abs_values == true`)
|
||||
* `gpfs_num_writes` (if `send_abs_values == true`)
|
||||
* `gpfs_num_readdirs` (if `send_abs_values == true`)
|
||||
* `gpfs_num_inode_updates` (if `send_abs_values == true`)
|
||||
* `gpfs_bytes_read_diff` (if `send_diff_values == true`)
|
||||
* `gpfs_bytes_written_diff` (if `send_diff_values == true`)
|
||||
* `gpfs_num_opens_diff` (if `send_diff_values == true`)
|
||||
* `gpfs_num_closes_diff` (if `send_diff_values == true`)
|
||||
* `gpfs_num_reads_diff` (if `send_diff_values == true`)
|
||||
* `gpfs_num_writes_diff` (if `send_diff_values == true`)
|
||||
* `gpfs_num_readdirs_diff` (if `send_diff_values == true`)
|
||||
* `gpfs_num_inode_updates_diff` (if `send_diff_values == true`)
|
||||
* `gpfs_bw_read` (if `send_derived_values == true` or `send_bandwidths == true`)
|
||||
* `gpfs_bw_write` (if `send_derived_values == true` or `send_bandwidths == true`)
|
||||
* `gpfs_bytes_read`
|
||||
* `gpfs_bytes_written`
|
||||
* `gpfs_num_opens`
|
||||
* `gpfs_num_closes`
|
||||
* `gpfs_num_reads`
|
||||
* `gpfs_num_writes`
|
||||
* `gpfs_num_readdirs`
|
||||
* `gpfs_num_inode_updates`
|
||||
* `gpfs_opens_rate` (if `send_derived_values == true`)
|
||||
* `gpfs_closes_rate` (if `send_derived_values == true`)
|
||||
* `gpfs_reads_rate` (if `send_derived_values == true`)
|
||||
* `gpfs_writes_rate` (if `send_derived_values == true`)
|
||||
* `gpfs_readdirs_rate` (if `send_derived_values == true`)
|
||||
* `gpfs_inode_updates_rate` (if `send_derived_values == true`)
|
||||
* `gpfs_bytes_total = gpfs_bytes_read + gpfs_bytes_written` (if `send_total_values == true` and `send_abs_values == true`)
|
||||
* `gpfs_bytes_total_diff` (if `send_total_values == true` and `send_diff_values == true`)
|
||||
* `gpfs_bw_total` ((if `send_total_values == true` and `send_derived_values == true`) or `send_bandwidths == true`)
|
||||
* `gpfs_iops = gpfs_num_reads + gpfs_num_writes` (if `send_total_values == true` and `send_abs_values == true`)
|
||||
* `gpfs_iops_diff` (if `send_total_values == true` and `send_diff_values == true`)
|
||||
* `gpfs_bytes_total = gpfs_bytes_read + gpfs_bytes_written` (if `send_total_values == true`)
|
||||
* `gpfs_iops = gpfs_num_reads + gpfs_num_writes` (if `send_total_values == true`)
|
||||
* `gpfs_iops_rate` (if `send_total_values == true` and `send_derived_values == true`)
|
||||
* `gpfs_metaops = gpfs_num_inode_updates + gpfs_num_closes + gpfs_num_opens + gpfs_num_readdirs` (if `send_total_values == true` and `send_abs_values == true`)
|
||||
* `gpfs_metaops_diff` (if `send_total_values == true` and `send_diff_values == true`)
|
||||
* `gpfs_metaops = gpfs_num_inode_updates + gpfs_num_closes + gpfs_num_opens + gpfs_num_readdirs` (if `send_total_values == true`)
|
||||
* `gpfs_metaops_rate` (if `send_total_values == true` and `send_derived_values == true`)
|
||||
* `gpfs_bw_read` (if `send_bandwidths == true`)
|
||||
* `gpfs_bw_write` (if `send_bandwidths == true`)
|
||||
* `gpfs_bw_total` (if `send_bandwidths == true` and `send_total_values == true`)
|
||||
|
||||
The collector adds a `filesystem` tag to all metrics
|
||||
|
||||
@@ -11,8 +11,8 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"encoding/json"
|
||||
@@ -65,9 +65,7 @@ func (m *InfinibandCollector) Init(config json.RawMessage) error {
|
||||
|
||||
var err error
|
||||
m.name = "InfinibandCollector"
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
}
|
||||
m.setup()
|
||||
m.parallel = true
|
||||
m.meta = map[string]string{
|
||||
"source": m.name,
|
||||
|
||||
@@ -11,28 +11,27 @@ import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
)
|
||||
|
||||
// Konstante für den Pfad zu /proc/diskstats
|
||||
const IOSTATFILE = `/proc/diskstats`
|
||||
|
||||
type IOstatCollectorConfig struct {
|
||||
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
||||
// Neues Feld zum Ausschließen von Devices per JSON-Konfiguration
|
||||
ExcludeDevices []string `json:"exclude_devices,omitempty"`
|
||||
}
|
||||
|
||||
type IOstatCollectorEntry struct {
|
||||
currentValues map[string]int64
|
||||
lastValues map[string]int64
|
||||
tags map[string]string
|
||||
lastValues map[string]int64
|
||||
tags map[string]string
|
||||
}
|
||||
|
||||
type IOstatCollector struct {
|
||||
@@ -47,9 +46,7 @@ func (m *IOstatCollector) Init(config json.RawMessage) error {
|
||||
m.name = "IOstatCollector"
|
||||
m.parallel = true
|
||||
m.meta = map[string]string{"source": m.name, "group": "Disk"}
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
}
|
||||
m.setup()
|
||||
if len(config) > 0 {
|
||||
err = json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
@@ -79,7 +76,7 @@ func (m *IOstatCollector) Init(config json.RawMessage) error {
|
||||
m.devices = make(map[string]IOstatCollectorEntry)
|
||||
m.matches = make(map[string]int)
|
||||
for k, v := range matches {
|
||||
if !slices.Contains(m.config.ExcludeMetrics, k) {
|
||||
if _, skip := stringArrayContains(m.config.ExcludeMetrics, k); !skip {
|
||||
m.matches[k] = v
|
||||
}
|
||||
}
|
||||
@@ -88,8 +85,10 @@ func (m *IOstatCollector) Init(config json.RawMessage) error {
|
||||
}
|
||||
file, err := os.Open(IOSTATFILE)
|
||||
if err != nil {
|
||||
return fmt.Errorf("%s Init(): Failed to open file \"%s\": %w", m.name, IOSTATFILE, err)
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
return err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
@@ -103,36 +102,21 @@ func (m *IOstatCollector) Init(config json.RawMessage) error {
|
||||
if strings.Contains(device, "loop") {
|
||||
continue
|
||||
}
|
||||
if slices.Contains(m.config.ExcludeDevices, device) {
|
||||
if _, skip := stringArrayContains(m.config.ExcludeDevices, device); skip {
|
||||
continue
|
||||
}
|
||||
currentValues := make(map[string]int64)
|
||||
lastValues := make(map[string]int64)
|
||||
values := make(map[string]int64)
|
||||
for m := range m.matches {
|
||||
currentValues[m] = 0
|
||||
lastValues[m] = 0
|
||||
}
|
||||
for name, idx := range m.matches {
|
||||
if idx < len(linefields) {
|
||||
if value, err := strconv.ParseInt(linefields[idx], 0, 64); err == nil {
|
||||
currentValues[name] = value
|
||||
lastValues[name] = value // Set last to current for first read
|
||||
}
|
||||
}
|
||||
values[m] = 0
|
||||
}
|
||||
m.devices[device] = IOstatCollectorEntry{
|
||||
tags: map[string]string{
|
||||
"device": device,
|
||||
"type": "node",
|
||||
},
|
||||
currentValues: currentValues,
|
||||
lastValues: lastValues,
|
||||
lastValues: values,
|
||||
}
|
||||
}
|
||||
if err := file.Close(); err != nil {
|
||||
return fmt.Errorf("%s Init(): Failed to close file \"%s\": %w", m.name, IOSTATFILE, err)
|
||||
}
|
||||
|
||||
m.init = true
|
||||
return err
|
||||
}
|
||||
@@ -144,18 +128,10 @@ func (m *IOstatCollector) Read(interval time.Duration, output chan lp.CCMessage)
|
||||
|
||||
file, err := os.Open(IOSTATFILE)
|
||||
if err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to open file '%s': %v", IOSTATFILE, err))
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
if err := file.Close(); err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to close file '%s': %v", IOSTATFILE, err))
|
||||
}
|
||||
}()
|
||||
defer file.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
@@ -171,28 +147,24 @@ func (m *IOstatCollector) Read(interval time.Duration, output chan lp.CCMessage)
|
||||
if strings.Contains(device, "loop") {
|
||||
continue
|
||||
}
|
||||
if slices.Contains(m.config.ExcludeDevices, device) {
|
||||
if _, skip := stringArrayContains(m.config.ExcludeDevices, device); skip {
|
||||
continue
|
||||
}
|
||||
if _, ok := m.devices[device]; !ok {
|
||||
continue
|
||||
}
|
||||
// Update current and last values
|
||||
entry := m.devices[device]
|
||||
for name, idx := range m.matches {
|
||||
if idx < len(linefields) {
|
||||
x, err := strconv.ParseInt(linefields[idx], 0, 64)
|
||||
if err == nil {
|
||||
// Calculate difference using previous current and new value
|
||||
diff := x - entry.currentValues[name]
|
||||
y, err := lp.NewMetric(name, entry.tags, m.meta, int(diff), time.Now())
|
||||
diff := x - entry.lastValues[name]
|
||||
y, err := lp.NewMessage(name, entry.tags, m.meta, map[string]interface{}{"value": int(diff)}, time.Now())
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
// Update last to previous current, and current to new value
|
||||
entry.lastValues[name] = entry.currentValues[name]
|
||||
entry.currentValues[name] = x
|
||||
}
|
||||
entry.lastValues[name] = x
|
||||
}
|
||||
}
|
||||
m.devices[device] = entry
|
||||
|
||||
@@ -14,13 +14,14 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"os/exec"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
)
|
||||
|
||||
const IPMISENSORS_PATH = `ipmi-sensors`
|
||||
@@ -43,9 +44,7 @@ func (m *IpmiCollector) Init(config json.RawMessage) error {
|
||||
}
|
||||
|
||||
m.name = "IpmiCollector"
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
}
|
||||
m.setup()
|
||||
m.parallel = true
|
||||
m.meta = map[string]string{
|
||||
"source": m.name,
|
||||
@@ -117,16 +116,15 @@ func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMessage) {
|
||||
}
|
||||
v, err := strconv.ParseFloat(strings.TrimSpace(lv[1]), 64)
|
||||
if err == nil {
|
||||
name := strings.ToLower(strings.ReplaceAll(strings.TrimSpace(lv[0]), " ", "_"))
|
||||
name := strings.ToLower(strings.Replace(strings.TrimSpace(lv[0]), " ", "_", -1))
|
||||
unit := strings.TrimSpace(lv[2])
|
||||
switch unit {
|
||||
case "Volts":
|
||||
if unit == "Volts" {
|
||||
unit = "Volts"
|
||||
case "degrees C":
|
||||
} else if unit == "degrees C" {
|
||||
unit = "degC"
|
||||
case "degrees F":
|
||||
} else if unit == "degrees F" {
|
||||
unit = "degF"
|
||||
case "Watts":
|
||||
} else if unit == "Watts" {
|
||||
unit = "Watts"
|
||||
}
|
||||
|
||||
@@ -152,29 +150,22 @@ func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMessage) {
|
||||
|
||||
func (m *IpmiCollector) readIpmiSensors(cmd string, output chan lp.CCMessage) {
|
||||
|
||||
// Setup ipmisensors command
|
||||
command := exec.Command(cmd, "--comma-separated-output", "--sdr-cache-recreate")
|
||||
stdout, _ := command.StdoutPipe()
|
||||
errBuf := new(bytes.Buffer)
|
||||
command.Stderr = errBuf
|
||||
|
||||
// start command
|
||||
if err := command.Start(); err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("readIpmiSensors(): Failed to start command \"%s\": %v", command.String(), err),
|
||||
)
|
||||
command.Wait()
|
||||
stdout, err := command.Output()
|
||||
if err != nil {
|
||||
log.Print(err)
|
||||
return
|
||||
}
|
||||
|
||||
// Read command output
|
||||
scanner := bufio.NewScanner(stdout)
|
||||
for scanner.Scan() {
|
||||
lv := strings.Split(scanner.Text(), ",")
|
||||
ll := strings.Split(string(stdout), "\n")
|
||||
|
||||
for _, line := range ll {
|
||||
lv := strings.Split(line, ",")
|
||||
if len(lv) > 3 {
|
||||
v, err := strconv.ParseFloat(lv[3], 64)
|
||||
if err == nil {
|
||||
name := strings.ToLower(strings.ReplaceAll(lv[1], " ", "_"))
|
||||
name := strings.ToLower(strings.Replace(lv[1], " ", "_", -1))
|
||||
y, err := lp.NewMessage(name, map[string]string{"type": "node"}, m.meta, map[string]interface{}{"value": v}, time.Now())
|
||||
if err == nil {
|
||||
if len(lv) > 4 {
|
||||
@@ -185,18 +176,6 @@ func (m *IpmiCollector) readIpmiSensors(cmd string, output chan lp.CCMessage) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for command end
|
||||
if err := command.Wait(); err != nil {
|
||||
errMsg, _ := io.ReadAll(errBuf)
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("readIpmiSensors(): Failed to wait for the end of command \"%s\": %v\n", command.String(), err),
|
||||
)
|
||||
cclog.ComponentError(m.name, fmt.Sprintf("readIpmiSensors(): command stderr: \"%s\"\n", strings.TrimSpace(string(errMsg))))
|
||||
return
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func (m *IpmiCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
|
||||
@@ -12,6 +12,12 @@ package collectors
|
||||
#cgo LDFLAGS: -Wl,--unresolved-symbols=ignore-in-object-files
|
||||
#include <stdlib.h>
|
||||
#include <likwid.h>
|
||||
|
||||
int _HPMaddThread(int cpuid) {
|
||||
return HPMaddThread(cpuid);
|
||||
}
|
||||
|
||||
|
||||
*/
|
||||
import "C"
|
||||
|
||||
@@ -19,7 +25,6 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"maps"
|
||||
"math"
|
||||
"os"
|
||||
"os/signal"
|
||||
@@ -32,8 +37,8 @@ import (
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
agg "github.com/ClusterCockpit/cc-metric-collector/internal/metricAggregator"
|
||||
topo "github.com/ClusterCockpit/cc-metric-collector/pkg/ccTopology"
|
||||
"github.com/NVIDIA/go-nvml/pkg/dl"
|
||||
@@ -188,7 +193,7 @@ func getBaseFreq() float64 {
|
||||
for _, f := range files {
|
||||
buffer, err := os.ReadFile(f)
|
||||
if err == nil {
|
||||
data := strings.ReplaceAll(string(buffer), "\n", "")
|
||||
data := strings.Replace(string(buffer), "\n", "", -1)
|
||||
x, err := strconv.ParseInt(data, 0, 64)
|
||||
if err == nil {
|
||||
freq = float64(x)
|
||||
@@ -231,13 +236,13 @@ func (m *LikwidCollector) Init(config json.RawMessage) error {
|
||||
|
||||
if m.config.ForceOverwrite {
|
||||
cclog.ComponentDebug(m.name, "Set LIKWID_FORCE=1")
|
||||
if err := os.Setenv("LIKWID_FORCE", "1"); err != nil {
|
||||
return fmt.Errorf("error setting environment variable LIKWID_FORCE=1: %v", err)
|
||||
}
|
||||
}
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
os.Setenv("LIKWID_FORCE", "1")
|
||||
}
|
||||
m.setup()
|
||||
major := C.likwid_getMajorVersion()
|
||||
minor := C.likwid_getMinorVersion()
|
||||
bugfix := C.likwid_getBugfixVersion()
|
||||
cclog.ComponentDebug(m.name, fmt.Sprintf("Using LIKWID library %d.%d.%d at %s with %s access", major, minor, bugfix, m.config.LibraryPath, m.config.AccessMode))
|
||||
|
||||
m.meta = map[string]string{"group": "PerfCounter"}
|
||||
cclog.ComponentDebug(m.name, "Get cpulist and init maps and lists")
|
||||
@@ -321,14 +326,7 @@ func (m *LikwidCollector) Init(config json.RawMessage) error {
|
||||
case "accessdaemon":
|
||||
if len(m.config.DaemonPath) > 0 {
|
||||
p := os.Getenv("PATH")
|
||||
if len(p) > 0 {
|
||||
p = m.config.DaemonPath + ":" + p
|
||||
} else {
|
||||
p = m.config.DaemonPath
|
||||
}
|
||||
if err := os.Setenv("PATH", p); err != nil {
|
||||
return fmt.Errorf("error setting environment variable PATH=%s: %v", p, err)
|
||||
}
|
||||
os.Setenv("PATH", m.config.DaemonPath+":"+p)
|
||||
}
|
||||
C.HPMmode(1)
|
||||
retCode := C.HPMinit()
|
||||
@@ -339,7 +337,7 @@ func (m *LikwidCollector) Init(config json.RawMessage) error {
|
||||
for _, c := range m.cpulist {
|
||||
m.measureThread.Call(
|
||||
func() {
|
||||
retCode := C.HPMaddThread(c)
|
||||
retCode := C._HPMaddThread(C.int(c))
|
||||
if retCode != 0 {
|
||||
err := fmt.Errorf("C.HPMaddThread(%v) failed with return code %v", c, retCode)
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
@@ -387,18 +385,10 @@ func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig,
|
||||
// Watch changes for the lock file ()
|
||||
watcher, err := fsnotify.NewWatcher()
|
||||
if err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("takeMeasurement(): Failed to create a new fsnotify.Watcher: %v", err))
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
return true, err
|
||||
}
|
||||
defer func() {
|
||||
if err := watcher.Close(); err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("takeMeasurement(): Failed to close fsnotify.Watcher: %v", err))
|
||||
}
|
||||
}()
|
||||
defer watcher.Close()
|
||||
if len(m.config.LockfilePath) > 0 {
|
||||
// Check if the lock file exists
|
||||
info, err := os.Stat(m.config.LockfilePath)
|
||||
@@ -408,9 +398,7 @@ func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig,
|
||||
if createErr != nil {
|
||||
return true, fmt.Errorf("failed to create lock file: %v", createErr)
|
||||
}
|
||||
if err := file.Close(); err != nil {
|
||||
return true, fmt.Errorf("failed to close lock file: %v", err)
|
||||
}
|
||||
file.Close()
|
||||
info, err = os.Stat(m.config.LockfilePath) // Recheck the file after creation
|
||||
}
|
||||
if err != nil {
|
||||
@@ -450,9 +438,13 @@ func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig,
|
||||
case e := <-watcher.Events:
|
||||
ret = -1
|
||||
if e.Op != fsnotify.Chmod {
|
||||
C.HPMfinalize()
|
||||
C.HPMinit()
|
||||
ret = C.perfmon_init(C.int(len(m.cpulist)), &m.cpulist[0])
|
||||
}
|
||||
default:
|
||||
C.HPMfinalize()
|
||||
C.HPMinit()
|
||||
ret = C.perfmon_init(C.int(len(m.cpulist)), &m.cpulist[0])
|
||||
}
|
||||
if ret != 0 {
|
||||
@@ -770,7 +762,9 @@ func (m *LikwidCollector) calcGlobalMetrics(groups []LikwidEventsetConfig, inter
|
||||
// Here we generate parameter list
|
||||
params := make(map[string]float64)
|
||||
for _, evset := range groups {
|
||||
maps.Copy(params, evset.metrics[tid])
|
||||
for mname, mres := range evset.metrics[tid] {
|
||||
params[mname] = mres
|
||||
}
|
||||
}
|
||||
params["gotime"] = interval.Seconds()
|
||||
// Evaluate the metric
|
||||
@@ -833,21 +827,13 @@ func (m *LikwidCollector) ReadThread(interval time.Duration, output chan lp.CCMe
|
||||
|
||||
if !skip {
|
||||
// read measurements and derive event set metrics
|
||||
err = m.calcEventsetMetrics(e, interval, output)
|
||||
if err != nil {
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
return
|
||||
}
|
||||
m.calcEventsetMetrics(e, interval, output)
|
||||
groups = append(groups, e)
|
||||
}
|
||||
}
|
||||
if len(groups) > 0 {
|
||||
// calculate global metrics
|
||||
err = m.calcGlobalMetrics(groups, interval, output)
|
||||
if err != nil {
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
return
|
||||
}
|
||||
m.calcGlobalMetrics(groups, interval, output)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -11,13 +11,12 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
)
|
||||
|
||||
// LoadavgCollector collects:
|
||||
@@ -43,9 +42,7 @@ type LoadavgCollector struct {
|
||||
func (m *LoadavgCollector) Init(config json.RawMessage) error {
|
||||
m.name = "LoadavgCollector"
|
||||
m.parallel = true
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
}
|
||||
m.setup()
|
||||
if len(config) > 0 {
|
||||
err := json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
@@ -67,10 +64,10 @@ func (m *LoadavgCollector) Init(config json.RawMessage) error {
|
||||
m.proc_skips = make([]bool, len(m.proc_matches))
|
||||
|
||||
for i, name := range m.load_matches {
|
||||
m.load_skips[i] = slices.Contains(m.config.ExcludeMetrics, name)
|
||||
_, m.load_skips[i] = stringArrayContains(m.config.ExcludeMetrics, name)
|
||||
}
|
||||
for i, name := range m.proc_matches {
|
||||
m.proc_skips[i] = slices.Contains(m.config.ExcludeMetrics, name)
|
||||
_, m.proc_skips[i] = stringArrayContains(m.config.ExcludeMetrics, name)
|
||||
}
|
||||
m.init = true
|
||||
return nil
|
||||
|
||||
@@ -13,13 +13,12 @@ import (
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"os/user"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
)
|
||||
|
||||
const LUSTRE_SYSFS = `/sys/fs/lustre`
|
||||
@@ -62,6 +61,7 @@ func (m *LustreCollector) getDeviceDataCommand(device string) []string {
|
||||
} else {
|
||||
command = exec.Command(m.lctl, LCTL_OPTION, statsfile)
|
||||
}
|
||||
command.Wait()
|
||||
stdout, _ := command.Output()
|
||||
return strings.Split(string(stdout), "\n")
|
||||
}
|
||||
@@ -302,9 +302,7 @@ func (m *LustreCollector) Init(config json.RawMessage) error {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
}
|
||||
m.setup()
|
||||
m.tags = map[string]string{"type": "node"}
|
||||
m.meta = map[string]string{"source": m.name, "group": "Lustre"}
|
||||
|
||||
@@ -341,21 +339,21 @@ func (m *LustreCollector) Init(config json.RawMessage) error {
|
||||
m.definitions = []LustreMetricDefinition{}
|
||||
if m.config.SendAbsoluteValues {
|
||||
for _, def := range LustreAbsMetrics {
|
||||
if !slices.Contains(m.config.ExcludeMetrics, def.name) {
|
||||
if _, skip := stringArrayContains(m.config.ExcludeMetrics, def.name); !skip {
|
||||
m.definitions = append(m.definitions, def)
|
||||
}
|
||||
}
|
||||
}
|
||||
if m.config.SendDiffValues {
|
||||
for _, def := range LustreDiffMetrics {
|
||||
if !slices.Contains(m.config.ExcludeMetrics, def.name) {
|
||||
if _, skip := stringArrayContains(m.config.ExcludeMetrics, def.name); !skip {
|
||||
m.definitions = append(m.definitions, def)
|
||||
}
|
||||
}
|
||||
}
|
||||
if m.config.SendDerivedValues {
|
||||
for _, def := range LustreDeriveMetrics {
|
||||
if !slices.Contains(m.config.ExcludeMetrics, def.name) {
|
||||
if _, skip := stringArrayContains(m.config.ExcludeMetrics, def.name); !skip {
|
||||
m.definitions = append(m.definitions, def)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,13 +15,12 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
)
|
||||
|
||||
const MEMSTATFILE = "/proc/meminfo"
|
||||
@@ -59,11 +58,7 @@ func getStats(filename string) map[string]MemstatStats {
|
||||
if err != nil {
|
||||
cclog.Error(err.Error())
|
||||
}
|
||||
defer func() {
|
||||
if err := file.Close(); err != nil {
|
||||
cclog.Error(err.Error())
|
||||
}
|
||||
}()
|
||||
defer file.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
@@ -120,20 +115,19 @@ func (m *MemstatCollector) Init(config json.RawMessage) error {
|
||||
"MemShared": "mem_shared",
|
||||
}
|
||||
for k, v := range matches {
|
||||
if !slices.Contains(m.config.ExcludeMetrics, k) {
|
||||
_, skip := stringArrayContains(m.config.ExcludeMetrics, k)
|
||||
if !skip {
|
||||
m.matches[k] = v
|
||||
}
|
||||
}
|
||||
m.sendMemUsed = false
|
||||
if !slices.Contains(m.config.ExcludeMetrics, "mem_used") {
|
||||
if _, skip := stringArrayContains(m.config.ExcludeMetrics, "mem_used"); !skip {
|
||||
m.sendMemUsed = true
|
||||
}
|
||||
if len(m.matches) == 0 {
|
||||
return errors.New("no metrics to collect")
|
||||
}
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
}
|
||||
m.setup()
|
||||
|
||||
if m.config.NodeStats {
|
||||
if stats := getStats(MEMSTATFILE); len(stats) == 0 {
|
||||
@@ -180,7 +174,7 @@ func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMessage
|
||||
sendStats := func(stats map[string]MemstatStats, tags map[string]string) {
|
||||
for match, name := range m.matches {
|
||||
var value float64 = 0
|
||||
unit := ""
|
||||
var unit string = ""
|
||||
if v, ok := stats[match]; ok {
|
||||
value = v.value
|
||||
if len(v.unit) > 0 {
|
||||
|
||||
@@ -12,7 +12,7 @@ import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
)
|
||||
|
||||
type MetricCollector interface {
|
||||
@@ -51,6 +51,30 @@ func (c *metricCollector) Initialized() bool {
|
||||
return c.init
|
||||
}
|
||||
|
||||
// intArrayContains scans an array of ints if the value str is present in the array
|
||||
// If the specified value is found, the corresponding array index is returned.
|
||||
// The bool value is used to signal success or failure
|
||||
func intArrayContains(array []int, str int) (int, bool) {
|
||||
for i, a := range array {
|
||||
if a == str {
|
||||
return i, true
|
||||
}
|
||||
}
|
||||
return -1, false
|
||||
}
|
||||
|
||||
// stringArrayContains scans an array of strings if the value str is present in the array
|
||||
// If the specified value is found, the corresponding array index is returned.
|
||||
// The bool value is used to signal success or failure
|
||||
func stringArrayContains(array []string, str string) (int, bool) {
|
||||
for i, a := range array {
|
||||
if a == str {
|
||||
return i, true
|
||||
}
|
||||
}
|
||||
return -1, false
|
||||
}
|
||||
|
||||
// RemoveFromStringList removes the string r from the array of strings s
|
||||
// If r is not contained in the array an error is returned
|
||||
func RemoveFromStringList(s []string, r string) ([]string, error) {
|
||||
|
||||
@@ -10,15 +10,14 @@ package collectors
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"errors"
|
||||
"os"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
)
|
||||
|
||||
const NETSTATFILE = "/proc/net/dev"
|
||||
@@ -66,9 +65,7 @@ func getCanonicalName(raw string, aliasToCanonical map[string]string) string {
|
||||
func (m *NetstatCollector) Init(config json.RawMessage) error {
|
||||
m.name = "NetstatCollector"
|
||||
m.parallel = true
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
}
|
||||
m.setup()
|
||||
m.lastTimestamp = time.Now()
|
||||
|
||||
const (
|
||||
@@ -110,8 +107,10 @@ func (m *NetstatCollector) Init(config json.RawMessage) error {
|
||||
// Check access to net statistic file
|
||||
file, err := os.Open(NETSTATFILE)
|
||||
if err != nil {
|
||||
return fmt.Errorf("%s Init(): failed to open netstat file \"%s\": %w", m.name, NETSTATFILE, err)
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
return err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
@@ -130,7 +129,7 @@ func (m *NetstatCollector) Init(config json.RawMessage) error {
|
||||
canonical := getCanonicalName(raw, m.aliasToCanonical)
|
||||
|
||||
// Check if device is a included device
|
||||
if slices.Contains(m.config.IncludeDevices, canonical) {
|
||||
if _, ok := stringArrayContains(m.config.IncludeDevices, canonical); ok {
|
||||
// Tag will contain original device name (raw).
|
||||
tags := map[string]string{"stype": "network", "stype-id": raw, "type": "node"}
|
||||
meta_unit_byte := map[string]string{"source": m.name, "group": "Network", "unit": "bytes"}
|
||||
@@ -175,13 +174,8 @@ func (m *NetstatCollector) Init(config json.RawMessage) error {
|
||||
}
|
||||
}
|
||||
|
||||
// Close netstat file
|
||||
if err := file.Close(); err != nil {
|
||||
return fmt.Errorf("%s Init(): failed to close netstat file \"%s\": %w", m.name, NETSTATFILE, err)
|
||||
}
|
||||
|
||||
if len(m.matches) == 0 {
|
||||
return fmt.Errorf("%s Init(): no devices to collect metrics found", m.name)
|
||||
return errors.New("no devices to collector metrics found")
|
||||
}
|
||||
m.init = true
|
||||
return nil
|
||||
@@ -200,18 +194,10 @@ func (m *NetstatCollector) Read(interval time.Duration, output chan lp.CCMessage
|
||||
|
||||
file, err := os.Open(NETSTATFILE)
|
||||
if err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to open file '%s': %v", NETSTATFILE, err))
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
if err := file.Close(); err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to close file '%s': %v", NETSTATFILE, err))
|
||||
}
|
||||
}()
|
||||
defer file.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
|
||||
@@ -11,7 +11,6 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"slices"
|
||||
|
||||
// "os"
|
||||
"os/exec"
|
||||
@@ -19,8 +18,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
)
|
||||
|
||||
// First part contains the code for the general NfsCollector.
|
||||
@@ -46,15 +44,10 @@ type nfsCollector struct {
|
||||
|
||||
func (m *nfsCollector) initStats() error {
|
||||
cmd := exec.Command(m.config.Nfsstats, `-l`, `--all`)
|
||||
|
||||
// Wait for cmd end
|
||||
if err := cmd.Wait(); err != nil {
|
||||
return fmt.Errorf("initStats(): %w", err)
|
||||
}
|
||||
|
||||
cmd.Wait()
|
||||
buffer, err := cmd.Output()
|
||||
if err == nil {
|
||||
for line := range strings.Lines(string(buffer)) {
|
||||
for _, line := range strings.Split(string(buffer), "\n") {
|
||||
lf := strings.Fields(line)
|
||||
if len(lf) != 5 {
|
||||
continue
|
||||
@@ -78,15 +71,10 @@ func (m *nfsCollector) initStats() error {
|
||||
|
||||
func (m *nfsCollector) updateStats() error {
|
||||
cmd := exec.Command(m.config.Nfsstats, `-l`, `--all`)
|
||||
|
||||
// Wait for cmd end
|
||||
if err := cmd.Wait(); err != nil {
|
||||
return fmt.Errorf("updateStats(): %w", err)
|
||||
}
|
||||
|
||||
cmd.Wait()
|
||||
buffer, err := cmd.Output()
|
||||
if err == nil {
|
||||
for line := range strings.Lines(string(buffer)) {
|
||||
for _, line := range strings.Split(string(buffer), "\n") {
|
||||
lf := strings.Fields(line)
|
||||
if len(lf) != 5 {
|
||||
continue
|
||||
@@ -131,9 +119,7 @@ func (m *nfsCollector) MainInit(config json.RawMessage) error {
|
||||
return fmt.Errorf("NfsCollector.Init(): Failed to find nfsstat binary '%s': %v", m.config.Nfsstats, err)
|
||||
}
|
||||
m.data = make(map[string]NfsCollectorData)
|
||||
if err := m.initStats(); err != nil {
|
||||
return fmt.Errorf("NfsCollector.Init(): %w", err)
|
||||
}
|
||||
m.initStats()
|
||||
m.init = true
|
||||
m.parallel = true
|
||||
return nil
|
||||
@@ -145,13 +131,7 @@ func (m *nfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
}
|
||||
timestamp := time.Now()
|
||||
|
||||
if err := m.updateStats(); err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): updateStats() failed: %v", err),
|
||||
)
|
||||
return
|
||||
}
|
||||
m.updateStats()
|
||||
prefix := ""
|
||||
switch m.version {
|
||||
case "v3":
|
||||
@@ -163,7 +143,7 @@ func (m *nfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
}
|
||||
|
||||
for name, data := range m.data {
|
||||
if slices.Contains(m.config.ExcludeMetrics, name) {
|
||||
if _, skip := stringArrayContains(m.config.ExcludeMetrics, name); skip {
|
||||
continue
|
||||
}
|
||||
value := data.current - data.last
|
||||
@@ -190,17 +170,13 @@ type Nfs4Collector struct {
|
||||
func (m *Nfs3Collector) Init(config json.RawMessage) error {
|
||||
m.name = "Nfs3Collector"
|
||||
m.version = `v3`
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
}
|
||||
m.setup()
|
||||
return m.MainInit(config)
|
||||
}
|
||||
|
||||
func (m *Nfs4Collector) Init(config json.RawMessage) error {
|
||||
m.name = "Nfs4Collector"
|
||||
m.version = `v4`
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
}
|
||||
m.setup()
|
||||
return m.MainInit(config)
|
||||
}
|
||||
|
||||
@@ -12,13 +12,12 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
)
|
||||
|
||||
// These are the fields we read from the JSON configuration
|
||||
@@ -72,7 +71,7 @@ func (m *NfsIOStatCollector) readNfsiostats() map[string]map[string]int64 {
|
||||
// Is this a device line with mount point, remote target and NFS version?
|
||||
dev := resolve_regex_fields(l, deviceRegex)
|
||||
if len(dev) > 0 {
|
||||
if !slices.Contains(m.config.ExcludeFilesystem, dev[m.key]) {
|
||||
if _, ok := stringArrayContains(m.config.ExcludeFilesystem, dev[m.key]); !ok {
|
||||
current = dev
|
||||
if len(current["version"]) == 0 {
|
||||
current["version"] = "3"
|
||||
@@ -86,7 +85,7 @@ func (m *NfsIOStatCollector) readNfsiostats() map[string]map[string]int64 {
|
||||
if len(bytes) > 0 {
|
||||
data[current[m.key]] = make(map[string]int64)
|
||||
for name, sval := range bytes {
|
||||
if !slices.Contains(m.config.ExcludeMetrics, name) {
|
||||
if _, ok := stringArrayContains(m.config.ExcludeMetrics, name); !ok {
|
||||
val, err := strconv.ParseInt(sval, 10, 64)
|
||||
if err == nil {
|
||||
data[current[m.key]][name] = val
|
||||
@@ -103,9 +102,7 @@ func (m *NfsIOStatCollector) readNfsiostats() map[string]map[string]int64 {
|
||||
func (m *NfsIOStatCollector) Init(config json.RawMessage) error {
|
||||
var err error = nil
|
||||
m.name = "NfsIOStatCollector"
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
}
|
||||
m.setup()
|
||||
m.parallel = true
|
||||
m.meta = map[string]string{"source": m.name, "group": "NFS", "unit": "bytes"}
|
||||
m.tags = map[string]string{"type": "node"}
|
||||
|
||||
@@ -10,8 +10,8 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
)
|
||||
|
||||
type NUMAStatsCollectorConfig struct {
|
||||
@@ -72,9 +72,7 @@ func (m *NUMAStatsCollector) Init(config json.RawMessage) error {
|
||||
|
||||
m.name = "NUMAStatsCollector"
|
||||
m.parallel = true
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
}
|
||||
m.setup()
|
||||
m.meta = map[string]string{
|
||||
"source": m.name,
|
||||
"group": "NUMA",
|
||||
@@ -104,11 +102,8 @@ func (m *NUMAStatsCollector) Init(config json.RawMessage) error {
|
||||
file := filepath.Join(dir, "numastat")
|
||||
m.topology = append(m.topology,
|
||||
NUMAStatsCollectorTopolgy{
|
||||
file: file,
|
||||
tagSet: map[string]string{
|
||||
"type": "memoryDomain",
|
||||
"type-id": node,
|
||||
},
|
||||
file: file,
|
||||
tagSet: map[string]string{"memoryDomain": node},
|
||||
previousValues: make(map[string]int64),
|
||||
})
|
||||
}
|
||||
@@ -188,11 +183,7 @@ func (m *NUMAStatsCollector) Read(interval time.Duration, output chan lp.CCMessa
|
||||
t.previousValues[key] = value
|
||||
}
|
||||
}
|
||||
if err := file.Close(); err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to close file '%s': %v", t.file, err))
|
||||
}
|
||||
file.Close()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -12,13 +12,11 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"maps"
|
||||
"slices"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
"github.com/NVIDIA/go-nvml/pkg/nvml"
|
||||
)
|
||||
|
||||
@@ -66,9 +64,7 @@ func (m *NvidiaCollector) Init(config json.RawMessage) error {
|
||||
m.config.ProcessMigDevices = false
|
||||
m.config.UseUuidForMigDevices = false
|
||||
m.config.UseSliceForMigDevices = false
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
}
|
||||
m.setup()
|
||||
if len(config) > 0 {
|
||||
err = json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
@@ -113,7 +109,7 @@ func (m *NvidiaCollector) Init(config json.RawMessage) error {
|
||||
|
||||
// Skip excluded devices by ID
|
||||
str_i := fmt.Sprintf("%d", i)
|
||||
if slices.Contains(m.config.ExcludeDevices, str_i) {
|
||||
if _, skip := stringArrayContains(m.config.ExcludeDevices, str_i); skip {
|
||||
cclog.ComponentDebug(m.name, "Skipping excluded device", str_i)
|
||||
continue
|
||||
}
|
||||
@@ -141,7 +137,7 @@ func (m *NvidiaCollector) Init(config json.RawMessage) error {
|
||||
pciInfo.Device)
|
||||
|
||||
// Skip excluded devices specified by PCI ID
|
||||
if slices.Contains(m.config.ExcludeDevices, pci_id) {
|
||||
if _, skip := stringArrayContains(m.config.ExcludeDevices, pci_id); skip {
|
||||
cclog.ComponentDebug(m.name, "Skipping excluded device", pci_id)
|
||||
continue
|
||||
}
|
||||
@@ -226,15 +222,13 @@ func readMemoryInfo(device *NvidiaCollectorDevice, output chan lp.CCMessage) err
|
||||
var total uint64
|
||||
var used uint64
|
||||
var reserved uint64 = 0
|
||||
v2 := false
|
||||
var v2 bool = false
|
||||
meminfo, ret := nvml.DeviceGetMemoryInfo(device.device)
|
||||
if ret != nvml.SUCCESS {
|
||||
err := errors.New(nvml.ErrorString(ret))
|
||||
return err
|
||||
}
|
||||
// Total physical device memory (in bytes)
|
||||
total = meminfo.Total
|
||||
// Sum of Reserved and Allocated device memory (in bytes)
|
||||
used = meminfo.Used
|
||||
|
||||
if !device.excludeMetrics["nv_fb_mem_total"] {
|
||||
@@ -409,8 +403,7 @@ func readEccMode(device *NvidiaCollectorDevice, output chan lp.CCMessage) error
|
||||
// Changing ECC modes requires a reboot.
|
||||
// The "pending" ECC mode refers to the target mode following the next reboot.
|
||||
_, ecc_pend, ret := nvml.DeviceGetEccMode(device.device)
|
||||
switch ret {
|
||||
case nvml.SUCCESS:
|
||||
if ret == nvml.SUCCESS {
|
||||
var y lp.CCMessage
|
||||
var err error
|
||||
switch ecc_pend {
|
||||
@@ -424,7 +417,7 @@ func readEccMode(device *NvidiaCollectorDevice, output chan lp.CCMessage) error
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
case nvml.ERROR_NOT_SUPPORTED:
|
||||
} else if ret == nvml.ERROR_NOT_SUPPORTED {
|
||||
y, err := lp.NewMessage("nv_ecc_mode", device.tags, device.meta, map[string]interface{}{"value": "N/A"}, time.Now())
|
||||
if err == nil {
|
||||
output <- y
|
||||
@@ -596,7 +589,7 @@ func readMaxClocks(device *NvidiaCollectorDevice, output chan lp.CCMessage) erro
|
||||
if !device.excludeMetrics["nv_max_graphics_clock"] {
|
||||
max_gclk, ret := nvml.DeviceGetMaxClockInfo(device.device, nvml.CLOCK_GRAPHICS)
|
||||
if ret == nvml.SUCCESS {
|
||||
y, err := lp.NewMetric("nv_max_graphics_clock", device.tags, device.meta, float64(max_gclk), time.Now())
|
||||
y, err := lp.NewMessage("nv_max_graphics_clock", device.tags, device.meta, map[string]interface{}{"value": float64(max_gclk)}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "MHz")
|
||||
output <- y
|
||||
@@ -605,9 +598,9 @@ func readMaxClocks(device *NvidiaCollectorDevice, output chan lp.CCMessage) erro
|
||||
}
|
||||
|
||||
if !device.excludeMetrics["nv_max_sm_clock"] {
|
||||
maxSmClock, ret := nvml.DeviceGetMaxClockInfo(device.device, nvml.CLOCK_SM)
|
||||
maxSmClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_SM)
|
||||
if ret == nvml.SUCCESS {
|
||||
y, err := lp.NewMetric("nv_max_sm_clock", device.tags, device.meta, float64(maxSmClock), time.Now())
|
||||
y, err := lp.NewMessage("nv_max_sm_clock", device.tags, device.meta, map[string]interface{}{"value": float64(maxSmClock)}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "MHz")
|
||||
output <- y
|
||||
@@ -616,9 +609,9 @@ func readMaxClocks(device *NvidiaCollectorDevice, output chan lp.CCMessage) erro
|
||||
}
|
||||
|
||||
if !device.excludeMetrics["nv_max_mem_clock"] {
|
||||
maxMemClock, ret := nvml.DeviceGetMaxClockInfo(device.device, nvml.CLOCK_MEM)
|
||||
maxMemClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_MEM)
|
||||
if ret == nvml.SUCCESS {
|
||||
y, err := lp.NewMetric("nv_max_mem_clock", device.tags, device.meta, float64(maxMemClock), time.Now())
|
||||
y, err := lp.NewMessage("nv_max_mem_clock", device.tags, device.meta, map[string]interface{}{"value": float64(maxMemClock)}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "MHz")
|
||||
output <- y
|
||||
@@ -627,9 +620,9 @@ func readMaxClocks(device *NvidiaCollectorDevice, output chan lp.CCMessage) erro
|
||||
}
|
||||
|
||||
if !device.excludeMetrics["nv_max_video_clock"] {
|
||||
maxVideoClock, ret := nvml.DeviceGetMaxClockInfo(device.device, nvml.CLOCK_VIDEO)
|
||||
maxMemClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_VIDEO)
|
||||
if ret == nvml.SUCCESS {
|
||||
y, err := lp.NewMetric("nv_max_video_clock", device.tags, device.meta, float64(maxVideoClock), time.Now())
|
||||
y, err := lp.NewMessage("nv_max_video_clock", device.tags, device.meta, map[string]interface{}{"value": float64(maxMemClock)}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "MHz")
|
||||
output <- y
|
||||
@@ -773,7 +766,7 @@ func readRemappedRows(device *NvidiaCollectorDevice, output chan lp.CCMessage) e
|
||||
}
|
||||
}
|
||||
if !device.excludeMetrics["nv_remapped_rows_pending"] {
|
||||
p := 0
|
||||
var p int = 0
|
||||
if pending {
|
||||
p = 1
|
||||
}
|
||||
@@ -783,7 +776,7 @@ func readRemappedRows(device *NvidiaCollectorDevice, output chan lp.CCMessage) e
|
||||
}
|
||||
}
|
||||
if !device.excludeMetrics["nv_remapped_rows_failure"] {
|
||||
f := 0
|
||||
var f int = 0
|
||||
if failure {
|
||||
f = 1
|
||||
}
|
||||
@@ -1280,7 +1273,9 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMessage)
|
||||
meta: map[string]string{},
|
||||
excludeMetrics: excludeMetrics,
|
||||
}
|
||||
maps.Copy(migDevice.tags, m.gpus[i].tags)
|
||||
for k, v := range m.gpus[i].tags {
|
||||
migDevice.tags[k] = v
|
||||
}
|
||||
migDevice.tags["stype"] = "mig"
|
||||
if m.config.UseUuidForMigDevices {
|
||||
uuid, ret := nvml.DeviceGetUUID(mdev)
|
||||
@@ -1294,8 +1289,8 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMessage)
|
||||
if ret == nvml.SUCCESS {
|
||||
mname, ret := nvml.DeviceGetName(mdev)
|
||||
if ret == nvml.SUCCESS {
|
||||
x := strings.ReplaceAll(mname, name, "")
|
||||
x = strings.ReplaceAll(x, "MIG", "")
|
||||
x := strings.Replace(mname, name, "", -1)
|
||||
x = strings.Replace(x, "MIG", "", -1)
|
||||
x = strings.TrimSpace(x)
|
||||
migDevice.tags["stype-id"] = x
|
||||
}
|
||||
@@ -1304,7 +1299,9 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMessage)
|
||||
if _, ok := migDevice.tags["stype-id"]; !ok {
|
||||
migDevice.tags["stype-id"] = fmt.Sprintf("%d", j)
|
||||
}
|
||||
maps.Copy(migDevice.meta, m.gpus[i].meta)
|
||||
for k, v := range m.gpus[i].meta {
|
||||
migDevice.meta[k] = v
|
||||
}
|
||||
if _, ok := migDevice.meta["uuid"]; ok && !m.config.UseUuidForMigDevices {
|
||||
uuid, ret := nvml.DeviceGetUUID(mdev)
|
||||
if ret == nvml.SUCCESS {
|
||||
@@ -1320,9 +1317,7 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMessage)
|
||||
|
||||
func (m *NvidiaCollector) Close() {
|
||||
if m.init {
|
||||
if ret := nvml.Shutdown(); ret != nvml.SUCCESS {
|
||||
cclog.ComponentError(m.name, "nvml.Shutdown() not successful")
|
||||
}
|
||||
nvml.Shutdown()
|
||||
m.init = false
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,8 +16,8 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
)
|
||||
|
||||
// running average power limit (RAPL) monitoring attributes for a zone
|
||||
@@ -54,10 +54,9 @@ func (m *RAPLCollector) Init(config json.RawMessage) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
var err error = nil
|
||||
m.name = "RAPLCollector"
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
}
|
||||
m.setup()
|
||||
m.parallel = true
|
||||
m.meta = map[string]string{
|
||||
"source": m.name,
|
||||
@@ -67,7 +66,7 @@ func (m *RAPLCollector) Init(config json.RawMessage) error {
|
||||
|
||||
// Read in the JSON configuration
|
||||
if len(config) > 0 {
|
||||
err := json.Unmarshal(config, &m.config)
|
||||
err = json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
cclog.ComponentError(m.name, "Error reading config:", err.Error())
|
||||
return err
|
||||
|
||||
@@ -13,8 +13,8 @@ import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
"github.com/ClusterCockpit/go-rocm-smi/pkg/rocm_smi"
|
||||
)
|
||||
|
||||
@@ -52,9 +52,7 @@ func (m *RocmSmiCollector) Init(config json.RawMessage) error {
|
||||
// Always set the name early in Init() to use it in cclog.Component* functions
|
||||
m.name = "RocmSmiCollector"
|
||||
// This is for later use, also call it early
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
}
|
||||
m.setup()
|
||||
// Define meta information sent with each metric
|
||||
// (Can also be dynamic or this is the basic set with extension through AddMeta())
|
||||
//m.meta = map[string]string{"source": m.name, "group": "AMD"}
|
||||
|
||||
@@ -9,11 +9,10 @@ package collectors
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
)
|
||||
|
||||
// These are the fields we read from the JSON configuration
|
||||
@@ -42,9 +41,7 @@ func (m *SampleCollector) Init(config json.RawMessage) error {
|
||||
// Always set the name early in Init() to use it in cclog.Component* functions
|
||||
m.name = "SampleCollector"
|
||||
// This is for later use, also call it early
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
}
|
||||
m.setup()
|
||||
// Tell whether the collector should be run in parallel with others (reading files, ...)
|
||||
// or it should be run serially, mostly for collectors actually doing measurements
|
||||
// because they should not measure the execution of the other collectors
|
||||
|
||||
@@ -9,12 +9,11 @@ package collectors
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
)
|
||||
|
||||
// These are the fields we read from the JSON configuration
|
||||
@@ -41,9 +40,7 @@ func (m *SampleTimerCollector) Init(name string, config json.RawMessage) error {
|
||||
// Always set the name early in Init() to use it in cclog.Component* functions
|
||||
m.name = "SampleTimerCollector"
|
||||
// This is for later use, also call it early
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
}
|
||||
m.setup()
|
||||
// Define meta information sent with each metric
|
||||
// (Can also be dynamic or this is the basic set with extension through AddMeta())
|
||||
m.meta = map[string]string{"source": m.name, "group": "SAMPLE"}
|
||||
|
||||
@@ -11,13 +11,14 @@ import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
)
|
||||
|
||||
const SCHEDSTATFILE = `/proc/schedstat`
|
||||
@@ -46,37 +47,37 @@ type SchedstatCollector struct {
|
||||
// Called once by the collector manager
|
||||
// All tags, meta data tags and metrics that do not change over the runtime should be set here
|
||||
func (m *SchedstatCollector) Init(config json.RawMessage) error {
|
||||
var err error = nil
|
||||
// Always set the name early in Init() to use it in cclog.Component* functions
|
||||
m.name = "SchedstatCollector"
|
||||
// This is for later use, also call it early
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
}
|
||||
m.setup()
|
||||
// Tell whether the collector should be run in parallel with others (reading files, ...)
|
||||
// or it should be run serially, mostly for collectors acutally doing measurements
|
||||
// because they should not measure the execution of the other collectors
|
||||
m.parallel = true
|
||||
// Define meta information sent with each metric
|
||||
// (Can also be dynamic or this is the basic set with extension through AddMeta())
|
||||
m.meta = map[string]string{
|
||||
"source": m.name,
|
||||
"group": "SCHEDSTAT",
|
||||
}
|
||||
m.meta = map[string]string{"source": m.name, "group": "SCHEDSTAT"}
|
||||
|
||||
// Read in the JSON configuration
|
||||
if len(config) > 0 {
|
||||
if err := json.Unmarshal(config, &m.config); err != nil {
|
||||
return fmt.Errorf("%s Init(): Error reading config: %w", m.name, err)
|
||||
err = json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
cclog.ComponentError(m.name, "Error reading config:", err.Error())
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Check input file
|
||||
file, err := os.Open(SCHEDSTATFILE)
|
||||
file, err := os.Open(string(SCHEDSTATFILE))
|
||||
if err != nil {
|
||||
return fmt.Errorf("%s Init(): Failed opening scheduler statistics file \"%s\": %w", m.name, SCHEDSTATFILE, err)
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// Pre-generate tags for all CPUs
|
||||
num_cpus := 0
|
||||
m.cputags = make(map[string]map[string]string)
|
||||
m.olddata = make(map[string]map[string]int64)
|
||||
scanner := bufio.NewScanner(file)
|
||||
@@ -88,19 +89,11 @@ func (m *SchedstatCollector) Init(config json.RawMessage) error {
|
||||
cpu, _ := strconv.Atoi(cpustr)
|
||||
running, _ := strconv.ParseInt(linefields[7], 10, 64)
|
||||
waiting, _ := strconv.ParseInt(linefields[8], 10, 64)
|
||||
m.cputags[linefields[0]] = map[string]string{
|
||||
"type": "hwthread",
|
||||
"type-id": fmt.Sprintf("%d", cpu),
|
||||
}
|
||||
m.olddata[linefields[0]] = map[string]int64{
|
||||
"running": running,
|
||||
"waiting": waiting,
|
||||
}
|
||||
m.cputags[linefields[0]] = map[string]string{"type": "hwthread", "type-id": fmt.Sprintf("%d", cpu)}
|
||||
m.olddata[linefields[0]] = map[string]int64{"running": running, "waiting": waiting}
|
||||
num_cpus++
|
||||
}
|
||||
}
|
||||
if err := file.Close(); err != nil {
|
||||
return fmt.Errorf("%s Init(): Failed closing scheduler statistics file \"%s\": %w", m.name, SCHEDSTATFILE, err)
|
||||
}
|
||||
|
||||
// Save current timestamp
|
||||
m.lastTimestamp = time.Now()
|
||||
@@ -116,8 +109,8 @@ func (m *SchedstatCollector) ParseProcLine(linefields []string, tags map[string]
|
||||
diff_running := running - m.olddata[linefields[0]]["running"]
|
||||
diff_waiting := waiting - m.olddata[linefields[0]]["waiting"]
|
||||
|
||||
l_running := float64(diff_running) / tsdelta.Seconds() / 1000_000_000
|
||||
l_waiting := float64(diff_waiting) / tsdelta.Seconds() / 1000_000_000
|
||||
var l_running float64 = float64(diff_running) / tsdelta.Seconds() / (math.Pow(1000, 3))
|
||||
var l_waiting float64 = float64(diff_waiting) / tsdelta.Seconds() / (math.Pow(1000, 3))
|
||||
|
||||
m.olddata[linefields[0]]["running"] = running
|
||||
m.olddata[linefields[0]]["waiting"] = waiting
|
||||
@@ -141,19 +134,11 @@ func (m *SchedstatCollector) Read(interval time.Duration, output chan lp.CCMessa
|
||||
now := time.Now()
|
||||
tsdelta := now.Sub(m.lastTimestamp)
|
||||
|
||||
file, err := os.Open(SCHEDSTATFILE)
|
||||
file, err := os.Open(string(SCHEDSTATFILE))
|
||||
if err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to open file '%s': %v", SCHEDSTATFILE, err))
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
}
|
||||
defer func() {
|
||||
if err := file.Close(); err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to close file '%s': %v", SCHEDSTATFILE, err))
|
||||
}
|
||||
}()
|
||||
defer file.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
|
||||
@@ -9,13 +9,12 @@ package collectors
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"runtime"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
)
|
||||
|
||||
type SelfCollectorConfig struct {
|
||||
@@ -35,9 +34,7 @@ type SelfCollector struct {
|
||||
func (m *SelfCollector) Init(config json.RawMessage) error {
|
||||
var err error = nil
|
||||
m.name = "SelfCollector"
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
}
|
||||
m.setup()
|
||||
m.parallel = true
|
||||
m.meta = map[string]string{"source": m.name, "group": "Self"}
|
||||
m.tags = map[string]string{"type": "node"}
|
||||
|
||||
@@ -1,350 +0,0 @@
|
||||
package collectors
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"os/user"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
)
|
||||
|
||||
type SlurmJobData struct {
|
||||
MemoryUsage float64
|
||||
MaxMemoryUsage float64
|
||||
LimitMemoryUsage float64
|
||||
CpuUsageUser float64
|
||||
CpuUsageSys float64
|
||||
CpuSet []int
|
||||
}
|
||||
|
||||
type SlurmCgroupsConfig struct {
|
||||
CgroupBase string `json:"cgroup_base"`
|
||||
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
||||
UseSudo bool `json:"use_sudo,omitempty"`
|
||||
}
|
||||
|
||||
type SlurmCgroupCollector struct {
|
||||
metricCollector
|
||||
config SlurmCgroupsConfig
|
||||
meta map[string]string
|
||||
tags map[string]string
|
||||
allCPUs []int
|
||||
cpuUsed map[int]bool
|
||||
cgroupBase string
|
||||
excludeMetrics map[string]struct{}
|
||||
useSudo bool
|
||||
}
|
||||
|
||||
const defaultCgroupBase = "/sys/fs/cgroup/system.slice/slurmstepd.scope"
|
||||
|
||||
func ParseCPUs(cpuset string) ([]int, error) {
|
||||
var result []int
|
||||
if cpuset == "" {
|
||||
return result, nil
|
||||
}
|
||||
|
||||
for r := range strings.SplitSeq(cpuset, ",") {
|
||||
if strings.Contains(r, "-") {
|
||||
parts := strings.Split(r, "-")
|
||||
if len(parts) != 2 {
|
||||
return nil, fmt.Errorf("invalid CPU range: %s", r)
|
||||
}
|
||||
start, err := strconv.Atoi(strings.TrimSpace(parts[0]))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid CPU range start: %s", parts[0])
|
||||
}
|
||||
end, err := strconv.Atoi(strings.TrimSpace(parts[1]))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid CPU range end: %s", parts[1])
|
||||
}
|
||||
for i := start; i <= end; i++ {
|
||||
result = append(result, i)
|
||||
}
|
||||
} else {
|
||||
cpu, err := strconv.Atoi(strings.TrimSpace(r))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid CPU ID: %s", r)
|
||||
}
|
||||
result = append(result, cpu)
|
||||
}
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func GetAllCPUs() ([]int, error) {
|
||||
data, err := os.ReadFile("/sys/devices/system/cpu/online")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read /sys/devices/system/cpu/online: %v", err)
|
||||
}
|
||||
return ParseCPUs(strings.TrimSpace(string(data)))
|
||||
}
|
||||
|
||||
func (m *SlurmCgroupCollector) isExcluded(metric string) bool {
|
||||
_, found := m.excludeMetrics[metric]
|
||||
return found
|
||||
}
|
||||
|
||||
func (m *SlurmCgroupCollector) readFile(path string) ([]byte, error) {
|
||||
if m.useSudo {
|
||||
cmd := exec.Command("sudo", "cat", path)
|
||||
return cmd.Output()
|
||||
}
|
||||
return os.ReadFile(path)
|
||||
}
|
||||
|
||||
func (m *SlurmCgroupCollector) Init(config json.RawMessage) error {
|
||||
var err error
|
||||
m.name = "SlurmCgroupCollector"
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
}
|
||||
m.parallel = true
|
||||
m.meta = map[string]string{"source": m.name, "group": "SLURM"}
|
||||
m.tags = map[string]string{"type": "hwthread"}
|
||||
m.cpuUsed = make(map[int]bool)
|
||||
m.cgroupBase = defaultCgroupBase
|
||||
|
||||
if len(config) > 0 {
|
||||
err = json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
cclog.ComponentError(m.name, "Error reading config:", err.Error())
|
||||
return err
|
||||
}
|
||||
m.excludeMetrics = make(map[string]struct{})
|
||||
for _, metric := range m.config.ExcludeMetrics {
|
||||
m.excludeMetrics[metric] = struct{}{}
|
||||
}
|
||||
if m.config.CgroupBase != "" {
|
||||
m.cgroupBase = m.config.CgroupBase
|
||||
}
|
||||
}
|
||||
|
||||
m.useSudo = m.config.UseSudo
|
||||
if !m.useSudo {
|
||||
user, err := user.Current()
|
||||
if err != nil {
|
||||
cclog.ComponentError(m.name, "Failed to get current user:", err.Error())
|
||||
return err
|
||||
}
|
||||
if user.Uid != "0" {
|
||||
cclog.ComponentError(m.name, "Reading cgroup files requires root privileges (or enable use_sudo in config)")
|
||||
return fmt.Errorf("not root")
|
||||
}
|
||||
}
|
||||
|
||||
m.allCPUs, err = GetAllCPUs()
|
||||
if err != nil {
|
||||
cclog.ComponentError(m.name, "Error reading online CPUs:", err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
m.init = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *SlurmCgroupCollector) ReadJobData(jobdir string) (SlurmJobData, error) {
|
||||
jobdata := SlurmJobData{
|
||||
MemoryUsage: 0,
|
||||
MaxMemoryUsage: 0,
|
||||
LimitMemoryUsage: 0,
|
||||
CpuUsageUser: 0,
|
||||
CpuUsageSys: 0,
|
||||
CpuSet: []int{},
|
||||
}
|
||||
|
||||
cg := func(f string) string { return filepath.Join(m.cgroupBase, jobdir, f) }
|
||||
|
||||
memUsage, err := m.readFile(cg("memory.current"))
|
||||
if err == nil {
|
||||
x, err := strconv.ParseFloat(strings.TrimSpace(string(memUsage)), 64)
|
||||
if err == nil {
|
||||
jobdata.MemoryUsage = x
|
||||
}
|
||||
}
|
||||
|
||||
maxMem, err := m.readFile(cg("memory.peak"))
|
||||
if err == nil {
|
||||
x, err := strconv.ParseFloat(strings.TrimSpace(string(maxMem)), 64)
|
||||
if err == nil {
|
||||
jobdata.MaxMemoryUsage = x
|
||||
}
|
||||
}
|
||||
|
||||
limitMem, err := m.readFile(cg("memory.max"))
|
||||
if err == nil {
|
||||
x, err := strconv.ParseFloat(strings.TrimSpace(string(limitMem)), 64)
|
||||
if err == nil {
|
||||
jobdata.LimitMemoryUsage = x
|
||||
}
|
||||
}
|
||||
|
||||
cpuStat, err := m.readFile(cg("cpu.stat"))
|
||||
if err == nil {
|
||||
lines := strings.Split(strings.TrimSpace(string(cpuStat)), "\n")
|
||||
var usageUsec, userUsec, systemUsec float64
|
||||
for _, line := range lines {
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) < 2 {
|
||||
continue
|
||||
}
|
||||
value, err := strconv.ParseFloat(fields[1], 64)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
switch fields[0] {
|
||||
case "usage_usec":
|
||||
usageUsec = value
|
||||
case "user_usec":
|
||||
userUsec = value
|
||||
case "system_usec":
|
||||
systemUsec = value
|
||||
}
|
||||
}
|
||||
if usageUsec > 0 {
|
||||
jobdata.CpuUsageUser = (userUsec * 100 / usageUsec)
|
||||
jobdata.CpuUsageSys = (systemUsec * 100 / usageUsec)
|
||||
}
|
||||
}
|
||||
|
||||
cpuSet, err := m.readFile(cg("cpuset.cpus"))
|
||||
if err == nil {
|
||||
cpus, err := ParseCPUs(strings.TrimSpace(string(cpuSet)))
|
||||
if err == nil {
|
||||
jobdata.CpuSet = cpus
|
||||
}
|
||||
}
|
||||
|
||||
return jobdata, nil
|
||||
}
|
||||
|
||||
func (m *SlurmCgroupCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
timestamp := time.Now()
|
||||
|
||||
for k := range m.cpuUsed {
|
||||
delete(m.cpuUsed, k)
|
||||
}
|
||||
|
||||
globPattern := filepath.Join(m.cgroupBase, "job_*")
|
||||
jobDirs, err := filepath.Glob(globPattern)
|
||||
if err != nil {
|
||||
cclog.ComponentError(m.name, "Error globbing job directories:", err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
for _, jdir := range jobDirs {
|
||||
jKey := filepath.Base(jdir)
|
||||
|
||||
jobdata, err := m.ReadJobData(jKey)
|
||||
if err != nil {
|
||||
cclog.ComponentError(m.name, "Error reading job data for", jKey, ":", err.Error())
|
||||
continue
|
||||
}
|
||||
|
||||
if len(jobdata.CpuSet) > 0 {
|
||||
coreCount := float64(len(jobdata.CpuSet))
|
||||
for _, cpu := range jobdata.CpuSet {
|
||||
coreTags := map[string]string{
|
||||
"type": "hwthread",
|
||||
"type-id": fmt.Sprintf("%d", cpu),
|
||||
}
|
||||
|
||||
if coreCount > 0 && !m.isExcluded("job_mem_used") {
|
||||
memPerCore := jobdata.MemoryUsage / coreCount
|
||||
if y, err := lp.NewMessage("job_mem_used", coreTags, m.meta, map[string]interface{}{"value": memPerCore}, timestamp); err == nil {
|
||||
y.AddMeta("unit", "Bytes")
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
if coreCount > 0 && !m.isExcluded("job_max_mem_used") {
|
||||
maxMemPerCore := jobdata.MaxMemoryUsage / coreCount
|
||||
if y, err := lp.NewMessage("job_max_mem_used", coreTags, m.meta, map[string]interface{}{"value": maxMemPerCore}, timestamp); err == nil {
|
||||
y.AddMeta("unit", "Bytes")
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
if coreCount > 0 && !m.isExcluded("job_mem_limit") {
|
||||
limitPerCore := jobdata.LimitMemoryUsage / coreCount
|
||||
if y, err := lp.NewMessage("job_mem_limit", coreTags, m.meta, map[string]interface{}{"value": limitPerCore}, timestamp); err == nil {
|
||||
y.AddMeta("unit", "Bytes")
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
if coreCount > 0 && !m.isExcluded("job_user_cpu") {
|
||||
cpuUserPerCore := jobdata.CpuUsageUser / coreCount
|
||||
if y, err := lp.NewMessage("job_user_cpu", coreTags, m.meta, map[string]interface{}{"value": cpuUserPerCore}, timestamp); err == nil {
|
||||
y.AddMeta("unit", "%")
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
if coreCount > 0 && !m.isExcluded("job_sys_cpu") {
|
||||
cpuSysPerCore := jobdata.CpuUsageSys / coreCount
|
||||
if y, err := lp.NewMessage("job_sys_cpu", coreTags, m.meta, map[string]interface{}{"value": cpuSysPerCore}, timestamp); err == nil {
|
||||
y.AddMeta("unit", "%")
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
m.cpuUsed[cpu] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, cpu := range m.allCPUs {
|
||||
if !m.cpuUsed[cpu] {
|
||||
coreTags := map[string]string{
|
||||
"type": "hwthread",
|
||||
"type-id": fmt.Sprintf("%d", cpu),
|
||||
}
|
||||
|
||||
if !m.isExcluded("job_mem_used") {
|
||||
if y, err := lp.NewMessage("job_mem_used", coreTags, m.meta, map[string]interface{}{"value": 0}, timestamp); err == nil {
|
||||
y.AddMeta("unit", "Bytes")
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
if !m.isExcluded("job_max_mem_used") {
|
||||
if y, err := lp.NewMessage("job_max_mem_used", coreTags, m.meta, map[string]interface{}{"value": 0}, timestamp); err == nil {
|
||||
y.AddMeta("unit", "Bytes")
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
if !m.isExcluded("job_mem_limit") {
|
||||
if y, err := lp.NewMessage("job_mem_limit", coreTags, m.meta, map[string]interface{}{"value": 0}, timestamp); err == nil {
|
||||
y.AddMeta("unit", "Bytes")
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
if !m.isExcluded("job_user_cpu") {
|
||||
if y, err := lp.NewMessage("job_user_cpu", coreTags, m.meta, map[string]interface{}{"value": 0}, timestamp); err == nil {
|
||||
y.AddMeta("unit", "%")
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
if !m.isExcluded("job_sys_cpu") {
|
||||
if y, err := lp.NewMessage("job_sys_cpu", coreTags, m.meta, map[string]interface{}{"value": 0}, timestamp); err == nil {
|
||||
y.AddMeta("unit", "%")
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *SlurmCgroupCollector) Close() {
|
||||
m.init = false
|
||||
}
|
||||
@@ -1,50 +0,0 @@
|
||||
<!--
|
||||
---
|
||||
title: Slurm cgroup metric collector
|
||||
description: Collect per-core memory and CPU usage for SLURM jobs from cgroup v2
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 3
|
||||
hugo_path: docs/reference/cc-metric-collector/collectors/slurm_cgroup.md
|
||||
---
|
||||
-->
|
||||
|
||||
## `slurm_cgroup` collector
|
||||
|
||||
The `slurm_cgroup` collector reads job-specific resource metrics from the cgroup v2 filesystem and provides **hwthread** metrics for memory and CPU usage of running SLURM jobs.
|
||||
|
||||
### Example configuration
|
||||
|
||||
```json
|
||||
"slurm_cgroup": {
|
||||
"cgroup_base": "/sys/fs/cgroup/system.slice/slurmstepd.scope",
|
||||
"exclude_metrics": [
|
||||
"job_sys_cpu",
|
||||
"job_mem_limit"
|
||||
],
|
||||
"use_sudo": false
|
||||
}
|
||||
```
|
||||
|
||||
* The `cgroup_base` parameter (optional) can be set to specify the root path to SLURM job cgroups. The default is `/sys/fs/cgroup/system.slice/slurmstepd.scope`.
|
||||
* The `exclude_metrics` array can be used to suppress individual metrics from being sent to the sink.
|
||||
* The cgroups metrics are only available for root users. If password-less sudo is configured, you can enable sudo in the configuration.
|
||||
|
||||
### Reported metrics
|
||||
|
||||
All metrics are available **per hardware thread** :
|
||||
|
||||
* `job_mem_used` (`unit=Bytes`): Current memory usage of the job
|
||||
* `job_max_mem_used` (`unit=Bytes`): Peak memory usage
|
||||
* `job_mem_limit` (`unit=Bytes`): Cgroup memory limit
|
||||
* `job_user_cpu` (`unit=%`): User CPU utilization percentage
|
||||
* `job_sys_cpu` (`unit=%`): System CPU utilization percentage
|
||||
|
||||
Each metric has tags:
|
||||
|
||||
* `type=hwthread`
|
||||
* `type-id=<core_id>`
|
||||
|
||||
### Limitations
|
||||
|
||||
* **cgroups v2 required:** This collector only supports systems running with cgroups v2 (unified hierarchy).
|
||||
@@ -16,8 +16,8 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
)
|
||||
|
||||
// See: https://www.kernel.org/doc/html/latest/hwmon/sysfs-interface.html
|
||||
@@ -58,9 +58,7 @@ func (m *TempCollector) Init(config json.RawMessage) error {
|
||||
|
||||
m.name = "TempCollector"
|
||||
m.parallel = true
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
}
|
||||
m.setup()
|
||||
if len(config) > 0 {
|
||||
err := json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
@@ -119,7 +117,7 @@ func (m *TempCollector) Init(config json.RawMessage) error {
|
||||
sensor.metricName = sensor.label
|
||||
}
|
||||
sensor.metricName = strings.ToLower(sensor.metricName)
|
||||
sensor.metricName = strings.ReplaceAll(sensor.metricName, " ", "_")
|
||||
sensor.metricName = strings.Replace(sensor.metricName, " ", "_", -1)
|
||||
// Add temperature prefix, if required
|
||||
if !strings.Contains(sensor.metricName, "temp") {
|
||||
sensor.metricName = "temp_" + sensor.metricName
|
||||
|
||||
@@ -9,13 +9,14 @@ package collectors
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
)
|
||||
|
||||
const MAX_NUM_PROCS = 10
|
||||
@@ -35,17 +36,12 @@ func (m *TopProcsCollector) Init(config json.RawMessage) error {
|
||||
var err error
|
||||
m.name = "TopProcsCollector"
|
||||
m.parallel = true
|
||||
m.tags = map[string]string{
|
||||
"type": "node",
|
||||
}
|
||||
m.meta = map[string]string{
|
||||
"source": m.name,
|
||||
"group": "TopProcs",
|
||||
}
|
||||
m.tags = map[string]string{"type": "node"}
|
||||
m.meta = map[string]string{"source": m.name, "group": "TopProcs"}
|
||||
if len(config) > 0 {
|
||||
err = json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
return fmt.Errorf("%s Init(): json.Unmarshal() failed: %w", m.name, err)
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
m.config.Num_procs = int(DEFAULT_NUM_PROCS)
|
||||
@@ -53,13 +49,12 @@ func (m *TopProcsCollector) Init(config json.RawMessage) error {
|
||||
if m.config.Num_procs <= 0 || m.config.Num_procs > MAX_NUM_PROCS {
|
||||
return fmt.Errorf("num_procs option must be set in 'topprocs' config (range: 1-%d)", MAX_NUM_PROCS)
|
||||
}
|
||||
if err := m.setup(); err != nil {
|
||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||
}
|
||||
m.setup()
|
||||
command := exec.Command("ps", "-Ao", "comm", "--sort=-pcpu")
|
||||
command.Wait()
|
||||
_, err = command.Output()
|
||||
if err != nil {
|
||||
return fmt.Errorf("%s Init(): failed to get output from command: %w", m.name, err)
|
||||
return errors.New("failed to execute command")
|
||||
}
|
||||
m.init = true
|
||||
return nil
|
||||
@@ -70,11 +65,10 @@ func (m *TopProcsCollector) Read(interval time.Duration, output chan lp.CCMessag
|
||||
return
|
||||
}
|
||||
command := exec.Command("ps", "-Ao", "comm", "--sort=-pcpu")
|
||||
command.Wait()
|
||||
stdout, err := command.Output()
|
||||
if err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to read output from command \"%s\": %v", command.String(), err))
|
||||
log.Print(m.name, err)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,15 @@
|
||||
# Building the cc-metric-collector
|
||||
|
||||
In most cases, a simple `make` in the main folder is enough to get a `cc-metric-collector` binary. It is basically a `go build` but some collectors require additional tasks. There is currently no Golang interface to LIKWID, so it uses `cgo` to create bindings but `cgo` requires the LIKWID header files. Therefore, it checks whether LIKWID is installed and if not it downloads LIKWID and copies the headers.
|
||||
Dependencies:
|
||||
- golang
|
||||
- hwloc
|
||||
|
||||
```
|
||||
$ export CGO_LDFLAGS="-L/path/to/hwloc/lib/dir"
|
||||
$ make
|
||||
```
|
||||
|
||||
In most cases, a simple `make` in the main folder is enough to get a `cc-metric-collector` binary as long as hwloc is in default locations. It is basically a `go build` but some collectors require additional tasks. There is currently no Golang interface to LIKWID, so it uses `cgo` to create bindings but `cgo` requires the LIKWID header files. Therefore, it checks whether LIKWID is installed and if not it downloads LIKWID and copies the headers.
|
||||
|
||||
## System integration
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ The configuration of the CC metric collector consists of five configuration file
|
||||
|
||||
## Global configuration
|
||||
|
||||
The global file contains the paths to the other four files and some global options. You can find examples in `example_configs`.
|
||||
The global file contains the paths to the other four files and some global options.
|
||||
|
||||
```json
|
||||
{
|
||||
|
||||
43
go.mod
43
go.mod
@@ -1,45 +1,46 @@
|
||||
module github.com/ClusterCockpit/cc-metric-collector
|
||||
|
||||
go 1.24.0
|
||||
go 1.23.4
|
||||
|
||||
toolchain go1.23.7
|
||||
|
||||
require (
|
||||
github.com/ClusterCockpit/cc-lib/v2 v2.1.0
|
||||
github.com/ClusterCockpit/cc-lib v0.5.0
|
||||
github.com/ClusterCockpit/go-rocm-smi v0.3.0
|
||||
github.com/NVIDIA/go-nvml v0.13.0-1
|
||||
github.com/PaesslerAG/gval v1.2.4
|
||||
github.com/NVIDIA/go-nvml v0.12.9-0
|
||||
github.com/PaesslerAG/gval v1.2.2
|
||||
github.com/fsnotify/fsnotify v1.9.0
|
||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf
|
||||
github.com/tklauser/go-sysconf v0.3.16
|
||||
github.com/tklauser/go-sysconf v0.3.13
|
||||
golang.design/x/thread v0.0.0-20210122121316-335e9adffdf1
|
||||
golang.org/x/sys v0.40.0
|
||||
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b
|
||||
golang.org/x/sys v0.33.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||
github.com/expr-lang/expr v1.17.7 // indirect
|
||||
github.com/expr-lang/expr v1.17.5 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/gorilla/mux v1.8.1 // indirect
|
||||
github.com/influxdata/influxdb-client-go/v2 v2.14.0 // indirect
|
||||
github.com/influxdata/line-protocol/v2 v2.2.1 // indirect
|
||||
github.com/klauspost/compress v1.18.2 // indirect
|
||||
github.com/klauspost/compress v1.18.0 // indirect
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||
github.com/nats-io/nats.go v1.48.0 // indirect
|
||||
github.com/nats-io/nkeys v0.4.12 // indirect
|
||||
github.com/nats-io/nats.go v1.43.0 // indirect
|
||||
github.com/nats-io/nkeys v0.4.11 // indirect
|
||||
github.com/nats-io/nuid v1.0.1 // indirect
|
||||
github.com/oapi-codegen/runtime v1.1.2 // indirect
|
||||
github.com/prometheus/client_golang v1.23.2 // indirect
|
||||
github.com/oapi-codegen/runtime v1.1.1 // indirect
|
||||
github.com/prometheus/client_golang v1.22.0 // indirect
|
||||
github.com/prometheus/client_model v0.6.2 // indirect
|
||||
github.com/prometheus/common v0.67.5 // indirect
|
||||
github.com/prometheus/procfs v0.19.2 // indirect
|
||||
github.com/prometheus/common v0.65.0 // indirect
|
||||
github.com/prometheus/procfs v0.16.1 // indirect
|
||||
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 // indirect
|
||||
github.com/shopspring/decimal v1.4.0 // indirect
|
||||
github.com/shopspring/decimal v1.3.1 // indirect
|
||||
github.com/stmcginnis/gofish v0.20.0 // indirect
|
||||
github.com/tklauser/numcpus v0.11.0 // indirect
|
||||
go.yaml.in/yaml/v2 v2.4.3 // indirect
|
||||
golang.org/x/crypto v0.47.0 // indirect
|
||||
golang.org/x/exp v0.0.0-20260112195511-716be5621a96 // indirect
|
||||
golang.org/x/net v0.49.0 // indirect
|
||||
google.golang.org/protobuf v1.36.11 // indirect
|
||||
github.com/tklauser/numcpus v0.7.0 // indirect
|
||||
golang.org/x/crypto v0.39.0 // indirect
|
||||
golang.org/x/net v0.41.0 // indirect
|
||||
google.golang.org/protobuf v1.36.6 // indirect
|
||||
)
|
||||
|
||||
93
go.sum
93
go.sum
@@ -1,17 +1,15 @@
|
||||
github.com/ClusterCockpit/cc-lib/v2 v2.1.0 h1:B6l6h0IjfEuY9DU6aVM3fSsj24lQ1eudXK9QTKmJjqg=
|
||||
github.com/ClusterCockpit/cc-lib/v2 v2.1.0/go.mod h1:JuxMAuEOaLLNEnnL9U3ejha8kMvsSatLdKPZEgJw6iw=
|
||||
github.com/ClusterCockpit/cc-lib v0.5.0 h1:DSKAD1TxjVWyd1x3GWvxFeEkANF9o13T97nirj3CbRU=
|
||||
github.com/ClusterCockpit/cc-lib v0.5.0/go.mod h1:0zLbJprwOWLA+OSNQ+OlUKLscZszwf9J2j8Ly5ztplk=
|
||||
github.com/ClusterCockpit/go-rocm-smi v0.3.0 h1:1qZnSpG7/NyLtc7AjqnUL9Jb8xtqG1nMVgp69rJfaR8=
|
||||
github.com/ClusterCockpit/go-rocm-smi v0.3.0/go.mod h1:+I3UMeX3OlizXDf1WpGD43W4KGZZGVSGmny6rTeOnWA=
|
||||
github.com/NVIDIA/go-nvml v0.11.6-0/go.mod h1:hy7HYeQy335x6nEss0Ne3PYqleRa6Ct+VKD9RQ4nyFs=
|
||||
github.com/NVIDIA/go-nvml v0.13.0-1 h1:OLX8Jq3dONuPOQPC7rndB6+iDmDakw0XTYgzMxObkEw=
|
||||
github.com/NVIDIA/go-nvml v0.13.0-1/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4=
|
||||
github.com/NVIDIA/go-nvml v0.12.9-0 h1:e344UK8ZkeMeeLkdQtRhmXRxNf+u532LDZPGMtkdus0=
|
||||
github.com/NVIDIA/go-nvml v0.12.9-0/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4=
|
||||
github.com/PaesslerAG/gval v1.2.4 h1:rhX7MpjJlcxYwL2eTTYIOBUyEKZ+A96T9vQySWkVUiU=
|
||||
github.com/PaesslerAG/gval v1.2.4/go.mod h1:XRFLwvmkTEdYziLdaCeCa5ImcGVrfQbeNUbVR+C6xac=
|
||||
github.com/PaesslerAG/jsonpath v0.1.0 h1:gADYeifvlqK3R3i2cR5B4DGgxLXIPb3TRTH1mGi0jPI=
|
||||
github.com/PaesslerAG/jsonpath v0.1.0/go.mod h1:4BzmtoM/PI8fPO4aQGIusjGxGir2BzcV0grWtFzq1Y8=
|
||||
github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk=
|
||||
github.com/antithesishq/antithesis-sdk-go v0.5.0-default-no-op h1:Ucf+QxEKMbPogRO5guBNe5cgd9uZgfoJLOYs8WWhtjM=
|
||||
github.com/antithesishq/antithesis-sdk-go v0.5.0-default-no-op/go.mod h1:IUpT2DPAKh6i/YhSbt6Gl3v2yvUZjmKncl7U91fup7E=
|
||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7Dml6nw9rQ=
|
||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0/go.mod h1:lvDnEdqiQrp0O42VQGgmlKpxL1AP2+08jFMw88y4klk=
|
||||
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||
@@ -23,8 +21,8 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/expr-lang/expr v1.17.7 h1:Q0xY/e/2aCIp8g9s/LGvMDCC5PxYlvHgDZRQ4y16JX8=
|
||||
github.com/expr-lang/expr v1.17.7/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4=
|
||||
github.com/expr-lang/expr v1.17.5 h1:i1WrMvcdLF249nSNlpQZN1S6NXuW9WaOfF5tPi3aw3k=
|
||||
github.com/expr-lang/expr v1.17.5/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4=
|
||||
github.com/frankban/quicktest v1.11.0/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s=
|
||||
github.com/frankban/quicktest v1.11.2/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s=
|
||||
github.com/frankban/quicktest v1.13.0 h1:yNZif1OkDfNoDfb9zZa9aXIpejNR4F23Wely0c+Qdqk=
|
||||
@@ -35,8 +33,6 @@ github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
|
||||
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
||||
github.com/google/go-tpm v0.9.7 h1:u89J4tUUeDTlH8xxC3CTW7OHZjbjKoHdQ9W7gCUhtxA=
|
||||
github.com/google/go-tpm v0.9.7/go.mod h1:h9jEsEECg7gtLis0upRBQU+GhYVH6jMjrFxI8u6bVUY=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
|
||||
@@ -53,8 +49,8 @@ github.com/influxdata/line-protocol/v2 v2.1.0/go.mod h1:QKw43hdUBg3GTk2iC3iyCxks
|
||||
github.com/influxdata/line-protocol/v2 v2.2.1 h1:EAPkqJ9Km4uAxtMRgUubJyqAr6zgWM0dznKMLRauQRE=
|
||||
github.com/influxdata/line-protocol/v2 v2.2.1/go.mod h1:DmB3Cnh+3oxmG6LOBIxce4oaL4CPj3OmMPgvauXh+tM=
|
||||
github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE=
|
||||
github.com/klauspost/compress v1.18.2 h1:iiPHWW0YrcFgpBYhsA6D1+fqHssJscY/Tm/y2Uqnapk=
|
||||
github.com/klauspost/compress v1.18.2/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4=
|
||||
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
|
||||
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
|
||||
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
|
||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||
@@ -64,75 +60,60 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
|
||||
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
|
||||
github.com/minio/highwayhash v1.0.4-0.20251030100505-070ab1a87a76 h1:KGuD/pM2JpL9FAYvBrnBBeENKZNh6eNtjqytV6TYjnk=
|
||||
github.com/minio/highwayhash v1.0.4-0.20251030100505-070ab1a87a76/go.mod h1:GGYsuwP/fPD6Y9hMiXuapVvlIUEhFhMTh0rxU3ik1LQ=
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
||||
github.com/nats-io/jwt/v2 v2.8.0 h1:K7uzyz50+yGZDO5o772eRE7atlcSEENpL7P+b74JV1g=
|
||||
github.com/nats-io/jwt/v2 v2.8.0/go.mod h1:me11pOkwObtcBNR8AiMrUbtVOUGkqYjMQZ6jnSdVUIA=
|
||||
github.com/nats-io/nats-server/v2 v2.12.3 h1:KRv+1n7lddMVgkJPQer+pt36TcO0ENxjilBmeWdjcHs=
|
||||
github.com/nats-io/nats-server/v2 v2.12.3/go.mod h1:MQXjG9WjyXKz9koWzUc3jYUMKD8x3CLmTNy91IQQz3Y=
|
||||
github.com/nats-io/nats.go v1.48.0 h1:pSFyXApG+yWU/TgbKCjmm5K4wrHu86231/w84qRVR+U=
|
||||
github.com/nats-io/nats.go v1.48.0/go.mod h1:iRWIPokVIFbVijxuMQq4y9ttaBTMe0SFdlZfMDd+33g=
|
||||
github.com/nats-io/nkeys v0.4.12 h1:nssm7JKOG9/x4J8II47VWCL1Ds29avyiQDRn0ckMvDc=
|
||||
github.com/nats-io/nkeys v0.4.12/go.mod h1:MT59A1HYcjIcyQDJStTfaOY6vhy9XTUjOFo+SVsvpBg=
|
||||
github.com/nats-io/nats.go v1.43.0 h1:uRFZ2FEoRvP64+UUhaTokyS18XBCR/xM2vQZKO4i8ug=
|
||||
github.com/nats-io/nats.go v1.43.0/go.mod h1:iRWIPokVIFbVijxuMQq4y9ttaBTMe0SFdlZfMDd+33g=
|
||||
github.com/nats-io/nkeys v0.4.11 h1:q44qGV008kYd9W1b1nEBkNzvnWxtRSQ7A8BoqRrcfa0=
|
||||
github.com/nats-io/nkeys v0.4.11/go.mod h1:szDimtgmfOi9n25JpfIdGw12tZFYXqhGxjhVxsatHVE=
|
||||
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
|
||||
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
|
||||
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
|
||||
github.com/oapi-codegen/runtime v1.1.2 h1:P2+CubHq8fO4Q6fV1tqDBZHCwpVpvPg7oKiYzQgXIyI=
|
||||
github.com/oapi-codegen/runtime v1.1.2/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg=
|
||||
github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmtpMYro=
|
||||
github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=
|
||||
github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
|
||||
github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q=
|
||||
github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0=
|
||||
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
|
||||
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
|
||||
github.com/prometheus/common v0.67.5 h1:pIgK94WWlQt1WLwAC5j2ynLaBRDiinoAb86HZHTUGI4=
|
||||
github.com/prometheus/common v0.67.5/go.mod h1:SjE/0MzDEEAyrdr5Gqc6G+sXI67maCxzaT3A2+HqjUw=
|
||||
github.com/prometheus/procfs v0.19.2 h1:zUMhqEW66Ex7OXIiDkll3tl9a1ZdilUOd/F6ZXw4Vws=
|
||||
github.com/prometheus/procfs v0.19.2/go.mod h1:M0aotyiemPhBCM0z5w87kL22CxfcH05ZpYlu+b4J7mw=
|
||||
github.com/prometheus/common v0.65.0 h1:QDwzd+G1twt//Kwj/Ww6E9FQq1iVMmODnILtW1t2VzE=
|
||||
github.com/prometheus/common v0.65.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8=
|
||||
github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg=
|
||||
github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is=
|
||||
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
|
||||
github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
|
||||
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 h1:lZUw3E0/J3roVtGQ+SCrUrg3ON6NgVqpn3+iol9aGu4=
|
||||
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1/go.mod h1:uToXkOrWAZ6/Oc07xWQrPOhJotwFIyu2bBVN41fcDUY=
|
||||
github.com/shopspring/decimal v1.3.1 h1:2Usl1nmF/WZucqkFZhnfFYxxxu8LG21F6nPQBE5gKV8=
|
||||
github.com/shopspring/decimal v1.3.1/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
|
||||
github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
|
||||
github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
|
||||
github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKkMo8ZTx3f+BZEkzsRUY10Xsm2mwU0=
|
||||
github.com/stmcginnis/gofish v0.20.0 h1:hH2V2Qe898F2wWT1loApnkDUrXXiLKqbSlMaH3Y1n08=
|
||||
github.com/stmcginnis/gofish v0.20.0/go.mod h1:PzF5i8ecRG9A2ol8XT64npKUunyraJ+7t0kYMpQAtqU=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||
github.com/tklauser/go-sysconf v0.3.16 h1:frioLaCQSsF5Cy1jgRBrzr6t502KIIwQ0MArYICU0nA=
|
||||
github.com/tklauser/go-sysconf v0.3.16/go.mod h1:/qNL9xxDhc7tx3HSRsLWNnuzbVfh3e7gh/BmM179nYI=
|
||||
github.com/tklauser/numcpus v0.11.0 h1:nSTwhKH5e1dMNsCdVBukSZrURJRoHbSEQjdEbY+9RXw=
|
||||
github.com/tklauser/numcpus v0.11.0/go.mod h1:z+LwcLq54uWZTX0u/bGobaV34u6V7KNlTZejzM6/3MQ=
|
||||
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
|
||||
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
|
||||
go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0=
|
||||
go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8=
|
||||
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
||||
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/tklauser/go-sysconf v0.3.13 h1:GBUpcahXSpR2xN01jhkNAbTLRk2Yzgggk8IM08lq3r4=
|
||||
github.com/tklauser/go-sysconf v0.3.13/go.mod h1:zwleP4Q4OehZHGn4CYZDipCgg9usW5IJePewFCGVEa0=
|
||||
github.com/tklauser/numcpus v0.7.0 h1:yjuerZP127QG9m5Zh/mSO4wqurYil27tHrqwRoRjpr4=
|
||||
github.com/tklauser/numcpus v0.7.0/go.mod h1:bb6dMVcj8A42tSE7i32fsIUCbQNllK5iDguyOZRUzAY=
|
||||
golang.design/x/thread v0.0.0-20210122121316-335e9adffdf1 h1:P7S/GeHBAFEZIYp0ePPs2kHXoazz8q2KsyxHyQVGCJg=
|
||||
golang.design/x/thread v0.0.0-20210122121316-335e9adffdf1/go.mod h1:9CWpnTUmlQkfdpdutA1nNf4iE5lAVt3QZOu0Z6hahBE=
|
||||
golang.org/x/crypto v0.47.0 h1:V6e3FRj+n4dbpw86FJ8Fv7XVOql7TEwpHapKoMJ/GO8=
|
||||
golang.org/x/crypto v0.47.0/go.mod h1:ff3Y9VzzKbwSSEzWqJsJVBnWmRwRSHt/6Op5n9bQc4A=
|
||||
golang.org/x/exp v0.0.0-20260112195511-716be5621a96 h1:Z/6YuSHTLOHfNFdb8zVZomZr7cqNgTJvA8+Qz75D8gU=
|
||||
golang.org/x/exp v0.0.0-20260112195511-716be5621a96/go.mod h1:nzimsREAkjBCIEFtHiYkrJyT+2uy9YZJB7H1k68CXZU=
|
||||
golang.org/x/net v0.49.0 h1:eeHFmOGUTtaaPSGNmjBKpbng9MulQsJURQUAfUwY++o=
|
||||
golang.org/x/net v0.49.0/go.mod h1:/ysNB2EvaqvesRkuLAyjI1ycPZlQHM3q01F02UY/MV8=
|
||||
golang.org/x/crypto v0.39.0 h1:SHs+kF4LP+f+p14esP5jAoDpHU8Gu/v9lFRK6IT5imM=
|
||||
golang.org/x/crypto v0.39.0/go.mod h1:L+Xg3Wf6HoL4Bn4238Z6ft6KfEpN0tJGo53AAPC632U=
|
||||
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b h1:M2rDM6z3Fhozi9O7NWsxAkg/yqS/lQJ6PmkyIV3YP+o=
|
||||
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8=
|
||||
golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw=
|
||||
golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA=
|
||||
golang.org/x/sys v0.0.0-20210122093101-04d7465088b8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ=
|
||||
golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
||||
golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI=
|
||||
golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4=
|
||||
golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
|
||||
golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
|
||||
google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
|
||||
google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY=
|
||||
google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
|
||||
@@ -10,16 +10,15 @@ package metricAggregator
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"maps"
|
||||
"math"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
topo "github.com/ClusterCockpit/cc-metric-collector/pkg/ccTopology"
|
||||
|
||||
"github.com/PaesslerAG/gval"
|
||||
@@ -122,7 +121,9 @@ func (c *metricAggregator) Init(output chan lp.CCMessage) error {
|
||||
|
||||
func (c *metricAggregator) Eval(starttime time.Time, endtime time.Time, metrics []lp.CCMessage) {
|
||||
vars := make(map[string]interface{})
|
||||
maps.Copy(vars, c.constants)
|
||||
for k, v := range c.constants {
|
||||
vars[k] = v
|
||||
}
|
||||
vars["starttime"] = starttime
|
||||
vars["endtime"] = endtime
|
||||
for _, f := range c.functions {
|
||||
|
||||
@@ -11,9 +11,10 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/exp/slices"
|
||||
|
||||
topo "github.com/ClusterCockpit/cc-metric-collector/pkg/ccTopology"
|
||||
)
|
||||
|
||||
@@ -168,7 +169,7 @@ func medianfunc(args interface{}) (interface{}, error) {
|
||||
|
||||
func lenfunc(args interface{}) (interface{}, error) {
|
||||
var err error = nil
|
||||
length := 0
|
||||
var length int = 0
|
||||
switch values := args.(type) {
|
||||
case []float64:
|
||||
length = len(values)
|
||||
@@ -237,7 +238,7 @@ func matchfunc(args ...interface{}) (interface{}, error) {
|
||||
case string:
|
||||
switch total := args[1].(type) {
|
||||
case string:
|
||||
smatch := strings.ReplaceAll(match, "%", "\\")
|
||||
smatch := strings.Replace(match, "%", "\\", -1)
|
||||
regex, err := regexp.Compile(smatch)
|
||||
if err != nil {
|
||||
return false, err
|
||||
|
||||
@@ -11,9 +11,9 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
agg "github.com/ClusterCockpit/cc-metric-collector/internal/metricAggregator"
|
||||
mct "github.com/ClusterCockpit/cc-metric-collector/pkg/multiChanTicker"
|
||||
)
|
||||
@@ -51,7 +51,7 @@ type MetricCache interface {
|
||||
}
|
||||
|
||||
func (c *metricCache) Init(output chan lp.CCMessage, ticker mct.MultiChanTicker, wg *sync.WaitGroup, numPeriods int) error {
|
||||
var err error
|
||||
var err error = nil
|
||||
c.done = make(chan bool)
|
||||
c.wg = wg
|
||||
c.ticker = ticker
|
||||
@@ -161,8 +161,8 @@ func (c *metricCache) DeleteAggregation(name string) error {
|
||||
// is the current one, index=1 the last interval and so on. Returns and empty array if a wrong index
|
||||
// is given (negative index, index larger than configured number of total intervals, ...)
|
||||
func (c *metricCache) GetPeriod(index int) (time.Time, time.Time, []lp.CCMessage) {
|
||||
start := time.Now()
|
||||
stop := time.Now()
|
||||
var start time.Time = time.Now()
|
||||
var stop time.Time = time.Now()
|
||||
var metrics []lp.CCMessage
|
||||
if index >= 0 && index < c.numPeriods {
|
||||
pindex := c.curPeriod - index
|
||||
|
||||
@@ -15,10 +15,10 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
mp "github.com/ClusterCockpit/cc-lib/v2/messageProcessor"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
mp "github.com/ClusterCockpit/cc-lib/messageProcessor"
|
||||
agg "github.com/ClusterCockpit/cc-metric-collector/internal/metricAggregator"
|
||||
mct "github.com/ClusterCockpit/cc-metric-collector/pkg/multiChanTicker"
|
||||
)
|
||||
@@ -107,8 +107,10 @@ func (r *metricRouter) Init(ticker mct.MultiChanTicker, wg *sync.WaitGroup, rout
|
||||
cclog.ComponentError("MetricRouter", err.Error())
|
||||
return err
|
||||
}
|
||||
r.maxForward = max(1, r.config.MaxForward)
|
||||
|
||||
r.maxForward = 1
|
||||
if r.config.MaxForward > r.maxForward {
|
||||
r.maxForward = r.config.MaxForward
|
||||
}
|
||||
if r.config.NumCacheIntervals > 0 {
|
||||
r.cache, err = NewCache(r.cache_input, r.ticker, &r.cachewg, r.config.NumCacheIntervals)
|
||||
if err != nil {
|
||||
@@ -116,74 +118,50 @@ func (r *metricRouter) Init(ticker mct.MultiChanTicker, wg *sync.WaitGroup, rout
|
||||
return err
|
||||
}
|
||||
for _, agg := range r.config.IntervalAgg {
|
||||
err = r.cache.AddAggregation(agg.Name, agg.Function, agg.Condition, agg.Tags, agg.Meta)
|
||||
if err != nil {
|
||||
return fmt.Errorf("MetricCache AddAggregation() failed: %w", err)
|
||||
}
|
||||
r.cache.AddAggregation(agg.Name, agg.Function, agg.Condition, agg.Tags, agg.Meta)
|
||||
}
|
||||
}
|
||||
p, err := mp.NewMessageProcessor()
|
||||
if err != nil {
|
||||
return fmt.Errorf("MessageProcessor NewMessageProcessor() failed: %w", err)
|
||||
return fmt.Errorf("initialization of message processor failed: %v", err.Error())
|
||||
}
|
||||
r.mp = p
|
||||
|
||||
if len(r.config.MessageProcessor) > 0 {
|
||||
err = r.mp.FromConfigJSON(r.config.MessageProcessor)
|
||||
if err != nil {
|
||||
return fmt.Errorf("MessageProcessor FromConfigJSON() failed: %w", err)
|
||||
return fmt.Errorf("failed parsing JSON for message processor: %v", err.Error())
|
||||
}
|
||||
}
|
||||
for _, mname := range r.config.DropMetrics {
|
||||
err = r.mp.AddDropMessagesByName(mname)
|
||||
if err != nil {
|
||||
return fmt.Errorf("MessageProcessor AddDropMessagesByName() failed: %w", err)
|
||||
}
|
||||
r.mp.AddDropMessagesByName(mname)
|
||||
}
|
||||
for _, cond := range r.config.DropMetricsIf {
|
||||
err = r.mp.AddDropMessagesByCondition(cond)
|
||||
if err != nil {
|
||||
return fmt.Errorf("MessageProcessor AddDropMessagesByCondition() failed: %w", err)
|
||||
}
|
||||
r.mp.AddDropMessagesByCondition(cond)
|
||||
}
|
||||
for _, data := range r.config.AddTags {
|
||||
cond := data.Condition
|
||||
if cond == "*" {
|
||||
cond = "true"
|
||||
}
|
||||
err = r.mp.AddAddTagsByCondition(cond, data.Key, data.Value)
|
||||
if err != nil {
|
||||
return fmt.Errorf("MessageProcessor AddAddTagsByCondition() failed: %w", err)
|
||||
}
|
||||
r.mp.AddAddTagsByCondition(cond, data.Key, data.Value)
|
||||
}
|
||||
for _, data := range r.config.DelTags {
|
||||
cond := data.Condition
|
||||
if cond == "*" {
|
||||
cond = "true"
|
||||
}
|
||||
err = r.mp.AddDeleteTagsByCondition(cond, data.Key, data.Value)
|
||||
if err != nil {
|
||||
return fmt.Errorf("MessageProcessor AddDeleteTagsByCondition() failed: %w", err)
|
||||
}
|
||||
r.mp.AddDeleteTagsByCondition(cond, data.Key, data.Value)
|
||||
}
|
||||
for oldname, newname := range r.config.RenameMetrics {
|
||||
err = r.mp.AddRenameMetricByName(oldname, newname)
|
||||
if err != nil {
|
||||
return fmt.Errorf("MessageProcessor AddRenameMetricByName() failed: %w", err)
|
||||
}
|
||||
r.mp.AddRenameMetricByName(oldname, newname)
|
||||
}
|
||||
for metricName, prefix := range r.config.ChangeUnitPrefix {
|
||||
err = r.mp.AddChangeUnitPrefix(fmt.Sprintf("name == '%s'", metricName), prefix)
|
||||
if err != nil {
|
||||
return fmt.Errorf("MessageProcessor AddChangeUnitPrefix() failed: %w", err)
|
||||
}
|
||||
r.mp.AddChangeUnitPrefix(fmt.Sprintf("name == '%s'", metricName), prefix)
|
||||
}
|
||||
r.mp.SetNormalizeUnits(r.config.NormalizeUnits)
|
||||
|
||||
err = r.mp.AddAddTagsByCondition("true", r.config.HostnameTagName, r.hostname)
|
||||
if err != nil {
|
||||
return fmt.Errorf("MessageProcessor AddAddTagsByCondition() failed: %w", err)
|
||||
}
|
||||
r.mp.AddAddTagsByCondition("true", r.config.HostnameTagName, r.hostname)
|
||||
|
||||
// r.config.dropMetrics = make(map[string]bool)
|
||||
// for _, mname := range r.config.DropMetrics {
|
||||
|
||||
@@ -13,11 +13,11 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
cclogger "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
cclogger "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"golang.org/x/exp/slices"
|
||||
)
|
||||
|
||||
const SYSFS_CPUBASE = `/sys/devices/system/cpu`
|
||||
@@ -80,7 +80,7 @@ func fileToList(path string) []int {
|
||||
// Create list
|
||||
list := make([]int, 0)
|
||||
stringBuffer := strings.TrimSpace(string(buffer))
|
||||
for valueRangeString := range strings.SplitSeq(stringBuffer, ",") {
|
||||
for _, valueRangeString := range strings.Split(stringBuffer, ",") {
|
||||
valueRange := strings.Split(valueRangeString, "-")
|
||||
switch len(valueRange) {
|
||||
case 1:
|
||||
|
||||
@@ -10,7 +10,7 @@ package multiChanTicker
|
||||
import (
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
)
|
||||
|
||||
type multiChanTicker struct {
|
||||
|
||||
@@ -44,8 +44,6 @@ def group_to_json(groupfile):
|
||||
scope = "socket"
|
||||
if "PWR" in calc:
|
||||
scope = "socket"
|
||||
if "UMC" in calc:
|
||||
scope = "socket"
|
||||
|
||||
m = {"name" : metric, "calc": calc, "type" : scope, "publish" : True}
|
||||
metrics.append(m)
|
||||
|
||||
Reference in New Issue
Block a user