mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2025-07-19 11:21:41 +02:00
Compare commits
14 Commits
cc_lib_swi
...
v0.7.1
Author | SHA1 | Date | |
---|---|---|---|
|
979192af4e | ||
|
c1032ff329 | ||
|
6b03d3aee8 | ||
|
b9665d0d68 | ||
|
4c7a0e064f | ||
|
d8f10384a1 | ||
|
f74d856e69 | ||
|
fabb37ea70 | ||
|
3a0f148728 | ||
|
03cd965099 | ||
|
bd04e19c96 | ||
|
c9f2378813 | ||
|
c1395ec2ed | ||
|
16faa70867 |
36
.github/workflows/Release.yml
vendored
36
.github/workflows/Release.yml
vendored
@@ -48,10 +48,10 @@ jobs:
|
||||
- name: Setup Golang
|
||||
run: |
|
||||
dnf --assumeyes --disableplugin=subscription-manager install \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/go-toolset-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-bin-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-src-1.22.9-1.module_el8.10.0+3938+8c723e16.noarch.rpm
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.noarch.rpm
|
||||
|
||||
- name: RPM build MetricCollector
|
||||
id: rpmbuild
|
||||
@@ -126,11 +126,11 @@ jobs:
|
||||
- name: Setup Golang
|
||||
run: |
|
||||
dnf --assumeyes --disableplugin=subscription-manager install \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/go-toolset-1.22.7-2.el9_5.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-1.22.7-2.el9_5.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-bin-1.22.7-2.el9_5.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-src-1.22.7-2.el9_5.noarch.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-race-1.22.7-2.el9_5.x86_64.rpm
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.el9_6.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-1.23.9-1.el9_6.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.el9_6.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.el9_6.noarch.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-race-1.23.9-1.el9_6.x86_64.rpm
|
||||
|
||||
- name: RPM build MetricCollector
|
||||
id: rpmbuild
|
||||
@@ -202,10 +202,10 @@ jobs:
|
||||
- name: Setup Golang
|
||||
run: |
|
||||
dnf --assumeyes --disableplugin=subscription-manager install \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/go-toolset-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-bin-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-src-1.22.9-1.module_el8.10.0+3938+8c723e16.noarch.rpm
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.noarch.rpm
|
||||
|
||||
- name: RPM build MetricCollector
|
||||
id: rpmbuild
|
||||
@@ -262,11 +262,11 @@ jobs:
|
||||
- name: Setup Golang
|
||||
run: |
|
||||
dnf --assumeyes --disableplugin=subscription-manager install \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/go-toolset-1.22.7-2.el9_5.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-1.22.7-2.el9_5.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-bin-1.22.7-2.el9_5.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-src-1.22.7-2.el9_5.noarch.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-race-1.22.7-2.el9_5.x86_64.rpm
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.el9_6.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-1.23.9-1.el9_6.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.el9_6.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.el9_6.noarch.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-race-1.23.9-1.el9_6.x86_64.rpm
|
||||
|
||||
- name: RPM build MetricCollector
|
||||
id: rpmbuild
|
||||
|
36
.github/workflows/runonce.yml
vendored
36
.github/workflows/runonce.yml
vendored
@@ -71,10 +71,10 @@ jobs:
|
||||
- name: Setup Golang
|
||||
run: |
|
||||
dnf --assumeyes --disableplugin=subscription-manager install \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/go-toolset-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-bin-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-src-1.22.9-1.module_el8.10.0+3938+8c723e16.noarch.rpm
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.noarch.rpm
|
||||
|
||||
- name: RPM build MetricCollector
|
||||
id: rpmbuild
|
||||
@@ -116,11 +116,11 @@ jobs:
|
||||
- name: Setup Golang
|
||||
run: |
|
||||
dnf --assumeyes --disableplugin=subscription-manager install \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/go-toolset-1.22.7-2.el9_5.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-1.22.7-2.el9_5.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-bin-1.22.7-2.el9_5.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-src-1.22.7-2.el9_5.noarch.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-race-1.22.7-2.el9_5.x86_64.rpm
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.el9_6.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-1.23.9-1.el9_6.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.el9_6.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.el9_6.noarch.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-race-1.23.9-1.el9_6.x86_64.rpm
|
||||
|
||||
- name: RPM build MetricCollector
|
||||
id: rpmbuild
|
||||
@@ -160,10 +160,10 @@ jobs:
|
||||
- name: Setup Golang
|
||||
run: |
|
||||
dnf --assumeyes --disableplugin=subscription-manager install \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/go-toolset-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-bin-1.22.9-1.module_el8.10.0+3938+8c723e16.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-src-1.22.9-1.module_el8.10.0+3938+8c723e16.noarch.rpm
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/8/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.module_el8.10.0+4000+1ad1b2cc.noarch.rpm
|
||||
|
||||
- name: RPM build MetricCollector
|
||||
id: rpmbuild
|
||||
@@ -202,11 +202,11 @@ jobs:
|
||||
- name: Setup Golang
|
||||
run: |
|
||||
dnf --assumeyes --disableplugin=subscription-manager install \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/go-toolset-1.22.7-2.el9_5.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-1.22.7-2.el9_5.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-bin-1.22.7-2.el9_5.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-src-1.22.7-2.el9_5.noarch.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-race-1.22.7-2.el9_5.x86_64.rpm
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/go-toolset-1.23.9-1.el9_6.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-1.23.9-1.el9_6.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-bin-1.23.9-1.el9_6.x86_64.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-src-1.23.9-1.el9_6.noarch.rpm \
|
||||
https://repo.almalinux.org/almalinux/9/AppStream/x86_64/os/Packages/golang-race-1.23.9-1.el9_6.x86_64.rpm
|
||||
|
||||
- name: RPM build MetricCollector
|
||||
id: rpmbuild
|
||||
|
17
README.md
17
README.md
@@ -1,6 +1,17 @@
|
||||
<!--
|
||||
---
|
||||
title: cc-metric-collector
|
||||
description: Metric collecting node agent
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 2
|
||||
hugo_path: docs/reference/cc-metric-collector/_index.md
|
||||
---
|
||||
-->
|
||||
|
||||
# cc-metric-collector
|
||||
|
||||
A node agent for measuring, processing and forwarding node level metrics. It is part of the [ClusterCockpit ecosystem](./docs/introduction.md).
|
||||
A node agent for measuring, processing and forwarding node level metrics. It is part of the [ClusterCockpit ecosystem](https://clustercockpit.org/docs/overview/).
|
||||
|
||||
The metric collector sends (and receives) metric in the [InfluxDB line protocol](https://docs.influxdata.com/influxdb/cloud/reference/syntax/line-protocol/) as it provides flexibility while providing a separation between tags (like index columns in relational databases) and fields (like data columns).
|
||||
|
||||
@@ -35,8 +46,8 @@ The `interval` defines how often the metrics should be read and send to the sink
|
||||
See the component READMEs for their configuration:
|
||||
|
||||
* [`collectors`](./collectors/README.md)
|
||||
* [`sinks`](./sinks/README.md)
|
||||
* [`receivers`](./receivers/README.md)
|
||||
* [`sinks`](https://github.com/ClusterCockpit/cc-lib/blob/main/sinks/README.md)
|
||||
* [`receivers`](https://github.com/ClusterCockpit/cc-lib/blob/main/receivers/README.md)
|
||||
* [`router`](./internal/metricRouter/README.md)
|
||||
|
||||
# Installation
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
|
@@ -1,3 +1,14 @@
|
||||
<!--
|
||||
---
|
||||
title: Metric Collectors
|
||||
description: Metric collectors for cc-metric-collector
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 2
|
||||
hugo_path: docs/reference/cc-metric-collector/collectors/_index.md
|
||||
---
|
||||
-->
|
||||
|
||||
# CCMetric collectors
|
||||
|
||||
This folder contains the collectors for the cc-metric-collector.
|
||||
@@ -23,7 +34,6 @@ In contrast to the configuration files for sinks and receivers, the collectors c
|
||||
* [`loadavg`](./loadavgMetric.md)
|
||||
* [`netstat`](./netstatMetric.md)
|
||||
* [`ibstat`](./infinibandMetric.md)
|
||||
* [`ibstat_perfquery`](./infinibandPerfQueryMetric.md)
|
||||
* [`tempstat`](./tempMetric.md)
|
||||
* [`lustrestat`](./lustreMetric.md)
|
||||
* [`likwid`](./likwidMetric.md)
|
||||
@@ -33,8 +43,10 @@ In contrast to the configuration files for sinks and receivers, the collectors c
|
||||
* [`topprocs`](./topprocsMetric.md)
|
||||
* [`nfs3stat`](./nfs3Metric.md)
|
||||
* [`nfs4stat`](./nfs4Metric.md)
|
||||
* [`nfsiostat`](./nfsiostatMetric.md)
|
||||
* [`cpufreq`](./cpufreqMetric.md)
|
||||
* [`cpufreq_cpuinfo`](./cpufreqCpuinfoMetric.md)
|
||||
* [`schedstat`](./schedstatMetric.md)
|
||||
* [`numastats`](./numastatsMetric.md)
|
||||
* [`gpfs`](./gpfsMetric.md)
|
||||
* [`beegfs_meta`](./beegfsmetaMetric.md)
|
||||
@@ -51,7 +63,7 @@ A collector reads data from any source, parses it to metrics and submits these m
|
||||
* `Name() string`: Return the name of the collector
|
||||
* `Init(config json.RawMessage) error`: Initializes the collector using the given collector-specific config in JSON. Check if needed files/commands exists, ...
|
||||
* `Initialized() bool`: Check if a collector is successfully initialized
|
||||
* `Read(duration time.Duration, output chan ccMetric.CCMetric)`: Read, parse and submit data to the `output` channel as [`CCMetric`](../internal/ccMetric/README.md). If the collector has to measure anything for some duration, use the provided function argument `duration`.
|
||||
* `Read(duration time.Duration, output chan ccMessage.CCMessage)`: Read, parse and submit data to the `output` channel as [`CCMessage`](https://github.com/ClusterCockpit/cc-lib/blob/main/ccMessage/README.md). If the collector has to measure anything for some duration, use the provided function argument `duration`.
|
||||
* `Close()`: Closes down the collector.
|
||||
|
||||
It is recommanded to call `setup()` in the `Init()` function.
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
|
@@ -1,5 +1,17 @@
|
||||
<!--
|
||||
---
|
||||
title: BeeGFS metadata metric collector
|
||||
description: Collect metadata clientstats for `BeeGFS on Demand`
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 2
|
||||
hugo_path: docs/reference/cc-metric-collector/collectors/beegfsmeta.md
|
||||
---
|
||||
-->
|
||||
|
||||
|
||||
## `BeeGFS on Demand` collector
|
||||
This Collector is to collect BeeGFS on Demand (BeeOND) metadata clientstats.
|
||||
This Collector is to collect `BeeGFS on Demand` (BeeOND) metadata clientstats.
|
||||
|
||||
```json
|
||||
"beegfs_meta": {
|
||||
@@ -72,4 +84,4 @@ Available Metrics:
|
||||
* setXA
|
||||
* mirror
|
||||
|
||||
The collector adds a `filesystem` tag to all metrics
|
||||
The collector adds a `filesystem` tag to all metrics
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
|
@@ -1,3 +1,14 @@
|
||||
<!--
|
||||
---
|
||||
title: "BeeGFS on Demand metric collector"
|
||||
description: Collect performance metrics for BeeGFS filesystems
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 2
|
||||
hugo_path: docs/reference/cc-metric-collector/collectors/beegfsstorage.md
|
||||
---
|
||||
-->
|
||||
|
||||
## `BeeGFS on Demand` collector
|
||||
This Collector is to collect BeeGFS on Demand (BeeOND) storage stats.
|
||||
|
||||
@@ -52,4 +63,4 @@ Available Metrics:
|
||||
* "unlnk"
|
||||
|
||||
|
||||
The collector adds a `filesystem` tag to all metrics
|
||||
The collector adds a `filesystem` tag to all metrics
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
|
@@ -1,3 +1,14 @@
|
||||
<!--
|
||||
---
|
||||
title: CPU frequency metric collector through cpuinfo
|
||||
description: Collect the CPU frequency from `/proc/cpuinfo`
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 2
|
||||
hugo_path: docs/reference/cc-metric-collector/collectors/cpufreq_cpuinfo.md
|
||||
---
|
||||
-->
|
||||
|
||||
## `cpufreq_cpuinfo` collector
|
||||
|
||||
```json
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
|
@@ -1,3 +1,14 @@
|
||||
<!--
|
||||
---
|
||||
title: CPU frequency metric collector through sysfs
|
||||
description: Collect the CPU frequency metrics from `/sys/.../cpu/.../cpufreq`
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 2
|
||||
hugo_path: docs/reference/cc-metric-collector/collectors/cpufreq.md
|
||||
---
|
||||
-->
|
||||
|
||||
## `cpufreq_cpuinfo` collector
|
||||
|
||||
```json
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
|
@@ -1,3 +1,14 @@
|
||||
<!--
|
||||
---
|
||||
title: CPU usage metric collector
|
||||
description: Collect CPU metrics from `/proc/stat`
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 2
|
||||
hugo_path: docs/reference/cc-metric-collector/collectors/cpustat.md
|
||||
---
|
||||
-->
|
||||
|
||||
|
||||
## `cpustat` collector
|
||||
|
||||
@@ -24,4 +35,4 @@ Metrics:
|
||||
* `cpu_guest` with `unit=Percent`
|
||||
* `cpu_guest_nice` with `unit=Percent`
|
||||
* `cpu_used` = `cpu_* - cpu_idle` with `unit=Percent`
|
||||
* `num_cpus`
|
||||
* `num_cpus`
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
|
@@ -1,3 +1,13 @@
|
||||
<!--
|
||||
---
|
||||
title: CustomCommand metric collector
|
||||
description: Collect messages from custom command or files
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 2
|
||||
hugo_path: docs/reference/cc-metric-collector/collectors/customcmd.md
|
||||
---
|
||||
-->
|
||||
|
||||
## `customcmd` collector
|
||||
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
|
@@ -1,3 +1,13 @@
|
||||
<!--
|
||||
---
|
||||
title: Disk usage statistics metric collector
|
||||
description: Collect metrics for various filesystems from `/proc/self/mounts`
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 2
|
||||
hugo_path: docs/reference/cc-metric-collector/collectors/diskstat.md
|
||||
---
|
||||
-->
|
||||
|
||||
## `diskstat` collector
|
||||
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
|
@@ -1,7 +1,18 @@
|
||||
<!--
|
||||
---
|
||||
title: GPFS collector
|
||||
description: Collect infos about GPFS filesystems
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 2
|
||||
hugo_path: docs/reference/cc-metric-collector/collectors/gpfs.md
|
||||
---
|
||||
-->
|
||||
|
||||
## `gpfs` collector
|
||||
|
||||
```json
|
||||
"ibstat": {
|
||||
"gpfs": {
|
||||
"mmpmon_path": "/path/to/mmpmon",
|
||||
"exclude_filesystem": [
|
||||
"fs1"
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
|
@@ -1,3 +1,13 @@
|
||||
<!--
|
||||
---
|
||||
title: InfiniBand Metric collector
|
||||
description: Collect metrics for InfiniBand devices
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 2
|
||||
hugo_path: docs/reference/cc-metric-collector/collectors/infiniband.md
|
||||
---
|
||||
-->
|
||||
|
||||
## `ibstat` collector
|
||||
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
|
@@ -1,3 +1,13 @@
|
||||
<!--
|
||||
---
|
||||
title: IOStat Metric collector
|
||||
description: Collect metrics from `/proc/diskstats`
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 2
|
||||
hugo_path: docs/reference/cc-metric-collector/collectors/iostat.md
|
||||
---
|
||||
-->
|
||||
|
||||
## `iostat` collector
|
||||
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
|
@@ -1,3 +1,13 @@
|
||||
<!--
|
||||
---
|
||||
title: IPMI Metric collector
|
||||
description: Collect metrics using ipmitool or ipmi-sensors
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 2
|
||||
hugo_path: docs/reference/cc-metric-collector/collectors/ipmi.md
|
||||
---
|
||||
-->
|
||||
|
||||
## `ipmistat` collector
|
||||
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package collectors
|
||||
|
||||
/*
|
||||
@@ -190,12 +197,8 @@ func getBaseFreq() float64 {
|
||||
}
|
||||
|
||||
if math.IsNaN(freq) {
|
||||
C.power_init(0)
|
||||
info := C.get_powerInfo()
|
||||
if float64(info.baseFrequency) != 0 {
|
||||
freq = float64(info.baseFrequency)
|
||||
}
|
||||
C.power_finalize()
|
||||
C.timer_init()
|
||||
freq = float64(C.timer_getCycleClock()) / 1e3
|
||||
}
|
||||
return freq * 1e3
|
||||
}
|
||||
|
@@ -1,3 +1,13 @@
|
||||
<!--
|
||||
---
|
||||
title: LIKWID collector
|
||||
description: Collect hardware performance events and metrics using LIKWID
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 2
|
||||
hugo_path: docs/reference/cc-metric-collector/collectors/likwid.md
|
||||
---
|
||||
-->
|
||||
|
||||
## `likwid` collector
|
||||
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
|
@@ -1,3 +1,14 @@
|
||||
<!--
|
||||
---
|
||||
title: Load average metric collector
|
||||
description: Collect metrics from `/proc/loadavg`
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 2
|
||||
hugo_path: docs/reference/cc-metric-collector/collectors/loadavg.md
|
||||
---
|
||||
-->
|
||||
|
||||
|
||||
## `loadavg` collector
|
||||
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
|
@@ -1,3 +1,14 @@
|
||||
<!--
|
||||
---
|
||||
title: Lustre filesystem metric collector
|
||||
description: Collect metrics for Lustre filesystems
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 2
|
||||
hugo_path: docs/reference/cc-metric-collector/collectors/lustre.md
|
||||
---
|
||||
-->
|
||||
|
||||
|
||||
## `lustrestat` collector
|
||||
|
||||
@@ -43,4 +54,4 @@ Metrics:
|
||||
* `lustre_statfs_diff` (if `send_diff_values == true`)
|
||||
* `lustre_inode_permission_diff` (if `send_diff_values == true`)
|
||||
|
||||
This collector adds an `device` tag.
|
||||
This collector adds an `device` tag.
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
|
@@ -1,3 +1,14 @@
|
||||
<!--
|
||||
---
|
||||
title: Memory statistics metric collector
|
||||
description: Collect metrics from `/proc/meminfo`
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 2
|
||||
hugo_path: docs/reference/cc-metric-collector/collectors/memstat.md
|
||||
---
|
||||
-->
|
||||
|
||||
|
||||
## `memstat` collector
|
||||
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
|
@@ -1,3 +1,13 @@
|
||||
<!--
|
||||
---
|
||||
title: Network device metric collector
|
||||
description: Collect metrics for network devices through procfs
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 2
|
||||
hugo_path: docs/reference/cc-metric-collector/collectors/netstat.md
|
||||
---
|
||||
-->
|
||||
|
||||
## `netstat` collector
|
||||
|
||||
@@ -28,4 +38,4 @@ Metrics:
|
||||
* `net_pkts_in_bw` (`unit=packets/sec` if `send_derived_values == true`)
|
||||
* `net_pkts_out_bw` (`unit=packets/sec` if `send_derived_values == true`)
|
||||
|
||||
The device name is added as tag `stype=network,stype-id=<device>`.
|
||||
The device name is added as tag `stype=network,stype-id=<device>`.
|
||||
|
@@ -1,3 +1,14 @@
|
||||
<!--
|
||||
---
|
||||
title: NFS network filesystem (v3) metric collector
|
||||
description: Collect metrics for NFS network filesystems in version 3
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 2
|
||||
hugo_path: docs/reference/cc-metric-collector/collectors/nfs3.md
|
||||
---
|
||||
-->
|
||||
|
||||
|
||||
## `nfs3stat` collector
|
||||
|
||||
|
@@ -1,3 +1,14 @@
|
||||
<!--
|
||||
---
|
||||
title: NFS network filesystem (v4) metric collector
|
||||
description: Collect metrics for NFS network filesystems in version 4
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 2
|
||||
hugo_path: docs/reference/cc-metric-collector/collectors/nfs4.md
|
||||
---
|
||||
-->
|
||||
|
||||
|
||||
## `nfs4stat` collector
|
||||
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
@@ -171,7 +178,7 @@ func (m *NfsIOStatCollector) Read(interval time.Duration, output chan lp.CCMessa
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
m.data[mntpoint] = nil
|
||||
delete(m.data, mntpoint)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -1,3 +1,14 @@
|
||||
<!--
|
||||
---
|
||||
title: NFS network filesystem metrics from procfs
|
||||
description: Collect NFS network filesystem metrics for mounts from `/proc/self/mountstats`
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 2
|
||||
hugo_path: docs/reference/cc-metric-collector/collectors/nfsio.md
|
||||
---
|
||||
-->
|
||||
|
||||
## `nfsiostat` collector
|
||||
|
||||
```json
|
||||
|
@@ -78,6 +78,14 @@ func (m *NUMAStatsCollector) Init(config json.RawMessage) error {
|
||||
"group": "NUMA",
|
||||
}
|
||||
|
||||
m.config.SendAbsoluteValues = true
|
||||
if len(config) > 0 {
|
||||
err := json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to unmarshal numastat configuration: %s", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
// Loop for all NUMA node directories
|
||||
base := "/sys/devices/system/node/node"
|
||||
globPattern := base + "[0-9]*"
|
||||
@@ -145,11 +153,11 @@ func (m *NUMAStatsCollector) Read(interval time.Duration, output chan lp.CCMessa
|
||||
}
|
||||
|
||||
if m.config.SendAbsoluteValues {
|
||||
msg, err := lp.NewMessage(
|
||||
msg, err := lp.NewMetric(
|
||||
"numastats_"+key,
|
||||
t.tagSet,
|
||||
m.meta,
|
||||
map[string]interface{}{"value": value},
|
||||
value,
|
||||
now,
|
||||
)
|
||||
if err == nil {
|
||||
@@ -161,11 +169,11 @@ func (m *NUMAStatsCollector) Read(interval time.Duration, output chan lp.CCMessa
|
||||
prev, ok := t.previousValues[key]
|
||||
if ok {
|
||||
rate := float64(value-prev) / timeDiff
|
||||
msg, err := lp.NewMessage(
|
||||
msg, err := lp.NewMetric(
|
||||
"numastats_"+key+"_rate",
|
||||
t.tagSet,
|
||||
m.meta,
|
||||
map[string]interface{}{"value": rate},
|
||||
rate,
|
||||
now,
|
||||
)
|
||||
if err == nil {
|
||||
|
@@ -1,3 +1,13 @@
|
||||
<!--
|
||||
---
|
||||
title: NUMAStat collector
|
||||
description: Collect infos about NUMA domains
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 2
|
||||
hugo_path: docs/reference/cc-metric-collector/collectors/numastat.md
|
||||
---
|
||||
-->
|
||||
|
||||
## `numastat` collector
|
||||
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
@@ -27,10 +34,12 @@ type NvidiaCollectorConfig struct {
|
||||
}
|
||||
|
||||
type NvidiaCollectorDevice struct {
|
||||
device nvml.Device
|
||||
excludeMetrics map[string]bool
|
||||
tags map[string]string
|
||||
meta map[string]string
|
||||
device nvml.Device
|
||||
excludeMetrics map[string]bool
|
||||
tags map[string]string
|
||||
meta map[string]string
|
||||
lastEnergyReading uint64
|
||||
lastEnergyTimestamp time.Time
|
||||
}
|
||||
|
||||
type NvidiaCollector struct {
|
||||
@@ -149,6 +158,8 @@ func (m *NvidiaCollector) Init(config json.RawMessage) error {
|
||||
|
||||
// Add device handle
|
||||
g.device = device
|
||||
g.lastEnergyReading = 0
|
||||
g.lastEnergyTimestamp = time.Now()
|
||||
|
||||
// Add tags
|
||||
g.tags = map[string]string{
|
||||
@@ -206,7 +217,7 @@ func (m *NvidiaCollector) Init(config json.RawMessage) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func readMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
func readMemoryInfo(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
if !device.excludeMetrics["nv_fb_mem_total"] || !device.excludeMetrics["nv_fb_mem_used"] || !device.excludeMetrics["nv_fb_mem_reserved"] {
|
||||
var total uint64
|
||||
var used uint64
|
||||
@@ -250,7 +261,7 @@ func readMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMessage) erro
|
||||
return nil
|
||||
}
|
||||
|
||||
func readBarMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
func readBarMemoryInfo(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
if !device.excludeMetrics["nv_bar1_mem_total"] || !device.excludeMetrics["nv_bar1_mem_used"] {
|
||||
meminfo, ret := nvml.DeviceGetBAR1MemoryInfo(device.device)
|
||||
if ret != nvml.SUCCESS {
|
||||
@@ -277,7 +288,7 @@ func readBarMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMessage) e
|
||||
return nil
|
||||
}
|
||||
|
||||
func readUtilization(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
func readUtilization(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
isMig, ret := nvml.DeviceIsMigDeviceHandle(device.device)
|
||||
if ret != nvml.SUCCESS {
|
||||
err := errors.New(nvml.ErrorString(ret))
|
||||
@@ -319,7 +330,7 @@ func readUtilization(device NvidiaCollectorDevice, output chan lp.CCMessage) err
|
||||
return nil
|
||||
}
|
||||
|
||||
func readTemp(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
func readTemp(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
if !device.excludeMetrics["nv_temp"] {
|
||||
// Retrieves the current temperature readings for the device, in degrees C.
|
||||
//
|
||||
@@ -338,7 +349,7 @@ func readTemp(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func readFan(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
func readFan(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
if !device.excludeMetrics["nv_fan"] {
|
||||
// Retrieves the intended operating speed of the device's fan.
|
||||
//
|
||||
@@ -361,7 +372,7 @@ func readFan(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// func readFans(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
// func readFans(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
// if !device.excludeMetrics["nv_fan"] {
|
||||
// numFans, ret := nvml.DeviceGetNumFans(device.device)
|
||||
// if ret == nvml.SUCCESS {
|
||||
@@ -382,7 +393,7 @@ func readFan(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
// return nil
|
||||
// }
|
||||
|
||||
func readEccMode(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
func readEccMode(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
if !device.excludeMetrics["nv_ecc_mode"] {
|
||||
// Retrieves the current and pending ECC modes for the device.
|
||||
//
|
||||
@@ -416,7 +427,7 @@ func readEccMode(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func readPerfState(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
func readPerfState(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
if !device.excludeMetrics["nv_perf_state"] {
|
||||
// Retrieves the current performance state for the device.
|
||||
//
|
||||
@@ -436,13 +447,16 @@ func readPerfState(device NvidiaCollectorDevice, output chan lp.CCMessage) error
|
||||
return nil
|
||||
}
|
||||
|
||||
func readPowerUsage(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
func readPowerUsage(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
if !device.excludeMetrics["nv_power_usage"] {
|
||||
// Retrieves power usage for this GPU in milliwatts and its associated circuitry (e.g. memory)
|
||||
//
|
||||
// On Fermi and Kepler GPUs the reading is accurate to within +/- 5% of current power draw.
|
||||
// On Ampere (except GA100) or newer GPUs, the API returns power averaged over 1 sec interval.
|
||||
// On GA100 and older architectures, instantaneous power is returned.
|
||||
//
|
||||
// It is only available if power management mode is supported
|
||||
// It is only available if power management mode is supported.
|
||||
|
||||
mode, ret := nvml.DeviceGetPowerManagementMode(device.device)
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil
|
||||
@@ -461,7 +475,54 @@ func readPowerUsage(device NvidiaCollectorDevice, output chan lp.CCMessage) erro
|
||||
return nil
|
||||
}
|
||||
|
||||
func readClocks(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
func readEnergyConsumption(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
// Retrieves total energy consumption for this GPU in millijoules (mJ) since the driver was last reloaded
|
||||
|
||||
// For Volta or newer fully supported devices.
|
||||
if (!device.excludeMetrics["nv_energy"]) && (!device.excludeMetrics["nv_energy_abs"]) && (!device.excludeMetrics["nv_average_power"]) {
|
||||
now := time.Now()
|
||||
mode, ret := nvml.DeviceGetPowerManagementMode(device.device)
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil
|
||||
}
|
||||
if mode == nvml.FEATURE_ENABLED {
|
||||
energy, ret := nvml.DeviceGetTotalEnergyConsumption(device.device)
|
||||
if ret == nvml.SUCCESS {
|
||||
if device.lastEnergyReading != 0 {
|
||||
if !device.excludeMetrics["nv_energy"] {
|
||||
y, err := lp.NewMetric("nv_energy", device.tags, device.meta, (energy-device.lastEnergyReading)/1000, now)
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "Joules")
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
if !device.excludeMetrics["nv_average_power"] {
|
||||
|
||||
energyDiff := (energy - device.lastEnergyReading) / 1000
|
||||
timeDiff := now.Sub(device.lastEnergyTimestamp)
|
||||
y, err := lp.NewMetric("nv_average_power", device.tags, device.meta, energyDiff/uint64(timeDiff.Seconds()), now)
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "watts")
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
if !device.excludeMetrics["nv_energy_abs"] {
|
||||
y, err := lp.NewMetric("nv_energy_abs", device.tags, device.meta, energy/1000, now)
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "Joules")
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
device.lastEnergyReading = energy
|
||||
device.lastEnergyTimestamp = time.Now()
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func readClocks(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
// Retrieves the current clock speeds for the device.
|
||||
//
|
||||
// Available clock information:
|
||||
@@ -513,7 +574,7 @@ func readClocks(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func readMaxClocks(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
func readMaxClocks(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
// Retrieves the maximum clock speeds for the device.
|
||||
//
|
||||
// Available clock information:
|
||||
@@ -571,7 +632,7 @@ func readMaxClocks(device NvidiaCollectorDevice, output chan lp.CCMessage) error
|
||||
return nil
|
||||
}
|
||||
|
||||
func readEccErrors(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
func readEccErrors(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
if !device.excludeMetrics["nv_ecc_uncorrected_error"] {
|
||||
// Retrieves the total ECC error counts for the device.
|
||||
//
|
||||
@@ -602,7 +663,7 @@ func readEccErrors(device NvidiaCollectorDevice, output chan lp.CCMessage) error
|
||||
return nil
|
||||
}
|
||||
|
||||
func readPowerLimit(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
func readPowerLimit(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
if !device.excludeMetrics["nv_power_max_limit"] {
|
||||
// Retrieves the power management limit associated with this device.
|
||||
//
|
||||
@@ -622,7 +683,7 @@ func readPowerLimit(device NvidiaCollectorDevice, output chan lp.CCMessage) erro
|
||||
return nil
|
||||
}
|
||||
|
||||
func readEncUtilization(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
func readEncUtilization(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
isMig, ret := nvml.DeviceIsMigDeviceHandle(device.device)
|
||||
if ret != nvml.SUCCESS {
|
||||
err := errors.New(nvml.ErrorString(ret))
|
||||
@@ -649,7 +710,7 @@ func readEncUtilization(device NvidiaCollectorDevice, output chan lp.CCMessage)
|
||||
return nil
|
||||
}
|
||||
|
||||
func readDecUtilization(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
func readDecUtilization(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
isMig, ret := nvml.DeviceIsMigDeviceHandle(device.device)
|
||||
if ret != nvml.SUCCESS {
|
||||
err := errors.New(nvml.ErrorString(ret))
|
||||
@@ -676,7 +737,7 @@ func readDecUtilization(device NvidiaCollectorDevice, output chan lp.CCMessage)
|
||||
return nil
|
||||
}
|
||||
|
||||
func readRemappedRows(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
func readRemappedRows(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
if !device.excludeMetrics["nv_remapped_rows_corrected"] ||
|
||||
!device.excludeMetrics["nv_remapped_rows_uncorrected"] ||
|
||||
!device.excludeMetrics["nv_remapped_rows_pending"] ||
|
||||
@@ -729,7 +790,7 @@ func readRemappedRows(device NvidiaCollectorDevice, output chan lp.CCMessage) er
|
||||
return nil
|
||||
}
|
||||
|
||||
func readProcessCounts(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
func readProcessCounts(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
if !device.excludeMetrics["nv_compute_processes"] {
|
||||
// Get information about processes with a compute context on a device
|
||||
//
|
||||
@@ -821,7 +882,7 @@ func readProcessCounts(device NvidiaCollectorDevice, output chan lp.CCMessage) e
|
||||
return nil
|
||||
}
|
||||
|
||||
func readViolationStats(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
func readViolationStats(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
var violTime nvml.ViolationTime
|
||||
var ret nvml.Return
|
||||
|
||||
@@ -935,7 +996,7 @@ func readViolationStats(device NvidiaCollectorDevice, output chan lp.CCMessage)
|
||||
return nil
|
||||
}
|
||||
|
||||
func readNVLinkStats(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
func readNVLinkStats(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
// Retrieves the specified error counter value
|
||||
// Please refer to \a nvmlNvLinkErrorCounter_t for error counters that are available
|
||||
//
|
||||
@@ -1070,7 +1131,7 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMessage)
|
||||
return
|
||||
}
|
||||
|
||||
readAll := func(device NvidiaCollectorDevice, output chan lp.CCMessage) {
|
||||
readAll := func(device *NvidiaCollectorDevice, output chan lp.CCMessage) {
|
||||
name, ret := nvml.DeviceGetName(device.device)
|
||||
if ret != nvml.SUCCESS {
|
||||
name = "NoName"
|
||||
@@ -1110,6 +1171,11 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMessage)
|
||||
cclog.ComponentDebug(m.name, "readPowerUsage for device", name, "failed")
|
||||
}
|
||||
|
||||
err = readEnergyConsumption(device, output)
|
||||
if err != nil {
|
||||
cclog.ComponentDebug(m.name, "readEnergyConsumption for device", name, "failed")
|
||||
}
|
||||
|
||||
err = readClocks(device, output)
|
||||
if err != nil {
|
||||
cclog.ComponentDebug(m.name, "readClocks for device", name, "failed")
|
||||
@@ -1169,7 +1235,7 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMessage)
|
||||
// Actual read loop over all attached Nvidia GPUs
|
||||
for i := 0; i < m.num_gpus; i++ {
|
||||
|
||||
readAll(m.gpus[i], output)
|
||||
readAll(&m.gpus[i], output)
|
||||
|
||||
// Iterate over all MIG devices if any
|
||||
if m.config.ProcessMigDevices {
|
||||
@@ -1243,7 +1309,7 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMessage)
|
||||
}
|
||||
}
|
||||
|
||||
readAll(migDevice, output)
|
||||
readAll(&migDevice, output)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -1,3 +1,13 @@
|
||||
<!--
|
||||
---
|
||||
title: "Nvidia NVML metric collector"
|
||||
description: Collect metrics for Nvidia GPUs using the NVML
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 2
|
||||
hugo_path: docs/reference/cc-metric-collector/collectors/nvidia.md
|
||||
---
|
||||
-->
|
||||
|
||||
## `nvidia` collector
|
||||
|
||||
@@ -72,5 +82,8 @@ Metrics:
|
||||
* `nv_nvlink_ecc_errors`
|
||||
* `nv_nvlink_replay_errors`
|
||||
* `nv_nvlink_recovery_errors`
|
||||
* `nv_energy`
|
||||
* `nv_energy_abs`
|
||||
* `nv_average_power`
|
||||
|
||||
Some metrics add the additional sub type tag (`stype`) like the `nv_nvlink_*` metrics set `stype=nvlink,stype-id=<link_number>`.
|
||||
Some metrics add the additional sub type tag (`stype`) like the `nv_nvlink_*` metrics set `stype=nvlink,stype-id=<link_number>`.
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
|
@@ -1,3 +1,14 @@
|
||||
<!--
|
||||
---
|
||||
title: RAPL metric collector
|
||||
description: Collect energy data through the RAPL sysfs interface
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 2
|
||||
hugo_path: docs/reference/cc-metric-collector/collectors/rapl.md
|
||||
---
|
||||
-->
|
||||
|
||||
## `rapl` collector
|
||||
|
||||
This collector reads running average power limit (RAPL) monitoring attributes to compute average power consumption metrics. See <https://www.kernel.org/doc/html/latest/power/powercap/powercap.html#monitoring-attributes>.
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
|
@@ -1,3 +1,14 @@
|
||||
<!--
|
||||
---
|
||||
title: "ROCm SMI metric collector"
|
||||
description: Collect metrics for AMD GPUs using the SMI library
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 2
|
||||
hugo_path: docs/reference/cc-metric-collector/collectors/rocmsmi.md
|
||||
---
|
||||
-->
|
||||
|
||||
|
||||
## `rocm_smi` collector
|
||||
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
|
@@ -1,3 +1,13 @@
|
||||
<!--
|
||||
---
|
||||
title: SchedStat Metric collector
|
||||
description: Collect metrics from `/proc/schedstat`
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 2
|
||||
hugo_path: docs/reference/cc-metric-collector/collectors/schedstat.md
|
||||
---
|
||||
-->
|
||||
|
||||
## `schedstat` collector
|
||||
```json
|
||||
@@ -8,4 +18,4 @@
|
||||
The `schedstat` collector reads data from /proc/schedstat and calculates a load value, separated by hwthread. This might be useful to detect bad cpu pinning on shared nodes etc.
|
||||
|
||||
Metric:
|
||||
* `cpu_load_core`
|
||||
* `cpu_load_core`
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
|
@@ -1,3 +1,14 @@
|
||||
<!--
|
||||
---
|
||||
title: Self-monitoring metric collector
|
||||
description: Collect metrics from the execution of cc-metric-collector itself
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 2
|
||||
hugo_path: docs/reference/cc-metric-collector/collectors/self.md
|
||||
---
|
||||
-->
|
||||
|
||||
## `self` collector
|
||||
|
||||
```json
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
|
@@ -1,3 +1,14 @@
|
||||
<!--
|
||||
---
|
||||
title: Temperature metric collector
|
||||
description: Collect thermal metrics from `/sys/class/hwmon/*`
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 2
|
||||
hugo_path: docs/reference/cc-metric-collector/collectors/temp.md
|
||||
---
|
||||
-->
|
||||
|
||||
|
||||
## `tempstat` collector
|
||||
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
|
@@ -1,3 +1,15 @@
|
||||
<!--
|
||||
---
|
||||
title: TopProcs collector
|
||||
description: Collect infos about most CPU-consuming processes
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Admin']
|
||||
weight: 2
|
||||
hugo_path: docs/reference/cc-metric-collector/collectors/topprocs.md
|
||||
---
|
||||
-->
|
||||
|
||||
|
||||
|
||||
## `topprocs` collector
|
||||
|
||||
|
@@ -1,3 +1,14 @@
|
||||
<!--
|
||||
---
|
||||
title: Metric Aggregator
|
||||
description: Subsystem for evaluating expressions on metrics (deprecated)
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Developer']
|
||||
weight: 1
|
||||
hugo_path: docs/reference/cc-metric-collector/internal/metricaggregator/_index.md
|
||||
---
|
||||
-->
|
||||
|
||||
# The MetricAggregator
|
||||
|
||||
In some cases, further combination of metrics or raw values is required. For that strings like `foo + 1` with runtime dependent `foo` need to be evaluated. The MetricAggregator relies on the [`gval`](https://github.com/PaesslerAG/gval) Golang package to perform all expression evaluation. The `gval` package provides the basic arithmetic operations but the MetricAggregator defines additional ones.
|
||||
@@ -35,4 +46,4 @@ The MetricAggregator provides these functions additional to the `Full` language
|
||||
## Limitations
|
||||
|
||||
- Since the metrics are written in JSON files which do not allow `""` without proper escaping inside of JSON strings, you have to use `''` for strings.
|
||||
- Since `\` is interpreted by JSON as escape character, it cannot be used in metrics. But it is required to write regular expressions. So instead of `/`, use `%` and the MetricAggregator replaces them after reading the JSON file.
|
||||
- Since `\` is interpreted by JSON as escape character, it cannot be used in metrics. But it is required to write regular expressions. So instead of `/`, use `%` and the MetricAggregator replaces them after reading the JSON file.
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package metricAggregator
|
||||
|
||||
import (
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package metricAggregator
|
||||
|
||||
import (
|
||||
|
@@ -1,11 +1,22 @@
|
||||
<!--
|
||||
---
|
||||
title: Message Router
|
||||
description: Routing component inside cc-metric-collector
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Developer']
|
||||
weight: 1
|
||||
hugo_path: docs/reference/cc-metric-collector/internal/metricrouter/_index.md
|
||||
---
|
||||
-->
|
||||
|
||||
# CC Metric Router
|
||||
|
||||
The CCMetric router sits in between the collectors and the sinks and can be used to add and remove tags to/from traversing [CCMessages](https://pkg.go.dev/github.com/ClusterCockpit/cc-energy-manager@v0.0.0-20240919152819-92a17f2da4f7/pkg/cc-message.
|
||||
The CCMetric router sits in between the collectors and the sinks and can be used to add and remove tags to/from traversing [CCMessages](https://pkg.go.dev/github.com/ClusterCockpit/cc-lib/ccMessage).
|
||||
|
||||
|
||||
# Configuration
|
||||
|
||||
**Note**: Use the [message processor configuration](../../pkg/messageProcessor/README.md) with option `process_messages`.
|
||||
**Note**: Use the [message processor configuration](https://github.com/ClusterCockpit/cc-lib/blob/main/messageProcessor/README.md) with option `process_messages`.
|
||||
|
||||
```json
|
||||
{
|
||||
@@ -69,7 +80,7 @@ The CCMetric router sits in between the collectors and the sinks and can be used
|
||||
|
||||
There are three main options `add_tags`, `delete_tags` and `interval_timestamp`. `add_tags` and `delete_tags` are lists consisting of dicts with `key`, `value` and `if`. The `value` can be omitted in the `delete_tags` part as it only uses the `key` for removal. The `interval_timestamp` setting means that a unique timestamp is applied to all metrics traversing the router during an interval.
|
||||
|
||||
**Note**: Use the [message processor configuration](../../pkg/messageProcessor/README.md) (option `process_messages`) instead of `add_tags`, `delete_tags`, `drop_metrics`, `drop_metrics_if`, `rename_metrics`, `normalize_units` and `change_unit_prefix`. These options are deprecated and will be removed in future versions. Until then, they are added to the message processor.
|
||||
**Note**: Use the [message processor configuration](https://github.com/ClusterCockpit/cc-lib/blob/main/messageProcessor/README.md) (option `process_messages`) instead of `add_tags`, `delete_tags`, `drop_metrics`, `drop_metrics_if`, `rename_metrics`, `normalize_units` and `change_unit_prefix`. These options are deprecated and will be removed in future versions. Until then, they are added to the message processor.
|
||||
|
||||
# Processing order in the router
|
||||
|
||||
@@ -225,13 +236,13 @@ __deprecated__
|
||||
|
||||
|
||||
The cc-metric-collector tries to read the data from the system as it is reported. If available, it tries to read the metric unit from the system as well (e.g. from `/proc/meminfo`). The problem is that, depending on the source, the metric units are named differently. Just think about `byte`, `Byte`, `B`, `bytes`, ...
|
||||
The [cc-units](https://github.com/ClusterCockpit/cc-units) package provides us a normalization option to use the same metric unit name for all metrics. It this option is set to true, all `unit` meta tags are normalized.
|
||||
The [cc-units](https://github.com/ClusterCockpit/cc-lib/ccUnits) package provides us a normalization option to use the same metric unit name for all metrics. It this option is set to true, all `unit` meta tags are normalized.
|
||||
|
||||
## The `change_unit_prefix` section
|
||||
|
||||
__deprecated__
|
||||
|
||||
It is often the case that metrics are reported by the system using a rather outdated unit prefix (like `/proc/meminfo` still uses kByte despite current memory sizes are in the GByte range). If you want to change the prefix of a unit, you can do that with the help of [cc-units](https://github.com/ClusterCockpit/cc-units). The setting works on the metric name and requires the new prefix for the metric. The cc-units package determines the scaling factor.
|
||||
It is often the case that metrics are reported by the system using a rather outdated unit prefix (like `/proc/meminfo` still uses kByte despite current memory sizes are in the GByte range). If you want to change the prefix of a unit, you can do that with the help of [cc-units](https://github.com/ClusterCockpit/cc-lib/ccUnits). The setting works on the metric name and requires the new prefix for the metric. The cc-units package determines the scaling factor.
|
||||
|
||||
# Aggregate metric values of the current interval with the `interval_aggregates` option
|
||||
|
||||
@@ -263,7 +274,7 @@ The above configuration, collects all metric values for metrics evaluating `if`
|
||||
If you are not interested in the input metrics `sub_metric_%d+` at all, you can add the same condition used here to the `drop_metrics_if` section to drop them.
|
||||
|
||||
Use cases for `interval_aggregates`:
|
||||
- Combine multiple metrics of the a collector to a new one like the [MemstatCollector](../../collectors/memstatMetric.md) does it for `mem_used`)):
|
||||
- Combine multiple metrics of the a collector to a new one like the [MemstatCollector](../../collectors/memstatMetric.md) does it for `mem_used`:
|
||||
```json
|
||||
{
|
||||
"name" : "mem_used",
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package metricRouter
|
||||
|
||||
import (
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package metricRouter
|
||||
|
||||
import (
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package ccTopology
|
||||
|
||||
import (
|
||||
|
@@ -1,125 +0,0 @@
|
||||
package hostlist
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func Expand(in string) (result []string, err error) {
|
||||
|
||||
// Create ranges regular expression
|
||||
reStNumber := "[[:digit:]]+"
|
||||
reStRange := reStNumber + "-" + reStNumber
|
||||
reStOptionalNumberOrRange := "(" + reStNumber + ",|" + reStRange + ",)*"
|
||||
reStNumberOrRange := "(" + reStNumber + "|" + reStRange + ")"
|
||||
reStBraceLeft := "[[]"
|
||||
reStBraceRight := "[]]"
|
||||
reStRanges := reStBraceLeft +
|
||||
reStOptionalNumberOrRange +
|
||||
reStNumberOrRange +
|
||||
reStBraceRight
|
||||
reRanges := regexp.MustCompile(reStRanges)
|
||||
|
||||
// Create host list regular expression
|
||||
reStDNSChars := "[a-zA-Z0-9-]+"
|
||||
reStPrefix := "^(" + reStDNSChars + ")"
|
||||
reStOptionalSuffix := "(" + reStDNSChars + ")?"
|
||||
re := regexp.MustCompile(reStPrefix + "([[][0-9,-]+[]])?" + reStOptionalSuffix)
|
||||
|
||||
// Remove all delimiters from the input
|
||||
in = strings.TrimLeft(in, ", ")
|
||||
|
||||
for len(in) > 0 {
|
||||
if v := re.FindStringSubmatch(in); v != nil {
|
||||
|
||||
// Remove matched part from the input
|
||||
lenPrefix := len(v[0])
|
||||
in = in[lenPrefix:]
|
||||
|
||||
// Remove all delimiters from the input
|
||||
in = strings.TrimLeft(in, ", ")
|
||||
|
||||
// matched prefix, range and suffix
|
||||
hlPrefix := v[1]
|
||||
hlRanges := v[2]
|
||||
hlSuffix := v[3]
|
||||
|
||||
// Single node without ranges
|
||||
if hlRanges == "" {
|
||||
result = append(result, hlPrefix)
|
||||
continue
|
||||
}
|
||||
|
||||
// Node with ranges
|
||||
if v := reRanges.FindStringSubmatch(hlRanges); v != nil {
|
||||
|
||||
// Remove braces
|
||||
hlRanges = hlRanges[1 : len(hlRanges)-1]
|
||||
|
||||
// Split host ranges at ,
|
||||
for _, hlRange := range strings.Split(hlRanges, ",") {
|
||||
|
||||
// Split host range at -
|
||||
RangeStartEnd := strings.Split(hlRange, "-")
|
||||
|
||||
// Range is only a single number
|
||||
if len(RangeStartEnd) == 1 {
|
||||
result = append(result, hlPrefix+RangeStartEnd[0]+hlSuffix)
|
||||
continue
|
||||
}
|
||||
|
||||
// Range has a start and an end
|
||||
widthRangeStart := len(RangeStartEnd[0])
|
||||
widthRangeEnd := len(RangeStartEnd[1])
|
||||
iStart, _ := strconv.ParseUint(RangeStartEnd[0], 10, 64)
|
||||
iEnd, _ := strconv.ParseUint(RangeStartEnd[1], 10, 64)
|
||||
if iStart > iEnd {
|
||||
return nil, fmt.Errorf("single range start is greater than end: %s", hlRange)
|
||||
}
|
||||
|
||||
// Create print format string for range numbers
|
||||
doPadding := widthRangeStart == widthRangeEnd
|
||||
widthPadding := widthRangeStart
|
||||
var formatString string
|
||||
if doPadding {
|
||||
formatString = "%0" + fmt.Sprint(widthPadding) + "d"
|
||||
} else {
|
||||
formatString = "%d"
|
||||
}
|
||||
formatString = hlPrefix + formatString + hlSuffix
|
||||
|
||||
// Add nodes from this range
|
||||
for i := iStart; i <= iEnd; i++ {
|
||||
result = append(result, fmt.Sprintf(formatString, i))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return nil, fmt.Errorf("not at hostlist range: %s", hlRanges)
|
||||
}
|
||||
} else {
|
||||
return nil, fmt.Errorf("not a hostlist: %s", in)
|
||||
}
|
||||
}
|
||||
|
||||
if result != nil {
|
||||
// sort
|
||||
sort.Strings(result)
|
||||
|
||||
// uniq
|
||||
previous := 1
|
||||
for current := 1; current < len(result); current++ {
|
||||
if result[current-1] != result[current] {
|
||||
if previous != current {
|
||||
result[previous] = result[current]
|
||||
}
|
||||
previous++
|
||||
}
|
||||
}
|
||||
result = result[:previous]
|
||||
}
|
||||
|
||||
return
|
||||
}
|
@@ -1,126 +0,0 @@
|
||||
package hostlist
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestExpand(t *testing.T) {
|
||||
|
||||
// Compare two slices of strings
|
||||
equal := func(a, b []string) bool {
|
||||
if len(a) != len(b) {
|
||||
return false
|
||||
}
|
||||
for i, v := range a {
|
||||
if v != b[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
type testDefinition struct {
|
||||
input string
|
||||
resultExpected []string
|
||||
errorExpected bool
|
||||
}
|
||||
|
||||
expandTests := []testDefinition{
|
||||
{
|
||||
// Single node
|
||||
input: "n1",
|
||||
resultExpected: []string{"n1"},
|
||||
errorExpected: false,
|
||||
},
|
||||
{
|
||||
// Single node, duplicated
|
||||
input: "n1,n1",
|
||||
resultExpected: []string{"n1"},
|
||||
errorExpected: false,
|
||||
},
|
||||
{
|
||||
// Single node with padding
|
||||
input: "n[01]",
|
||||
resultExpected: []string{"n01"},
|
||||
errorExpected: false,
|
||||
},
|
||||
{
|
||||
// Single node with suffix
|
||||
input: "n[01]-p",
|
||||
resultExpected: []string{"n01-p"},
|
||||
errorExpected: false,
|
||||
},
|
||||
{
|
||||
// Multiple nodes with a single range
|
||||
input: "n[1-2]",
|
||||
resultExpected: []string{"n1", "n2"},
|
||||
errorExpected: false,
|
||||
},
|
||||
{
|
||||
// Multiple nodes with a single range and a single index
|
||||
input: "n[1-2,3]",
|
||||
resultExpected: []string{"n1", "n2", "n3"},
|
||||
errorExpected: false,
|
||||
},
|
||||
{
|
||||
// Multiple nodes with different prefixes
|
||||
input: "n[1-2],m[1,2]",
|
||||
resultExpected: []string{"m1", "m2", "n1", "n2"},
|
||||
errorExpected: false,
|
||||
},
|
||||
{
|
||||
// Multiple nodes with different suffixes
|
||||
input: "n[1-2]-p,n[1,2]-q",
|
||||
resultExpected: []string{"n1-p", "n1-q", "n2-p", "n2-q"},
|
||||
errorExpected: false,
|
||||
},
|
||||
{
|
||||
// Multiple nodes with and without node ranges
|
||||
input: " n09, n[01-04,06-07,09] , , n10,n04",
|
||||
resultExpected: []string{"n01", "n02", "n03", "n04", "n06", "n07", "n09", "n10"},
|
||||
errorExpected: false,
|
||||
},
|
||||
{
|
||||
// Forbidden DNS character
|
||||
input: "n@",
|
||||
resultExpected: []string{},
|
||||
errorExpected: true,
|
||||
},
|
||||
{
|
||||
// Forbidden range
|
||||
input: "n[1-2-2,3]",
|
||||
resultExpected: []string{},
|
||||
errorExpected: true,
|
||||
},
|
||||
{
|
||||
// Forbidden range limits
|
||||
input: "n[2-1]",
|
||||
resultExpected: []string{},
|
||||
errorExpected: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, expandTest := range expandTests {
|
||||
result, err := Expand(expandTest.input)
|
||||
|
||||
hasError := err != nil
|
||||
if hasError != expandTest.errorExpected && hasError {
|
||||
t.Errorf("Expand('%s') failed: unexpected error '%v'",
|
||||
expandTest.input, err)
|
||||
continue
|
||||
}
|
||||
if hasError != expandTest.errorExpected && !hasError {
|
||||
t.Errorf("Expand('%s') did not fail as expected: got result '%+v'",
|
||||
expandTest.input, result)
|
||||
continue
|
||||
}
|
||||
if !hasError && !equal(result, expandTest.resultExpected) {
|
||||
t.Errorf("Expand('%s') failed: got result '%+v', expected result '%v'",
|
||||
expandTest.input, result, expandTest.resultExpected)
|
||||
continue
|
||||
}
|
||||
|
||||
t.Logf("Checked hostlist.Expand('%s'): result = '%+v', err = '%v'",
|
||||
expandTest.input, result, err)
|
||||
}
|
||||
}
|
@@ -1,3 +1,14 @@
|
||||
<!--
|
||||
---
|
||||
title: Multi-channel Ticker
|
||||
description: Timer ticker that sends out the tick to multiple channels
|
||||
categories: [cc-metric-collector]
|
||||
tags: ['Developer']
|
||||
weight: 1
|
||||
hugo_path: docs/reference/cc-metric-collector/pkg/multichanticker/_index.md
|
||||
---
|
||||
-->
|
||||
|
||||
# MultiChanTicker
|
||||
|
||||
The idea of this ticker is to multiply the output channels. The original Golang `time.Ticker` provides only a single output channel, so the signal can only be received by a single other class. This ticker allows to add multiple channels which get all notified about the time tick.
|
||||
|
@@ -1,3 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-lib.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// additional authors:
|
||||
// Holger Obermaier (NHR@KIT)
|
||||
|
||||
package multiChanTicker
|
||||
|
||||
import (
|
||||
|
Reference in New Issue
Block a user