mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2025-01-23 20:39:05 +01:00
Modularize the whole thing (#16)
* Use channels, add a metric router, split up configuration and use extended version of Influx line protocol internally * Use central timer for collectors and router. Add expressions to router * Add expression to router config * Update entry points * Start with README * Update README for CCMetric * Formatting * Update README.md * Add README for MultiChanTicker * Add README for MultiChanTicker * Update README.md * Add README to metric router * Update main README * Remove SinkEntity type * Update README for sinks * Update go files * Update README for receivers * Update collectors README * Update collectors README * Use seperate page per collector * Fix for tempstat page * Add docs for customcmd collector * Add docs for ipmistat collector * Add docs for topprocs collector * Update customCmdMetric.md * Use seconds when calculating LIKWID metrics * Add IB metrics ib_recv_pkts and ib_xmit_pkts * Drop domain part of host name * Updated to latest stable version of likwid * Define source code dependencies in Makefile * Add GPFS / IBM Spectrum Scale collector * Add vet and staticcheck make targets * Add vet and staticcheck make targets * Avoid go vet warning: struct field tag `json:"..., omitempty"` not compatible with reflect.StructTag.Get: suspicious space in struct tag value struct field tag `json:"...", omitempty` not compatible with reflect.StructTag.Get: key:"value" pairs not separated by spaces * Add sample collector to README.md * Add CPU frequency collector * Avoid staticcheck warning: redundant return statement * Avoid staticcheck warning: unnecessary assignment to the blank identifier * Simplified code * Add CPUFreqCollectorCpuinfo a metric collector to measure the current frequency of the CPUs as obtained from /proc/cpuinfo Only measure on the first hyperthread * Add collector for NFS clients * Move publication of metrics into Flush() for NatsSink * Update GitHub actions * Refactoring * Avoid vet warning: Println arg list ends with redundant newline * Avoid vet warning struct field commands has json tag but is not exported * Avoid vet warning: return copies lock value. * Corrected typo * Refactoring * Add go sources in internal/... * Bad separator in Makefile * Fix Infiniband collector Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com>
This commit is contained in:
parent
222862af32
commit
200af84c54
6
Makefile
6
Makefile
@ -3,7 +3,9 @@ GOSRC_APP := metric-collector.go
|
||||
GOSRC_COLLECTORS := $(wildcard collectors/*.go)
|
||||
GOSRC_SINKS := $(wildcard sinks/*.go)
|
||||
GOSRC_RECEIVERS := $(wildcard receivers/*.go)
|
||||
GOSRC := $(GOSRC_APP) $(GOSRC_COLLECTORS) $(GOSRC_SINKS) $(GOSRC_RECEIVERS)
|
||||
GOSRC_INTERNAL := $(wildcard internal/*/*.go)
|
||||
GOSRC := $(GOSRC_APP) $(GOSRC_COLLECTORS) $(GOSRC_SINKS) $(GOSRC_RECEIVERS) $(GOSRC_INTERNAL)
|
||||
|
||||
|
||||
.PHONY: all
|
||||
all: $(APP)
|
||||
@ -24,6 +26,8 @@ fmt:
|
||||
go fmt $(GOSRC_SINKS)
|
||||
go fmt $(GOSRC_RECEIVERS)
|
||||
go fmt $(GOSRC_APP)
|
||||
@for F in $(GOSRC_INTERNAL); do go fmt $$F; done
|
||||
|
||||
|
||||
# Examine Go source code and reports suspicious constructs
|
||||
.PHONY: vet
|
||||
|
87
README.md
87
README.md
@ -12,79 +12,33 @@ The receiver runs as a go routine side-by-side with the timer loop and asynchron
|
||||
Configuration is implemented using a single json document that is distributed over network and may be persisted as file.
|
||||
Supported metrics are documented [here](https://github.com/ClusterCockpit/cc-specifications/blob/master/metrics/lineprotocol_alternative.md).
|
||||
|
||||
There is a main configuration file with basic settings that point to the other configuration files for the different components.
|
||||
|
||||
``` json
|
||||
{
|
||||
"interval": 3,
|
||||
"duration": 1,
|
||||
"collectors": [
|
||||
"memstat",
|
||||
"likwid",
|
||||
"loadavg",
|
||||
"netstat",
|
||||
"ibstat",
|
||||
"lustrestat",
|
||||
"topprocs",
|
||||
"cpustat",
|
||||
"nvidia"
|
||||
],
|
||||
"sink": {
|
||||
"user": "admin",
|
||||
"password": "12345",
|
||||
"host": "localhost",
|
||||
"port": "8080",
|
||||
"database": "testdb",
|
||||
"organisation": "testorg",
|
||||
"type": "stdout"
|
||||
},
|
||||
"default_tags": {
|
||||
"cluster": "testcluster"
|
||||
},
|
||||
"receiver": {
|
||||
"type": "none",
|
||||
"address": "127.0.0.1",
|
||||
"port": "4222",
|
||||
"database": "testdb"
|
||||
},
|
||||
"collect_config": {
|
||||
"tempstat": {
|
||||
"tag_override": {
|
||||
"hwmon0": {
|
||||
"type": "socket",
|
||||
"type-id": "0"
|
||||
},
|
||||
"hwmon1": {
|
||||
"type": "socket",
|
||||
"type-id": "1"
|
||||
}
|
||||
}
|
||||
},
|
||||
"diskstat": {
|
||||
"exclude_metrics": [
|
||||
"read_ms"
|
||||
]
|
||||
}
|
||||
}
|
||||
"sinks": "sinks.json",
|
||||
"collectors" : "collectors.json",
|
||||
"receivers" : "receivers.json",
|
||||
"router" : "router.json",
|
||||
"interval": 10,
|
||||
"duration": 1
|
||||
}
|
||||
```
|
||||
|
||||
The `interval` defines how often the metrics should be read and send to the sink. The `duration` tells collectors how long one measurement has to take. An example for this is the `likwid` collector which starts the hardware performance counter, waits for `duration` seconds and stops the counters again. If you configure a collector to do two measurments, the `duration` must be at least half the `interval`.
|
||||
The `interval` defines how often the metrics should be read and send to the sink. The `duration` tells collectors how long one measurement has to take. This is important for some collectors, like the `likwid` collector.
|
||||
|
||||
The `collectors` contains all collectors executed collectors. Each collector can be configured in the `collect_config` section. A more detailed list of all collectors and their configuration options can be found in the [README for collectors](./collectors/README.md).
|
||||
See the component READMEs for their configuration:
|
||||
* [`collectors`](./collectors/README.md)
|
||||
* [`sinks`](./sinks/README.md)
|
||||
* [`receivers`](./receivers/README.md)
|
||||
* [`router`](./internal/metricRouter/README.md)
|
||||
|
||||
The `sink` section contains the configuration where the data should be transmitted to. There are currently four sinks supported `influxdb`, `nats`, `http` and `stdout`. See [README for sinks](./sinks/README.md) for more information about the individual sinks and which configuration field they are using.
|
||||
|
||||
In the `default_tags` section, one can define key-value-pairs (only strings) that are added to each sent out metric. This can be useful for cluster names like in the example JSON or information like rank or island for orientation.
|
||||
|
||||
With `receiver`, the collector can be used as a router by receiving metrics and forwarding them to the configured sink. There are currently only types `none` (for no receiver) and `nats`. For more information see the [README in receivers](./receivers/README.md).
|
||||
|
||||
# Installation
|
||||
|
||||
```
|
||||
$ git clone git@github.com:ClusterCockpit/cc-metric-collector.git
|
||||
$ cd cc-metric-collector/collectors
|
||||
$ edit Makefile (for LIKWID collector)
|
||||
$ make (downloads LIKWID, builds it as static library and copies all required files for the collector. Uses sudo in case of own accessdaemon)
|
||||
$ cd ..
|
||||
$ make (downloads LIKWID, builds it as static library with 'direct' accessmode and copies all required files for the collector)
|
||||
$ go get (requires at least golang 1.13)
|
||||
$ go build metric-collector
|
||||
```
|
||||
@ -104,13 +58,6 @@ Usage of metric-collector:
|
||||
Path for PID file (default "/var/run/cc-metric-collector.pid")
|
||||
```
|
||||
|
||||
# Todos
|
||||
|
||||
- [ ] Use only non-blocking APIs for the sinks
|
||||
- [x] Collector specific configuration in global JSON file? Changing the configuration inside the Go code is not user-friendly.
|
||||
- [ ] Mark collectors as 'can-run-in-parallel' and use goroutines for them. There are only a few collectors that should run serially (e.g. LIKWID)
|
||||
- [ ] Configuration option for receivers to add other tags. Additonal flag to tell whether default tags should be added as well.
|
||||
- [ ] CLI option to get help output for collectors, sinks and receivers about their configuration options and metrics
|
||||
|
||||
# Contributing
|
||||
The ClusterCockpit ecosystem is designed to be used by different HPC computing centers. Since configurations and setups differ between the centers, the centers likely have to put some work into the cc-metric-collector to gather all desired metrics.
|
||||
@ -119,5 +66,5 @@ You are free to open an issue to request a collector but we would also be happy
|
||||
|
||||
# Contact
|
||||
|
||||
[Matrix.org ClusterCockpit General chat](https://matrix.to/#/#clustercockpit-dev:matrix.org)
|
||||
[Matrix.org ClusterCockpit Development chat](https://matrix.to/#/#clustercockpit:matrix.org)
|
||||
* [Matrix.org ClusterCockpit General chat](https://matrix.to/#/#clustercockpit-dev:matrix.org)
|
||||
* [Matrix.org ClusterCockpit Development chat](https://matrix.to/#/#clustercockpit:matrix.org)
|
||||
|
15
collectors.json
Normal file
15
collectors.json
Normal file
@ -0,0 +1,15 @@
|
||||
{
|
||||
"tempstat": {
|
||||
"tag_override": {
|
||||
"hwmon0" : {
|
||||
"type" : "socket",
|
||||
"type-id" : "0"
|
||||
},
|
||||
"hwmon1" : {
|
||||
"type" : "socket",
|
||||
"type-id" : "1"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,288 +1,34 @@
|
||||
# CCMetric collectors
|
||||
|
||||
This folder contains the collectors for the cc-metric-collector.
|
||||
|
||||
# `metricCollector.go`
|
||||
The base class/configuration is located in `metricCollector.go`.
|
||||
|
||||
# Collectors
|
||||
|
||||
* `memstatMetric.go`: Reads `/proc/meminfo` to calculate **node** metrics. It also combines values to the metric `mem_used`
|
||||
* `loadavgMetric.go`: Reads `/proc/loadavg` and submits **node** metrics:
|
||||
* `netstatMetric.go`: Reads `/proc/net/dev` and submits for all network devices as the **node** metrics.
|
||||
* `lustreMetric.go`: Reads Lustre's stats files and submits **node** metrics:
|
||||
* `infinibandMetric.go`: Reads InfiniBand metrics. It uses the `perfquery` command to read the **node** metrics but can fallback to sysfs counters in case `perfquery` does not work.
|
||||
* `likwidMetric.go`: Reads hardware performance events using LIKWID. It submits **socket** and **cpu** metrics
|
||||
* `cpustatMetric.go`: Read CPU specific values from `/proc/stat`
|
||||
* `topprocsMetric.go`: Reads the TopX processes by their CPU usage. X is configurable
|
||||
* `nvidiaMetric.go`: Read data about Nvidia GPUs using the NVML library
|
||||
* `tempMetric.go`: Read temperature data from `/sys/class/hwmon/hwmon*`
|
||||
* `ipmiMetric.go`: Collect data from `ipmitool` or as fallback `ipmi-sensors`
|
||||
* `customCmdMetric.go`: Run commands or read files and submit the output (output has to be in InfluxDB line protocol!)
|
||||
|
||||
If any of the collectors cannot be initialized, it is excluded from all further reads. Like if the Lustre stat file is not a valid path, no Lustre specific metrics will be recorded.
|
||||
|
||||
# Collector configuration
|
||||
# Configuration
|
||||
|
||||
```json
|
||||
"collectors": [
|
||||
"tempstat"
|
||||
],
|
||||
"collect_config": {
|
||||
"tempstat": {
|
||||
"tag_override": {
|
||||
"hwmon0" : {
|
||||
"type" : "socket",
|
||||
"type-id" : "0"
|
||||
},
|
||||
"hwmon1" : {
|
||||
"type" : "socket",
|
||||
"type-id" : "1"
|
||||
}
|
||||
}
|
||||
{
|
||||
"collector_type" : {
|
||||
<collector specific configuration>
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The configuration of the collectors in the main config files consists of two parts: active collectors (`collectors`) and collector configuration (`collect_config`). At startup, all collectors in the `collectors` list is initialized and, if successfully initialized, added to the active collectors for metric retrieval. At initialization the collector-specific configuration from the `collect_config` section is handed over. Each collector has own configuration options, check at the collector-specific section.
|
||||
In contrast to the configuration files for sinks and receivers, the collectors configuration is not a list but a set of dicts. This is required because we didn't manage to partially read the type before loading the remaining configuration. We are eager to change this to the same format.
|
||||
|
||||
## `memstat`
|
||||
# Available collectors
|
||||
|
||||
```json
|
||||
"memstat": {
|
||||
"exclude_metrics": [
|
||||
"mem_used"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
The `memstat` collector reads data from `/proc/meminfo` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink.
|
||||
|
||||
|
||||
Metrics:
|
||||
* `mem_total`
|
||||
* `mem_sreclaimable`
|
||||
* `mem_slab`
|
||||
* `mem_free`
|
||||
* `mem_buffers`
|
||||
* `mem_cached`
|
||||
* `mem_available`
|
||||
* `mem_shared`
|
||||
* `swap_total`
|
||||
* `swap_free`
|
||||
* `mem_used` = `mem_total` - (`mem_free` + `mem_buffers` + `mem_cached`)
|
||||
|
||||
## `loadavg`
|
||||
```json
|
||||
"loadavg": {
|
||||
"exclude_metrics": [
|
||||
"proc_run"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
The `loadavg` collector reads data from `/proc/loadavg` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink.
|
||||
|
||||
Metrics:
|
||||
* `load_one`
|
||||
* `load_five`
|
||||
* `load_fifteen`
|
||||
* `proc_run`
|
||||
* `proc_total`
|
||||
|
||||
## `netstat`
|
||||
```json
|
||||
"netstat": {
|
||||
"exclude_devices": [
|
||||
"lo"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
The `netstat` collector reads data from `/proc/net/dev` and outputs a handful **node** metrics. If a device is not required, it can be excluded from forwarding it to the sink. Commonly the `lo` device should be excluded.
|
||||
|
||||
Metrics:
|
||||
* `bytes_in`
|
||||
* `bytes_out`
|
||||
* `pkts_in`
|
||||
* `pkts_out`
|
||||
|
||||
The device name is added as tag `device`.
|
||||
|
||||
|
||||
## `diskstat`
|
||||
|
||||
```json
|
||||
"diskstat": {
|
||||
"exclude_metrics": [
|
||||
"read_ms"
|
||||
],
|
||||
}
|
||||
```
|
||||
|
||||
The `netstat` collector reads data from `/proc/net/dev` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink.
|
||||
|
||||
Metrics:
|
||||
* `reads`
|
||||
* `reads_merged`
|
||||
* `read_sectors`
|
||||
* `read_ms`
|
||||
* `writes`
|
||||
* `writes_merged`
|
||||
* `writes_sectors`
|
||||
* `writes_ms`
|
||||
* `ioops`
|
||||
* `ioops_ms`
|
||||
* `ioops_weighted_ms`
|
||||
* `discards`
|
||||
* `discards_merged`
|
||||
* `discards_sectors`
|
||||
* `discards_ms`
|
||||
* `flushes`
|
||||
* `flushes_ms`
|
||||
|
||||
|
||||
The device name is added as tag `device`.
|
||||
|
||||
## `cpustat`
|
||||
```json
|
||||
"netstat": {
|
||||
"exclude_metrics": [
|
||||
"cpu_idle"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
The `cpustat` collector reads data from `/proc/stats` and outputs a handful **node** and **hwthread** metrics. If a metric is not required, it can be excluded from forwarding it to the sink.
|
||||
|
||||
Metrics:
|
||||
* `cpu_user`
|
||||
* `cpu_nice`
|
||||
* `cpu_system`
|
||||
* `cpu_idle`
|
||||
* `cpu_iowait`
|
||||
* `cpu_irq`
|
||||
* `cpu_softirq`
|
||||
* `cpu_steal`
|
||||
* `cpu_guest`
|
||||
* `cpu_guest_nice`
|
||||
|
||||
## `likwid`
|
||||
```json
|
||||
"likwid": {
|
||||
"eventsets": [
|
||||
{
|
||||
"events": {
|
||||
"FIXC1": "ACTUAL_CPU_CLOCK",
|
||||
"FIXC2": "MAX_CPU_CLOCK",
|
||||
"PMC0": "RETIRED_INSTRUCTIONS",
|
||||
"PMC1": "CPU_CLOCKS_UNHALTED",
|
||||
"PMC2": "RETIRED_SSE_AVX_FLOPS_ALL",
|
||||
"PMC3": "MERGE",
|
||||
"DFC0": "DRAM_CHANNEL_0",
|
||||
"DFC1": "DRAM_CHANNEL_1",
|
||||
"DFC2": "DRAM_CHANNEL_2",
|
||||
"DFC3": "DRAM_CHANNEL_3"
|
||||
},
|
||||
"metrics": [
|
||||
{
|
||||
"name": "ipc",
|
||||
"calc": "PMC0/PMC1",
|
||||
"socket_scope": false,
|
||||
"publish": true
|
||||
},
|
||||
{
|
||||
"name": "flops_any",
|
||||
"calc": "0.000001*PMC2/time",
|
||||
"socket_scope": false,
|
||||
"publish": true
|
||||
},
|
||||
{
|
||||
"name": "clock_mhz",
|
||||
"calc": "0.000001*(FIXC1/FIXC2)/inverseClock",
|
||||
"socket_scope": false,
|
||||
"publish": true
|
||||
},
|
||||
{
|
||||
"name": "mem1",
|
||||
"calc": "0.000001*(DFC0+DFC1+DFC2+DFC3)*64.0/time",
|
||||
"socket_scope": true,
|
||||
"publish": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"events": {
|
||||
"DFC0": "DRAM_CHANNEL_4",
|
||||
"DFC1": "DRAM_CHANNEL_5",
|
||||
"DFC2": "DRAM_CHANNEL_6",
|
||||
"DFC3": "DRAM_CHANNEL_7",
|
||||
"PWR0": "RAPL_CORE_ENERGY",
|
||||
"PWR1": "RAPL_PKG_ENERGY"
|
||||
},
|
||||
"metrics": [
|
||||
{
|
||||
"name": "pwr_core",
|
||||
"calc": "PWR0/time",
|
||||
"socket_scope": false,
|
||||
"publish": true
|
||||
},
|
||||
{
|
||||
"name": "pwr_pkg",
|
||||
"calc": "PWR1/time",
|
||||
"socket_scope": true,
|
||||
"publish": true
|
||||
},
|
||||
{
|
||||
"name": "mem2",
|
||||
"calc": "0.000001*(DFC0+DFC1+DFC2+DFC3)*64.0/time",
|
||||
"socket_scope": true,
|
||||
"publish": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"globalmetrics": [
|
||||
{
|
||||
"name": "mem_bw",
|
||||
"calc": "mem1+mem2",
|
||||
"socket_scope": true,
|
||||
"publish": true
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
_Example config suitable for AMD Zen3_
|
||||
|
||||
The `likwid` collector reads hardware performance counters at a **hwthread** and **socket** level. The configuration looks quite complicated but it is basically copy&paste from [LIKWID's performance groups](https://github.com/RRZE-HPC/likwid/tree/master/groups). The collector made multiple iterations and tried to use the performance groups but it lacked flexibility. The current way of configuration provides most flexibility.
|
||||
|
||||
The logic is as following: There are multiple eventsets, each consisting of a list of counters+events and a list of metrics. If you compare a common performance group with the example setting above, there is not much difference:
|
||||
```
|
||||
EVENTSET -> "events": {
|
||||
FIXC1 ACTUAL_CPU_CLOCK -> "FIXC1": "ACTUAL_CPU_CLOCK",
|
||||
FIXC2 MAX_CPU_CLOCK -> "FIXC2": "MAX_CPU_CLOCK",
|
||||
PMC0 RETIRED_INSTRUCTIONS -> "PMC0" : "RETIRED_INSTRUCTIONS",
|
||||
PMC1 CPU_CLOCKS_UNHALTED -> "PMC1" : "CPU_CLOCKS_UNHALTED",
|
||||
PMC2 RETIRED_SSE_AVX_FLOPS_ALL -> "PMC2": "RETIRED_SSE_AVX_FLOPS_ALL",
|
||||
PMC3 MERGE -> "PMC3": "MERGE",
|
||||
-> }
|
||||
```
|
||||
|
||||
The metrics are following the same procedure:
|
||||
|
||||
```
|
||||
METRICS -> "metrics": [
|
||||
IPC PMC0/PMC1 -> {
|
||||
-> "name" : "IPC",
|
||||
-> "calc" : "PMC0/PMC1",
|
||||
-> "socket_scope": false,
|
||||
-> "publish": true
|
||||
-> }
|
||||
-> ]
|
||||
```
|
||||
|
||||
The `socket_scope` option tells whether it is submitted per socket or per hwthread. If a metric is only used for internal calculations, you can set `publish = false`.
|
||||
|
||||
Since some metrics can only be gathered in multiple measurements (like the memory bandwidth on AMD Zen3 chips), configure multiple eventsets like in the example config and use the `globalmetrics` section to combine them. **Be aware** that the combination might be misleading because the "behavior" of a metric changes over time and the multiple measurements might count different computing phases.
|
||||
* [`cpustat`](./cpustatMetric.md)
|
||||
* [`memstat`](./memstatMetric.md)
|
||||
* [`diskstat`](./diskstatMetric.md)
|
||||
* [`loadavg`](./loadavgMetric.md)
|
||||
* [`netstat`](./netstatMetric.md)
|
||||
* [`ibstat`](./infinibandMetric.md)
|
||||
* [`tempstat`](./tempMetric.md)
|
||||
* [`lustre`](./lustreMetric.md)
|
||||
* [`likwid`](./likwidMetric.md)
|
||||
* [`nvidia`](./nvidiaMetric.md)
|
||||
* [`customcmd`](./customCmdMetric.md)
|
||||
* [`ipmistat`](./ipmiMetric.md)
|
||||
* [`topprocs`](./topprocsMetric.md)
|
||||
|
||||
## Todos
|
||||
|
||||
@ -292,13 +38,15 @@ Since some metrics can only be gathered in multiple measurements (like the memor
|
||||
# Contributing own collectors
|
||||
A collector reads data from any source, parses it to metrics and submits these metrics to the `metric-collector`. A collector provides three function:
|
||||
|
||||
* `Init(config []byte) error`: Initializes the collector using the given collector-specific config in JSON.
|
||||
* `Read(duration time.Duration, out *[]lp.MutableMetric) error`: Read, parse and submit data to the `out` list. If the collector has to measure anything for some duration, use the provided function argument `duration`.
|
||||
* `Name() string`: Return the name of the collector
|
||||
* `Init(config json.RawMessage) error`: Initializes the collector using the given collector-specific config in JSON. Check if needed files/commands exists, ...
|
||||
* `Initialized() bool`: Check if a collector is successfully initialized
|
||||
* `Read(duration time.Duration, output chan ccMetric.CCMetric)`: Read, parse and submit data to the `output` channel as [`CCMetric`](../internal/ccMetric/README.md). If the collector has to measure anything for some duration, use the provided function argument `duration`.
|
||||
* `Close()`: Closes down the collector.
|
||||
|
||||
It is recommanded to call `setup()` in the `Init()` function.
|
||||
|
||||
Finally, the collector needs to be registered in the `metric-collector.go`. There is a list of collectors called `Collectors` which is a map (string -> pointer to collector). Add a new entry with a descriptive name and the new collector.
|
||||
Finally, the collector needs to be registered in the `collectorManager.go`. There is a list of collectors called `AvailableCollectors` which is a map (`collector_type_string` -> `pointer to MetricCollector interface`). Add a new entry with a descriptive name and the new collector.
|
||||
|
||||
## Sample collector
|
||||
|
||||
@ -307,8 +55,9 @@ package collectors
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
"time"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
)
|
||||
|
||||
// Struct for the collector-specific JSON config
|
||||
@ -317,11 +66,11 @@ type SampleCollectorConfig struct {
|
||||
}
|
||||
|
||||
type SampleCollector struct {
|
||||
MetricCollector
|
||||
metricCollector
|
||||
config SampleCollectorConfig
|
||||
}
|
||||
|
||||
func (m *SampleCollector) Init(config []byte) error {
|
||||
func (m *SampleCollector) Init(config json.RawMessage) error {
|
||||
m.name = "SampleCollector"
|
||||
m.setup()
|
||||
if len(config) > 0 {
|
||||
@ -330,11 +79,13 @@ func (m *SampleCollector) Init(config []byte) error {
|
||||
return err
|
||||
}
|
||||
}
|
||||
m.meta = map[string]string{"source": m.name, "group": "Sample"}
|
||||
|
||||
m.init = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *SampleCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
func (m *SampleCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@ -342,9 +93,9 @@ func (m *SampleCollector) Read(interval time.Duration, out *[]lp.MutableMetric)
|
||||
tags := map[string]string{"type" : "node"}
|
||||
// Each metric has exactly one field: value !
|
||||
value := map[string]interface{}{"value": int(x)}
|
||||
y, err := lp.New("sample_metric", tags, value, time.Now())
|
||||
y, err := lp.New("sample_metric", tags, m.meta, value, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
|
143
collectors/collectorManager.go
Normal file
143
collectors/collectorManager.go
Normal file
@ -0,0 +1,143 @@
|
||||
package collectors
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"log"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
mct "github.com/ClusterCockpit/cc-metric-collector/internal/multiChanTicker"
|
||||
)
|
||||
|
||||
var AvailableCollectors = map[string]MetricCollector{
|
||||
|
||||
"likwid": &LikwidCollector{},
|
||||
"loadavg": &LoadavgCollector{},
|
||||
"memstat": &MemstatCollector{},
|
||||
"netstat": &NetstatCollector{},
|
||||
"ibstat": &InfinibandCollector{},
|
||||
"lustrestat": &LustreCollector{},
|
||||
"cpustat": &CpustatCollector{},
|
||||
"topprocs": &TopProcsCollector{},
|
||||
"nvidia": &NvidiaCollector{},
|
||||
"customcmd": &CustomCmdCollector{},
|
||||
"diskstat": &DiskstatCollector{},
|
||||
"tempstat": &TempCollector{},
|
||||
"ipmistat": &IpmiCollector{},
|
||||
"gpfs": new(GpfsCollector),
|
||||
"cpufreq": new(CPUFreqCollector),
|
||||
"cpufreq_cpuinfo": new(CPUFreqCpuInfoCollector),
|
||||
"nfsstat": new(NfsCollector),
|
||||
}
|
||||
|
||||
type collectorManager struct {
|
||||
collectors []MetricCollector
|
||||
output chan lp.CCMetric
|
||||
done chan bool
|
||||
ticker mct.MultiChanTicker
|
||||
duration time.Duration
|
||||
wg *sync.WaitGroup
|
||||
config map[string]json.RawMessage
|
||||
}
|
||||
|
||||
type CollectorManager interface {
|
||||
Init(ticker mct.MultiChanTicker, duration time.Duration, wg *sync.WaitGroup, collectConfigFile string) error
|
||||
AddOutput(output chan lp.CCMetric)
|
||||
Start()
|
||||
Close()
|
||||
}
|
||||
|
||||
func (cm *collectorManager) Init(ticker mct.MultiChanTicker, duration time.Duration, wg *sync.WaitGroup, collectConfigFile string) error {
|
||||
cm.collectors = make([]MetricCollector, 0)
|
||||
cm.output = nil
|
||||
cm.done = make(chan bool)
|
||||
cm.wg = wg
|
||||
cm.ticker = ticker
|
||||
cm.duration = duration
|
||||
configFile, err := os.Open(collectConfigFile)
|
||||
if err != nil {
|
||||
log.Print(err.Error())
|
||||
return err
|
||||
}
|
||||
defer configFile.Close()
|
||||
jsonParser := json.NewDecoder(configFile)
|
||||
err = jsonParser.Decode(&cm.config)
|
||||
if err != nil {
|
||||
log.Print(err.Error())
|
||||
return err
|
||||
}
|
||||
for k, cfg := range cm.config {
|
||||
log.Print(k, " ", cfg)
|
||||
if _, found := AvailableCollectors[k]; !found {
|
||||
log.Print("[CollectorManager] SKIP unknown collector ", k)
|
||||
continue
|
||||
}
|
||||
c := AvailableCollectors[k]
|
||||
|
||||
err = c.Init(cfg)
|
||||
if err != nil {
|
||||
log.Print("[CollectorManager] Collector ", k, "initialization failed: ", err.Error())
|
||||
continue
|
||||
}
|
||||
cm.collectors = append(cm.collectors, c)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *collectorManager) Start() {
|
||||
cm.wg.Add(1)
|
||||
tick := make(chan time.Time)
|
||||
cm.ticker.AddChannel(tick)
|
||||
go func() {
|
||||
for {
|
||||
CollectorManagerLoop:
|
||||
select {
|
||||
case <-cm.done:
|
||||
for _, c := range cm.collectors {
|
||||
c.Close()
|
||||
}
|
||||
cm.wg.Done()
|
||||
log.Print("[CollectorManager] DONE\n")
|
||||
break CollectorManagerLoop
|
||||
case t := <-tick:
|
||||
for _, c := range cm.collectors {
|
||||
CollectorManagerInputLoop:
|
||||
select {
|
||||
case <-cm.done:
|
||||
for _, c := range cm.collectors {
|
||||
c.Close()
|
||||
}
|
||||
cm.wg.Done()
|
||||
log.Print("[CollectorManager] DONE\n")
|
||||
break CollectorManagerInputLoop
|
||||
default:
|
||||
log.Print("[CollectorManager] ", c.Name(), " ", t)
|
||||
c.Read(cm.duration, cm.output)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
log.Print("[CollectorManager] EXIT\n")
|
||||
}()
|
||||
log.Print("[CollectorManager] STARTED\n")
|
||||
}
|
||||
|
||||
func (cm *collectorManager) AddOutput(output chan lp.CCMetric) {
|
||||
cm.output = output
|
||||
}
|
||||
|
||||
func (cm *collectorManager) Close() {
|
||||
cm.done <- true
|
||||
log.Print("[CollectorManager] CLOSE")
|
||||
}
|
||||
|
||||
func New(ticker mct.MultiChanTicker, duration time.Duration, wg *sync.WaitGroup, collectConfigFile string) (CollectorManager, error) {
|
||||
cm := &collectorManager{}
|
||||
err := cm.Init(ticker, duration, wg, collectConfigFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cm, err
|
||||
}
|
@ -2,14 +2,16 @@ package collectors
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
)
|
||||
|
||||
//
|
||||
@ -33,12 +35,16 @@ type CPUFreqCpuInfoCollectorTopology struct {
|
||||
}
|
||||
|
||||
type CPUFreqCpuInfoCollector struct {
|
||||
MetricCollector
|
||||
metricCollector
|
||||
topology []CPUFreqCpuInfoCollectorTopology
|
||||
}
|
||||
|
||||
func (m *CPUFreqCpuInfoCollector) Init(config []byte) error {
|
||||
func (m *CPUFreqCpuInfoCollector) Init(config json.RawMessage) error {
|
||||
m.name = "CPUFreqCpuInfoCollector"
|
||||
m.meta = map[string]string{
|
||||
"source": m.name,
|
||||
"group": "cpufreq",
|
||||
}
|
||||
|
||||
const cpuInfoFile = "/proc/cpuinfo"
|
||||
file, err := os.Open(cpuInfoFile)
|
||||
@ -145,7 +151,8 @@ func (m *CPUFreqCpuInfoCollector) Init(config []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *CPUFreqCpuInfoCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
|
||||
func (m *CPUFreqCpuInfoCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@ -174,9 +181,9 @@ func (m *CPUFreqCpuInfoCollector) Read(interval time.Duration, out *[]lp.Mutable
|
||||
log.Printf("Failed to convert cpu MHz to float: %v", err)
|
||||
return
|
||||
}
|
||||
y, err := lp.New("cpufreq", t.tagSet, map[string]interface{}{"value": value}, now)
|
||||
y, err := lp.New("cpufreq", t.tagSet, m.meta, map[string]interface{}{"value": value}, now)
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
processorCounter++
|
||||
|
@ -10,8 +10,7 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
@ -56,14 +55,14 @@ type CPUFreqCollectorTopology struct {
|
||||
// See: https://www.kernel.org/doc/html/latest/admin-guide/pm/cpufreq.html
|
||||
//
|
||||
type CPUFreqCollector struct {
|
||||
MetricCollector
|
||||
metricCollector
|
||||
topology []CPUFreqCollectorTopology
|
||||
config struct {
|
||||
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
||||
}
|
||||
}
|
||||
|
||||
func (m *CPUFreqCollector) Init(config []byte) error {
|
||||
func (m *CPUFreqCollector) Init(config json.RawMessage) error {
|
||||
m.name = "CPUFreqCollector"
|
||||
m.setup()
|
||||
if len(config) > 0 {
|
||||
@ -72,6 +71,10 @@ func (m *CPUFreqCollector) Init(config []byte) error {
|
||||
return err
|
||||
}
|
||||
}
|
||||
m.meta = map[string]string{
|
||||
"source": m.name,
|
||||
"group": "CPU Frequency",
|
||||
}
|
||||
|
||||
// Loop for all CPU directories
|
||||
baseDir := "/sys/devices/system/cpu"
|
||||
@ -179,7 +182,7 @@ func (m *CPUFreqCollector) Init(config []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *CPUFreqCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
func (m *CPUFreqCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@ -205,9 +208,9 @@ func (m *CPUFreqCollector) Read(interval time.Duration, out *[]lp.MutableMetric)
|
||||
continue
|
||||
}
|
||||
|
||||
y, err := lp.New("cpufreq", t.tagSet, map[string]interface{}{"value": cpuFreq}, now)
|
||||
y, err := lp.New("cpufreq", t.tagSet, m.meta, map[string]interface{}{"value": cpuFreq}, now)
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -7,8 +7,7 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
)
|
||||
|
||||
const CPUSTATFILE = `/proc/stat`
|
||||
@ -18,13 +17,14 @@ type CpustatCollectorConfig struct {
|
||||
}
|
||||
|
||||
type CpustatCollector struct {
|
||||
MetricCollector
|
||||
metricCollector
|
||||
config CpustatCollectorConfig
|
||||
}
|
||||
|
||||
func (m *CpustatCollector) Init(config []byte) error {
|
||||
func (m *CpustatCollector) Init(config json.RawMessage) error {
|
||||
m.name = "CpustatCollector"
|
||||
m.setup()
|
||||
m.meta = map[string]string{"source": m.name, "group": "CPU"}
|
||||
if len(config) > 0 {
|
||||
err := json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
@ -35,7 +35,7 @@ func (m *CpustatCollector) Init(config []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func ParseStatLine(line string, cpu int, exclude []string, out *[]lp.MutableMetric) {
|
||||
func (c *CpustatCollector) parseStatLine(line string, cpu int, exclude []string, output chan lp.CCMetric) {
|
||||
ls := strings.Fields(line)
|
||||
matches := []string{"", "cpu_user", "cpu_nice", "cpu_system", "cpu_idle", "cpu_iowait", "cpu_irq", "cpu_softirq", "cpu_steal", "cpu_guest", "cpu_guest_nice"}
|
||||
for _, ex := range exclude {
|
||||
@ -52,16 +52,16 @@ func ParseStatLine(line string, cpu int, exclude []string, out *[]lp.MutableMetr
|
||||
if len(m) > 0 {
|
||||
x, err := strconv.ParseInt(ls[i], 0, 64)
|
||||
if err == nil {
|
||||
y, err := lp.New(m, tags, map[string]interface{}{"value": int(x)}, time.Now())
|
||||
y, err := lp.New(m, tags, c.meta, map[string]interface{}{"value": int(x)}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *CpustatCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
func (m *CpustatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@ -78,11 +78,11 @@ func (m *CpustatCollector) Read(interval time.Duration, out *[]lp.MutableMetric)
|
||||
}
|
||||
ls := strings.Fields(line)
|
||||
if strings.Compare(ls[0], "cpu") == 0 {
|
||||
ParseStatLine(line, -1, m.config.ExcludeMetrics, out)
|
||||
m.parseStatLine(line, -1, m.config.ExcludeMetrics, output)
|
||||
} else if strings.HasPrefix(ls[0], "cpu") {
|
||||
cpustr := strings.TrimLeft(ls[0], "cpu")
|
||||
cpu, _ := strconv.Atoi(cpustr)
|
||||
ParseStatLine(line, cpu, m.config.ExcludeMetrics, out)
|
||||
m.parseStatLine(line, cpu, m.config.ExcludeMetrics, output)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
23
collectors/cpustatMetric.md
Normal file
23
collectors/cpustatMetric.md
Normal file
@ -0,0 +1,23 @@
|
||||
|
||||
## `cpustat` collector
|
||||
```json
|
||||
"netstat": {
|
||||
"exclude_metrics": [
|
||||
"cpu_idle"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
The `cpustat` collector reads data from `/proc/stats` and outputs a handful **node** and **hwthread** metrics. If a metric is not required, it can be excluded from forwarding it to the sink.
|
||||
|
||||
Metrics:
|
||||
* `cpu_user`
|
||||
* `cpu_nice`
|
||||
* `cpu_system`
|
||||
* `cpu_idle`
|
||||
* `cpu_iowait`
|
||||
* `cpu_irq`
|
||||
* `cpu_softirq`
|
||||
* `cpu_steal`
|
||||
* `cpu_guest`
|
||||
* `cpu_guest_nice`
|
@ -9,7 +9,8 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
influx "github.com/influxdata/line-protocol"
|
||||
)
|
||||
|
||||
const CUSTOMCMDPATH = `/home/unrz139/Work/cc-metric-collector/collectors/custom`
|
||||
@ -21,17 +22,18 @@ type CustomCmdCollectorConfig struct {
|
||||
}
|
||||
|
||||
type CustomCmdCollector struct {
|
||||
MetricCollector
|
||||
handler *lp.MetricHandler
|
||||
parser *lp.Parser
|
||||
metricCollector
|
||||
handler *influx.MetricHandler
|
||||
parser *influx.Parser
|
||||
config CustomCmdCollectorConfig
|
||||
commands []string
|
||||
files []string
|
||||
}
|
||||
|
||||
func (m *CustomCmdCollector) Init(config []byte) error {
|
||||
func (m *CustomCmdCollector) Init(config json.RawMessage) error {
|
||||
var err error
|
||||
m.name = "CustomCmdCollector"
|
||||
m.meta = map[string]string{"source": m.name, "group": "Custom"}
|
||||
if len(config) > 0 {
|
||||
err = json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
@ -61,8 +63,8 @@ func (m *CustomCmdCollector) Init(config []byte) error {
|
||||
if len(m.files) == 0 && len(m.commands) == 0 {
|
||||
return errors.New("No metrics to collect")
|
||||
}
|
||||
m.handler = lp.NewMetricHandler()
|
||||
m.parser = lp.NewParser(m.handler)
|
||||
m.handler = influx.NewMetricHandler()
|
||||
m.parser = influx.NewParser(m.handler)
|
||||
m.parser.SetTimeFunc(DefaultTime)
|
||||
m.init = true
|
||||
return nil
|
||||
@ -72,7 +74,7 @@ var DefaultTime = func() time.Time {
|
||||
return time.Unix(42, 0)
|
||||
}
|
||||
|
||||
func (m *CustomCmdCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
func (m *CustomCmdCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@ -95,9 +97,9 @@ func (m *CustomCmdCollector) Read(interval time.Duration, out *[]lp.MutableMetri
|
||||
if skip {
|
||||
continue
|
||||
}
|
||||
y, err := lp.New(c.Name(), Tags2Map(c), Fields2Map(c), c.Time())
|
||||
y, err := lp.New(c.Name(), Tags2Map(c), m.meta, Fields2Map(c), c.Time())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -117,9 +119,9 @@ func (m *CustomCmdCollector) Read(interval time.Duration, out *[]lp.MutableMetri
|
||||
if skip {
|
||||
continue
|
||||
}
|
||||
y, err := lp.New(f.Name(), Tags2Map(f), Fields2Map(f), f.Time())
|
||||
y, err := lp.New(f.Name(), Tags2Map(f), m.meta, Fields2Map(f), f.Time())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
|
20
collectors/customCmdMetric.md
Normal file
20
collectors/customCmdMetric.md
Normal file
@ -0,0 +1,20 @@
|
||||
|
||||
## `customcmd` collector
|
||||
|
||||
```json
|
||||
"customcmd": {
|
||||
"exclude_metrics": [
|
||||
"mymetric"
|
||||
],
|
||||
"files" : [
|
||||
"/var/run/myapp.metrics"
|
||||
],
|
||||
"commands" : [
|
||||
"/usr/local/bin/getmetrics.pl"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
The `customcmd` collector reads data from files and the output of executed commands. The files and commands can output multiple metrics (separated by newline) but the have to be in the [InfluxDB line protocol](https://docs.influxdata.com/influxdb/cloud/reference/syntax/line-protocol/). If a metric is not parsable, it is skipped. If a metric is not required, it can be excluded from forwarding it to the sink.
|
||||
|
||||
|
@ -2,9 +2,7 @@ package collectors
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
// "log"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
@ -21,14 +19,15 @@ type DiskstatCollectorConfig struct {
|
||||
}
|
||||
|
||||
type DiskstatCollector struct {
|
||||
MetricCollector
|
||||
metricCollector
|
||||
matches map[int]string
|
||||
config DiskstatCollectorConfig
|
||||
}
|
||||
|
||||
func (m *DiskstatCollector) Init(config []byte) error {
|
||||
func (m *DiskstatCollector) Init(config json.RawMessage) error {
|
||||
var err error
|
||||
m.name = "DiskstatCollector"
|
||||
m.meta = map[string]string{"source": m.name, "group": "Disk"}
|
||||
m.setup()
|
||||
if len(config) > 0 {
|
||||
err = json.Unmarshal(config, &m.config)
|
||||
@ -73,7 +72,7 @@ func (m *DiskstatCollector) Init(config []byte) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *DiskstatCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
func (m *DiskstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
var lines []string
|
||||
if !m.init {
|
||||
return
|
||||
@ -101,9 +100,9 @@ func (m *DiskstatCollector) Read(interval time.Duration, out *[]lp.MutableMetric
|
||||
if idx < len(f) {
|
||||
x, err := strconv.ParseInt(f[idx], 0, 64)
|
||||
if err == nil {
|
||||
y, err := lp.New(name, tags, map[string]interface{}{"value": int(x)}, time.Now())
|
||||
y, err := lp.New(name, tags, m.meta, map[string]interface{}{"value": int(x)}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
|
34
collectors/diskstatMetric.md
Normal file
34
collectors/diskstatMetric.md
Normal file
@ -0,0 +1,34 @@
|
||||
|
||||
## `diskstat` collector
|
||||
|
||||
```json
|
||||
"diskstat": {
|
||||
"exclude_metrics": [
|
||||
"read_ms"
|
||||
],
|
||||
}
|
||||
```
|
||||
|
||||
The `netstat` collector reads data from `/proc/net/dev` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink.
|
||||
|
||||
Metrics:
|
||||
* `reads`
|
||||
* `reads_merged`
|
||||
* `read_sectors`
|
||||
* `read_ms`
|
||||
* `writes`
|
||||
* `writes_merged`
|
||||
* `writes_sectors`
|
||||
* `writes_ms`
|
||||
* `ioops`
|
||||
* `ioops_ms`
|
||||
* `ioops_weighted_ms`
|
||||
* `discards`
|
||||
* `discards_merged`
|
||||
* `discards_sectors`
|
||||
* `discards_ms`
|
||||
* `flushes`
|
||||
* `flushes_ms`
|
||||
|
||||
The device name is added as tag `device`.
|
||||
|
@ -13,18 +13,20 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
)
|
||||
|
||||
type GpfsCollector struct {
|
||||
MetricCollector
|
||||
metricCollector
|
||||
tags map[string]string
|
||||
|
||||
config struct {
|
||||
Mmpmon string `json:"mmpmon"`
|
||||
}
|
||||
}
|
||||
|
||||
func (m *GpfsCollector) Init(config []byte) error {
|
||||
|
||||
func (m *GpfsCollector) Init(config json.RawMessage) error {
|
||||
var err error
|
||||
m.name = "GpfsCollector"
|
||||
m.setup()
|
||||
@ -40,6 +42,14 @@ func (m *GpfsCollector) Init(config []byte) error {
|
||||
return err
|
||||
}
|
||||
}
|
||||
m.meta = map[string]string{
|
||||
"source": m.name,
|
||||
"group": "GPFS",
|
||||
}
|
||||
m.tags = map[string]string{
|
||||
"type": "node",
|
||||
"filesystem": "",
|
||||
}
|
||||
|
||||
// GPFS / IBM Spectrum Scale file system statistics can only be queried by user root
|
||||
user, err := user.Current()
|
||||
@ -60,7 +70,7 @@ func (m *GpfsCollector) Init(config []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *GpfsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@ -108,6 +118,9 @@ func (m *GpfsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
continue
|
||||
}
|
||||
|
||||
m.tags["filesystem"] = filesystem
|
||||
|
||||
|
||||
// return code
|
||||
rc, err := strconv.Atoi(key_value["_rc_"])
|
||||
if err != nil {
|
||||
@ -140,17 +153,10 @@ func (m *GpfsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
key_value["_br_"], err.Error())
|
||||
continue
|
||||
}
|
||||
y, err := lp.New(
|
||||
"gpfs_bytes_read",
|
||||
map[string]string{
|
||||
"filesystem": filesystem,
|
||||
},
|
||||
map[string]interface{}{
|
||||
"value": bytesRead,
|
||||
},
|
||||
timestamp)
|
||||
|
||||
y, err := lp.New("gpfs_bytes_read", m.tags, m.meta, map[string]interface{}{"value": bytesRead}, timestamp)
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
|
||||
// bytes written
|
||||
@ -161,17 +167,10 @@ func (m *GpfsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
key_value["_bw_"], err.Error())
|
||||
continue
|
||||
}
|
||||
y, err = lp.New(
|
||||
"gpfs_bytes_written",
|
||||
map[string]string{
|
||||
"filesystem": filesystem,
|
||||
},
|
||||
map[string]interface{}{
|
||||
"value": bytesWritten,
|
||||
},
|
||||
timestamp)
|
||||
|
||||
y, err = lp.New("gpfs_bytes_written", m.tags, m.meta, map[string]interface{}{"value": bytesWritten}, timestamp)
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
|
||||
// number of opens
|
||||
@ -182,17 +181,9 @@ func (m *GpfsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
key_value["_oc_"], err.Error())
|
||||
continue
|
||||
}
|
||||
y, err = lp.New(
|
||||
"gpfs_num_opens",
|
||||
map[string]string{
|
||||
"filesystem": filesystem,
|
||||
},
|
||||
map[string]interface{}{
|
||||
"value": numOpens,
|
||||
},
|
||||
timestamp)
|
||||
y, err = lp.New("gpfs_num_opens", m.tags, m.meta, map[string]interface{}{"value": numOpens}, timestamp)
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
|
||||
// number of closes
|
||||
@ -201,17 +192,9 @@ func (m *GpfsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): Failed to convert number of closes: %s\n", err.Error())
|
||||
continue
|
||||
}
|
||||
y, err = lp.New(
|
||||
"gpfs_num_closes",
|
||||
map[string]string{
|
||||
"filesystem": filesystem,
|
||||
},
|
||||
map[string]interface{}{
|
||||
"value": numCloses,
|
||||
},
|
||||
timestamp)
|
||||
y, err = lp.New("gpfs_num_closes", m.tags, m.meta, map[string]interface{}{"value": numCloses}, timestamp)
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
|
||||
// number of reads
|
||||
@ -220,17 +203,9 @@ func (m *GpfsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): Failed to convert number of reads: %s\n", err.Error())
|
||||
continue
|
||||
}
|
||||
y, err = lp.New(
|
||||
"gpfs_num_reads",
|
||||
map[string]string{
|
||||
"filesystem": filesystem,
|
||||
},
|
||||
map[string]interface{}{
|
||||
"value": numReads,
|
||||
},
|
||||
timestamp)
|
||||
y, err = lp.New("gpfs_num_reads", m.tags, m.meta, map[string]interface{}{"value": numReads}, timestamp)
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
|
||||
// number of writes
|
||||
@ -239,17 +214,9 @@ func (m *GpfsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): Failed to convert number of writes: %s\n", err.Error())
|
||||
continue
|
||||
}
|
||||
y, err = lp.New(
|
||||
"gpfs_num_writes",
|
||||
map[string]string{
|
||||
"filesystem": filesystem,
|
||||
},
|
||||
map[string]interface{}{
|
||||
"value": numWrites,
|
||||
},
|
||||
timestamp)
|
||||
y, err = lp.New("gpfs_num_writes", m.tags, m.meta, map[string]interface{}{"value": numWrites}, timestamp)
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
|
||||
// number of read directories
|
||||
@ -258,17 +225,9 @@ func (m *GpfsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): Failed to convert number of read directories: %s\n", err.Error())
|
||||
continue
|
||||
}
|
||||
y, err = lp.New(
|
||||
"gpfs_num_readdirs",
|
||||
map[string]string{
|
||||
"filesystem": filesystem,
|
||||
},
|
||||
map[string]interface{}{
|
||||
"value": numReaddirs,
|
||||
},
|
||||
timestamp)
|
||||
y, err = lp.New("gpfs_num_readdirs", m.tags, m.meta, map[string]interface{}{"value": numReaddirs}, timestamp)
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
|
||||
// Number of inode updates
|
||||
@ -277,17 +236,9 @@ func (m *GpfsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): Failed to convert Number of inode updates: %s\n", err.Error())
|
||||
continue
|
||||
}
|
||||
y, err = lp.New(
|
||||
"gpfs_num_inode_updates",
|
||||
map[string]string{
|
||||
"filesystem": filesystem,
|
||||
},
|
||||
map[string]interface{}{
|
||||
"value": numInodeUpdates,
|
||||
},
|
||||
timestamp)
|
||||
y, err = lp.New("gpfs_num_inode_updates", m.tags, m.meta, map[string]interface{}{"value": numInodeUpdates}, timestamp)
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -5,9 +5,7 @@ import (
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os/exec"
|
||||
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
// "os"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
@ -28,7 +26,7 @@ type InfinibandCollectorConfig struct {
|
||||
}
|
||||
|
||||
type InfinibandCollector struct {
|
||||
MetricCollector
|
||||
metricCollector
|
||||
tags map[string]string
|
||||
lids map[string]map[string]string
|
||||
config InfinibandCollectorConfig
|
||||
@ -56,11 +54,12 @@ func (m *InfinibandCollector) Help() {
|
||||
fmt.Println("- ib_xmit_pkts")
|
||||
}
|
||||
|
||||
func (m *InfinibandCollector) Init(config []byte) error {
|
||||
func (m *InfinibandCollector) Init(config json.RawMessage) error {
|
||||
var err error
|
||||
m.name = "InfinibandCollector"
|
||||
m.use_perfquery = false
|
||||
m.setup()
|
||||
m.meta = map[string]string{"source": m.name, "group": "Network"}
|
||||
m.tags = map[string]string{"type": "node"}
|
||||
if len(config) > 0 {
|
||||
err = json.Unmarshal(config, &m.config)
|
||||
@ -117,7 +116,7 @@ func (m *InfinibandCollector) Init(config []byte) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func DoPerfQuery(cmd string, dev string, lid string, port string, tags map[string]string, out *[]lp.MutableMetric) error {
|
||||
func (m *InfinibandCollector) doPerfQuery(cmd string, dev string, lid string, port string, tags map[string]string, output chan lp.CCMetric) error {
|
||||
|
||||
args := fmt.Sprintf("-r %s %s 0xf000", lid, port)
|
||||
command := exec.Command(cmd, args)
|
||||
@ -134,9 +133,9 @@ func DoPerfQuery(cmd string, dev string, lid string, port string, tags map[strin
|
||||
lv := strings.Fields(line)
|
||||
v, err := strconv.ParseFloat(lv[1], 64)
|
||||
if err == nil {
|
||||
y, err := lp.New("ib_recv", tags, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
y, err := lp.New("ib_recv", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -144,9 +143,9 @@ func DoPerfQuery(cmd string, dev string, lid string, port string, tags map[strin
|
||||
lv := strings.Fields(line)
|
||||
v, err := strconv.ParseFloat(lv[1], 64)
|
||||
if err == nil {
|
||||
y, err := lp.New("ib_xmit", tags, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
y, err := lp.New("ib_xmit", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -154,9 +153,9 @@ func DoPerfQuery(cmd string, dev string, lid string, port string, tags map[strin
|
||||
lv := strings.Fields(line)
|
||||
v, err := strconv.ParseFloat(lv[1], 64)
|
||||
if err == nil {
|
||||
y, err := lp.New("ib_recv_pkts", tags, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
y, err := lp.New("ib_recv_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -164,9 +163,29 @@ func DoPerfQuery(cmd string, dev string, lid string, port string, tags map[strin
|
||||
lv := strings.Fields(line)
|
||||
v, err := strconv.ParseFloat(lv[1], 64)
|
||||
if err == nil {
|
||||
y, err := lp.New("ib_xmit_pkts", tags, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
y, err := lp.New("ib_xmit_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
if strings.HasPrefix(line, "PortRcvPkts") || strings.HasPrefix(line, "RcvPkts") {
|
||||
lv := strings.Fields(line)
|
||||
v, err := strconv.ParseFloat(lv[1], 64)
|
||||
if err == nil {
|
||||
y, err := lp.New("ib_recv_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
if strings.HasPrefix(line, "PortXmitPkts") || strings.HasPrefix(line, "XmtPkts") {
|
||||
lv := strings.Fields(line)
|
||||
v, err := strconv.ParseFloat(lv[1], 64)
|
||||
if err == nil {
|
||||
y, err := lp.New("ib_xmit_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -174,16 +193,16 @@ func DoPerfQuery(cmd string, dev string, lid string, port string, tags map[strin
|
||||
return nil
|
||||
}
|
||||
|
||||
func DoSysfsRead(dev string, lid string, port string, tags map[string]string, out *[]lp.MutableMetric) error {
|
||||
func (m *InfinibandCollector) doSysfsRead(dev string, lid string, port string, tags map[string]string, output chan lp.CCMetric) error {
|
||||
path := fmt.Sprintf("%s/%s/ports/%s/counters/", string(IBBASEPATH), dev, port)
|
||||
buffer, err := ioutil.ReadFile(fmt.Sprintf("%s/port_rcv_data", path))
|
||||
if err == nil {
|
||||
data := strings.Replace(string(buffer), "\n", "", -1)
|
||||
v, err := strconv.ParseFloat(data, 64)
|
||||
if err == nil {
|
||||
y, err := lp.New("ib_recv", tags, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
y, err := lp.New("ib_recv", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -192,9 +211,9 @@ func DoSysfsRead(dev string, lid string, port string, tags map[string]string, ou
|
||||
data := strings.Replace(string(buffer), "\n", "", -1)
|
||||
v, err := strconv.ParseFloat(data, 64)
|
||||
if err == nil {
|
||||
y, err := lp.New("ib_xmit", tags, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
y, err := lp.New("ib_xmit", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -203,9 +222,9 @@ func DoSysfsRead(dev string, lid string, port string, tags map[string]string, ou
|
||||
data := strings.Replace(string(buffer), "\n", "", -1)
|
||||
v, err := strconv.ParseFloat(data, 64)
|
||||
if err == nil {
|
||||
y, err := lp.New("ib_recv_pkts", tags, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
y, err := lp.New("ib_recv_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -214,71 +233,29 @@ func DoSysfsRead(dev string, lid string, port string, tags map[string]string, ou
|
||||
data := strings.Replace(string(buffer), "\n", "", -1)
|
||||
v, err := strconv.ParseFloat(data, 64)
|
||||
if err == nil {
|
||||
y, err := lp.New("ib_xmit_pkts", tags, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
y, err := lp.New("ib_xmit_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *InfinibandCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
|
||||
if m.init {
|
||||
for dev, ports := range m.lids {
|
||||
for port, lid := range ports {
|
||||
tags := map[string]string{"type": "node", "device": dev, "port": port}
|
||||
if m.use_perfquery {
|
||||
DoPerfQuery(m.config.PerfQueryPath, dev, lid, port, tags, out)
|
||||
m.doPerfQuery(m.config.PerfQueryPath, dev, lid, port, tags, output)
|
||||
} else {
|
||||
DoSysfsRead(dev, lid, port, tags, out)
|
||||
m.doSysfsRead(dev, lid, port, tags, output)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// buffer, err := ioutil.ReadFile(string(LIDFILE))
|
||||
|
||||
// if err != nil {
|
||||
// log.Print(err)
|
||||
// return
|
||||
// }
|
||||
|
||||
// args := fmt.Sprintf("-r %s 1 0xf000", string(buffer))
|
||||
|
||||
// command := exec.Command(PERFQUERY, args)
|
||||
// command.Wait()
|
||||
// stdout, err := command.Output()
|
||||
// if err != nil {
|
||||
// log.Print(err)
|
||||
// return
|
||||
// }
|
||||
|
||||
// ll := strings.Split(string(stdout), "\n")
|
||||
|
||||
// for _, line := range ll {
|
||||
// if strings.HasPrefix(line, "PortRcvData") || strings.HasPrefix(line, "RcvData") {
|
||||
// lv := strings.Fields(line)
|
||||
// v, err := strconv.ParseFloat(lv[1], 64)
|
||||
// if err == nil {
|
||||
// y, err := lp.New("ib_recv", m.tags, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
// if err == nil {
|
||||
// *out = append(*out, y)
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// if strings.HasPrefix(line, "PortXmitData") || strings.HasPrefix(line, "XmtData") {
|
||||
// lv := strings.Fields(line)
|
||||
// v, err := strconv.ParseFloat(lv[1], 64)
|
||||
// if err == nil {
|
||||
// y, err := lp.New("ib_xmit", m.tags, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
// if err == nil {
|
||||
// *out = append(*out, y)
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
||||
func (m *InfinibandCollector) Close() {
|
||||
|
19
collectors/infinibandMetric.md
Normal file
19
collectors/infinibandMetric.md
Normal file
@ -0,0 +1,19 @@
|
||||
|
||||
## `ibstat` collector
|
||||
|
||||
```json
|
||||
"ibstat": {
|
||||
"perfquery_path" : "<path to perfquery command>",
|
||||
"exclude_devices": [
|
||||
"mlx4"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
The `ibstat` collector reads either data through the `perfquery` command or the sysfs files below `/sys/class/infiniband/<device>`.
|
||||
|
||||
Metrics:
|
||||
* `ib_recv`
|
||||
* `ib_xmit`
|
||||
|
||||
The collector adds a `device` tag to all metrics
|
@ -9,8 +9,7 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
)
|
||||
|
||||
const IPMITOOL_PATH = `/usr/bin/ipmitool`
|
||||
@ -23,15 +22,16 @@ type IpmiCollectorConfig struct {
|
||||
}
|
||||
|
||||
type IpmiCollector struct {
|
||||
MetricCollector
|
||||
metricCollector
|
||||
tags map[string]string
|
||||
matches map[string]string
|
||||
config IpmiCollectorConfig
|
||||
}
|
||||
|
||||
func (m *IpmiCollector) Init(config []byte) error {
|
||||
func (m *IpmiCollector) Init(config json.RawMessage) error {
|
||||
m.name = "IpmiCollector"
|
||||
m.setup()
|
||||
m.meta = map[string]string{"source": m.name, "group": "IPMI"}
|
||||
if len(config) > 0 {
|
||||
err := json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
@ -53,7 +53,7 @@ func (m *IpmiCollector) Init(config []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func ReadIpmiTool(cmd string, out *[]lp.MutableMetric) {
|
||||
func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMetric) {
|
||||
command := exec.Command(cmd, "sensor")
|
||||
command.Wait()
|
||||
stdout, err := command.Output()
|
||||
@ -74,24 +74,25 @@ func ReadIpmiTool(cmd string, out *[]lp.MutableMetric) {
|
||||
name := strings.ToLower(strings.Replace(strings.Trim(lv[0], " "), " ", "_", -1))
|
||||
unit := strings.Trim(lv[2], " ")
|
||||
if unit == "Volts" {
|
||||
unit = "V"
|
||||
unit = "Volts"
|
||||
} else if unit == "degrees C" {
|
||||
unit = "C"
|
||||
unit = "degC"
|
||||
} else if unit == "degrees F" {
|
||||
unit = "F"
|
||||
unit = "degF"
|
||||
} else if unit == "Watts" {
|
||||
unit = "W"
|
||||
unit = "Watts"
|
||||
}
|
||||
|
||||
y, err := lp.New(name, map[string]string{"unit": unit, "type": "node"}, map[string]interface{}{"value": v}, time.Now())
|
||||
y, err := lp.New(name, map[string]string{"type": "node"}, m.meta, map[string]interface{}{"value": v}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
y.AddMeta("unit", unit)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func ReadIpmiSensors(cmd string, out *[]lp.MutableMetric) {
|
||||
func (m *IpmiCollector) readIpmiSensors(cmd string, output chan lp.CCMetric) {
|
||||
|
||||
command := exec.Command(cmd, "--comma-separated-output", "--sdr-cache-recreate")
|
||||
command.Wait()
|
||||
@ -109,25 +110,28 @@ func ReadIpmiSensors(cmd string, out *[]lp.MutableMetric) {
|
||||
v, err := strconv.ParseFloat(lv[3], 64)
|
||||
if err == nil {
|
||||
name := strings.ToLower(strings.Replace(lv[1], " ", "_", -1))
|
||||
y, err := lp.New(name, map[string]string{"unit": lv[4], "type": "node"}, map[string]interface{}{"value": v}, time.Now())
|
||||
y, err := lp.New(name, map[string]string{"type": "node"}, m.meta, map[string]interface{}{"value": v}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
if len(lv) > 4 {
|
||||
y.AddMeta("unit", lv[4])
|
||||
}
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *IpmiCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
func (m *IpmiCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if len(m.config.IpmitoolPath) > 0 {
|
||||
_, err := os.Stat(m.config.IpmitoolPath)
|
||||
if err == nil {
|
||||
ReadIpmiTool(m.config.IpmitoolPath, out)
|
||||
m.readIpmiTool(m.config.IpmitoolPath, output)
|
||||
}
|
||||
} else if len(m.config.IpmisensorsPath) > 0 {
|
||||
_, err := os.Stat(m.config.IpmisensorsPath)
|
||||
if err == nil {
|
||||
ReadIpmiSensors(m.config.IpmisensorsPath, out)
|
||||
m.readIpmiSensors(m.config.IpmisensorsPath, output)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
16
collectors/ipmiMetric.md
Normal file
16
collectors/ipmiMetric.md
Normal file
@ -0,0 +1,16 @@
|
||||
|
||||
## `ipmistat` collector
|
||||
|
||||
```json
|
||||
"ipmistat": {
|
||||
"ipmitool_path": "/path/to/ipmitool",
|
||||
"ipmisensors_path": "/path/to/ipmi-sensors",
|
||||
}
|
||||
```
|
||||
|
||||
The `ipmistat` collector reads data from `ipmitool` (`ipmitool sensor`) or `ipmi-sensors` (`ipmi-sensors --sdr-cache-recreate --comma-separated-output`).
|
||||
|
||||
The metrics depend on the output of the underlying tools but contain temperature, power and energy metrics.
|
||||
|
||||
|
||||
|
@ -20,16 +20,28 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
"gopkg.in/Knetic/govaluate.v2"
|
||||
)
|
||||
|
||||
type MetricScope int
|
||||
|
||||
const (
|
||||
METRIC_SCOPE_HWTHREAD = iota
|
||||
METRIC_SCOPE_SOCKET
|
||||
METRIC_SCOPE_NUMA
|
||||
METRIC_SCOPE_NODE
|
||||
)
|
||||
|
||||
func (ms MetricScope) String() string {
|
||||
return []string{"Head", "Shoulder", "Knee", "Toe"}[ms]
|
||||
}
|
||||
|
||||
type LikwidCollectorMetricConfig struct {
|
||||
Name string `json:"name"`
|
||||
Calc string `json:"calc"`
|
||||
Socket_scope bool `json:"socket_scope"`
|
||||
Publish bool `json:"publish"`
|
||||
Name string `json:"name"`
|
||||
Calc string `json:"calc"`
|
||||
Scope MetricScope `json:"socket_scope"`
|
||||
Publish bool `json:"publish"`
|
||||
}
|
||||
|
||||
type LikwidCollectorEventsetConfig struct {
|
||||
@ -45,7 +57,7 @@ type LikwidCollectorConfig struct {
|
||||
}
|
||||
|
||||
type LikwidCollector struct {
|
||||
MetricCollector
|
||||
metricCollector
|
||||
cpulist []C.int
|
||||
sock2tid map[int]int
|
||||
metrics map[C.int]map[string]int
|
||||
@ -105,7 +117,7 @@ func getSocketCpus() map[C.int]int {
|
||||
return outmap
|
||||
}
|
||||
|
||||
func (m *LikwidCollector) Init(config []byte) error {
|
||||
func (m *LikwidCollector) Init(config json.RawMessage) error {
|
||||
var ret C.int
|
||||
m.name = "LikwidCollector"
|
||||
if len(config) > 0 {
|
||||
@ -115,11 +127,13 @@ func (m *LikwidCollector) Init(config []byte) error {
|
||||
}
|
||||
}
|
||||
m.setup()
|
||||
m.meta = map[string]string{"source": m.name, "group": "PerfCounter"}
|
||||
cpulist := CpuList()
|
||||
m.cpulist = make([]C.int, len(cpulist))
|
||||
slist := getSocketCpus()
|
||||
|
||||
m.sock2tid = make(map[int]int)
|
||||
// m.numa2tid = make(map[int]int)
|
||||
for i, c := range cpulist {
|
||||
m.cpulist[i] = C.int(c)
|
||||
if sid, found := slist[m.cpulist[i]]; found {
|
||||
@ -169,7 +183,7 @@ func (m *LikwidCollector) Init(config []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *LikwidCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
func (m *LikwidCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@ -246,24 +260,28 @@ func (m *LikwidCollector) Read(interval time.Duration, out *[]lp.MutableMetric)
|
||||
for _, metric := range evset.Metrics {
|
||||
_, skip := stringArrayContains(m.config.ExcludeMetrics, metric.Name)
|
||||
if metric.Publish && !skip {
|
||||
if metric.Socket_scope {
|
||||
if metric.Scope.String() == "socket" {
|
||||
for sid, tid := range m.sock2tid {
|
||||
y, err := lp.New(metric.Name,
|
||||
map[string]string{"type": "socket", "type-id": fmt.Sprintf("%d", int(sid))},
|
||||
map[string]string{"type": "socket",
|
||||
"type-id": fmt.Sprintf("%d", int(sid))},
|
||||
m.meta,
|
||||
map[string]interface{}{"value": m.mresults[i][tid][metric.Name]},
|
||||
time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
} else {
|
||||
} else if metric.Scope.String() == "hwthread" {
|
||||
for tid, cpu := range m.cpulist {
|
||||
y, err := lp.New(metric.Name,
|
||||
map[string]string{"type": "cpu", "type-id": fmt.Sprintf("%d", int(cpu))},
|
||||
map[string]string{"type": "cpu",
|
||||
"type-id": fmt.Sprintf("%d", int(cpu))},
|
||||
m.meta,
|
||||
map[string]interface{}{"value": m.mresults[i][tid][metric.Name]},
|
||||
time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -273,24 +291,28 @@ func (m *LikwidCollector) Read(interval time.Duration, out *[]lp.MutableMetric)
|
||||
for _, metric := range m.config.Metrics {
|
||||
_, skip := stringArrayContains(m.config.ExcludeMetrics, metric.Name)
|
||||
if metric.Publish && !skip {
|
||||
if metric.Socket_scope {
|
||||
if metric.Scope.String() == "socket" {
|
||||
for sid, tid := range m.sock2tid {
|
||||
y, err := lp.New(metric.Name,
|
||||
map[string]string{"type": "socket", "type-id": fmt.Sprintf("%d", int(sid))},
|
||||
map[string]string{"type": "socket",
|
||||
"type-id": fmt.Sprintf("%d", int(sid))},
|
||||
m.meta,
|
||||
map[string]interface{}{"value": m.gmresults[tid][metric.Name]},
|
||||
time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for tid, cpu := range m.cpulist {
|
||||
y, err := lp.New(metric.Name,
|
||||
map[string]string{"type": "cpu", "type-id": fmt.Sprintf("%d", int(cpu))},
|
||||
map[string]string{"type": "cpu",
|
||||
"type-id": fmt.Sprintf("%d", int(cpu))},
|
||||
m.meta,
|
||||
map[string]interface{}{"value": m.gmresults[tid][metric.Name]},
|
||||
time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
|
119
collectors/likwidMetric.md
Normal file
119
collectors/likwidMetric.md
Normal file
@ -0,0 +1,119 @@
|
||||
|
||||
## `likwid` collector
|
||||
```json
|
||||
"likwid": {
|
||||
"eventsets": [
|
||||
{
|
||||
"events": {
|
||||
"FIXC1": "ACTUAL_CPU_CLOCK",
|
||||
"FIXC2": "MAX_CPU_CLOCK",
|
||||
"PMC0": "RETIRED_INSTRUCTIONS",
|
||||
"PMC1": "CPU_CLOCKS_UNHALTED",
|
||||
"PMC2": "RETIRED_SSE_AVX_FLOPS_ALL",
|
||||
"PMC3": "MERGE",
|
||||
"DFC0": "DRAM_CHANNEL_0",
|
||||
"DFC1": "DRAM_CHANNEL_1",
|
||||
"DFC2": "DRAM_CHANNEL_2",
|
||||
"DFC3": "DRAM_CHANNEL_3"
|
||||
},
|
||||
"metrics": [
|
||||
{
|
||||
"name": "ipc",
|
||||
"calc": "PMC0/PMC1",
|
||||
"socket_scope": false,
|
||||
"publish": true
|
||||
},
|
||||
{
|
||||
"name": "flops_any",
|
||||
"calc": "0.000001*PMC2/time",
|
||||
"socket_scope": false,
|
||||
"publish": true
|
||||
},
|
||||
{
|
||||
"name": "clock_mhz",
|
||||
"calc": "0.000001*(FIXC1/FIXC2)/inverseClock",
|
||||
"socket_scope": false,
|
||||
"publish": true
|
||||
},
|
||||
{
|
||||
"name": "mem1",
|
||||
"calc": "0.000001*(DFC0+DFC1+DFC2+DFC3)*64.0/time",
|
||||
"socket_scope": true,
|
||||
"publish": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"events": {
|
||||
"DFC0": "DRAM_CHANNEL_4",
|
||||
"DFC1": "DRAM_CHANNEL_5",
|
||||
"DFC2": "DRAM_CHANNEL_6",
|
||||
"DFC3": "DRAM_CHANNEL_7",
|
||||
"PWR0": "RAPL_CORE_ENERGY",
|
||||
"PWR1": "RAPL_PKG_ENERGY"
|
||||
},
|
||||
"metrics": [
|
||||
{
|
||||
"name": "pwr_core",
|
||||
"calc": "PWR0/time",
|
||||
"socket_scope": false,
|
||||
"publish": true
|
||||
},
|
||||
{
|
||||
"name": "pwr_pkg",
|
||||
"calc": "PWR1/time",
|
||||
"socket_scope": true,
|
||||
"publish": true
|
||||
},
|
||||
{
|
||||
"name": "mem2",
|
||||
"calc": "0.000001*(DFC0+DFC1+DFC2+DFC3)*64.0/time",
|
||||
"socket_scope": true,
|
||||
"publish": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"globalmetrics": [
|
||||
{
|
||||
"name": "mem_bw",
|
||||
"calc": "mem1+mem2",
|
||||
"socket_scope": true,
|
||||
"publish": true
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
_Example config suitable for AMD Zen3_
|
||||
|
||||
The `likwid` collector reads hardware performance counters at a **hwthread** and **socket** level. The configuration looks quite complicated but it is basically copy&paste from [LIKWID's performance groups](https://github.com/RRZE-HPC/likwid/tree/master/groups). The collector made multiple iterations and tried to use the performance groups but it lacked flexibility. The current way of configuration provides most flexibility.
|
||||
|
||||
The logic is as following: There are multiple eventsets, each consisting of a list of counters+events and a list of metrics. If you compare a common performance group with the example setting above, there is not much difference:
|
||||
```
|
||||
EVENTSET -> "events": {
|
||||
FIXC1 ACTUAL_CPU_CLOCK -> "FIXC1": "ACTUAL_CPU_CLOCK",
|
||||
FIXC2 MAX_CPU_CLOCK -> "FIXC2": "MAX_CPU_CLOCK",
|
||||
PMC0 RETIRED_INSTRUCTIONS -> "PMC0" : "RETIRED_INSTRUCTIONS",
|
||||
PMC1 CPU_CLOCKS_UNHALTED -> "PMC1" : "CPU_CLOCKS_UNHALTED",
|
||||
PMC2 RETIRED_SSE_AVX_FLOPS_ALL -> "PMC2": "RETIRED_SSE_AVX_FLOPS_ALL",
|
||||
PMC3 MERGE -> "PMC3": "MERGE",
|
||||
-> }
|
||||
```
|
||||
|
||||
The metrics are following the same procedure:
|
||||
|
||||
```
|
||||
METRICS -> "metrics": [
|
||||
IPC PMC0/PMC1 -> {
|
||||
-> "name" : "IPC",
|
||||
-> "calc" : "PMC0/PMC1",
|
||||
-> "socket_scope": false,
|
||||
-> "publish": true
|
||||
-> }
|
||||
-> ]
|
||||
```
|
||||
|
||||
The `socket_scope` option tells whether it is submitted per socket or per hwthread. If a metric is only used for internal calculations, you can set `publish = false`.
|
||||
|
||||
Since some metrics can only be gathered in multiple measurements (like the memory bandwidth on AMD Zen3 chips), configure multiple eventsets like in the example config and use the `globalmetrics` section to combine them. **Be aware** that the combination might be misleading because the "behavior" of a metric changes over time and the multiple measurements might count different computing phases.
|
@ -6,8 +6,7 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
)
|
||||
|
||||
const LOADAVGFILE = `/proc/loadavg`
|
||||
@ -17,14 +16,14 @@ type LoadavgCollectorConfig struct {
|
||||
}
|
||||
|
||||
type LoadavgCollector struct {
|
||||
MetricCollector
|
||||
metricCollector
|
||||
tags map[string]string
|
||||
load_matches []string
|
||||
proc_matches []string
|
||||
config LoadavgCollectorConfig
|
||||
}
|
||||
|
||||
func (m *LoadavgCollector) Init(config []byte) error {
|
||||
func (m *LoadavgCollector) Init(config json.RawMessage) error {
|
||||
m.name = "LoadavgCollector"
|
||||
m.setup()
|
||||
if len(config) > 0 {
|
||||
@ -33,6 +32,7 @@ func (m *LoadavgCollector) Init(config []byte) error {
|
||||
return err
|
||||
}
|
||||
}
|
||||
m.meta = map[string]string{"source": m.name, "group": "LOAD"}
|
||||
m.tags = map[string]string{"type": "node"}
|
||||
m.load_matches = []string{"load_one", "load_five", "load_fifteen"}
|
||||
m.proc_matches = []string{"proc_run", "proc_total"}
|
||||
@ -40,7 +40,7 @@ func (m *LoadavgCollector) Init(config []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *LoadavgCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
var skip bool
|
||||
if !m.init {
|
||||
return
|
||||
@ -56,9 +56,9 @@ func (m *LoadavgCollector) Read(interval time.Duration, out *[]lp.MutableMetric)
|
||||
x, err := strconv.ParseFloat(ls[i], 64)
|
||||
if err == nil {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, name)
|
||||
y, err := lp.New(name, m.tags, map[string]interface{}{"value": float64(x)}, time.Now())
|
||||
y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": float64(x)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -67,9 +67,9 @@ func (m *LoadavgCollector) Read(interval time.Duration, out *[]lp.MutableMetric)
|
||||
x, err := strconv.ParseFloat(lv[i], 64)
|
||||
if err == nil {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, name)
|
||||
y, err := lp.New(name, m.tags, map[string]interface{}{"value": float64(x)}, time.Now())
|
||||
y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": float64(x)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
|
19
collectors/loadavgMetric.md
Normal file
19
collectors/loadavgMetric.md
Normal file
@ -0,0 +1,19 @@
|
||||
|
||||
## `loadavg` collector
|
||||
|
||||
```json
|
||||
"loadavg": {
|
||||
"exclude_metrics": [
|
||||
"proc_run"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
The `loadavg` collector reads data from `/proc/loadavg` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink.
|
||||
|
||||
Metrics:
|
||||
* `load_one`
|
||||
* `load_five`
|
||||
* `load_fifteen`
|
||||
* `proc_run`
|
||||
* `proc_total`
|
@ -8,8 +8,7 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
)
|
||||
|
||||
const LUSTREFILE = `/proc/fs/lustre/llite/lnec-XXXXXX/stats`
|
||||
@ -20,14 +19,14 @@ type LustreCollectorConfig struct {
|
||||
}
|
||||
|
||||
type LustreCollector struct {
|
||||
MetricCollector
|
||||
metricCollector
|
||||
tags map[string]string
|
||||
matches map[string]map[string]int
|
||||
devices []string
|
||||
config LustreCollectorConfig
|
||||
}
|
||||
|
||||
func (m *LustreCollector) Init(config []byte) error {
|
||||
func (m *LustreCollector) Init(config json.RawMessage) error {
|
||||
var err error
|
||||
m.name = "LustreCollector"
|
||||
if len(config) > 0 {
|
||||
@ -38,6 +37,7 @@ func (m *LustreCollector) Init(config []byte) error {
|
||||
}
|
||||
m.setup()
|
||||
m.tags = map[string]string{"type": "node"}
|
||||
m.meta = map[string]string{"source": m.name, "group": "Lustre"}
|
||||
m.matches = map[string]map[string]int{"read_bytes": {"read_bytes": 6, "read_requests": 1},
|
||||
"write_bytes": {"write_bytes": 6, "write_requests": 1},
|
||||
"open": {"open": 1},
|
||||
@ -64,7 +64,7 @@ func (m *LustreCollector) Init(config []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *LustreCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
func (m *LustreCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@ -88,9 +88,12 @@ func (m *LustreCollector) Read(interval time.Duration, out *[]lp.MutableMetric)
|
||||
}
|
||||
x, err := strconv.ParseInt(lf[idx], 0, 64)
|
||||
if err == nil {
|
||||
y, err := lp.New(name, m.tags, map[string]interface{}{"value": x}, time.Now())
|
||||
y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": x}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
if strings.Contains(name, "byte") {
|
||||
y.AddMeta("unit", "Byte")
|
||||
}
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
|
29
collectors/lustreMetric.md
Normal file
29
collectors/lustreMetric.md
Normal file
@ -0,0 +1,29 @@
|
||||
|
||||
## `lustrestat` collector
|
||||
|
||||
```json
|
||||
"lustrestat": {
|
||||
"procfiles" : [
|
||||
"/proc/fs/lustre/llite/lnec-XXXXXX/stats"
|
||||
],
|
||||
"exclude_metrics": [
|
||||
"setattr",
|
||||
"getattr"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
The `lustrestat` collector reads from the procfs stat files for Lustre like `/proc/fs/lustre/llite/lnec-XXXXXX/stats`.
|
||||
|
||||
Metrics:
|
||||
* `read_bytes`
|
||||
* `read_requests`
|
||||
* `write_bytes`
|
||||
* `write_requests`
|
||||
* `open`
|
||||
* `close`
|
||||
* `getattr`
|
||||
* `setattr`
|
||||
* `statfs`
|
||||
* `inode_permission`
|
||||
|
@ -9,8 +9,7 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
)
|
||||
|
||||
const MEMSTATFILE = `/proc/meminfo`
|
||||
@ -20,14 +19,14 @@ type MemstatCollectorConfig struct {
|
||||
}
|
||||
|
||||
type MemstatCollector struct {
|
||||
MetricCollector
|
||||
metricCollector
|
||||
stats map[string]int64
|
||||
tags map[string]string
|
||||
matches map[string]string
|
||||
config MemstatCollectorConfig
|
||||
}
|
||||
|
||||
func (m *MemstatCollector) Init(config []byte) error {
|
||||
func (m *MemstatCollector) Init(config json.RawMessage) error {
|
||||
var err error
|
||||
m.name = "MemstatCollector"
|
||||
if len(config) > 0 {
|
||||
@ -36,6 +35,7 @@ func (m *MemstatCollector) Init(config []byte) error {
|
||||
return err
|
||||
}
|
||||
}
|
||||
m.meta = map[string]string{"source": m.name, "group": "Memory", "unit": "kByte"}
|
||||
m.stats = make(map[string]int64)
|
||||
m.matches = make(map[string]string)
|
||||
m.tags = map[string]string{"type": "node"}
|
||||
@ -65,7 +65,7 @@ func (m *MemstatCollector) Init(config []byte) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *MemstatCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@ -97,9 +97,9 @@ func (m *MemstatCollector) Read(interval time.Duration, out *[]lp.MutableMetric)
|
||||
log.Print(err)
|
||||
continue
|
||||
}
|
||||
y, err := lp.New(name, m.tags, map[string]interface{}{"value": int(float64(m.stats[match]) * 1.0e-3)}, time.Now())
|
||||
y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": int(float64(m.stats[match]) * 1.0e-3)}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
@ -108,18 +108,18 @@ func (m *MemstatCollector) Read(interval time.Duration, out *[]lp.MutableMetric)
|
||||
if _, cached := m.stats[`Cached`]; cached {
|
||||
memUsed := m.stats[`MemTotal`] - (m.stats[`MemFree`] + m.stats[`Buffers`] + m.stats[`Cached`])
|
||||
_, skip := stringArrayContains(m.config.ExcludeMetrics, "mem_used")
|
||||
y, err := lp.New("mem_used", m.tags, map[string]interface{}{"value": int(float64(memUsed) * 1.0e-3)}, time.Now())
|
||||
y, err := lp.New("mem_used", m.tags, m.meta, map[string]interface{}{"value": int(float64(memUsed) * 1.0e-3)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if _, found := m.stats[`MemShared`]; found {
|
||||
_, skip := stringArrayContains(m.config.ExcludeMetrics, "mem_shared")
|
||||
y, err := lp.New("mem_shared", m.tags, map[string]interface{}{"value": int(float64(m.stats[`MemShared`]) * 1.0e-3)}, time.Now())
|
||||
y, err := lp.New("mem_shared", m.tags, m.meta, map[string]interface{}{"value": int(float64(m.stats[`MemShared`]) * 1.0e-3)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
|
27
collectors/memstatMetric.md
Normal file
27
collectors/memstatMetric.md
Normal file
@ -0,0 +1,27 @@
|
||||
|
||||
## `memstat` collector
|
||||
|
||||
```json
|
||||
"memstat": {
|
||||
"exclude_metrics": [
|
||||
"mem_used"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
The `memstat` collector reads data from `/proc/meminfo` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink.
|
||||
|
||||
|
||||
Metrics:
|
||||
* `mem_total`
|
||||
* `mem_sreclaimable`
|
||||
* `mem_slab`
|
||||
* `mem_free`
|
||||
* `mem_buffers`
|
||||
* `mem_cached`
|
||||
* `mem_available`
|
||||
* `mem_shared`
|
||||
* `swap_total`
|
||||
* `swap_free`
|
||||
* `mem_used` = `mem_total` - (`mem_free` + `mem_buffers` + `mem_cached`)
|
||||
|
@ -1,8 +1,10 @@
|
||||
package collectors
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
influx "github.com/influxdata/line-protocol"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"strconv"
|
||||
@ -10,28 +12,30 @@ import (
|
||||
"time"
|
||||
)
|
||||
|
||||
type MetricGetter interface {
|
||||
type MetricCollector interface {
|
||||
Name() string
|
||||
Init(config []byte) error
|
||||
Init(config json.RawMessage) error
|
||||
Initialized() bool
|
||||
Read(time.Duration, *[]lp.MutableMetric)
|
||||
Read(duration time.Duration, output chan lp.CCMetric)
|
||||
Close()
|
||||
}
|
||||
|
||||
type MetricCollector struct {
|
||||
name string
|
||||
init bool
|
||||
type metricCollector struct {
|
||||
output chan lp.CCMetric
|
||||
name string
|
||||
init bool
|
||||
meta map[string]string
|
||||
}
|
||||
|
||||
func (c *MetricCollector) Name() string {
|
||||
func (c *metricCollector) Name() string {
|
||||
return c.name
|
||||
}
|
||||
|
||||
func (c *MetricCollector) setup() error {
|
||||
func (c *metricCollector) setup() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *MetricCollector) Initialized() bool {
|
||||
func (c *metricCollector) Initialized() bool {
|
||||
return c.init == true
|
||||
}
|
||||
|
||||
@ -103,7 +107,7 @@ func CpuList() []int {
|
||||
return cpulist
|
||||
}
|
||||
|
||||
func Tags2Map(metric lp.Metric) map[string]string {
|
||||
func Tags2Map(metric influx.Metric) map[string]string {
|
||||
tags := make(map[string]string)
|
||||
for _, t := range metric.TagList() {
|
||||
tags[t.Key] = t.Value
|
||||
@ -111,7 +115,7 @@ func Tags2Map(metric lp.Metric) map[string]string {
|
||||
return tags
|
||||
}
|
||||
|
||||
func Fields2Map(metric lp.Metric) map[string]interface{} {
|
||||
func Fields2Map(metric influx.Metric) map[string]interface{} {
|
||||
fields := make(map[string]interface{})
|
||||
for _, f := range metric.FieldList() {
|
||||
fields[f.Key] = f.Value
|
||||
|
@ -7,8 +7,7 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
)
|
||||
|
||||
const NETSTATFILE = `/proc/net/dev`
|
||||
@ -18,14 +17,15 @@ type NetstatCollectorConfig struct {
|
||||
}
|
||||
|
||||
type NetstatCollector struct {
|
||||
MetricCollector
|
||||
metricCollector
|
||||
config NetstatCollectorConfig
|
||||
matches map[int]string
|
||||
}
|
||||
|
||||
func (m *NetstatCollector) Init(config []byte) error {
|
||||
func (m *NetstatCollector) Init(config json.RawMessage) error {
|
||||
m.name = "NetstatCollector"
|
||||
m.setup()
|
||||
m.meta = map[string]string{"source": m.name, "group": "Memory"}
|
||||
m.matches = map[int]string{
|
||||
1: "bytes_in",
|
||||
9: "bytes_out",
|
||||
@ -46,7 +46,7 @@ func (m *NetstatCollector) Init(config []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *NetstatCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
func (m *NetstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
data, err := ioutil.ReadFile(string(NETSTATFILE))
|
||||
if err != nil {
|
||||
log.Print(err.Error())
|
||||
@ -73,9 +73,15 @@ func (m *NetstatCollector) Read(interval time.Duration, out *[]lp.MutableMetric)
|
||||
for i, name := range m.matches {
|
||||
v, err := strconv.ParseInt(f[i], 10, 0)
|
||||
if err == nil {
|
||||
y, err := lp.New(name, tags, map[string]interface{}{"value": int(float64(v) * 1.0e-3)}, time.Now())
|
||||
y, err := lp.New(name, tags, m.meta, map[string]interface{}{"value": int(float64(v) * 1.0e-3)}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
switch {
|
||||
case strings.Contains(name, "byte"):
|
||||
y.AddMeta("unit", "Byte")
|
||||
case strings.Contains(name, "pkt"):
|
||||
y.AddMeta("unit", "Packets")
|
||||
}
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
|
21
collectors/netstatMetric.md
Normal file
21
collectors/netstatMetric.md
Normal file
@ -0,0 +1,21 @@
|
||||
|
||||
## `netstat` collector
|
||||
|
||||
```json
|
||||
"netstat": {
|
||||
"exclude_devices": [
|
||||
"lo"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
The `netstat` collector reads data from `/proc/net/dev` and outputs a handful **node** metrics. If a device is not required, it can be excluded from forwarding it to the sink. Commonly the `lo` device should be excluded.
|
||||
|
||||
Metrics:
|
||||
* `bytes_in`
|
||||
* `bytes_out`
|
||||
* `pkts_in`
|
||||
* `pkts_out`
|
||||
|
||||
The device name is added as tag `device`.
|
||||
|
147
collectors/nfsMetric.go
Normal file
147
collectors/nfsMetric.go
Normal file
@ -0,0 +1,147 @@
|
||||
package collectors
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
// "os"
|
||||
"os/exec"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
)
|
||||
|
||||
type NfsCollectorData struct {
|
||||
current int64
|
||||
last int64
|
||||
}
|
||||
|
||||
type NfsCollector struct {
|
||||
metricCollector
|
||||
tags map[string]string
|
||||
config struct {
|
||||
Nfsutils string `json:"nfsutils"`
|
||||
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
||||
}
|
||||
data map[string]map[string]NfsCollectorData
|
||||
}
|
||||
|
||||
func (m *NfsCollector) initStats() error {
|
||||
cmd := exec.Command(m.config.Nfsutils, "-l")
|
||||
cmd.Wait()
|
||||
buffer, err := cmd.Output()
|
||||
if err == nil {
|
||||
for _, line := range strings.Split(string(buffer), "\n") {
|
||||
lf := strings.Fields(line)
|
||||
if len(lf) != 5 {
|
||||
continue
|
||||
}
|
||||
if _, exist := m.data[lf[1]]; !exist {
|
||||
m.data[lf[1]] = make(map[string]NfsCollectorData)
|
||||
}
|
||||
name := strings.Trim(lf[3], ":")
|
||||
if _, exist := m.data[lf[1]][name]; !exist {
|
||||
value, err := strconv.ParseInt(lf[4], 0, 64)
|
||||
if err == nil {
|
||||
x := m.data[lf[1]][name]
|
||||
x.current = value
|
||||
x.last = 0
|
||||
m.data[lf[1]][name] = x
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *NfsCollector) updateStats() error {
|
||||
cmd := exec.Command(m.config.Nfsutils, "-l")
|
||||
cmd.Wait()
|
||||
buffer, err := cmd.Output()
|
||||
if err == nil {
|
||||
for _, line := range strings.Split(string(buffer), "\n") {
|
||||
lf := strings.Fields(line)
|
||||
if len(lf) != 5 {
|
||||
continue
|
||||
}
|
||||
if _, exist := m.data[lf[1]]; !exist {
|
||||
m.data[lf[1]] = make(map[string]NfsCollectorData)
|
||||
}
|
||||
name := strings.Trim(lf[3], ":")
|
||||
if _, exist := m.data[lf[1]][name]; exist {
|
||||
value, err := strconv.ParseInt(lf[4], 0, 64)
|
||||
if err == nil {
|
||||
x := m.data[lf[1]][name]
|
||||
x.last = x.current
|
||||
x.current = value
|
||||
m.data[lf[1]][name] = x
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *NfsCollector) Init(config json.RawMessage) error {
|
||||
var err error
|
||||
m.name = "NfsCollector"
|
||||
m.setup()
|
||||
|
||||
// Set default mmpmon binary
|
||||
m.config.Nfsutils = "/usr/sbin/nfsstat"
|
||||
|
||||
// Read JSON configuration
|
||||
if len(config) > 0 {
|
||||
err = json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
log.Print(err.Error())
|
||||
return err
|
||||
}
|
||||
}
|
||||
m.meta = map[string]string{
|
||||
"source": m.name,
|
||||
"group": "NFS",
|
||||
}
|
||||
m.tags = map[string]string{
|
||||
"type": "node",
|
||||
}
|
||||
// Check if mmpmon is in executable search path
|
||||
_, err = exec.LookPath(m.config.Nfsutils)
|
||||
if err != nil {
|
||||
return fmt.Errorf("NfsCollector.Init(): Failed to find nfsstat binary '%s': %v", m.config.Nfsutils, err)
|
||||
}
|
||||
m.data = make(map[string]map[string]NfsCollectorData)
|
||||
m.initStats()
|
||||
m.init = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *NfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
timestamp := time.Now()
|
||||
|
||||
m.updateStats()
|
||||
|
||||
for version, metrics := range m.data {
|
||||
for name, data := range metrics {
|
||||
if _, skip := stringArrayContains(m.config.ExcludeMetrics, name); skip {
|
||||
continue
|
||||
}
|
||||
value := data.current - data.last
|
||||
y, err := lp.New(fmt.Sprintf("nfs_%s", name), m.tags, m.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
if err == nil {
|
||||
y.AddMeta("version", version)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *NfsCollector) Close() {
|
||||
m.init = false
|
||||
}
|
@ -6,9 +6,8 @@ import (
|
||||
"fmt"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
"github.com/NVIDIA/go-nvml/pkg/nvml"
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
)
|
||||
|
||||
type NvidiaCollectorConfig struct {
|
||||
@ -17,7 +16,7 @@ type NvidiaCollectorConfig struct {
|
||||
}
|
||||
|
||||
type NvidiaCollector struct {
|
||||
MetricCollector
|
||||
metricCollector
|
||||
num_gpus int
|
||||
config NvidiaCollectorConfig
|
||||
}
|
||||
@ -29,10 +28,11 @@ func (m *NvidiaCollector) CatchPanic() {
|
||||
}
|
||||
}
|
||||
|
||||
func (m *NvidiaCollector) Init(config []byte) error {
|
||||
func (m *NvidiaCollector) Init(config json.RawMessage) error {
|
||||
var err error
|
||||
m.name = "NvidiaCollector"
|
||||
m.setup()
|
||||
m.meta = map[string]string{"source": m.name, "group": "Nvidia"}
|
||||
if len(config) > 0 {
|
||||
err = json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
@ -55,7 +55,7 @@ func (m *NvidiaCollector) Init(config []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *NvidiaCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@ -74,14 +74,14 @@ func (m *NvidiaCollector) Read(interval time.Duration, out *[]lp.MutableMetric)
|
||||
util, ret := nvml.DeviceGetUtilizationRates(device)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "util")
|
||||
y, err := lp.New("util", tags, map[string]interface{}{"value": float64(util.Gpu)}, time.Now())
|
||||
y, err := lp.New("util", tags, m.meta, map[string]interface{}{"value": float64(util.Gpu)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "mem_util")
|
||||
y, err = lp.New("mem_util", tags, map[string]interface{}{"value": float64(util.Memory)}, time.Now())
|
||||
y, err = lp.New("mem_util", tags, m.meta, map[string]interface{}{"value": float64(util.Memory)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
@ -89,174 +89,177 @@ func (m *NvidiaCollector) Read(interval time.Duration, out *[]lp.MutableMetric)
|
||||
if ret == nvml.SUCCESS {
|
||||
t := float64(meminfo.Total) / (1024 * 1024)
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "mem_total")
|
||||
y, err := lp.New("mem_total", tags, map[string]interface{}{"value": t}, time.Now())
|
||||
y, err := lp.New("mem_total", tags, m.meta, map[string]interface{}{"value": t}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
y.AddMeta("unit", "MByte")
|
||||
output <- y
|
||||
}
|
||||
f := float64(meminfo.Used) / (1024 * 1024)
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "fb_memory")
|
||||
y, err = lp.New("fb_memory", tags, map[string]interface{}{"value": f}, time.Now())
|
||||
y, err = lp.New("fb_memory", tags, m.meta, map[string]interface{}{"value": f}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
y.AddMeta("unit", "MByte")
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
temp, ret := nvml.DeviceGetTemperature(device, nvml.TEMPERATURE_GPU)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "temp")
|
||||
y, err := lp.New("temp", tags, map[string]interface{}{"value": float64(temp)}, time.Now())
|
||||
y, err := lp.New("temp", tags, m.meta, map[string]interface{}{"value": float64(temp)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
y.AddMeta("unit", "degC")
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
fan, ret := nvml.DeviceGetFanSpeed(device)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "fan")
|
||||
y, err := lp.New("fan", tags, map[string]interface{}{"value": float64(fan)}, time.Now())
|
||||
y, err := lp.New("fan", tags, m.meta, map[string]interface{}{"value": float64(fan)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
_, ecc_pend, ret := nvml.DeviceGetEccMode(device)
|
||||
if ret == nvml.SUCCESS {
|
||||
var y lp.MutableMetric
|
||||
var y lp.CCMetric
|
||||
var err error
|
||||
switch ecc_pend {
|
||||
case nvml.FEATURE_DISABLED:
|
||||
y, err = lp.New("ecc_mode", tags, map[string]interface{}{"value": string("OFF")}, time.Now())
|
||||
y, err = lp.New("ecc_mode", tags, m.meta, map[string]interface{}{"value": string("OFF")}, time.Now())
|
||||
case nvml.FEATURE_ENABLED:
|
||||
y, err = lp.New("ecc_mode", tags, map[string]interface{}{"value": string("ON")}, time.Now())
|
||||
y, err = lp.New("ecc_mode", tags, m.meta, map[string]interface{}{"value": string("ON")}, time.Now())
|
||||
default:
|
||||
y, err = lp.New("ecc_mode", tags, map[string]interface{}{"value": string("UNKNOWN")}, time.Now())
|
||||
y, err = lp.New("ecc_mode", tags, m.meta, map[string]interface{}{"value": string("UNKNOWN")}, time.Now())
|
||||
}
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "ecc_mode")
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
} else if ret == nvml.ERROR_NOT_SUPPORTED {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "ecc_mode")
|
||||
y, err := lp.New("ecc_mode", tags, map[string]interface{}{"value": string("N/A")}, time.Now())
|
||||
y, err := lp.New("ecc_mode", tags, m.meta, map[string]interface{}{"value": string("N/A")}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
pstate, ret := nvml.DeviceGetPerformanceState(device)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "perf_state")
|
||||
y, err := lp.New("perf_state", tags, map[string]interface{}{"value": fmt.Sprintf("P%d", int(pstate))}, time.Now())
|
||||
y, err := lp.New("perf_state", tags, m.meta, map[string]interface{}{"value": fmt.Sprintf("P%d", int(pstate))}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
power, ret := nvml.DeviceGetPowerUsage(device)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "power_usage_report")
|
||||
y, err := lp.New("power_usage_report", tags, map[string]interface{}{"value": float64(power) / 1000}, time.Now())
|
||||
y, err := lp.New("power_usage_report", tags, m.meta, map[string]interface{}{"value": float64(power) / 1000}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
gclk, ret := nvml.DeviceGetClockInfo(device, nvml.CLOCK_GRAPHICS)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "graphics_clock_report")
|
||||
y, err := lp.New("graphics_clock_report", tags, map[string]interface{}{"value": float64(gclk)}, time.Now())
|
||||
y, err := lp.New("graphics_clock_report", tags, m.meta, map[string]interface{}{"value": float64(gclk)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
smclk, ret := nvml.DeviceGetClockInfo(device, nvml.CLOCK_SM)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "sm_clock_report")
|
||||
y, err := lp.New("sm_clock_report", tags, map[string]interface{}{"value": float64(smclk)}, time.Now())
|
||||
y, err := lp.New("sm_clock_report", tags, m.meta, map[string]interface{}{"value": float64(smclk)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
memclk, ret := nvml.DeviceGetClockInfo(device, nvml.CLOCK_MEM)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "mem_clock_report")
|
||||
y, err := lp.New("mem_clock_report", tags, map[string]interface{}{"value": float64(memclk)}, time.Now())
|
||||
y, err := lp.New("mem_clock_report", tags, m.meta, map[string]interface{}{"value": float64(memclk)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
max_gclk, ret := nvml.DeviceGetMaxClockInfo(device, nvml.CLOCK_GRAPHICS)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "max_graphics_clock")
|
||||
y, err := lp.New("max_graphics_clock", tags, map[string]interface{}{"value": float64(max_gclk)}, time.Now())
|
||||
y, err := lp.New("max_graphics_clock", tags, m.meta, map[string]interface{}{"value": float64(max_gclk)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
max_smclk, ret := nvml.DeviceGetClockInfo(device, nvml.CLOCK_SM)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "max_sm_clock")
|
||||
y, err := lp.New("max_sm_clock", tags, map[string]interface{}{"value": float64(max_smclk)}, time.Now())
|
||||
y, err := lp.New("max_sm_clock", tags, m.meta, map[string]interface{}{"value": float64(max_smclk)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
max_memclk, ret := nvml.DeviceGetClockInfo(device, nvml.CLOCK_MEM)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "max_mem_clock")
|
||||
y, err := lp.New("max_mem_clock", tags, map[string]interface{}{"value": float64(max_memclk)}, time.Now())
|
||||
y, err := lp.New("max_mem_clock", tags, m.meta, map[string]interface{}{"value": float64(max_memclk)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
ecc_db, ret := nvml.DeviceGetTotalEccErrors(device, 1, 1)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "ecc_db_error")
|
||||
y, err := lp.New("ecc_db_error", tags, map[string]interface{}{"value": float64(ecc_db)}, time.Now())
|
||||
y, err := lp.New("ecc_db_error", tags, m.meta, map[string]interface{}{"value": float64(ecc_db)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
ecc_sb, ret := nvml.DeviceGetTotalEccErrors(device, 0, 1)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "ecc_sb_error")
|
||||
y, err := lp.New("ecc_sb_error", tags, map[string]interface{}{"value": float64(ecc_sb)}, time.Now())
|
||||
y, err := lp.New("ecc_sb_error", tags, m.meta, map[string]interface{}{"value": float64(ecc_sb)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
pwr_limit, ret := nvml.DeviceGetPowerManagementLimit(device)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "power_man_limit")
|
||||
y, err := lp.New("power_man_limit", tags, map[string]interface{}{"value": float64(pwr_limit)}, time.Now())
|
||||
y, err := lp.New("power_man_limit", tags, m.meta, map[string]interface{}{"value": float64(pwr_limit)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
enc_util, _, ret := nvml.DeviceGetEncoderUtilization(device)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "encoder_util")
|
||||
y, err := lp.New("encoder_util", tags, map[string]interface{}{"value": float64(enc_util)}, time.Now())
|
||||
y, err := lp.New("encoder_util", tags, m.meta, map[string]interface{}{"value": float64(enc_util)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
dec_util, _, ret := nvml.DeviceGetDecoderUtilization(device)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "decoder_util")
|
||||
y, err := lp.New("decoder_util", tags, map[string]interface{}{"value": float64(dec_util)}, time.Now())
|
||||
y, err := lp.New("decoder_util", tags, m.meta, map[string]interface{}{"value": float64(dec_util)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
|
40
collectors/nvidiaMetric.md
Normal file
40
collectors/nvidiaMetric.md
Normal file
@ -0,0 +1,40 @@
|
||||
|
||||
## `nvidia` collector
|
||||
|
||||
```json
|
||||
"lustrestat": {
|
||||
"exclude_devices" : [
|
||||
"0","1"
|
||||
],
|
||||
"exclude_metrics": [
|
||||
"fb_memory",
|
||||
"fan"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Metrics:
|
||||
* `util`
|
||||
* `mem_util`
|
||||
* `mem_total`
|
||||
* `fb_memory`
|
||||
* `temp`
|
||||
* `fan`
|
||||
* `ecc_mode`
|
||||
* `perf_state`
|
||||
* `power_usage_report`
|
||||
* `graphics_clock_report`
|
||||
* `sm_clock_report`
|
||||
* `mem_clock_report`
|
||||
* `max_graphics_clock`
|
||||
* `max_sm_clock`
|
||||
* `max_mem_clock`
|
||||
* `ecc_db_error`
|
||||
* `ecc_sb_error`
|
||||
* `power_man_limit`
|
||||
* `encoder_util`
|
||||
* `decoder_util`
|
||||
|
||||
It uses a separate `type` in the metrics. The output metric looks like this:
|
||||
`<name>,type=accelerator,type-id=<nvidia-gpu-id> value=<metric value> <timestamp>`
|
||||
|
@ -4,13 +4,13 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
)
|
||||
|
||||
const HWMON_PATH = `/sys/class/hwmon`
|
||||
@ -21,20 +21,21 @@ type TempCollectorConfig struct {
|
||||
}
|
||||
|
||||
type TempCollector struct {
|
||||
MetricCollector
|
||||
metricCollector
|
||||
config TempCollectorConfig
|
||||
}
|
||||
|
||||
func (m *TempCollector) Init(config []byte) error {
|
||||
func (m *TempCollector) Init(config json.RawMessage) error {
|
||||
m.name = "TempCollector"
|
||||
m.setup()
|
||||
m.init = true
|
||||
m.meta = map[string]string{"source": m.name, "group": "IPMI", "unit": "degC"}
|
||||
if len(config) > 0 {
|
||||
err := json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
m.init = true
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -74,7 +75,7 @@ func get_hwmon_sensors() (map[string]map[string]string, error) {
|
||||
return sensors, nil
|
||||
}
|
||||
|
||||
func (m *TempCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
func (m *TempCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
|
||||
sensors, err := get_hwmon_sensors()
|
||||
if err != nil {
|
||||
@ -89,15 +90,20 @@ func (m *TempCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
break
|
||||
}
|
||||
}
|
||||
mname := strings.Replace(name, " ", "_", -1)
|
||||
if !strings.Contains(mname, "temp") {
|
||||
mname = fmt.Sprintf("temp_%s", mname)
|
||||
}
|
||||
buffer, err := ioutil.ReadFile(string(file))
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
x, err := strconv.ParseInt(strings.Replace(string(buffer), "\n", "", -1), 0, 64)
|
||||
if err == nil {
|
||||
y, err := lp.New(strings.ToLower(name), tags, map[string]interface{}{"value": float64(x) / 1000}, time.Now())
|
||||
y, err := lp.New(strings.ToLower(mname), tags, m.meta, map[string]interface{}{"value": int(float64(x) / 1000)}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
log.Print("[", m.name, "] ", y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
|
22
collectors/tempMetric.md
Normal file
22
collectors/tempMetric.md
Normal file
@ -0,0 +1,22 @@
|
||||
|
||||
## `tempstat` collector
|
||||
|
||||
```json
|
||||
"tempstat": {
|
||||
"tag_override" : {
|
||||
"<device like hwmon1>" : {
|
||||
"type" : "socket",
|
||||
"type-id" : "0"
|
||||
}
|
||||
},
|
||||
"exclude_metrics": [
|
||||
"metric1",
|
||||
"metric2"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
The `tempstat` collector reads the data from `/sys/class/hwmon/<device>/tempX_{input,label}`
|
||||
|
||||
Metrics:
|
||||
* `temp_*`: The metric name is taken from the `label` files.
|
@ -8,8 +8,7 @@ import (
|
||||
"os/exec"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
)
|
||||
|
||||
const MAX_NUM_PROCS = 10
|
||||
@ -20,15 +19,16 @@ type TopProcsCollectorConfig struct {
|
||||
}
|
||||
|
||||
type TopProcsCollector struct {
|
||||
MetricCollector
|
||||
metricCollector
|
||||
tags map[string]string
|
||||
config TopProcsCollectorConfig
|
||||
}
|
||||
|
||||
func (m *TopProcsCollector) Init(config []byte) error {
|
||||
func (m *TopProcsCollector) Init(config json.RawMessage) error {
|
||||
var err error
|
||||
m.name = "TopProcsCollector"
|
||||
m.tags = map[string]string{"type": "node"}
|
||||
m.meta = map[string]string{"source": m.name, "group": "TopProcs"}
|
||||
if len(config) > 0 {
|
||||
err = json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
@ -51,7 +51,7 @@ func (m *TopProcsCollector) Init(config []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *TopProcsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
func (m *TopProcsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@ -66,9 +66,9 @@ func (m *TopProcsCollector) Read(interval time.Duration, out *[]lp.MutableMetric
|
||||
lines := strings.Split(string(stdout), "\n")
|
||||
for i := 1; i < m.config.Num_procs+1; i++ {
|
||||
name := fmt.Sprintf("topproc%d", i)
|
||||
y, err := lp.New(name, m.tags, map[string]interface{}{"value": string(lines[i])}, time.Now())
|
||||
y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": string(lines[i])}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
|
15
collectors/topprocsMetric.md
Normal file
15
collectors/topprocsMetric.md
Normal file
@ -0,0 +1,15 @@
|
||||
|
||||
## `topprocs` collector
|
||||
|
||||
```json
|
||||
"topprocs": {
|
||||
"num_procs": 5
|
||||
}
|
||||
```
|
||||
|
||||
The `topprocs` collector reads the TopX processes (sorted by CPU utilization, `ps -Ao comm --sort=-pcpu`).
|
||||
|
||||
In contrast to most other collectors, the metric value is a `string`.
|
||||
|
||||
|
||||
|
40
config.json
40
config.json
@ -1,36 +1,8 @@
|
||||
{
|
||||
"sink": {
|
||||
"user": "testuser",
|
||||
"password": "testpass",
|
||||
"host": "127.0.0.1",
|
||||
"port": "9090",
|
||||
"database": "testdb",
|
||||
"organization": "testorg",
|
||||
"type": "stdout"
|
||||
},
|
||||
"interval": 3,
|
||||
"duration": 1,
|
||||
"collectors": [
|
||||
"tempstat"
|
||||
],
|
||||
"default_tags": {
|
||||
"cluster": "testcluster"
|
||||
},
|
||||
"receiver": {
|
||||
"type": "none"
|
||||
},
|
||||
"collect_config": {
|
||||
"tempstat": {
|
||||
"tag_override": {
|
||||
"hwmon0" : {
|
||||
"type" : "socket",
|
||||
"type-id" : "0"
|
||||
},
|
||||
"hwmon1" : {
|
||||
"type" : "socket",
|
||||
"type-id" : "1"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"sinks": "sinks.json",
|
||||
"collectors" : "collectors.json",
|
||||
"receivers" : "receivers.json",
|
||||
"router" : "router.json",
|
||||
"interval": 10,
|
||||
"duration": 1
|
||||
}
|
||||
|
3
go.mod
3
go.mod
@ -3,10 +3,11 @@ module github.com/ClusterCockpit/cc-metric-collector
|
||||
go 1.16
|
||||
|
||||
require (
|
||||
github.com/NVIDIA/go-nvml v0.11.1-0 // indirect
|
||||
github.com/NVIDIA/go-nvml v0.11.1-0
|
||||
github.com/influxdata/influxdb-client-go/v2 v2.2.2
|
||||
github.com/influxdata/line-protocol v0.0.0-20210311194329-9aa0e372d097
|
||||
github.com/nats-io/nats.go v1.10.0
|
||||
github.com/nats-io/nkeys v0.1.4 // indirect
|
||||
github.com/prometheus/client_golang v1.10.0 // indirect
|
||||
gopkg.in/Knetic/govaluate.v2 v2.3.0
|
||||
)
|
||||
|
2
go.sum
2
go.sum
@ -421,6 +421,8 @@ google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miE
|
||||
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
|
||||
google.golang.org/protobuf v1.23.0 h1:4MY060fB1DLGMB/7MBTLnwQUY6+F09GEiz6SsrNqyzM=
|
||||
google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
||||
gopkg.in/Knetic/govaluate.v2 v2.3.0 h1:naJVc9CZlWA8rC8f5mvECJD7jreTrn7FvGXjBthkHJQ=
|
||||
gopkg.in/Knetic/govaluate.v2 v2.3.0/go.mod h1:NW0gr10J8s7aNghEg6uhdxiEaBvc0+8VgJjVViHUKp4=
|
||||
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
|
32
internal/ccMetric/README.md
Normal file
32
internal/ccMetric/README.md
Normal file
@ -0,0 +1,32 @@
|
||||
# ClusterCockpit metrics
|
||||
|
||||
As described in the [ClusterCockpit specifications](https://github.com/ClusterCockpit/cc-specifications), the whole ClusterCockpit stack uses metrics in the InfluxDB line protocol format. This is also the input and output format for the ClusterCockpit Metric Collector but internally it uses an extended format while processing, named CCMetric.
|
||||
|
||||
It is basically a copy of the [InfluxDB line protocol](https://github.com/influxdata/line-protocol) `MutableMetric` interface with one extension. Besides the tags and fields, it contains a list of meta information (re-using the `Tag` structure of the original protocol):
|
||||
|
||||
```golang
|
||||
type ccMetric struct {
|
||||
name string // same as
|
||||
tags []*influx.Tag // original
|
||||
fields []*influx.Field // Influx
|
||||
tm time.Time // line-protocol
|
||||
meta []*influx.Tag
|
||||
}
|
||||
|
||||
type CCMetric interface {
|
||||
influx.MutableMetric // the same functions as defined by influx.MutableMetric
|
||||
RemoveTag(key string) // this is not published by the original influx.MutableMetric
|
||||
Meta() map[string]string
|
||||
MetaList() []*inlux.Tag
|
||||
AddMeta(key, value string)
|
||||
HasMeta(key string) bool
|
||||
GetMeta(key string) (string, bool)
|
||||
RemoveMeta(key string)
|
||||
}
|
||||
```
|
||||
|
||||
The `CCMetric` interface provides the same functions as the `MutableMetric` like `{Add, Remove, Has}{Tag, Field}` and additionally provides `{Add, Remove, Has}Meta`.
|
||||
|
||||
The InfluxDB protocol creates a new metric with `influx.New(name, tags, fields, time)` while CCMetric uses `ccMetric.New(name, tags, meta, fields, time)` where `tags` and `meta` are both of type `map[string]string`.
|
||||
|
||||
You can copy a CCMetric with `FromMetric(other CCMetric) CCMetric`. If you get an `influx.Metric` from a function, like the line protocol parser, you can use `FromInfluxMetric(other influx.Metric) CCMetric` to get a CCMetric out of it (see `NatsReceiver` for an example).
|
374
internal/ccMetric/ccMetric.go
Normal file
374
internal/ccMetric/ccMetric.go
Normal file
@ -0,0 +1,374 @@
|
||||
package ccmetric
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
lp "github.com/influxdata/line-protocol" // MIT license
|
||||
"sort"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Most functions are derived from github.com/influxdata/line-protocol/metric.go
|
||||
// The metric type is extended with an extra meta information list re-using the Tag
|
||||
// type.
|
||||
|
||||
type ccMetric struct {
|
||||
name string
|
||||
tags []*lp.Tag
|
||||
fields []*lp.Field
|
||||
tm time.Time
|
||||
meta []*lp.Tag
|
||||
}
|
||||
|
||||
type CCMetric interface {
|
||||
lp.MutableMetric
|
||||
AddMeta(key, value string)
|
||||
MetaList() []*lp.Tag
|
||||
RemoveTag(key string)
|
||||
}
|
||||
|
||||
func (m *ccMetric) Meta() map[string]string {
|
||||
meta := make(map[string]string, len(m.meta))
|
||||
for _, m := range m.meta {
|
||||
meta[m.Key] = m.Value
|
||||
}
|
||||
return meta
|
||||
}
|
||||
|
||||
func (m *ccMetric) MetaList() []*lp.Tag {
|
||||
return m.meta
|
||||
}
|
||||
|
||||
func (m *ccMetric) String() string {
|
||||
return fmt.Sprintf("%s %v %v %v %d", m.name, m.Tags(), m.Meta(), m.Fields(), m.tm.UnixNano())
|
||||
}
|
||||
|
||||
func (m *ccMetric) Name() string {
|
||||
return m.name
|
||||
}
|
||||
|
||||
func (m *ccMetric) Tags() map[string]string {
|
||||
tags := make(map[string]string, len(m.tags))
|
||||
for _, tag := range m.tags {
|
||||
tags[tag.Key] = tag.Value
|
||||
}
|
||||
return tags
|
||||
}
|
||||
|
||||
func (m *ccMetric) TagList() []*lp.Tag {
|
||||
return m.tags
|
||||
}
|
||||
|
||||
func (m *ccMetric) Fields() map[string]interface{} {
|
||||
fields := make(map[string]interface{}, len(m.fields))
|
||||
for _, field := range m.fields {
|
||||
fields[field.Key] = field.Value
|
||||
}
|
||||
|
||||
return fields
|
||||
}
|
||||
|
||||
func (m *ccMetric) FieldList() []*lp.Field {
|
||||
return m.fields
|
||||
}
|
||||
|
||||
func (m *ccMetric) Time() time.Time {
|
||||
return m.tm
|
||||
}
|
||||
|
||||
func (m *ccMetric) SetTime(t time.Time) {
|
||||
m.tm = t
|
||||
}
|
||||
|
||||
func (m *ccMetric) HasTag(key string) bool {
|
||||
for _, tag := range m.tags {
|
||||
if tag.Key == key {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *ccMetric) GetTag(key string) (string, bool) {
|
||||
for _, tag := range m.tags {
|
||||
if tag.Key == key {
|
||||
return tag.Value, true
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
func (m *ccMetric) RemoveTag(key string) {
|
||||
for i, tag := range m.tags {
|
||||
if tag.Key == key {
|
||||
copy(m.tags[i:], m.tags[i+1:])
|
||||
m.tags[len(m.tags)-1] = nil
|
||||
m.tags = m.tags[:len(m.tags)-1]
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *ccMetric) AddTag(key, value string) {
|
||||
for i, tag := range m.tags {
|
||||
if key > tag.Key {
|
||||
continue
|
||||
}
|
||||
|
||||
if key == tag.Key {
|
||||
tag.Value = value
|
||||
return
|
||||
}
|
||||
|
||||
m.tags = append(m.tags, nil)
|
||||
copy(m.tags[i+1:], m.tags[i:])
|
||||
m.tags[i] = &lp.Tag{Key: key, Value: value}
|
||||
return
|
||||
}
|
||||
|
||||
m.tags = append(m.tags, &lp.Tag{Key: key, Value: value})
|
||||
}
|
||||
|
||||
func (m *ccMetric) HasMeta(key string) bool {
|
||||
for _, tag := range m.meta {
|
||||
if tag.Key == key {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *ccMetric) GetMeta(key string) (string, bool) {
|
||||
for _, tag := range m.meta {
|
||||
if tag.Key == key {
|
||||
return tag.Value, true
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
func (m *ccMetric) RemoveMeta(key string) {
|
||||
for i, tag := range m.meta {
|
||||
if tag.Key == key {
|
||||
copy(m.meta[i:], m.meta[i+1:])
|
||||
m.meta[len(m.meta)-1] = nil
|
||||
m.meta = m.meta[:len(m.meta)-1]
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *ccMetric) AddMeta(key, value string) {
|
||||
for i, tag := range m.meta {
|
||||
if key > tag.Key {
|
||||
continue
|
||||
}
|
||||
|
||||
if key == tag.Key {
|
||||
tag.Value = value
|
||||
return
|
||||
}
|
||||
|
||||
m.meta = append(m.meta, nil)
|
||||
copy(m.meta[i+1:], m.meta[i:])
|
||||
m.meta[i] = &lp.Tag{Key: key, Value: value}
|
||||
return
|
||||
}
|
||||
|
||||
m.meta = append(m.meta, &lp.Tag{Key: key, Value: value})
|
||||
}
|
||||
|
||||
func (m *ccMetric) AddField(key string, value interface{}) {
|
||||
for i, field := range m.fields {
|
||||
if key == field.Key {
|
||||
m.fields[i] = &lp.Field{Key: key, Value: convertField(value)}
|
||||
return
|
||||
}
|
||||
}
|
||||
m.fields = append(m.fields, &lp.Field{Key: key, Value: convertField(value)})
|
||||
}
|
||||
|
||||
func New(
|
||||
name string,
|
||||
tags map[string]string,
|
||||
meta map[string]string,
|
||||
fields map[string]interface{},
|
||||
tm time.Time,
|
||||
) (CCMetric, error) {
|
||||
m := &ccMetric{
|
||||
name: name,
|
||||
tags: nil,
|
||||
fields: nil,
|
||||
tm: tm,
|
||||
meta: nil,
|
||||
}
|
||||
|
||||
if len(tags) > 0 {
|
||||
m.tags = make([]*lp.Tag, 0, len(tags))
|
||||
for k, v := range tags {
|
||||
m.tags = append(m.tags,
|
||||
&lp.Tag{Key: k, Value: v})
|
||||
}
|
||||
sort.Slice(m.tags, func(i, j int) bool { return m.tags[i].Key < m.tags[j].Key })
|
||||
}
|
||||
|
||||
if len(meta) > 0 {
|
||||
m.meta = make([]*lp.Tag, 0, len(meta))
|
||||
for k, v := range meta {
|
||||
m.meta = append(m.meta,
|
||||
&lp.Tag{Key: k, Value: v})
|
||||
}
|
||||
sort.Slice(m.meta, func(i, j int) bool { return m.meta[i].Key < m.meta[j].Key })
|
||||
}
|
||||
|
||||
if len(fields) > 0 {
|
||||
m.fields = make([]*lp.Field, 0, len(fields))
|
||||
for k, v := range fields {
|
||||
v := convertField(v)
|
||||
if v == nil {
|
||||
continue
|
||||
}
|
||||
m.AddField(k, v)
|
||||
}
|
||||
}
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func FromMetric(other CCMetric) CCMetric {
|
||||
m := &ccMetric{
|
||||
name: other.Name(),
|
||||
tags: make([]*lp.Tag, len(other.TagList())),
|
||||
fields: make([]*lp.Field, len(other.FieldList())),
|
||||
meta: make([]*lp.Tag, len(other.MetaList())),
|
||||
tm: other.Time(),
|
||||
}
|
||||
|
||||
for i, tag := range other.TagList() {
|
||||
m.tags[i] = &lp.Tag{Key: tag.Key, Value: tag.Value}
|
||||
}
|
||||
for i, s := range other.MetaList() {
|
||||
m.meta[i] = &lp.Tag{Key: s.Key, Value: s.Value}
|
||||
}
|
||||
|
||||
for i, field := range other.FieldList() {
|
||||
m.fields[i] = &lp.Field{Key: field.Key, Value: field.Value}
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
func FromInfluxMetric(other lp.Metric) CCMetric {
|
||||
m := &ccMetric{
|
||||
name: other.Name(),
|
||||
tags: make([]*lp.Tag, len(other.TagList())),
|
||||
fields: make([]*lp.Field, len(other.FieldList())),
|
||||
meta: make([]*lp.Tag, 0),
|
||||
tm: other.Time(),
|
||||
}
|
||||
|
||||
for i, tag := range other.TagList() {
|
||||
m.tags[i] = &lp.Tag{Key: tag.Key, Value: tag.Value}
|
||||
}
|
||||
|
||||
for i, field := range other.FieldList() {
|
||||
m.fields[i] = &lp.Field{Key: field.Key, Value: field.Value}
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
func convertField(v interface{}) interface{} {
|
||||
switch v := v.(type) {
|
||||
case float64:
|
||||
return v
|
||||
case int64:
|
||||
return v
|
||||
case string:
|
||||
return v
|
||||
case bool:
|
||||
return v
|
||||
case int:
|
||||
return int64(v)
|
||||
case uint:
|
||||
return uint64(v)
|
||||
case uint64:
|
||||
return uint64(v)
|
||||
case []byte:
|
||||
return string(v)
|
||||
case int32:
|
||||
return int64(v)
|
||||
case int16:
|
||||
return int64(v)
|
||||
case int8:
|
||||
return int64(v)
|
||||
case uint32:
|
||||
return uint64(v)
|
||||
case uint16:
|
||||
return uint64(v)
|
||||
case uint8:
|
||||
return uint64(v)
|
||||
case float32:
|
||||
return float64(v)
|
||||
case *float64:
|
||||
if v != nil {
|
||||
return *v
|
||||
}
|
||||
case *int64:
|
||||
if v != nil {
|
||||
return *v
|
||||
}
|
||||
case *string:
|
||||
if v != nil {
|
||||
return *v
|
||||
}
|
||||
case *bool:
|
||||
if v != nil {
|
||||
return *v
|
||||
}
|
||||
case *int:
|
||||
if v != nil {
|
||||
return int64(*v)
|
||||
}
|
||||
case *uint:
|
||||
if v != nil {
|
||||
return uint64(*v)
|
||||
}
|
||||
case *uint64:
|
||||
if v != nil {
|
||||
return uint64(*v)
|
||||
}
|
||||
case *[]byte:
|
||||
if v != nil {
|
||||
return string(*v)
|
||||
}
|
||||
case *int32:
|
||||
if v != nil {
|
||||
return int64(*v)
|
||||
}
|
||||
case *int16:
|
||||
if v != nil {
|
||||
return int64(*v)
|
||||
}
|
||||
case *int8:
|
||||
if v != nil {
|
||||
return int64(*v)
|
||||
}
|
||||
case *uint32:
|
||||
if v != nil {
|
||||
return uint64(*v)
|
||||
}
|
||||
case *uint16:
|
||||
if v != nil {
|
||||
return uint64(*v)
|
||||
}
|
||||
case *uint8:
|
||||
if v != nil {
|
||||
return uint64(*v)
|
||||
}
|
||||
case *float32:
|
||||
if v != nil {
|
||||
return float64(*v)
|
||||
}
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
return nil
|
||||
}
|
50
internal/metricRouter/README.md
Normal file
50
internal/metricRouter/README.md
Normal file
@ -0,0 +1,50 @@
|
||||
# CC Metric Router
|
||||
|
||||
The CCMetric router sits in between the collectors and the sinks and can be used to add and remove tags to/from traversing [CCMetrics](../ccMetric/README.md).
|
||||
|
||||
# Configuration
|
||||
|
||||
```json
|
||||
{
|
||||
"add_tags" : [
|
||||
{
|
||||
"key" : "cluster",
|
||||
"value" : "testcluster",
|
||||
"if" : "*"
|
||||
},
|
||||
{
|
||||
"key" : "test",
|
||||
"value" : "testing",
|
||||
"if" : "name == 'temp_package_id_0'"
|
||||
}
|
||||
],
|
||||
"delete_tags" : [
|
||||
{
|
||||
"key" : "unit",
|
||||
"value" : "*",
|
||||
"if" : "*"
|
||||
}
|
||||
],
|
||||
"interval_timestamp" : true
|
||||
}
|
||||
```
|
||||
|
||||
There are three main options `add_tags`, `delete_tags` and `interval_timestamp`. `add_tags` and `delete_tags` are lists consisting of dicts with `key`, `value` and `if`. The `value` can be omitted in the `delete_tags` part as it only uses the `key` for removal. The `interval_timestamp` setting means that a unique timestamp is applied to all metrics traversing the router during an interval.
|
||||
|
||||
# Conditional manipulation of tags
|
||||
|
||||
The `if` setting allows conditional testing of a single metric like in the example:
|
||||
|
||||
```json
|
||||
{
|
||||
"key" : "test",
|
||||
"value" : "testing",
|
||||
"if" : "name == 'temp_package_id_0'"
|
||||
}
|
||||
```
|
||||
|
||||
If the CCMetric name is equal to 'temp_package_id_0', it adds an additional tag `test=testing` to the metric.
|
||||
|
||||
In order to match all metrics, you can use `*`, so in order to add a flag per default, like the `cluster=testcluster` tag in the example.
|
||||
|
||||
|
208
internal/metricRouter/metricRouter.go
Normal file
208
internal/metricRouter/metricRouter.go
Normal file
@ -0,0 +1,208 @@
|
||||
package metricRouter
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"log"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
mct "github.com/ClusterCockpit/cc-metric-collector/internal/multiChanTicker"
|
||||
"gopkg.in/Knetic/govaluate.v2"
|
||||
)
|
||||
|
||||
type metricRouterTagConfig struct {
|
||||
Key string `json:"key"`
|
||||
Value string `json:"value"`
|
||||
Condition string `json:"if"`
|
||||
}
|
||||
|
||||
type metricRouterConfig struct {
|
||||
AddTags []metricRouterTagConfig `json:"add_tags"`
|
||||
DelTags []metricRouterTagConfig `json:"delete_tags"`
|
||||
IntervalStamp bool `json:"interval_timestamp"`
|
||||
}
|
||||
|
||||
type metricRouter struct {
|
||||
inputs []chan lp.CCMetric
|
||||
outputs []chan lp.CCMetric
|
||||
done chan bool
|
||||
wg *sync.WaitGroup
|
||||
timestamp time.Time
|
||||
ticker mct.MultiChanTicker
|
||||
config metricRouterConfig
|
||||
}
|
||||
|
||||
type MetricRouter interface {
|
||||
Init(ticker mct.MultiChanTicker, wg *sync.WaitGroup, routerConfigFile string) error
|
||||
AddInput(input chan lp.CCMetric)
|
||||
AddOutput(output chan lp.CCMetric)
|
||||
Start()
|
||||
Close()
|
||||
}
|
||||
|
||||
func (r *metricRouter) Init(ticker mct.MultiChanTicker, wg *sync.WaitGroup, routerConfigFile string) error {
|
||||
r.inputs = make([]chan lp.CCMetric, 0)
|
||||
r.outputs = make([]chan lp.CCMetric, 0)
|
||||
r.done = make(chan bool)
|
||||
r.wg = wg
|
||||
r.ticker = ticker
|
||||
configFile, err := os.Open(routerConfigFile)
|
||||
if err != nil {
|
||||
log.Print(err.Error())
|
||||
return err
|
||||
}
|
||||
defer configFile.Close()
|
||||
jsonParser := json.NewDecoder(configFile)
|
||||
err = jsonParser.Decode(&r.config)
|
||||
if err != nil {
|
||||
log.Print(err.Error())
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *metricRouter) StartTimer() {
|
||||
m := make(chan time.Time)
|
||||
r.ticker.AddChannel(m)
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case t := <-m:
|
||||
r.timestamp = t
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
func (r *metricRouter) EvalCondition(Cond string, point lp.CCMetric) (bool, error) {
|
||||
expression, err := govaluate.NewEvaluableExpression(Cond)
|
||||
if err != nil {
|
||||
log.Print(Cond, " = ", err.Error())
|
||||
return false, err
|
||||
}
|
||||
params := make(map[string]interface{})
|
||||
params["name"] = point.Name()
|
||||
for _, t := range point.TagList() {
|
||||
params[t.Key] = t.Value
|
||||
}
|
||||
for _, m := range point.MetaList() {
|
||||
params[m.Key] = m.Value
|
||||
}
|
||||
for _, f := range point.FieldList() {
|
||||
params[f.Key] = f.Value
|
||||
}
|
||||
params["timestamp"] = point.Time()
|
||||
|
||||
result, err := expression.Evaluate(params)
|
||||
if err != nil {
|
||||
log.Print(Cond, " = ", err.Error())
|
||||
return false, err
|
||||
}
|
||||
return bool(result.(bool)), err
|
||||
}
|
||||
|
||||
func (r *metricRouter) DoAddTags(point lp.CCMetric) {
|
||||
for _, m := range r.config.AddTags {
|
||||
var conditionMatches bool
|
||||
|
||||
if m.Condition == "*" {
|
||||
conditionMatches = true
|
||||
} else {
|
||||
var err error
|
||||
conditionMatches, err = r.EvalCondition(m.Condition, point)
|
||||
if err != nil {
|
||||
log.Print(err.Error())
|
||||
conditionMatches = false
|
||||
}
|
||||
}
|
||||
if conditionMatches {
|
||||
point.AddTag(m.Key, m.Value)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (r *metricRouter) DoDelTags(point lp.CCMetric) {
|
||||
for _, m := range r.config.DelTags {
|
||||
var conditionMatches bool
|
||||
|
||||
if m.Condition == "*" {
|
||||
conditionMatches = true
|
||||
} else {
|
||||
var err error
|
||||
conditionMatches, err = r.EvalCondition(m.Condition, point)
|
||||
if err != nil {
|
||||
log.Print(err.Error())
|
||||
conditionMatches = false
|
||||
}
|
||||
}
|
||||
if conditionMatches {
|
||||
point.RemoveTag(m.Key)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (r *metricRouter) Start() {
|
||||
r.wg.Add(1)
|
||||
r.timestamp = time.Now()
|
||||
if r.config.IntervalStamp {
|
||||
r.StartTimer()
|
||||
}
|
||||
go func() {
|
||||
for {
|
||||
RouterLoop:
|
||||
select {
|
||||
case <-r.done:
|
||||
log.Print("[MetricRouter] DONE\n")
|
||||
r.wg.Done()
|
||||
break RouterLoop
|
||||
default:
|
||||
for _, c := range r.inputs {
|
||||
RouterInputLoop:
|
||||
select {
|
||||
case <-r.done:
|
||||
log.Print("[MetricRouter] DONE\n")
|
||||
r.wg.Done()
|
||||
break RouterInputLoop
|
||||
case p := <-c:
|
||||
log.Print("[MetricRouter] FORWARD ", p)
|
||||
r.DoAddTags(p)
|
||||
r.DoDelTags(p)
|
||||
if r.config.IntervalStamp {
|
||||
p.SetTime(r.timestamp)
|
||||
}
|
||||
for _, o := range r.outputs {
|
||||
o <- p
|
||||
}
|
||||
default:
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
log.Print("[MetricRouter] EXIT\n")
|
||||
}()
|
||||
log.Print("[MetricRouter] STARTED\n")
|
||||
}
|
||||
|
||||
func (r *metricRouter) AddInput(input chan lp.CCMetric) {
|
||||
r.inputs = append(r.inputs, input)
|
||||
}
|
||||
|
||||
func (r *metricRouter) AddOutput(output chan lp.CCMetric) {
|
||||
r.outputs = append(r.outputs, output)
|
||||
}
|
||||
|
||||
func (r *metricRouter) Close() {
|
||||
r.done <- true
|
||||
log.Print("[MetricRouter] CLOSE\n")
|
||||
}
|
||||
|
||||
func New(ticker mct.MultiChanTicker, wg *sync.WaitGroup, routerConfigFile string) (MetricRouter, error) {
|
||||
r := new(metricRouter)
|
||||
err := r.Init(ticker, wg, routerConfigFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return r, err
|
||||
}
|
37
internal/multiChanTicker/README.md
Normal file
37
internal/multiChanTicker/README.md
Normal file
@ -0,0 +1,37 @@
|
||||
# MultiChanTicker
|
||||
|
||||
The idea of this ticker is to multiply the output channels. The original Golang `time.Ticker` provides only a single output channel, so the signal can only be received by a single other class. This ticker allows to add multiple channels which get all notified about the time tick.
|
||||
|
||||
```golang
|
||||
type MultiChanTicker interface {
|
||||
Init(duration time.Duration)
|
||||
AddChannel(chan time.Time)
|
||||
}
|
||||
```
|
||||
|
||||
The MultiChanTicker is created similarly to the common `time.Ticker`:
|
||||
|
||||
```golang
|
||||
NewTicker(duration time.Duration) MultiChanTicker
|
||||
```
|
||||
|
||||
Afterwards, you can add channels:
|
||||
|
||||
```golang
|
||||
t := MultiChanTicker(duration)
|
||||
c1 := make(chan time.Time)
|
||||
c2 := make(chan time.Time)
|
||||
t.AddChannel(c1)
|
||||
t.AddChannel(c2)
|
||||
|
||||
for {
|
||||
select {
|
||||
case t1 := <- c1:
|
||||
log.Print(t1)
|
||||
case t2 := <- c2:
|
||||
log.Print(t2)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The result should be the same `time.Time` output in both channels, notified "simultaneously".
|
39
internal/multiChanTicker/multiChanTicker.go
Normal file
39
internal/multiChanTicker/multiChanTicker.go
Normal file
@ -0,0 +1,39 @@
|
||||
package multiChanTicker
|
||||
|
||||
import (
|
||||
"time"
|
||||
)
|
||||
|
||||
type multiChanTicker struct {
|
||||
ticker *time.Ticker
|
||||
channels []chan time.Time
|
||||
}
|
||||
|
||||
type MultiChanTicker interface {
|
||||
Init(duration time.Duration)
|
||||
AddChannel(chan time.Time)
|
||||
}
|
||||
|
||||
func (t *multiChanTicker) Init(duration time.Duration) {
|
||||
t.ticker = time.NewTicker(duration)
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case ts := <-t.ticker.C:
|
||||
for _, c := range t.channels {
|
||||
c <- ts
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
func (t *multiChanTicker) AddChannel(channel chan time.Time) {
|
||||
t.channels = append(t.channels, channel)
|
||||
}
|
||||
|
||||
func NewTicker(duration time.Duration) MultiChanTicker {
|
||||
t := &multiChanTicker{}
|
||||
t.Init(duration)
|
||||
return t
|
||||
}
|
@ -8,60 +8,32 @@ import (
|
||||
"os"
|
||||
"os/signal"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-metric-collector/collectors"
|
||||
"github.com/ClusterCockpit/cc-metric-collector/receivers"
|
||||
"github.com/ClusterCockpit/cc-metric-collector/sinks"
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
|
||||
// "strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
mr "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
||||
mct "github.com/ClusterCockpit/cc-metric-collector/internal/multiChanTicker"
|
||||
)
|
||||
|
||||
// List of provided collectors. Which collector should be run can be
|
||||
// configured at 'collectors' list in 'config.json'.
|
||||
var Collectors = map[string]collectors.MetricGetter{
|
||||
"likwid": &collectors.LikwidCollector{},
|
||||
"loadavg": &collectors.LoadavgCollector{},
|
||||
"memstat": &collectors.MemstatCollector{},
|
||||
"netstat": &collectors.NetstatCollector{},
|
||||
"ibstat": &collectors.InfinibandCollector{},
|
||||
"lustrestat": &collectors.LustreCollector{},
|
||||
"cpustat": &collectors.CpustatCollector{},
|
||||
"topprocs": &collectors.TopProcsCollector{},
|
||||
"nvidia": &collectors.NvidiaCollector{},
|
||||
"customcmd": &collectors.CustomCmdCollector{},
|
||||
"diskstat": &collectors.DiskstatCollector{},
|
||||
"tempstat": &collectors.TempCollector{},
|
||||
"ipmistat": &collectors.IpmiCollector{},
|
||||
"gpfs": new(collectors.GpfsCollector),
|
||||
"cpufreq": new(collectors.CPUFreqCollector),
|
||||
"cpufreq_cpuinfo": new(collectors.CPUFreqCpuInfoCollector),
|
||||
type CentralConfigFile struct {
|
||||
Interval int `json:"interval"`
|
||||
Duration int `json:"duration"`
|
||||
Pidfile string `json:"pidfile,omitempty"`
|
||||
CollectorConfigFile string `json:"collectors"`
|
||||
RouterConfigFile string `json:"router"`
|
||||
SinkConfigFile string `json:"sinks"`
|
||||
ReceiverConfigFile string `json:"receivers,omitempty"`
|
||||
}
|
||||
|
||||
var Sinks = map[string]sinks.SinkFuncs{
|
||||
"influxdb": &sinks.InfluxSink{},
|
||||
"stdout": &sinks.StdoutSink{},
|
||||
"nats": &sinks.NatsSink{},
|
||||
"http": &sinks.HttpSink{},
|
||||
}
|
||||
|
||||
var Receivers = map[string]receivers.ReceiverFuncs{
|
||||
"nats": &receivers.NatsReceiver{},
|
||||
}
|
||||
|
||||
// Structure of the configuration file
|
||||
type GlobalConfig struct {
|
||||
Sink sinks.SinkConfig `json:"sink"`
|
||||
Interval int `json:"interval"`
|
||||
Duration int `json:"duration"`
|
||||
Collectors []string `json:"collectors"`
|
||||
Receiver receivers.ReceiverConfig `json:"receiver"`
|
||||
DefTags map[string]string `json:"default_tags"`
|
||||
CollectConfigs map[string]json.RawMessage `json:"collect_config"`
|
||||
}
|
||||
|
||||
// Load JSON configuration file
|
||||
func LoadConfiguration(file string, config *GlobalConfig) error {
|
||||
func LoadCentralConfiguration(file string, config *CentralConfigFile) error {
|
||||
configFile, err := os.Open(file)
|
||||
defer configFile.Close()
|
||||
if err != nil {
|
||||
@ -73,6 +45,56 @@ func LoadConfiguration(file string, config *GlobalConfig) error {
|
||||
return err
|
||||
}
|
||||
|
||||
type RuntimeConfig struct {
|
||||
Hostname string
|
||||
Interval time.Duration
|
||||
Duration time.Duration
|
||||
CliArgs map[string]string
|
||||
ConfigFile CentralConfigFile
|
||||
|
||||
Router mr.MetricRouter
|
||||
CollectManager collectors.CollectorManager
|
||||
SinkManager sinks.SinkManager
|
||||
ReceiveManager receivers.ReceiveManager
|
||||
Ticker mct.MultiChanTicker
|
||||
|
||||
Channels []chan lp.CCMetric
|
||||
Sync sync.WaitGroup
|
||||
}
|
||||
|
||||
func prepare_runcfg() RuntimeConfig {
|
||||
return RuntimeConfig{
|
||||
Router: nil,
|
||||
CollectManager: nil,
|
||||
SinkManager: nil,
|
||||
ReceiveManager: nil,
|
||||
}
|
||||
}
|
||||
|
||||
//// Structure of the configuration file
|
||||
//type GlobalConfig struct {
|
||||
// Sink sinks.SinkConfig `json:"sink"`
|
||||
// Interval int `json:"interval"`
|
||||
// Duration int `json:"duration"`
|
||||
// Collectors []string `json:"collectors"`
|
||||
// Receiver receivers.ReceiverConfig `json:"receiver"`
|
||||
// DefTags map[string]string `json:"default_tags"`
|
||||
// CollectConfigs map[string]json.RawMessage `json:"collect_config"`
|
||||
//}
|
||||
|
||||
//// Load JSON configuration file
|
||||
//func LoadConfiguration(file string, config *GlobalConfig) error {
|
||||
// configFile, err := os.Open(file)
|
||||
// defer configFile.Close()
|
||||
// if err != nil {
|
||||
// fmt.Println(err.Error())
|
||||
// return err
|
||||
// }
|
||||
// jsonParser := json.NewDecoder(configFile)
|
||||
// err = jsonParser.Decode(config)
|
||||
// return err
|
||||
//}
|
||||
|
||||
func ReadCli() map[string]string {
|
||||
var m map[string]string
|
||||
cfg := flag.String("config", "./config.json", "Path to configuration file")
|
||||
@ -92,228 +114,168 @@ func ReadCli() map[string]string {
|
||||
return m
|
||||
}
|
||||
|
||||
func SetLogging(logfile string) error {
|
||||
var file *os.File
|
||||
var err error
|
||||
if logfile != "stderr" {
|
||||
file, err = os.OpenFile(logfile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0600)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
file = os.Stderr
|
||||
}
|
||||
log.SetOutput(file)
|
||||
return nil
|
||||
}
|
||||
//func SetLogging(logfile string) error {
|
||||
// var file *os.File
|
||||
// var err error
|
||||
// if logfile != "stderr" {
|
||||
// file, err = os.OpenFile(logfile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0600)
|
||||
// if err != nil {
|
||||
// log.Fatal(err)
|
||||
// return err
|
||||
// }
|
||||
// } else {
|
||||
// file = os.Stderr
|
||||
// }
|
||||
// log.SetOutput(file)
|
||||
// return nil
|
||||
//}
|
||||
|
||||
func CreatePidfile(pidfile string) error {
|
||||
file, err := os.OpenFile(pidfile, os.O_CREATE|os.O_RDWR, 0600)
|
||||
if err != nil {
|
||||
log.Print(err)
|
||||
return err
|
||||
}
|
||||
file.Write([]byte(fmt.Sprintf("%d", os.Getpid())))
|
||||
file.Close()
|
||||
return nil
|
||||
}
|
||||
//func CreatePidfile(pidfile string) error {
|
||||
// file, err := os.OpenFile(pidfile, os.O_CREATE|os.O_RDWR, 0600)
|
||||
// if err != nil {
|
||||
// log.Print(err)
|
||||
// return err
|
||||
// }
|
||||
// file.Write([]byte(fmt.Sprintf("%d", os.Getpid())))
|
||||
// file.Close()
|
||||
// return nil
|
||||
//}
|
||||
|
||||
func RemovePidfile(pidfile string) error {
|
||||
info, err := os.Stat(pidfile)
|
||||
if !os.IsNotExist(err) && !info.IsDir() {
|
||||
os.Remove(pidfile)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
//func RemovePidfile(pidfile string) error {
|
||||
// info, err := os.Stat(pidfile)
|
||||
// if !os.IsNotExist(err) && !info.IsDir() {
|
||||
// os.Remove(pidfile)
|
||||
// }
|
||||
// return nil
|
||||
//}
|
||||
|
||||
// General shutdown function that gets executed in case of interrupt or graceful shutdown
|
||||
func shutdown(wg *sync.WaitGroup, collectors []string, sink sinks.SinkFuncs, recv receivers.ReceiverFuncs, pidfile string) {
|
||||
func shutdown(config *RuntimeConfig) {
|
||||
log.Print("Shutdown...")
|
||||
for _, c := range collectors {
|
||||
col := Collectors[c]
|
||||
log.Print("Stop ", col.Name())
|
||||
col.Close()
|
||||
if config.CollectManager != nil {
|
||||
log.Print("Shutdown CollectManager...")
|
||||
config.CollectManager.Close()
|
||||
}
|
||||
time.Sleep(1 * time.Second)
|
||||
if recv != nil {
|
||||
recv.Close()
|
||||
if config.ReceiveManager != nil {
|
||||
log.Print("Shutdown ReceiveManager...")
|
||||
config.ReceiveManager.Close()
|
||||
}
|
||||
sink.Close()
|
||||
RemovePidfile(pidfile)
|
||||
wg.Done()
|
||||
if config.Router != nil {
|
||||
log.Print("Shutdown Router...")
|
||||
config.Router.Close()
|
||||
}
|
||||
if config.SinkManager != nil {
|
||||
log.Print("Shutdown SinkManager...")
|
||||
config.SinkManager.Close()
|
||||
}
|
||||
|
||||
// pidfile := config.ConfigFile.Pidfile
|
||||
// RemovePidfile(pidfile)
|
||||
// pidfile = config.CliArgs["pidfile"]
|
||||
// RemovePidfile(pidfile)
|
||||
config.Sync.Done()
|
||||
}
|
||||
|
||||
// Register an interrupt handler for Ctrl+C and similar. At signal,
|
||||
// all collectors are closed
|
||||
func prepare_shutdown(wg *sync.WaitGroup, config *GlobalConfig, sink sinks.SinkFuncs, recv receivers.ReceiverFuncs, pidfile string) {
|
||||
func prepare_shutdown(config *RuntimeConfig) {
|
||||
sigs := make(chan os.Signal, 1)
|
||||
signal.Notify(sigs, os.Interrupt)
|
||||
|
||||
go func(wg *sync.WaitGroup) {
|
||||
go func(config *RuntimeConfig) {
|
||||
<-sigs
|
||||
log.Print("Shutdown...")
|
||||
shutdown(wg, config.Collectors, sink, recv, pidfile)
|
||||
}(wg)
|
||||
shutdown(config)
|
||||
}(config)
|
||||
}
|
||||
|
||||
func main() {
|
||||
var config GlobalConfig
|
||||
var wg sync.WaitGroup
|
||||
var recv receivers.ReceiverFuncs = nil
|
||||
var use_recv bool
|
||||
use_recv = false
|
||||
wg.Add(1)
|
||||
host, err := os.Hostname()
|
||||
if err != nil {
|
||||
log.Print(err)
|
||||
return
|
||||
}
|
||||
// Drop domain part of host name
|
||||
host = strings.SplitN(host, `.`, 2)[0]
|
||||
clicfg := ReadCli()
|
||||
err = CreatePidfile(clicfg["pidfile"])
|
||||
err = SetLogging(clicfg["logfile"])
|
||||
if err != nil {
|
||||
log.Print("Error setting up logging system to ", clicfg["logfile"], " on ", host)
|
||||
return
|
||||
}
|
||||
var err error
|
||||
use_recv := false
|
||||
|
||||
rcfg := prepare_runcfg()
|
||||
rcfg.CliArgs = ReadCli()
|
||||
|
||||
// Load and check configuration
|
||||
err = LoadConfiguration(clicfg["configfile"], &config)
|
||||
err = LoadCentralConfiguration(rcfg.CliArgs["configfile"], &rcfg.ConfigFile)
|
||||
if err != nil {
|
||||
log.Print("Error reading configuration file ", clicfg["configfile"])
|
||||
log.Print("Error reading configuration file ", rcfg.CliArgs["configfile"])
|
||||
log.Print(err.Error())
|
||||
return
|
||||
}
|
||||
if config.Interval <= 0 || time.Duration(config.Interval)*time.Second <= 0 {
|
||||
if rcfg.ConfigFile.Interval <= 0 || time.Duration(rcfg.ConfigFile.Interval)*time.Second <= 0 {
|
||||
log.Print("Configuration value 'interval' must be greater than zero")
|
||||
return
|
||||
}
|
||||
if config.Duration <= 0 {
|
||||
rcfg.Interval = time.Duration(rcfg.ConfigFile.Interval) * time.Second
|
||||
if rcfg.ConfigFile.Duration <= 0 || time.Duration(rcfg.ConfigFile.Duration)*time.Second <= 0 {
|
||||
log.Print("Configuration value 'duration' must be greater than zero")
|
||||
return
|
||||
}
|
||||
if len(config.Collectors) == 0 {
|
||||
var keys []string
|
||||
for k := range Collectors {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
log.Print("Configuration value 'collectors' does not contain any collector. Available: ", strings.Join(keys, ", "))
|
||||
return
|
||||
}
|
||||
for _, name := range config.Collectors {
|
||||
if _, found := Collectors[name]; !found {
|
||||
log.Print("Invalid collector '", name, "' in configuration")
|
||||
return
|
||||
}
|
||||
}
|
||||
if _, found := Sinks[config.Sink.Type]; !found {
|
||||
log.Print("Invalid sink type '", config.Sink.Type, "' in configuration")
|
||||
return
|
||||
}
|
||||
// Setup sink
|
||||
sink := Sinks[config.Sink.Type]
|
||||
err = sink.Init(config.Sink)
|
||||
rcfg.Duration = time.Duration(rcfg.ConfigFile.Duration) * time.Second
|
||||
|
||||
rcfg.Hostname, err = os.Hostname()
|
||||
if err != nil {
|
||||
log.Print(err)
|
||||
log.Print(err.Error())
|
||||
return
|
||||
}
|
||||
// Setup receiver
|
||||
if len(config.Receiver.Type) > 0 && config.Receiver.Type != "none" {
|
||||
if _, found := Receivers[config.Receiver.Type]; !found {
|
||||
log.Print("Invalid receiver type '", config.Receiver.Type, "' in configuration")
|
||||
return
|
||||
} else {
|
||||
recv = Receivers[config.Receiver.Type]
|
||||
err = recv.Init(config.Receiver, sink)
|
||||
if err == nil {
|
||||
use_recv = true
|
||||
} else {
|
||||
log.Print(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Register interrupt handler
|
||||
prepare_shutdown(&wg, &config, sink, recv, clicfg["pidfile"])
|
||||
|
||||
// Initialize all collectors
|
||||
tmp := make([]string, 0)
|
||||
for _, c := range config.Collectors {
|
||||
col := Collectors[c]
|
||||
conf, found := config.CollectConfigs[c]
|
||||
if !found {
|
||||
conf = json.RawMessage("")
|
||||
}
|
||||
err = col.Init([]byte(conf))
|
||||
// Drop domain part of host name
|
||||
rcfg.Hostname = strings.SplitN(rcfg.Hostname, `.`, 2)[0]
|
||||
// err = CreatePidfile(rcfg.CliArgs["pidfile"])
|
||||
// err = SetLogging(rcfg.CliArgs["logfile"])
|
||||
// if err != nil {
|
||||
// log.Print("Error setting up logging system to ", rcfg.CliArgs["logfile"], " on ", rcfg.Hostname)
|
||||
// return
|
||||
// }
|
||||
rcfg.Ticker = mct.NewTicker(rcfg.Interval)
|
||||
if len(rcfg.ConfigFile.RouterConfigFile) > 0 {
|
||||
rcfg.Router, err = mr.New(rcfg.Ticker, &rcfg.Sync, rcfg.ConfigFile.RouterConfigFile)
|
||||
if err != nil {
|
||||
log.Print("SKIP ", col.Name(), " (", err.Error(), ")")
|
||||
} else if !col.Initialized() {
|
||||
log.Print("SKIP ", col.Name(), " (Not initialized)")
|
||||
} else {
|
||||
log.Print("Start ", col.Name())
|
||||
tmp = append(tmp, c)
|
||||
log.Print(err.Error())
|
||||
return
|
||||
}
|
||||
}
|
||||
config.Collectors = tmp
|
||||
config.DefTags["hostname"] = host
|
||||
|
||||
// Setup up ticker loop
|
||||
if clicfg["once"] != "true" {
|
||||
log.Print("Running loop every ", time.Duration(config.Interval)*time.Second)
|
||||
} else {
|
||||
log.Print("Running loop only once")
|
||||
if len(rcfg.ConfigFile.SinkConfigFile) > 0 {
|
||||
rcfg.SinkManager, err = sinks.New(&rcfg.Sync, rcfg.ConfigFile.SinkConfigFile)
|
||||
if err != nil {
|
||||
log.Print(err.Error())
|
||||
return
|
||||
}
|
||||
RouterToSinksChannel := make(chan lp.CCMetric)
|
||||
rcfg.SinkManager.AddInput(RouterToSinksChannel)
|
||||
rcfg.Router.AddOutput(RouterToSinksChannel)
|
||||
}
|
||||
ticker := time.NewTicker(time.Duration(config.Interval) * time.Second)
|
||||
done := make(chan bool)
|
||||
if len(rcfg.ConfigFile.CollectorConfigFile) > 0 {
|
||||
rcfg.CollectManager, err = collectors.New(rcfg.Ticker, rcfg.Duration, &rcfg.Sync, rcfg.ConfigFile.CollectorConfigFile)
|
||||
if err != nil {
|
||||
log.Print(err.Error())
|
||||
return
|
||||
}
|
||||
CollectToRouterChannel := make(chan lp.CCMetric)
|
||||
rcfg.CollectManager.AddOutput(CollectToRouterChannel)
|
||||
rcfg.Router.AddInput(CollectToRouterChannel)
|
||||
}
|
||||
if len(rcfg.ConfigFile.ReceiverConfigFile) > 0 {
|
||||
rcfg.ReceiveManager, err = receivers.New(&rcfg.Sync, rcfg.ConfigFile.ReceiverConfigFile)
|
||||
if err != nil {
|
||||
log.Print(err.Error())
|
||||
return
|
||||
}
|
||||
ReceiveToRouterChannel := make(chan lp.CCMetric)
|
||||
rcfg.ReceiveManager.AddOutput(ReceiveToRouterChannel)
|
||||
rcfg.Router.AddInput(ReceiveToRouterChannel)
|
||||
use_recv = true
|
||||
}
|
||||
prepare_shutdown(&rcfg)
|
||||
rcfg.Sync.Add(1)
|
||||
rcfg.Router.Start()
|
||||
rcfg.SinkManager.Start()
|
||||
rcfg.CollectManager.Start()
|
||||
|
||||
// Storage for all node metrics
|
||||
tmpPoints := make([]lp.MutableMetric, 0)
|
||||
|
||||
// Start receiver
|
||||
if use_recv {
|
||||
recv.Start()
|
||||
rcfg.ReceiveManager.Start()
|
||||
}
|
||||
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case <-done:
|
||||
return
|
||||
case t := <-ticker.C:
|
||||
|
||||
// Read all collectors are sort the results in the right
|
||||
// storage locations
|
||||
for _, c := range config.Collectors {
|
||||
col := Collectors[c]
|
||||
col.Read(time.Duration(config.Duration)*time.Second, &tmpPoints)
|
||||
|
||||
for {
|
||||
if len(tmpPoints) == 0 {
|
||||
break
|
||||
}
|
||||
p := tmpPoints[0]
|
||||
for k, v := range config.DefTags {
|
||||
p.AddTag(k, v)
|
||||
p.SetTime(t)
|
||||
}
|
||||
sink.Write(p)
|
||||
tmpPoints = tmpPoints[1:]
|
||||
}
|
||||
}
|
||||
|
||||
if err := sink.Flush(); err != nil {
|
||||
log.Printf("sink error: %s\n", err)
|
||||
}
|
||||
if clicfg["once"] == "true" {
|
||||
shutdown(&wg, config.Collectors, sink, recv, clicfg["pidfile"])
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// Wait until receiving an interrupt
|
||||
wg.Wait()
|
||||
rcfg.Sync.Wait()
|
||||
}
|
||||
|
8
receivers.json
Normal file
8
receivers.json
Normal file
@ -0,0 +1,8 @@
|
||||
[
|
||||
{
|
||||
"type": "nats",
|
||||
"address": "nats://my-url",
|
||||
"port" : "4222",
|
||||
"database": "testcluster"
|
||||
}
|
||||
]
|
@ -1,35 +1,44 @@
|
||||
This folder contains the receivers for the cc-metric-collector.
|
||||
# CCMetric receivers
|
||||
|
||||
# `metricReceiver.go`
|
||||
The base class/configuration is located in `metricReceiver.go`.
|
||||
This folder contains the ReceiveManager and receiver implementations for the cc-metric-collector.
|
||||
|
||||
# Receivers
|
||||
* `natsReceiver.go`: Receives metrics from the Nats transport system in Influx line protocol encoding. The database name is used as subscription subject for the NATS messages. It uses https://github.com/nats-io/nats.go
|
||||
# Configuration
|
||||
|
||||
# Installation
|
||||
Nothing to do, all receivers are pure Go code
|
||||
|
||||
# Receiver configuration
|
||||
The configuration file for the receivers is a list of configurations. The `type` field in each specifies which receiver to initialize.
|
||||
|
||||
```json
|
||||
"receiver": {
|
||||
[
|
||||
{
|
||||
"type": "nats",
|
||||
"address": "nats://my-url"
|
||||
"address": "nats://my-url",
|
||||
"port" : "4222",
|
||||
"database": "testcluster"
|
||||
},
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
## `nats`
|
||||
|
||||
The receiver connects to `address` and `port` and subscribes itself for all messages with topic `database`. The default port is `4222`.
|
||||
## Type `nats`
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "nats",
|
||||
"address": "<nats-URI or hostname>",
|
||||
"port" : "<portnumber>",
|
||||
"database": "<subscribe topic>"
|
||||
}
|
||||
```
|
||||
|
||||
The `nats` receiver subscribes to the topic `database` and listens on `address` and `port` for metrics in the InfluxDB line protocol.
|
||||
|
||||
# Contributing own receivers
|
||||
A receiver contains three functions and is derived from the type `Receiver` (in `metricReceiver.go`):
|
||||
* `Init(config ReceiverConfig) error`
|
||||
* `Start() error`
|
||||
* `Close()`
|
||||
* `Name() string`
|
||||
* `SetSink(sink chan ccMetric.CCMetric)`
|
||||
|
||||
The data structures should be set up in `Init()` like opening a file or server connection. The `Start()` function should either start a go routine or issue some other asynchronous mechanism for receiving metrics. The `Close()` function should tear down anything created in `Init()`.
|
||||
|
||||
Finally, the receiver needs to be registered in the `metric-collector.go`. There is a list of receivers called `Receivers` which is a map (string -> pointer to receiver). Add a new entry with a descriptive name and the new receiver.
|
||||
Finally, the receiver needs to be registered in the `receiveManager.go`. There is a list of receivers called `AvailableReceivers` which is a map (`receiver_type_string` -> `pointer to Receiver interface`). Add a new entry with a descriptive name and the new receiver.
|
||||
|
@ -2,30 +2,41 @@ package receivers
|
||||
|
||||
import (
|
||||
// "time"
|
||||
s "github.com/ClusterCockpit/cc-metric-collector/sinks"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
influx "github.com/influxdata/line-protocol"
|
||||
)
|
||||
|
||||
type ReceiverConfig struct {
|
||||
Addr string `json:"address"`
|
||||
Port string `json:"port"`
|
||||
Database string `json:"database"`
|
||||
Type string `json:"type"`
|
||||
Addr string `json:"address"`
|
||||
Port string `json:"port"`
|
||||
Database string `json:"database"`
|
||||
Organization string `json:"organization,omitempty"`
|
||||
Type string `json:"type"`
|
||||
}
|
||||
|
||||
type Receiver struct {
|
||||
type receiver struct {
|
||||
name string
|
||||
addr string
|
||||
port string
|
||||
database string
|
||||
organization string
|
||||
sink s.SinkFuncs
|
||||
sink chan lp.CCMetric
|
||||
}
|
||||
|
||||
type ReceiverFuncs interface {
|
||||
Init(config ReceiverConfig, sink s.SinkFuncs) error
|
||||
type Receiver interface {
|
||||
Init(config ReceiverConfig) error
|
||||
Start()
|
||||
Close()
|
||||
Name() string
|
||||
SetSink(sink chan lp.CCMetric)
|
||||
}
|
||||
|
||||
func (r *receiver) Name() string {
|
||||
return r.name
|
||||
}
|
||||
|
||||
func (r *receiver) SetSink(sink chan lp.CCMetric) {
|
||||
r.sink = sink
|
||||
}
|
||||
|
||||
func Tags2Map(metric influx.Metric) map[string]string {
|
||||
|
@ -2,56 +2,68 @@ package receivers
|
||||
|
||||
import (
|
||||
"errors"
|
||||
s "github.com/ClusterCockpit/cc-metric-collector/sinks"
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
"fmt"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
influx "github.com/influxdata/line-protocol"
|
||||
nats "github.com/nats-io/nats.go"
|
||||
"log"
|
||||
"time"
|
||||
)
|
||||
|
||||
type NatsReceiverConfig struct {
|
||||
Addr string `json:"address"`
|
||||
Port string `json:"port"`
|
||||
Database string `json:"database"`
|
||||
}
|
||||
|
||||
type NatsReceiver struct {
|
||||
Receiver
|
||||
receiver
|
||||
nc *nats.Conn
|
||||
handler *lp.MetricHandler
|
||||
parser *lp.Parser
|
||||
handler *influx.MetricHandler
|
||||
parser *influx.Parser
|
||||
meta map[string]string
|
||||
config ReceiverConfig
|
||||
}
|
||||
|
||||
var DefaultTime = func() time.Time {
|
||||
return time.Unix(42, 0)
|
||||
}
|
||||
|
||||
func (r *NatsReceiver) Init(config ReceiverConfig, sink s.SinkFuncs) error {
|
||||
if len(config.Addr) == 0 ||
|
||||
len(config.Port) == 0 ||
|
||||
len(config.Database) == 0 {
|
||||
func (r *NatsReceiver) Init(config ReceiverConfig) error {
|
||||
r.name = "NatsReceiver"
|
||||
r.config = config
|
||||
if len(r.config.Addr) == 0 ||
|
||||
len(r.config.Port) == 0 ||
|
||||
len(r.config.Database) == 0 {
|
||||
return errors.New("Not all configuration variables set required by NatsReceiver")
|
||||
}
|
||||
r.addr = config.Addr
|
||||
r.meta = map[string]string{"source": r.name}
|
||||
r.addr = r.config.Addr
|
||||
if len(r.addr) == 0 {
|
||||
r.addr = nats.DefaultURL
|
||||
}
|
||||
r.port = config.Port
|
||||
r.port = r.config.Port
|
||||
if len(r.port) == 0 {
|
||||
r.port = "4222"
|
||||
}
|
||||
log.Print("Init NATS Receiver")
|
||||
nc, err := nats.Connect(r.addr)
|
||||
log.Print("[NatsReceiver] INIT")
|
||||
uri := fmt.Sprintf("%s:%s", r.addr, r.port)
|
||||
nc, err := nats.Connect(uri)
|
||||
if err == nil {
|
||||
r.database = config.Database
|
||||
r.sink = sink
|
||||
r.database = r.config.Database
|
||||
r.nc = nc
|
||||
} else {
|
||||
log.Print(err)
|
||||
r.nc = nil
|
||||
return err
|
||||
}
|
||||
r.handler = lp.NewMetricHandler()
|
||||
r.parser = lp.NewParser(r.handler)
|
||||
r.handler = influx.NewMetricHandler()
|
||||
r.parser = influx.NewParser(r.handler)
|
||||
r.parser.SetTimeFunc(DefaultTime)
|
||||
return err
|
||||
}
|
||||
|
||||
func (r *NatsReceiver) Start() {
|
||||
log.Print("Start NATS Receiver")
|
||||
log.Print("[NatsReceiver] START")
|
||||
r.nc.Subscribe(r.database, r._NatsReceive)
|
||||
}
|
||||
|
||||
@ -59,9 +71,13 @@ func (r *NatsReceiver) _NatsReceive(m *nats.Msg) {
|
||||
metrics, err := r.parser.Parse(m.Data)
|
||||
if err == nil {
|
||||
for _, m := range metrics {
|
||||
y, err := lp.New(m.Name(), Tags2Map(m), Fields2Map(m), m.Time())
|
||||
if err == nil {
|
||||
r.sink.Write(y)
|
||||
y := lp.FromInfluxMetric(m)
|
||||
for k, v := range r.meta {
|
||||
y.AddMeta(k, v)
|
||||
}
|
||||
//y, err := lp.New(m.Name(), Tags2Map(m), r.meta, Fields2Map(m), m.Time())
|
||||
if r.sink != nil {
|
||||
r.sink <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -69,7 +85,7 @@ func (r *NatsReceiver) _NatsReceive(m *nats.Msg) {
|
||||
|
||||
func (r *NatsReceiver) Close() {
|
||||
if r.nc != nil {
|
||||
log.Print("Close NATS Receiver")
|
||||
log.Print("[NatsReceiver] CLOSE")
|
||||
r.nc.Close()
|
||||
}
|
||||
}
|
||||
|
153
receivers/receiveManager.go
Normal file
153
receivers/receiveManager.go
Normal file
@ -0,0 +1,153 @@
|
||||
package receivers
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
"log"
|
||||
"os"
|
||||
"sync"
|
||||
)
|
||||
|
||||
var AvailableReceivers = map[string]Receiver{
|
||||
"nats": &NatsReceiver{},
|
||||
}
|
||||
|
||||
type receiveManager struct {
|
||||
inputs []Receiver
|
||||
output chan lp.CCMetric
|
||||
done chan bool
|
||||
wg *sync.WaitGroup
|
||||
config []ReceiverConfig
|
||||
}
|
||||
|
||||
type ReceiveManager interface {
|
||||
Init(wg *sync.WaitGroup, receiverConfigFile string) error
|
||||
AddInput(rawConfig json.RawMessage) error
|
||||
AddOutput(output chan lp.CCMetric)
|
||||
Start()
|
||||
Close()
|
||||
}
|
||||
|
||||
func (rm *receiveManager) Init(wg *sync.WaitGroup, receiverConfigFile string) error {
|
||||
rm.inputs = make([]Receiver, 0)
|
||||
rm.output = nil
|
||||
rm.done = make(chan bool)
|
||||
rm.wg = wg
|
||||
rm.config = make([]ReceiverConfig, 0)
|
||||
configFile, err := os.Open(receiverConfigFile)
|
||||
if err != nil {
|
||||
log.Print(err.Error())
|
||||
return err
|
||||
}
|
||||
defer configFile.Close()
|
||||
jsonParser := json.NewDecoder(configFile)
|
||||
var rawConfigs []json.RawMessage
|
||||
err = jsonParser.Decode(&rawConfigs)
|
||||
if err != nil {
|
||||
log.Print(err.Error())
|
||||
return err
|
||||
}
|
||||
for _, raw := range rawConfigs {
|
||||
log.Print("[ReceiveManager] ", string(raw))
|
||||
rm.AddInput(raw)
|
||||
// if _, found := AvailableReceivers[k.Type]; !found {
|
||||
// log.Print("[ReceiveManager] SKIP Config specifies unknown receiver 'type': ", k.Type)
|
||||
// continue
|
||||
// }
|
||||
// r := AvailableReceivers[k.Type]
|
||||
// err = r.Init(k)
|
||||
// if err != nil {
|
||||
// log.Print("[ReceiveManager] SKIP Receiver ", k.Type, " cannot be initialized: ", err.Error())
|
||||
// continue
|
||||
// }
|
||||
// rm.inputs = append(rm.inputs, r)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rm *receiveManager) Start() {
|
||||
rm.wg.Add(1)
|
||||
|
||||
for _, r := range rm.inputs {
|
||||
log.Print("[ReceiveManager] START ", r.Name())
|
||||
r.Start()
|
||||
}
|
||||
log.Print("[ReceiveManager] STARTED\n")
|
||||
// go func() {
|
||||
// for {
|
||||
//ReceiveManagerLoop:
|
||||
// select {
|
||||
// case <- rm.done:
|
||||
// log.Print("ReceiveManager done\n")
|
||||
// rm.wg.Done()
|
||||
// break ReceiveManagerLoop
|
||||
// default:
|
||||
// for _, c := range rm.inputs {
|
||||
//ReceiveManagerInputLoop:
|
||||
// select {
|
||||
// case <- rm.done:
|
||||
// log.Print("ReceiveManager done\n")
|
||||
// rm.wg.Done()
|
||||
// break ReceiveManagerInputLoop
|
||||
// case p := <- c:
|
||||
// log.Print("ReceiveManager: ", p)
|
||||
// rm.output <- p
|
||||
// default:
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }()
|
||||
// for _, r := range rm.inputs {
|
||||
// r.Close()
|
||||
// }
|
||||
}
|
||||
|
||||
func (rm *receiveManager) AddInput(rawConfig json.RawMessage) error {
|
||||
var config ReceiverConfig
|
||||
err := json.Unmarshal(rawConfig, &config)
|
||||
if err != nil {
|
||||
log.Print("[ReceiveManager] SKIP ", config.Type, " JSON config error: ", err.Error())
|
||||
log.Print(err.Error())
|
||||
return err
|
||||
}
|
||||
if _, found := AvailableReceivers[config.Type]; !found {
|
||||
log.Print("[ReceiveManager] SKIP ", config.Type, " unknown receiver: ", err.Error())
|
||||
return err
|
||||
}
|
||||
r := AvailableReceivers[config.Type]
|
||||
err = r.Init(config)
|
||||
if err != nil {
|
||||
log.Print("[ReceiveManager] SKIP ", r.Name(), " initialization failed: ", err.Error())
|
||||
return err
|
||||
}
|
||||
rm.inputs = append(rm.inputs, r)
|
||||
rm.config = append(rm.config, config)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rm *receiveManager) AddOutput(output chan lp.CCMetric) {
|
||||
rm.output = output
|
||||
for _, r := range rm.inputs {
|
||||
r.SetSink(rm.output)
|
||||
}
|
||||
}
|
||||
|
||||
func (rm *receiveManager) Close() {
|
||||
for _, r := range rm.inputs {
|
||||
log.Print("[ReceiveManager] CLOSE ", r.Name())
|
||||
r.Close()
|
||||
}
|
||||
rm.wg.Done()
|
||||
log.Print("[ReceiveManager] CLOSE\n")
|
||||
log.Print("[ReceiveManager] EXIT\n")
|
||||
}
|
||||
|
||||
func New(wg *sync.WaitGroup, receiverConfigFile string) (ReceiveManager, error) {
|
||||
r := &receiveManager{}
|
||||
err := r.Init(wg, receiverConfigFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return r, err
|
||||
}
|
22
router.json
Normal file
22
router.json
Normal file
@ -0,0 +1,22 @@
|
||||
{
|
||||
"add_tags" : [
|
||||
{
|
||||
"key" : "cluster",
|
||||
"value" : "testcluster",
|
||||
"if" : "*"
|
||||
},
|
||||
{
|
||||
"key" : "test",
|
||||
"value" : "testing",
|
||||
"if" : "name == 'temp_package_id_0'"
|
||||
}
|
||||
],
|
||||
"delete_tags" : [
|
||||
{
|
||||
"key" : "unit",
|
||||
"value" : "*",
|
||||
"if" : "*"
|
||||
}
|
||||
],
|
||||
"interval_timestamp" : true
|
||||
}
|
6
sinks.json
Normal file
6
sinks.json
Normal file
@ -0,0 +1,6 @@
|
||||
[
|
||||
{
|
||||
"type" : "stdout",
|
||||
"meta_as_tags" : true
|
||||
}
|
||||
]
|
126
sinks/README.md
126
sinks/README.md
@ -1,65 +1,99 @@
|
||||
This folder contains the sinks for the cc-metric-collector.
|
||||
# CCMetric sinks
|
||||
|
||||
# `metricSink.go`
|
||||
The base class/configuration is located in `metricSink.go`.
|
||||
This folder contains the SinkManager and sink implementations for the cc-metric-collector.
|
||||
|
||||
# Sinks
|
||||
* `stdoutSink.go`: Writes all metrics to `stdout` in InfluxDB line protocol. The sink does not use https://github.com/influxdata/line-protocol to reduce the executed code for debugging
|
||||
* `influxSink.go`: Writes all metrics to an InfluxDB database instance using a blocking writer. It uses https://github.com/influxdata/influxdb-client-go . Configuration for the server, port, ssl, password, database name and organisation are in the global configuration file. The 'password' is used for the token and the 'database' for the bucket. It uses the v2 API of Influx.
|
||||
* `natsSink.go`: Sends all metrics to an NATS server using the InfluxDB line protocol as encoding. It uses https://github.com/nats-io/nats.go . Configuration for the server, port, user, password and database name are in the global configuration file. The database name is used as subject for the NATS messages.
|
||||
* `httpSink.go`: Sends all metrics to an HTTP endpoint `http://<host>:<port>/<database>` using a POST request. The body of the request will consist of lines in the InfluxDB line protocol. In case password is specified, that password is used as a JWT in the 'Authorization' header.
|
||||
# Configuration
|
||||
|
||||
# Installation
|
||||
Nothing to do, all sinks are pure Go code
|
||||
|
||||
# Sink configuration
|
||||
The configuration file for the sinks is a list of configurations. The `type` field in each specifies which sink to initialize.
|
||||
|
||||
```json
|
||||
"sink": {
|
||||
"user": "testuser",
|
||||
"password": "testpass",
|
||||
"host": "127.0.0.1",
|
||||
"port": "9090",
|
||||
"database": "testdb",
|
||||
"organization": "testorg",
|
||||
"ssl": false
|
||||
"type": "stdout"
|
||||
[
|
||||
{
|
||||
"type" : "stdout",
|
||||
"meta_as_tags" : false
|
||||
},
|
||||
{
|
||||
"type" : "http",
|
||||
"host" : "localhost",
|
||||
"port" : "4123",
|
||||
"database" : "ccmetric",
|
||||
"password" : "<jwt token>"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
## `stdout`
|
||||
When configuring `type = stdout`, all metrics are printed to stdout. No further configuration is required or touched, so you can leave your other-sink-config in there and just change the `type` for debugging purposes
|
||||
This example initializes two sinks, the `stdout` sink printing all metrics to the STDOUT and the `http` sink with the given `host`, `port`, `database` and `password`.
|
||||
|
||||
## `influxdb`
|
||||
The InfluxDB sink uses blocking write operations to write to an InfluxDB database using the v2 API. It uses the following configuration options:
|
||||
* `host`: Hostname of the database instance
|
||||
* `port`: Portnumber (as string) of the database
|
||||
* `database`: Name of the database, called 'bucket' in InfluxDB v2
|
||||
* `organization`: The InfluxDB v2 API uses organizations to separate database instances running on the same host
|
||||
* `ssl`: Boolean to activate SSL/TLS
|
||||
* `user`: Although the v2 API uses API keys instead of username and password, this field can be used if the sink should authentificate with `username:password`. If you want to use an API key, leave this field empty.
|
||||
* `password`: API key for the InfluxDB v2 API or password if `user` is set
|
||||
If `meta_as_tags` is set, all meta information attached to CCMetric are printed out as tags.
|
||||
|
||||
## `nats`
|
||||
* `host`: Hostname of the NATS server
|
||||
* `port`: Portnumber (as string) of the NATS server
|
||||
* `user`: Username for authentification in the NATS transport system
|
||||
* `password`: Password for authentification in the NATS transport system
|
||||
## Type `stdout`
|
||||
|
||||
```json
|
||||
{
|
||||
"type" : "stdout",
|
||||
"meta_as_tags" : <true|false>
|
||||
}
|
||||
```
|
||||
|
||||
The `stdout` sink dumps all metrics to the STDOUT.
|
||||
|
||||
## Type `http`
|
||||
|
||||
```json
|
||||
{
|
||||
"type" : "http",
|
||||
"host" : "<hostname>",
|
||||
"port" : "<portnumber>",
|
||||
"database" : "<database name>",
|
||||
"password" : "<jwt token>",
|
||||
"meta_as_tags" : <true|false>
|
||||
}
|
||||
```
|
||||
The sink uses POST requests to send metrics to `http://<host>:<port>/<database>` using the JWT token as a JWT in the 'Authorization' header.
|
||||
|
||||
## Type `nats`
|
||||
|
||||
```json
|
||||
{
|
||||
"type" : "nats",
|
||||
"host" : "<hostname>",
|
||||
"port" : "<portnumber>",
|
||||
"user" : "<username>",
|
||||
"password" : "<password>",
|
||||
"database" : "<database name>"
|
||||
"meta_as_tags" : <true|false>
|
||||
}
|
||||
```
|
||||
|
||||
This sink publishes the CCMetric in a NATS environment using `host`, `port`, `user` and `password` for connecting. The metrics are published using the topic `database`.
|
||||
|
||||
## Type `influxdb`
|
||||
|
||||
```json
|
||||
{
|
||||
"type" : "influxdb",
|
||||
"host" : "<hostname>",
|
||||
"port" : "<portnumber>",
|
||||
"user" : "<username>",
|
||||
"password" : "<password or API key>",
|
||||
"database" : "<database name>"
|
||||
"organization": "<InfluxDB v2 organization>",
|
||||
"ssl" : <true|false>,
|
||||
"meta_as_tags" : <true|false>
|
||||
}
|
||||
```
|
||||
|
||||
This sink submits the CCMetrics to an InfluxDB time-series database. It uses `host`, `port` and `ssl` for connecting. For authentification, it uses either `user:password` if `user` is set and only `password` as API key. The `organization` and `database` are used for writing to the correct database.
|
||||
|
||||
## `http`
|
||||
* `host`: Hostname of the HTTP server
|
||||
* `port`: Portnumber (as string) of the HTTP server
|
||||
* `database`: Endpoint to write to. HTTP POST requests are performed on `http://<host>:<port>/<database>`
|
||||
* `password`: JSON Web token used for authentification
|
||||
|
||||
|
||||
# Contributing own sinks
|
||||
A sink contains three functions and is derived from the type `Sink` (in `metricSink.go`):
|
||||
A sink contains three functions and is derived from the type `Sink`:
|
||||
* `Init(config SinkConfig) error`
|
||||
* `Write(measurement string, tags map[string]string, fields map[string]interface{}, t time.Time) error`
|
||||
* `Write(point CCMetric) error`
|
||||
* `Flush() error`
|
||||
* `Close()`
|
||||
|
||||
The data structures should be set up in `Init()` like opening a file or server connection. The `Write()` function takes a measurement, tags, fields and a timestamp and writes/sends the data. For non-blocking sinks, the `Flush()` method tells the sink to drain its internal buffers. The `Close()` function should tear down anything created in `Init()`.
|
||||
The data structures should be set up in `Init()` like opening a file or server connection. The `Write()` function writes/sends the data. For non-blocking sinks, the `Flush()` method tells the sink to drain its internal buffers. The `Close()` function should tear down anything created in `Init()`.
|
||||
|
||||
Finally, the sink needs to be registered in the `metric-collector.go`. There is a list of sinks called `Sinks` which is a map (sink_type_string -> pointer to sink). Add a new entry with a descriptive name and the new sink.
|
||||
Finally, the sink needs to be registered in the `sinkManager.go`. There is a list of sinks called `AvailableSinks` which is a map (`sink_type_string` -> `pointer to sink interface`). Add a new entry with a descriptive name and the new sink.
|
||||
|
@ -7,19 +7,21 @@ import (
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
influx "github.com/influxdata/line-protocol"
|
||||
)
|
||||
|
||||
type HttpSink struct {
|
||||
Sink
|
||||
sink
|
||||
client *http.Client
|
||||
url, jwt string
|
||||
encoder *lp.Encoder
|
||||
encoder *influx.Encoder
|
||||
buffer *bytes.Buffer
|
||||
}
|
||||
|
||||
func (s *HttpSink) Init(config SinkConfig) error {
|
||||
if len(config.Host) == 0 || len(config.Port) == 0 {
|
||||
func (s *HttpSink) Init(config sinkConfig) error {
|
||||
s.name = "HttpSink"
|
||||
if len(config.Host) == 0 || len(config.Port) == 0 || len(config.Database) == 0 {
|
||||
return errors.New("`host`, `port` and `database` config options required for TCP sink")
|
||||
}
|
||||
|
||||
@ -28,13 +30,13 @@ func (s *HttpSink) Init(config SinkConfig) error {
|
||||
s.port = config.Port
|
||||
s.jwt = config.Password
|
||||
s.buffer = &bytes.Buffer{}
|
||||
s.encoder = lp.NewEncoder(s.buffer)
|
||||
s.encoder = influx.NewEncoder(s.buffer)
|
||||
s.encoder.SetPrecision(time.Second)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *HttpSink) Write(point lp.MutableMetric) error {
|
||||
func (s *HttpSink) Write(point lp.CCMetric) error {
|
||||
_, err := s.encoder.Encode(point)
|
||||
return err
|
||||
}
|
||||
|
@ -5,15 +5,14 @@ import (
|
||||
"crypto/tls"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
|
||||
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
"log"
|
||||
)
|
||||
|
||||
type InfluxSink struct {
|
||||
Sink
|
||||
sink
|
||||
client influxdb2.Client
|
||||
writeApi influxdb2Api.WriteAPIBlocking
|
||||
retPolicy string
|
||||
@ -39,7 +38,8 @@ func (s *InfluxSink) connect() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *InfluxSink) Init(config SinkConfig) error {
|
||||
func (s *InfluxSink) Init(config sinkConfig) error {
|
||||
s.name = "InfluxSink"
|
||||
if len(config.Host) == 0 ||
|
||||
len(config.Port) == 0 ||
|
||||
len(config.Database) == 0 ||
|
||||
@ -54,15 +54,21 @@ func (s *InfluxSink) Init(config SinkConfig) error {
|
||||
s.user = config.User
|
||||
s.password = config.Password
|
||||
s.ssl = config.SSL
|
||||
s.meta_as_tags = config.MetaAsTags
|
||||
return s.connect()
|
||||
}
|
||||
|
||||
func (s *InfluxSink) Write(point lp.MutableMetric) error {
|
||||
func (s *InfluxSink) Write(point lp.CCMetric) error {
|
||||
tags := map[string]string{}
|
||||
fields := map[string]interface{}{}
|
||||
for _, t := range point.TagList() {
|
||||
tags[t.Key] = t.Value
|
||||
}
|
||||
if s.meta_as_tags {
|
||||
for _, m := range point.MetaList() {
|
||||
tags[m.Key] = m.Value
|
||||
}
|
||||
}
|
||||
for _, f := range point.FieldList() {
|
||||
fields[f.Key] = f.Value
|
||||
}
|
||||
|
@ -2,21 +2,22 @@ package sinks
|
||||
|
||||
import (
|
||||
// "time"
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
)
|
||||
|
||||
type SinkConfig struct {
|
||||
Host string `json:"host"`
|
||||
Port string `json:"port"`
|
||||
Database string `json:"database"`
|
||||
User string `json:"user"`
|
||||
Password string `json:"password"`
|
||||
Organization string `json:"organization"`
|
||||
type sinkConfig struct {
|
||||
Type string `json:"type"`
|
||||
SSL bool `json:"ssl"`
|
||||
Host string `json:"host,omitempty"`
|
||||
Port string `json:"port,omitempty"`
|
||||
Database string `json:"database,omitempty"`
|
||||
User string `json:"user,omitempty"`
|
||||
Password string `json:"password,omitempty"`
|
||||
Organization string `json:"organization,omitempty"`
|
||||
SSL bool `json:"ssl,omitempty"`
|
||||
MetaAsTags bool `json:"meta_as_tags,omitempty"`
|
||||
}
|
||||
|
||||
type Sink struct {
|
||||
type sink struct {
|
||||
host string
|
||||
port string
|
||||
user string
|
||||
@ -24,11 +25,18 @@ type Sink struct {
|
||||
database string
|
||||
organization string
|
||||
ssl bool
|
||||
meta_as_tags bool
|
||||
name string
|
||||
}
|
||||
|
||||
type SinkFuncs interface {
|
||||
Init(config SinkConfig) error
|
||||
Write(point lp.MutableMetric) error
|
||||
type Sink interface {
|
||||
Init(config sinkConfig) error
|
||||
Write(point lp.CCMetric) error
|
||||
Flush() error
|
||||
Close()
|
||||
Name() string
|
||||
}
|
||||
|
||||
func (s *sink) Name() string {
|
||||
return s.name
|
||||
}
|
||||
|
@ -4,16 +4,17 @@ import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
influx "github.com/influxdata/line-protocol"
|
||||
nats "github.com/nats-io/nats.go"
|
||||
"log"
|
||||
"time"
|
||||
)
|
||||
|
||||
type NatsSink struct {
|
||||
Sink
|
||||
sink
|
||||
client *nats.Conn
|
||||
encoder *lp.Encoder
|
||||
encoder *influx.Encoder
|
||||
buffer *bytes.Buffer
|
||||
}
|
||||
|
||||
@ -31,7 +32,8 @@ func (s *NatsSink) connect() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NatsSink) Init(config SinkConfig) error {
|
||||
func (s *NatsSink) Init(config sinkConfig) error {
|
||||
s.name = "NatsSink"
|
||||
if len(config.Host) == 0 ||
|
||||
len(config.Port) == 0 ||
|
||||
len(config.Database) == 0 {
|
||||
@ -46,40 +48,31 @@ func (s *NatsSink) Init(config SinkConfig) error {
|
||||
// Setup Influx line protocol
|
||||
s.buffer = &bytes.Buffer{}
|
||||
s.buffer.Grow(1025)
|
||||
s.encoder = lp.NewEncoder(s.buffer)
|
||||
s.encoder = influx.NewEncoder(s.buffer)
|
||||
s.encoder.SetPrecision(time.Second)
|
||||
s.encoder.SetMaxLineBytes(1024)
|
||||
// Setup infos for connection
|
||||
return s.connect()
|
||||
}
|
||||
|
||||
func (s *NatsSink) Write(point lp.MutableMetric) error {
|
||||
func (s *NatsSink) Write(point lp.CCMetric) error {
|
||||
if s.client != nil {
|
||||
// var tags map[string]string
|
||||
// var fields map[string]interface{}
|
||||
// for _, t := range point.TagList() {
|
||||
// tags[t.Key] = t.Value
|
||||
// }
|
||||
// for _, f := range point.FieldList() {
|
||||
// fields[f.Key] = f.Value
|
||||
// }
|
||||
// m, err := protocol.New(point.Name(), tags, fields, point.Time())
|
||||
// if err != nil {
|
||||
// log.Print(err)
|
||||
// return err
|
||||
// }
|
||||
_, err := s.encoder.Encode(point)
|
||||
if err != nil {
|
||||
log.Print(err)
|
||||
return err
|
||||
}
|
||||
s.client.Publish(s.database, s.buffer.Bytes())
|
||||
s.buffer.Reset()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NatsSink) Flush() error {
|
||||
if s.client != nil {
|
||||
if err := s.client.Publish(s.database, s.buffer.Bytes()); err != nil {
|
||||
return err
|
||||
}
|
||||
s.buffer.Reset()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
141
sinks/sinkManager.go
Normal file
141
sinks/sinkManager.go
Normal file
@ -0,0 +1,141 @@
|
||||
package sinks
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"log"
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
)
|
||||
|
||||
var AvailableSinks = map[string]Sink{
|
||||
"influxdb": &InfluxSink{},
|
||||
"stdout": &StdoutSink{},
|
||||
"nats": &NatsSink{},
|
||||
"http": &HttpSink{},
|
||||
}
|
||||
|
||||
type sinkManager struct {
|
||||
input chan lp.CCMetric
|
||||
outputs []Sink
|
||||
done chan bool
|
||||
wg *sync.WaitGroup
|
||||
config []sinkConfig
|
||||
}
|
||||
|
||||
type SinkManager interface {
|
||||
Init(wg *sync.WaitGroup, sinkConfigFile string) error
|
||||
AddInput(input chan lp.CCMetric)
|
||||
AddOutput(config json.RawMessage) error
|
||||
Start()
|
||||
Close()
|
||||
}
|
||||
|
||||
func (sm *sinkManager) Init(wg *sync.WaitGroup, sinkConfigFile string) error {
|
||||
sm.input = nil
|
||||
sm.outputs = make([]Sink, 0)
|
||||
sm.done = make(chan bool)
|
||||
sm.wg = wg
|
||||
sm.config = make([]sinkConfig, 0)
|
||||
if len(sinkConfigFile) > 0 {
|
||||
configFile, err := os.Open(sinkConfigFile)
|
||||
if err != nil {
|
||||
log.Print("[SinkManager] ", err.Error())
|
||||
return err
|
||||
}
|
||||
defer configFile.Close()
|
||||
jsonParser := json.NewDecoder(configFile)
|
||||
var rawConfigs []json.RawMessage
|
||||
err = jsonParser.Decode(&rawConfigs)
|
||||
if err != nil {
|
||||
log.Print("[SinkManager] ", err.Error())
|
||||
return err
|
||||
}
|
||||
for _, raw := range rawConfigs {
|
||||
err = sm.AddOutput(raw)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sm *sinkManager) Start() {
|
||||
sm.wg.Add(1)
|
||||
batchcount := 20
|
||||
go func() {
|
||||
for {
|
||||
SinkManagerLoop:
|
||||
select {
|
||||
case <-sm.done:
|
||||
for _, s := range sm.outputs {
|
||||
s.Close()
|
||||
}
|
||||
log.Print("[SinkManager] DONE\n")
|
||||
sm.wg.Done()
|
||||
break SinkManagerLoop
|
||||
case p := <-sm.input:
|
||||
log.Print("[SinkManager] WRITE ", p)
|
||||
for _, s := range sm.outputs {
|
||||
s.Write(p)
|
||||
}
|
||||
if batchcount == 0 {
|
||||
log.Print("[SinkManager] FLUSH")
|
||||
for _, s := range sm.outputs {
|
||||
s.Flush()
|
||||
}
|
||||
batchcount = 20
|
||||
}
|
||||
batchcount--
|
||||
default:
|
||||
}
|
||||
}
|
||||
log.Print("[SinkManager] EXIT\n")
|
||||
}()
|
||||
log.Print("[SinkManager] STARTED\n")
|
||||
}
|
||||
|
||||
func (sm *sinkManager) AddInput(input chan lp.CCMetric) {
|
||||
sm.input = input
|
||||
}
|
||||
|
||||
func (sm *sinkManager) AddOutput(rawConfig json.RawMessage) error {
|
||||
var err error
|
||||
var config sinkConfig
|
||||
if len(rawConfig) > 3 {
|
||||
err = json.Unmarshal(rawConfig, &config)
|
||||
if err != nil {
|
||||
log.Print("[SinkManager] SKIP ", config.Type, " JSON config error: ", err.Error())
|
||||
return err
|
||||
}
|
||||
}
|
||||
if _, found := AvailableSinks[config.Type]; !found {
|
||||
log.Print("[SinkManager] SKIP ", config.Type, " unknown sink: ", err.Error())
|
||||
return err
|
||||
}
|
||||
s := AvailableSinks[config.Type]
|
||||
err = s.Init(config)
|
||||
if err != nil {
|
||||
log.Print("[SinkManager] SKIP ", s.Name(), " initialization failed: ", err.Error())
|
||||
return err
|
||||
}
|
||||
sm.outputs = append(sm.outputs, s)
|
||||
sm.config = append(sm.config, config)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sm *sinkManager) Close() {
|
||||
sm.done <- true
|
||||
log.Print("[SinkManager] CLOSE")
|
||||
}
|
||||
|
||||
func New(wg *sync.WaitGroup, sinkConfigFile string) (SinkManager, error) {
|
||||
sm := &sinkManager{}
|
||||
err := sm.Init(wg, sinkConfigFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return sm, err
|
||||
}
|
@ -6,23 +6,30 @@ import (
|
||||
"strings"
|
||||
|
||||
// "time"
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
)
|
||||
|
||||
type StdoutSink struct {
|
||||
Sink
|
||||
sink
|
||||
}
|
||||
|
||||
func (s *StdoutSink) Init(config SinkConfig) error {
|
||||
func (s *StdoutSink) Init(config sinkConfig) error {
|
||||
s.name = "StdoutSink"
|
||||
s.meta_as_tags = config.MetaAsTags
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *StdoutSink) Write(point lp.MutableMetric) error {
|
||||
func (s *StdoutSink) Write(point lp.CCMetric) error {
|
||||
var tagsstr []string
|
||||
var fieldstr []string
|
||||
for _, t := range point.TagList() {
|
||||
tagsstr = append(tagsstr, fmt.Sprintf("%s=%s", t.Key, t.Value))
|
||||
}
|
||||
if s.meta_as_tags {
|
||||
for _, m := range point.MetaList() {
|
||||
tagsstr = append(tagsstr, fmt.Sprintf("%s=%s", m.Key, m.Value))
|
||||
}
|
||||
}
|
||||
for _, f := range point.FieldList() {
|
||||
switch f.Value.(type) {
|
||||
case float64:
|
||||
|
Loading…
Reference in New Issue
Block a user