cc-metric-collector/collectors/memstatMetric.go

237 lines
5.8 KiB
Go
Raw Normal View History

package collectors
import (
2022-02-24 18:27:05 +01:00
"bufio"
2021-11-25 15:11:39 +01:00
"encoding/json"
"errors"
2021-10-04 15:23:43 +02:00
"fmt"
2022-02-24 18:27:05 +01:00
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"time"
2022-01-26 15:54:49 +01:00
2022-02-24 18:27:05 +01:00
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
Modularize the whole thing (#16) * Use channels, add a metric router, split up configuration and use extended version of Influx line protocol internally * Use central timer for collectors and router. Add expressions to router * Add expression to router config * Update entry points * Start with README * Update README for CCMetric * Formatting * Update README.md * Add README for MultiChanTicker * Add README for MultiChanTicker * Update README.md * Add README to metric router * Update main README * Remove SinkEntity type * Update README for sinks * Update go files * Update README for receivers * Update collectors README * Update collectors README * Use seperate page per collector * Fix for tempstat page * Add docs for customcmd collector * Add docs for ipmistat collector * Add docs for topprocs collector * Update customCmdMetric.md * Use seconds when calculating LIKWID metrics * Add IB metrics ib_recv_pkts and ib_xmit_pkts * Drop domain part of host name * Updated to latest stable version of likwid * Define source code dependencies in Makefile * Add GPFS / IBM Spectrum Scale collector * Add vet and staticcheck make targets * Add vet and staticcheck make targets * Avoid go vet warning: struct field tag `json:"..., omitempty"` not compatible with reflect.StructTag.Get: suspicious space in struct tag value struct field tag `json:"...", omitempty` not compatible with reflect.StructTag.Get: key:"value" pairs not separated by spaces * Add sample collector to README.md * Add CPU frequency collector * Avoid staticcheck warning: redundant return statement * Avoid staticcheck warning: unnecessary assignment to the blank identifier * Simplified code * Add CPUFreqCollectorCpuinfo a metric collector to measure the current frequency of the CPUs as obtained from /proc/cpuinfo Only measure on the first hyperthread * Add collector for NFS clients * Move publication of metrics into Flush() for NatsSink * Update GitHub actions * Refactoring * Avoid vet warning: Println arg list ends with redundant newline * Avoid vet warning struct field commands has json tag but is not exported * Avoid vet warning: return copies lock value. * Corrected typo * Refactoring * Add go sources in internal/... * Bad separator in Makefile * Fix Infiniband collector Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com>
2022-01-25 15:37:43 +01:00
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
2022-04-02 16:05:52 +02:00
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
)
2022-02-24 18:27:05 +01:00
const MEMSTATFILE = "/proc/meminfo"
const NUMA_MEMSTAT_BASE = "/sys/devices/system/node"
type MemstatCollectorConfig struct {
2021-11-25 15:11:39 +01:00
ExcludeMetrics []string `json:"exclude_metrics"`
2022-02-24 18:27:05 +01:00
NodeStats bool `json:"node_stats,omitempty"`
NumaStats bool `json:"numa_stats,omitempty"`
}
type MemstatCollectorNode struct {
file string
tags map[string]string
}
type MemstatCollector struct {
Modularize the whole thing (#16) * Use channels, add a metric router, split up configuration and use extended version of Influx line protocol internally * Use central timer for collectors and router. Add expressions to router * Add expression to router config * Update entry points * Start with README * Update README for CCMetric * Formatting * Update README.md * Add README for MultiChanTicker * Add README for MultiChanTicker * Update README.md * Add README to metric router * Update main README * Remove SinkEntity type * Update README for sinks * Update go files * Update README for receivers * Update collectors README * Update collectors README * Use seperate page per collector * Fix for tempstat page * Add docs for customcmd collector * Add docs for ipmistat collector * Add docs for topprocs collector * Update customCmdMetric.md * Use seconds when calculating LIKWID metrics * Add IB metrics ib_recv_pkts and ib_xmit_pkts * Drop domain part of host name * Updated to latest stable version of likwid * Define source code dependencies in Makefile * Add GPFS / IBM Spectrum Scale collector * Add vet and staticcheck make targets * Add vet and staticcheck make targets * Avoid go vet warning: struct field tag `json:"..., omitempty"` not compatible with reflect.StructTag.Get: suspicious space in struct tag value struct field tag `json:"...", omitempty` not compatible with reflect.StructTag.Get: key:"value" pairs not separated by spaces * Add sample collector to README.md * Add CPU frequency collector * Avoid staticcheck warning: redundant return statement * Avoid staticcheck warning: unnecessary assignment to the blank identifier * Simplified code * Add CPUFreqCollectorCpuinfo a metric collector to measure the current frequency of the CPUs as obtained from /proc/cpuinfo Only measure on the first hyperthread * Add collector for NFS clients * Move publication of metrics into Flush() for NatsSink * Update GitHub actions * Refactoring * Avoid vet warning: Println arg list ends with redundant newline * Avoid vet warning struct field commands has json tag but is not exported * Avoid vet warning: return copies lock value. * Corrected typo * Refactoring * Add go sources in internal/... * Bad separator in Makefile * Fix Infiniband collector Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com>
2022-01-25 15:37:43 +01:00
metricCollector
2022-04-02 16:05:52 +02:00
stats map[string]int64
tags map[string]string
matches map[string]string
config MemstatCollectorConfig
nodefiles map[int]MemstatCollectorNode
sendMemUsed bool
statsProcessedMetrics int64
2022-02-24 18:27:05 +01:00
}
type MemstatStats struct {
value float64
unit string
}
func getStats(filename string) map[string]MemstatStats {
stats := make(map[string]MemstatStats)
2022-02-24 18:27:05 +01:00
file, err := os.Open(filename)
if err != nil {
cclog.Error(err.Error())
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
linefields := strings.Fields(line)
if len(linefields) == 3 {
v, err := strconv.ParseFloat(linefields[1], 64)
if err == nil {
stats[strings.Trim(linefields[0], ":")] = MemstatStats{
value: v,
unit: linefields[2],
}
2022-02-24 18:27:05 +01:00
}
} else if len(linefields) == 5 {
v, err := strconv.ParseFloat(linefields[3], 64)
if err == nil {
stats[strings.Trim(linefields[0], ":")] = MemstatStats{
value: v,
unit: linefields[4],
}
2022-02-24 18:27:05 +01:00
}
}
}
return stats
}
Modularize the whole thing (#16) * Use channels, add a metric router, split up configuration and use extended version of Influx line protocol internally * Use central timer for collectors and router. Add expressions to router * Add expression to router config * Update entry points * Start with README * Update README for CCMetric * Formatting * Update README.md * Add README for MultiChanTicker * Add README for MultiChanTicker * Update README.md * Add README to metric router * Update main README * Remove SinkEntity type * Update README for sinks * Update go files * Update README for receivers * Update collectors README * Update collectors README * Use seperate page per collector * Fix for tempstat page * Add docs for customcmd collector * Add docs for ipmistat collector * Add docs for topprocs collector * Update customCmdMetric.md * Use seconds when calculating LIKWID metrics * Add IB metrics ib_recv_pkts and ib_xmit_pkts * Drop domain part of host name * Updated to latest stable version of likwid * Define source code dependencies in Makefile * Add GPFS / IBM Spectrum Scale collector * Add vet and staticcheck make targets * Add vet and staticcheck make targets * Avoid go vet warning: struct field tag `json:"..., omitempty"` not compatible with reflect.StructTag.Get: suspicious space in struct tag value struct field tag `json:"...", omitempty` not compatible with reflect.StructTag.Get: key:"value" pairs not separated by spaces * Add sample collector to README.md * Add CPU frequency collector * Avoid staticcheck warning: redundant return statement * Avoid staticcheck warning: unnecessary assignment to the blank identifier * Simplified code * Add CPUFreqCollectorCpuinfo a metric collector to measure the current frequency of the CPUs as obtained from /proc/cpuinfo Only measure on the first hyperthread * Add collector for NFS clients * Move publication of metrics into Flush() for NatsSink * Update GitHub actions * Refactoring * Avoid vet warning: Println arg list ends with redundant newline * Avoid vet warning struct field commands has json tag but is not exported * Avoid vet warning: return copies lock value. * Corrected typo * Refactoring * Add go sources in internal/... * Bad separator in Makefile * Fix Infiniband collector Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com>
2022-01-25 15:37:43 +01:00
func (m *MemstatCollector) Init(config json.RawMessage) error {
2021-11-25 15:11:39 +01:00
var err error
2021-03-25 17:47:08 +01:00
m.name = "MemstatCollector"
2022-02-24 18:27:05 +01:00
m.config.NodeStats = true
m.config.NumaStats = false
if len(config) > 0 {
2021-11-25 15:11:39 +01:00
err = json.Unmarshal(config, &m.config)
if err != nil {
return err
}
}
m.meta = map[string]string{"source": m.name, "group": "Memory"}
2021-10-04 15:23:43 +02:00
m.stats = make(map[string]int64)
m.matches = make(map[string]string)
2021-10-04 15:23:43 +02:00
m.tags = map[string]string{"type": "node"}
2022-02-24 18:27:05 +01:00
matches := map[string]string{
"MemTotal": "mem_total",
2021-10-04 15:23:43 +02:00
"SwapTotal": "swap_total",
"SReclaimable": "mem_sreclaimable",
"Slab": "mem_slab",
"MemFree": "mem_free",
"Buffers": "mem_buffers",
"Cached": "mem_cached",
"MemAvailable": "mem_available",
2022-02-24 18:27:05 +01:00
"SwapFree": "swap_free",
"MemShared": "mem_shared",
}
for k, v := range matches {
2021-11-25 15:11:39 +01:00
_, skip := stringArrayContains(m.config.ExcludeMetrics, k)
if !skip {
m.matches[k] = v
}
}
m.sendMemUsed = false
if _, skip := stringArrayContains(m.config.ExcludeMetrics, "mem_used"); !skip {
m.sendMemUsed = true
}
if len(m.matches) == 0 {
2022-02-24 18:27:05 +01:00
return errors.New("no metrics to collect")
}
m.setup()
2022-02-24 18:27:05 +01:00
if m.config.NodeStats {
if stats := getStats(MEMSTATFILE); len(stats) == 0 {
return fmt.Errorf("cannot read data from file %s", MEMSTATFILE)
}
}
2022-02-24 18:27:05 +01:00
if m.config.NumaStats {
globPattern := filepath.Join(NUMA_MEMSTAT_BASE, "node[0-9]*", "meminfo")
regex := regexp.MustCompile(filepath.Join(NUMA_MEMSTAT_BASE, "node(\\d+)", "meminfo"))
files, err := filepath.Glob(globPattern)
if err == nil {
m.nodefiles = make(map[int]MemstatCollectorNode)
for _, f := range files {
if stats := getStats(f); len(stats) == 0 {
return fmt.Errorf("cannot read data from file %s", f)
}
rematch := regex.FindStringSubmatch(f)
if len(rematch) == 2 {
id, err := strconv.Atoi(rematch[1])
if err == nil {
f := MemstatCollectorNode{
file: f,
tags: map[string]string{
"type": "memoryDomain",
"type-id": fmt.Sprintf("%d", id),
},
}
m.nodefiles[id] = f
}
}
}
}
}
2022-04-02 16:05:52 +02:00
m.statsProcessedMetrics = 0
2022-02-24 18:27:05 +01:00
m.init = true
return err
}
2022-02-24 18:27:05 +01:00
func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
if !m.init {
return
}
sendStats := func(stats map[string]MemstatStats, tags map[string]string) {
2022-02-24 18:27:05 +01:00
for match, name := range m.matches {
var value float64 = 0
var unit string = ""
2022-02-24 18:27:05 +01:00
if v, ok := stats[match]; ok {
value = v.value
if len(v.unit) > 0 {
unit = v.unit
}
2022-02-24 18:27:05 +01:00
}
y, err := lp.New(name, tags, m.meta, map[string]interface{}{"value": value}, time.Now())
2022-02-24 18:27:05 +01:00
if err == nil {
if len(unit) > 0 {
y.AddMeta("unit", unit)
}
2022-04-02 16:05:52 +02:00
m.statsProcessedMetrics++
2022-02-24 18:27:05 +01:00
output <- y
}
2021-10-04 15:23:43 +02:00
}
if m.sendMemUsed {
memUsed := 0.0
unit := ""
if totalVal, total := stats["MemTotal"]; total {
if freeVal, free := stats["MemFree"]; free {
if bufVal, buffers := stats["Buffers"]; buffers {
if cacheVal, cached := stats["Cached"]; cached {
memUsed = totalVal.value - (freeVal.value + bufVal.value + cacheVal.value)
if len(totalVal.unit) > 0 {
unit = totalVal.unit
} else if len(freeVal.unit) > 0 {
unit = freeVal.unit
} else if len(bufVal.unit) > 0 {
unit = bufVal.unit
} else if len(cacheVal.unit) > 0 {
unit = cacheVal.unit
}
2022-02-24 18:27:05 +01:00
}
}
2021-10-04 15:23:43 +02:00
}
}
y, err := lp.New("mem_used", tags, m.meta, map[string]interface{}{"value": memUsed}, time.Now())
if err == nil {
if len(unit) > 0 {
y.AddMeta("unit", unit)
}
2022-04-02 16:05:52 +02:00
m.statsProcessedMetrics++
output <- y
}
2021-10-04 15:23:43 +02:00
}
}
2022-02-24 18:27:05 +01:00
if m.config.NodeStats {
nodestats := getStats(MEMSTATFILE)
sendStats(nodestats, m.tags)
}
if m.config.NumaStats {
for _, nodeConf := range m.nodefiles {
stats := getStats(nodeConf.file)
sendStats(stats, nodeConf.tags)
2021-10-04 15:23:43 +02:00
}
}
2022-04-02 16:05:52 +02:00
stats.ComponentStatInt(m.name, "collected_metrics", m.statsProcessedMetrics)
}
func (m *MemstatCollector) Close() {
2021-10-04 15:47:03 +02:00
m.init = false
}