mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2025-04-19 19:21:41 +02:00
Include NUMA node specific memory stats in memstat collector
This commit is contained in:
parent
66b9a25a88
commit
d0dea36a63
@ -5,39 +5,49 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"log"
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
const MEMSTATFILE = `/proc/meminfo`
|
const MEMSTATFILE = `/proc/meminfo`
|
||||||
|
const NUMADIR = `/sys/devices/system/node`
|
||||||
|
|
||||||
type MemstatCollectorConfig struct {
|
type MemstatCollectorConfig struct {
|
||||||
ExcludeMetrics []string `json:"exclude_metrics"`
|
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
||||||
|
NodeStats bool `json:"node_stats,omitempty"`
|
||||||
|
NumaStats bool `json:"numa_stats,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type MemstatCollector struct {
|
type MemstatCollector struct {
|
||||||
metricCollector
|
metricCollector
|
||||||
stats map[string]int64
|
|
||||||
tags map[string]string
|
tags map[string]string
|
||||||
matches map[string]string
|
matches map[string]string
|
||||||
config MemstatCollectorConfig
|
config MemstatCollectorConfig
|
||||||
|
numafiles map[int]string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *MemstatCollector) Init(config json.RawMessage) error {
|
func (m *MemstatCollector) Init(config json.RawMessage) error {
|
||||||
var err error
|
var err error
|
||||||
m.name = "MemstatCollector"
|
m.name = "MemstatCollector"
|
||||||
|
m.config.NodeStats = true
|
||||||
|
m.config.NumaStats = false
|
||||||
if len(config) > 0 {
|
if len(config) > 0 {
|
||||||
err = json.Unmarshal(config, &m.config)
|
err = json.Unmarshal(config, &m.config)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (!m.config.NodeStats) && (!m.config.NumaStats) {
|
||||||
|
return errors.New("either node_stats or numa_stats needs to be true")
|
||||||
|
}
|
||||||
m.meta = map[string]string{"source": m.name, "group": "Memory", "unit": "kByte"}
|
m.meta = map[string]string{"source": m.name, "group": "Memory", "unit": "kByte"}
|
||||||
m.stats = make(map[string]int64)
|
m.numafiles = make(map[int]string)
|
||||||
m.matches = make(map[string]string)
|
m.matches = make(map[string]string)
|
||||||
m.tags = map[string]string{"type": "node"}
|
m.tags = map[string]string{"type": "node"}
|
||||||
matches := map[string]string{`MemTotal`: "mem_total",
|
matches := map[string]string{`MemTotal`: "mem_total",
|
||||||
@ -56,72 +66,164 @@ func (m *MemstatCollector) Init(config json.RawMessage) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if len(m.matches) == 0 {
|
if len(m.matches) == 0 {
|
||||||
return errors.New("No metrics to collect")
|
return errors.New("no metrics to collect")
|
||||||
}
|
}
|
||||||
m.setup()
|
m.setup()
|
||||||
_, err = ioutil.ReadFile(string(MEMSTATFILE))
|
sysInit := false
|
||||||
|
numaInit := false
|
||||||
|
if m.config.NodeStats {
|
||||||
|
_, err := ioutil.ReadFile(string(MEMSTATFILE))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
sysInit = true
|
||||||
|
}
|
||||||
|
if m.config.NumaStats {
|
||||||
|
globPattern := filepath.Join(NUMADIR, "node*", "meminfo")
|
||||||
|
regex := regexp.MustCompile(`node(\d+)`)
|
||||||
|
numafiles, err := filepath.Glob(globPattern)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
|
for _, f := range numafiles {
|
||||||
|
_, err := ioutil.ReadFile(f)
|
||||||
|
if err != nil {
|
||||||
|
cclog.ComponentError(m.name, "Skipping NUMA meminfo file:", f)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
splitPath := strings.Split(f, "/")
|
||||||
|
if regex.MatchString(splitPath[5]) {
|
||||||
|
rematch := regex.FindStringSubmatch(splitPath[5])
|
||||||
|
if len(rematch) == 2 {
|
||||||
|
nodeid, err := strconv.Atoi(rematch[1])
|
||||||
|
if err == nil {
|
||||||
|
m.numafiles[nodeid] = f
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(m.numafiles) > 0 {
|
||||||
|
numaInit = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if sysInit || numaInit {
|
||||||
m.init = true
|
m.init = true
|
||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func readMemstatRaw(filename string, re string, translate map[string]string) map[string]int64 {
|
||||||
|
stats := make(map[string]int64)
|
||||||
|
regex, err := regexp.Compile(re)
|
||||||
|
if err != nil {
|
||||||
|
return stats
|
||||||
|
}
|
||||||
|
buffer, err := ioutil.ReadFile(filename)
|
||||||
|
if err != nil {
|
||||||
|
return stats
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, line := range strings.Split(string(buffer), "\n") {
|
||||||
|
if regex.MatchString(line) {
|
||||||
|
matches := regex.FindStringSubmatch(line)
|
||||||
|
// FindStringSubmatch returns full match in index 0
|
||||||
|
if len(matches) == 3 {
|
||||||
|
name := string(matches[1])
|
||||||
|
if _, ok := translate[name]; ok {
|
||||||
|
v, err := strconv.ParseInt(string(matches[2]), 0, 64)
|
||||||
|
if err == nil {
|
||||||
|
stats[name] = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if _, exists := stats[`MemTotal`]; !exists {
|
||||||
|
return make(map[string]int64)
|
||||||
|
}
|
||||||
|
return stats
|
||||||
|
}
|
||||||
|
|
||||||
|
func readMemstatFile(translate map[string]string) map[string]int64 {
|
||||||
|
return readMemstatRaw(string(MEMSTATFILE), `^([\w\(\)]+):\s*(\d+)`, translate)
|
||||||
|
}
|
||||||
|
|
||||||
|
func readNumaMemstatFile(filename string, translate map[string]string) map[string]int64 {
|
||||||
|
return readMemstatRaw(filename, `^Node\s+\d+\s+([\w\(\)]+):\s*(\d+)`, translate)
|
||||||
|
}
|
||||||
|
|
||||||
|
func sendMatches(stats map[string]int64, matches map[string]string, tags map[string]string, meta map[string]string, output chan lp.CCMetric) {
|
||||||
|
for raw, name := range matches {
|
||||||
|
if value, ok := stats[raw]; ok {
|
||||||
|
y, err := lp.New(name, tags, meta, map[string]interface{}{"value": int(float64(value) * 1.0e-3)}, time.Now())
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func sendMemUsed(stats map[string]int64, tags map[string]string, meta map[string]string, output chan lp.CCMetric) {
|
||||||
|
if _, free := stats[`MemFree`]; free {
|
||||||
|
if _, buffers := stats[`Buffers`]; buffers {
|
||||||
|
if _, cached := stats[`Cached`]; cached {
|
||||||
|
memUsed := stats[`MemTotal`] - (stats[`MemFree`] + stats[`Buffers`] + stats[`Cached`])
|
||||||
|
y, err := lp.New("mem_used", tags, meta, map[string]interface{}{"value": int(float64(memUsed) * 1.0e-3)}, time.Now())
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func sendMemShared(stats map[string]int64, tags map[string]string, meta map[string]string, output chan lp.CCMetric) {
|
||||||
|
if _, found := stats[`MemShared`]; found {
|
||||||
|
y, err := lp.New("mem_shared", tags, meta, map[string]interface{}{"value": int(float64(stats[`MemShared`]) * 1.0e-3)}, time.Now())
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||||
if !m.init {
|
if !m.init {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
buffer, err := ioutil.ReadFile(string(MEMSTATFILE))
|
if m.config.NodeStats {
|
||||||
if err != nil {
|
cclog.ComponentDebug(m.name, "Read", string(MEMSTATFILE))
|
||||||
log.Print(err)
|
stats := readMemstatFile(m.matches)
|
||||||
return
|
sendMatches(stats, m.matches, m.tags, m.meta, output)
|
||||||
|
if _, skip := stringArrayContains(m.config.ExcludeMetrics, "mem_used"); !skip {
|
||||||
|
sendMemUsed(stats, m.tags, m.meta, output)
|
||||||
|
}
|
||||||
|
if _, skip := stringArrayContains(m.config.ExcludeMetrics, "mem_shared"); !skip {
|
||||||
|
sendMemShared(stats, m.tags, m.meta, output)
|
||||||
}
|
}
|
||||||
|
|
||||||
ll := strings.Split(string(buffer), "\n")
|
|
||||||
for _, line := range ll {
|
|
||||||
ls := strings.Split(line, `:`)
|
|
||||||
if len(ls) > 1 {
|
|
||||||
lv := strings.Fields(ls[1])
|
|
||||||
m.stats[ls[0]], err = strconv.ParseInt(lv[0], 0, 64)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if _, exists := m.stats[`MemTotal`]; !exists {
|
if m.config.NumaStats {
|
||||||
err = errors.New("Parse error")
|
tags := make(map[string]string)
|
||||||
log.Print(err)
|
for k, v := range m.tags {
|
||||||
return
|
tags[k] = v
|
||||||
}
|
}
|
||||||
|
tags["type"] = "memoryDomain"
|
||||||
|
|
||||||
for match, name := range m.matches {
|
for nodeid, file := range m.numafiles {
|
||||||
if _, exists := m.stats[match]; !exists {
|
cclog.ComponentDebug(m.name, "Read", file)
|
||||||
err = fmt.Errorf("Parse error for %s : %s", match, name)
|
tags["type-id"] = fmt.Sprintf("%d", nodeid)
|
||||||
log.Print(err)
|
stats := readNumaMemstatFile(file, m.matches)
|
||||||
continue
|
cclog.ComponentDebug(m.name, stats)
|
||||||
|
sendMatches(stats, m.matches, tags, m.meta, output)
|
||||||
|
if _, skip := stringArrayContains(m.config.ExcludeMetrics, "mem_used"); !skip {
|
||||||
|
sendMemUsed(stats, tags, m.meta, output)
|
||||||
}
|
}
|
||||||
y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": int(float64(m.stats[match]) * 1.0e-3)}, time.Now())
|
if _, skip := stringArrayContains(m.config.ExcludeMetrics, "mem_shared"); !skip {
|
||||||
if err == nil {
|
sendMemShared(stats, tags, m.meta, output)
|
||||||
output <- y
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if _, free := m.stats[`MemFree`]; free {
|
|
||||||
if _, buffers := m.stats[`Buffers`]; buffers {
|
|
||||||
if _, cached := m.stats[`Cached`]; cached {
|
|
||||||
memUsed := m.stats[`MemTotal`] - (m.stats[`MemFree`] + m.stats[`Buffers`] + m.stats[`Cached`])
|
|
||||||
_, skip := stringArrayContains(m.config.ExcludeMetrics, "mem_used")
|
|
||||||
y, err := lp.New("mem_used", m.tags, m.meta, map[string]interface{}{"value": int(float64(memUsed) * 1.0e-3)}, time.Now())
|
|
||||||
if err == nil && !skip {
|
|
||||||
output <- y
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if _, found := m.stats[`MemShared`]; found {
|
|
||||||
_, skip := stringArrayContains(m.config.ExcludeMetrics, "mem_shared")
|
|
||||||
y, err := lp.New("mem_shared", m.tags, m.meta, map[string]interface{}{"value": int(float64(m.stats[`MemShared`]) * 1.0e-3)}, time.Now())
|
|
||||||
if err == nil && !skip {
|
|
||||||
output <- y
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3,13 +3,20 @@
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
"memstat": {
|
"memstat": {
|
||||||
|
"node_stats" : true,
|
||||||
|
"numa_stats" : false,
|
||||||
"exclude_metrics": [
|
"exclude_metrics": [
|
||||||
"mem_used"
|
"mem_used"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
The `memstat` collector reads data from `/proc/meminfo` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink.
|
By default, the `memstat` collector reads data from `/proc/meminfo` and outputs a handful **node** metrics. This can be deactivated by the `node_stats` option.
|
||||||
|
|
||||||
|
Additionally, the `memstat` collector can read the NUMA node specific `/sys/devices/system/node/node*/meminfo` and output them as **memoryDomain** metrics. This can be de/activeate with the `numa_stats` option.
|
||||||
|
|
||||||
|
If a metric is not required, it can be excluded from forwarding it to the sink. This includes the metric for system-wide memory stats as well as NUMA node specific memory stats. If you want to filter only specific metrics, use the [MetricRouter](../internal/metricRouter/README.md) with something like:
|
||||||
|
`name == '<metric_that_should_be_dropped>' && type == 'node'` to keep the NUMA node specific `<metric_that_should_be_dropped>` while dropping the system-wide one.
|
||||||
|
|
||||||
|
|
||||||
Metrics:
|
Metrics:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user