From d0dea36a632e0c59ab5fd9259ad4f1d665d92d51 Mon Sep 17 00:00:00 2001 From: Thomas Roehl Date: Fri, 4 Feb 2022 15:46:23 +0100 Subject: [PATCH] Include NUMA node specific memory stats in memstat collector --- collectors/memstatMetric.go | 212 ++++++++++++++++++++++++++---------- collectors/memstatMetric.md | 9 +- 2 files changed, 165 insertions(+), 56 deletions(-) diff --git a/collectors/memstatMetric.go b/collectors/memstatMetric.go index b6ef855..a356dc6 100644 --- a/collectors/memstatMetric.go +++ b/collectors/memstatMetric.go @@ -5,39 +5,49 @@ import ( "errors" "fmt" "io/ioutil" - "log" + "path/filepath" + "regexp" "strconv" "strings" "time" + cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger" lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" ) const MEMSTATFILE = `/proc/meminfo` +const NUMADIR = `/sys/devices/system/node` type MemstatCollectorConfig struct { - ExcludeMetrics []string `json:"exclude_metrics"` + ExcludeMetrics []string `json:"exclude_metrics,omitempty"` + NodeStats bool `json:"node_stats,omitempty"` + NumaStats bool `json:"numa_stats,omitempty"` } type MemstatCollector struct { metricCollector - stats map[string]int64 - tags map[string]string - matches map[string]string - config MemstatCollectorConfig + tags map[string]string + matches map[string]string + config MemstatCollectorConfig + numafiles map[int]string } func (m *MemstatCollector) Init(config json.RawMessage) error { var err error m.name = "MemstatCollector" + m.config.NodeStats = true + m.config.NumaStats = false if len(config) > 0 { err = json.Unmarshal(config, &m.config) if err != nil { return err } } + if (!m.config.NodeStats) && (!m.config.NumaStats) { + return errors.New("either node_stats or numa_stats needs to be true") + } m.meta = map[string]string{"source": m.name, "group": "Memory", "unit": "kByte"} - m.stats = make(map[string]int64) + m.numafiles = make(map[int]string) m.matches = make(map[string]string) m.tags = map[string]string{"type": "node"} matches := map[string]string{`MemTotal`: "mem_total", @@ -56,71 +66,163 @@ func (m *MemstatCollector) Init(config json.RawMessage) error { } } if len(m.matches) == 0 { - return errors.New("No metrics to collect") + return errors.New("no metrics to collect") } m.setup() - _, err = ioutil.ReadFile(string(MEMSTATFILE)) - if err == nil { + sysInit := false + numaInit := false + if m.config.NodeStats { + _, err := ioutil.ReadFile(string(MEMSTATFILE)) + if err != nil { + return err + } + sysInit = true + } + if m.config.NumaStats { + globPattern := filepath.Join(NUMADIR, "node*", "meminfo") + regex := regexp.MustCompile(`node(\d+)`) + numafiles, err := filepath.Glob(globPattern) + if err == nil { + for _, f := range numafiles { + _, err := ioutil.ReadFile(f) + if err != nil { + cclog.ComponentError(m.name, "Skipping NUMA meminfo file:", f) + continue + } + splitPath := strings.Split(f, "/") + if regex.MatchString(splitPath[5]) { + rematch := regex.FindStringSubmatch(splitPath[5]) + if len(rematch) == 2 { + nodeid, err := strconv.Atoi(rematch[1]) + if err == nil { + m.numafiles[nodeid] = f + } + } + } + + } + } + if len(m.numafiles) > 0 { + numaInit = true + } + } + if sysInit || numaInit { m.init = true } return err } +func readMemstatRaw(filename string, re string, translate map[string]string) map[string]int64 { + stats := make(map[string]int64) + regex, err := regexp.Compile(re) + if err != nil { + return stats + } + buffer, err := ioutil.ReadFile(filename) + if err != nil { + return stats + } + + for _, line := range strings.Split(string(buffer), "\n") { + if regex.MatchString(line) { + matches := regex.FindStringSubmatch(line) + // FindStringSubmatch returns full match in index 0 + if len(matches) == 3 { + name := string(matches[1]) + if _, ok := translate[name]; ok { + v, err := strconv.ParseInt(string(matches[2]), 0, 64) + if err == nil { + stats[name] = v + } + } + } + } + } + if _, exists := stats[`MemTotal`]; !exists { + return make(map[string]int64) + } + return stats +} + +func readMemstatFile(translate map[string]string) map[string]int64 { + return readMemstatRaw(string(MEMSTATFILE), `^([\w\(\)]+):\s*(\d+)`, translate) +} + +func readNumaMemstatFile(filename string, translate map[string]string) map[string]int64 { + return readMemstatRaw(filename, `^Node\s+\d+\s+([\w\(\)]+):\s*(\d+)`, translate) +} + +func sendMatches(stats map[string]int64, matches map[string]string, tags map[string]string, meta map[string]string, output chan lp.CCMetric) { + for raw, name := range matches { + if value, ok := stats[raw]; ok { + y, err := lp.New(name, tags, meta, map[string]interface{}{"value": int(float64(value) * 1.0e-3)}, time.Now()) + if err == nil { + output <- y + } + } + } +} + +func sendMemUsed(stats map[string]int64, tags map[string]string, meta map[string]string, output chan lp.CCMetric) { + if _, free := stats[`MemFree`]; free { + if _, buffers := stats[`Buffers`]; buffers { + if _, cached := stats[`Cached`]; cached { + memUsed := stats[`MemTotal`] - (stats[`MemFree`] + stats[`Buffers`] + stats[`Cached`]) + y, err := lp.New("mem_used", tags, meta, map[string]interface{}{"value": int(float64(memUsed) * 1.0e-3)}, time.Now()) + if err == nil { + output <- y + } + } + } + } +} + +func sendMemShared(stats map[string]int64, tags map[string]string, meta map[string]string, output chan lp.CCMetric) { + if _, found := stats[`MemShared`]; found { + y, err := lp.New("mem_shared", tags, meta, map[string]interface{}{"value": int(float64(stats[`MemShared`]) * 1.0e-3)}, time.Now()) + if err == nil { + output <- y + } + } +} + func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMetric) { if !m.init { return } - buffer, err := ioutil.ReadFile(string(MEMSTATFILE)) - if err != nil { - log.Print(err) - return - } - - ll := strings.Split(string(buffer), "\n") - for _, line := range ll { - ls := strings.Split(line, `:`) - if len(ls) > 1 { - lv := strings.Fields(ls[1]) - m.stats[ls[0]], err = strconv.ParseInt(lv[0], 0, 64) + if m.config.NodeStats { + cclog.ComponentDebug(m.name, "Read", string(MEMSTATFILE)) + stats := readMemstatFile(m.matches) + sendMatches(stats, m.matches, m.tags, m.meta, output) + if _, skip := stringArrayContains(m.config.ExcludeMetrics, "mem_used"); !skip { + sendMemUsed(stats, m.tags, m.meta, output) } - } - - if _, exists := m.stats[`MemTotal`]; !exists { - err = errors.New("Parse error") - log.Print(err) - return - } - - for match, name := range m.matches { - if _, exists := m.stats[match]; !exists { - err = fmt.Errorf("Parse error for %s : %s", match, name) - log.Print(err) - continue - } - y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": int(float64(m.stats[match]) * 1.0e-3)}, time.Now()) - if err == nil { - output <- y + if _, skip := stringArrayContains(m.config.ExcludeMetrics, "mem_shared"); !skip { + sendMemShared(stats, m.tags, m.meta, output) } + } - if _, free := m.stats[`MemFree`]; free { - if _, buffers := m.stats[`Buffers`]; buffers { - if _, cached := m.stats[`Cached`]; cached { - memUsed := m.stats[`MemTotal`] - (m.stats[`MemFree`] + m.stats[`Buffers`] + m.stats[`Cached`]) - _, skip := stringArrayContains(m.config.ExcludeMetrics, "mem_used") - y, err := lp.New("mem_used", m.tags, m.meta, map[string]interface{}{"value": int(float64(memUsed) * 1.0e-3)}, time.Now()) - if err == nil && !skip { - output <- y - } + if m.config.NumaStats { + tags := make(map[string]string) + for k, v := range m.tags { + tags[k] = v + } + tags["type"] = "memoryDomain" + + for nodeid, file := range m.numafiles { + cclog.ComponentDebug(m.name, "Read", file) + tags["type-id"] = fmt.Sprintf("%d", nodeid) + stats := readNumaMemstatFile(file, m.matches) + cclog.ComponentDebug(m.name, stats) + sendMatches(stats, m.matches, tags, m.meta, output) + if _, skip := stringArrayContains(m.config.ExcludeMetrics, "mem_used"); !skip { + sendMemUsed(stats, tags, m.meta, output) + } + if _, skip := stringArrayContains(m.config.ExcludeMetrics, "mem_shared"); !skip { + sendMemShared(stats, tags, m.meta, output) } - } - } - if _, found := m.stats[`MemShared`]; found { - _, skip := stringArrayContains(m.config.ExcludeMetrics, "mem_shared") - y, err := lp.New("mem_shared", m.tags, m.meta, map[string]interface{}{"value": int(float64(m.stats[`MemShared`]) * 1.0e-3)}, time.Now()) - if err == nil && !skip { - output <- y } } } diff --git a/collectors/memstatMetric.md b/collectors/memstatMetric.md index 4b7b8c7..b3dd68b 100644 --- a/collectors/memstatMetric.md +++ b/collectors/memstatMetric.md @@ -3,13 +3,20 @@ ```json "memstat": { + "node_stats" : true, + "numa_stats" : false, "exclude_metrics": [ "mem_used" ] } ``` -The `memstat` collector reads data from `/proc/meminfo` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink. +By default, the `memstat` collector reads data from `/proc/meminfo` and outputs a handful **node** metrics. This can be deactivated by the `node_stats` option. + +Additionally, the `memstat` collector can read the NUMA node specific `/sys/devices/system/node/node*/meminfo` and output them as **memoryDomain** metrics. This can be de/activeate with the `numa_stats` option. + +If a metric is not required, it can be excluded from forwarding it to the sink. This includes the metric for system-wide memory stats as well as NUMA node specific memory stats. If you want to filter only specific metrics, use the [MetricRouter](../internal/metricRouter/README.md) with something like: +`name == '' && type == 'node'` to keep the NUMA node specific `` while dropping the system-wide one. Metrics: