mirror of
				https://github.com/ClusterCockpit/cc-metric-collector.git
				synced 2025-11-04 02:35:07 +01:00 
			
		
		
		
	Add NUMA metric collector.
This commit is contained in:
		@@ -71,6 +71,11 @@ type SampleCollector struct {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *SampleCollector) Init(config json.RawMessage) error {
 | 
			
		||||
    // Check if already initialized
 | 
			
		||||
    if m.init {
 | 
			
		||||
        return nil
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    m.name = "SampleCollector"
 | 
			
		||||
    m.setup()
 | 
			
		||||
    if len(config) > 0 {
 | 
			
		||||
 
 | 
			
		||||
@@ -32,6 +32,7 @@ var AvailableCollectors = map[string]MetricCollector{
 | 
			
		||||
	"cpufreq":          new(CPUFreqCollector),
 | 
			
		||||
	"cpufreq_cpuinfo":  new(CPUFreqCpuInfoCollector),
 | 
			
		||||
	"nfsstat":          new(NfsCollector),
 | 
			
		||||
	"numastats":        new(NUMAStatsCollector),
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Metric collector manager data structure
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										126
									
								
								collectors/numastatsMetric.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										126
									
								
								collectors/numastatsMetric.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,126 @@
 | 
			
		||||
package collectors
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"bufio"
 | 
			
		||||
	"encoding/json"
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"log"
 | 
			
		||||
	"os"
 | 
			
		||||
	"path/filepath"
 | 
			
		||||
	"strconv"
 | 
			
		||||
	"strings"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
//
 | 
			
		||||
// Numa policy hit/miss statistics
 | 
			
		||||
//
 | 
			
		||||
// numa_hit:
 | 
			
		||||
//   A process wanted to allocate memory from this node, and succeeded.
 | 
			
		||||
// numa_miss:
 | 
			
		||||
//   A process wanted to allocate memory from another node,
 | 
			
		||||
//   but ended up with memory from this node.
 | 
			
		||||
// numa_foreign:
 | 
			
		||||
//   A process wanted to allocate on this node,
 | 
			
		||||
//   but ended up with memory from another node.
 | 
			
		||||
// local_node:
 | 
			
		||||
//   A process ran on this node's CPU,
 | 
			
		||||
//   and got memory from this node.
 | 
			
		||||
// other_node:
 | 
			
		||||
//   A process ran on a different node's CPU
 | 
			
		||||
//   and got memory from this node.
 | 
			
		||||
// interleave_hit:
 | 
			
		||||
//   Interleaving wanted to allocate from this node
 | 
			
		||||
//   and succeeded.
 | 
			
		||||
//
 | 
			
		||||
// See: https://www.kernel.org/doc/html/latest/admin-guide/numastat.html
 | 
			
		||||
//
 | 
			
		||||
type NUMAStatsCollectorTopolgy struct {
 | 
			
		||||
	file   string
 | 
			
		||||
	tagSet map[string]string
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type NUMAStatsCollector struct {
 | 
			
		||||
	metricCollector
 | 
			
		||||
	topology []NUMAStatsCollectorTopolgy
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *NUMAStatsCollector) Init(config json.RawMessage) error {
 | 
			
		||||
	// Check if already initialized
 | 
			
		||||
	if m.init {
 | 
			
		||||
		return nil
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	m.name = "NUMAStatsCollector"
 | 
			
		||||
	m.setup()
 | 
			
		||||
	m.meta = map[string]string{
 | 
			
		||||
		"source": m.name,
 | 
			
		||||
		"group":  "NUMA",
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Loop for all NUMA node directories
 | 
			
		||||
	baseDir := "/sys/devices/system/node"
 | 
			
		||||
	globPattern := filepath.Join(baseDir, "node[0-9]*")
 | 
			
		||||
	dirs, err := filepath.Glob(globPattern)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return fmt.Errorf("unable to glob files with pattern %s", globPattern)
 | 
			
		||||
	}
 | 
			
		||||
	if dirs == nil {
 | 
			
		||||
		return fmt.Errorf("unable to find any files with pattern %s", globPattern)
 | 
			
		||||
	}
 | 
			
		||||
	m.topology = make([]NUMAStatsCollectorTopolgy, 0, len(dirs))
 | 
			
		||||
	for _, dir := range dirs {
 | 
			
		||||
		node := strings.TrimPrefix(dir, "/sys/devices/system/node/node")
 | 
			
		||||
		file := filepath.Join(dir, "numastat")
 | 
			
		||||
		m.topology = append(m.topology,
 | 
			
		||||
			NUMAStatsCollectorTopolgy{
 | 
			
		||||
				file:   file,
 | 
			
		||||
				tagSet: map[string]string{"domain": node},
 | 
			
		||||
			})
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	m.init = true
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *NUMAStatsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
	if !m.init {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for i := range m.topology {
 | 
			
		||||
		// Loop for all NUMA domains
 | 
			
		||||
		t := &m.topology[i]
 | 
			
		||||
 | 
			
		||||
		now := time.Now()
 | 
			
		||||
		file, err := os.Open(t.file)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return
 | 
			
		||||
		}
 | 
			
		||||
		scanner := bufio.NewScanner(file)
 | 
			
		||||
		for scanner.Scan() {
 | 
			
		||||
			split := strings.Fields(scanner.Text())
 | 
			
		||||
			if len(split) != 2 {
 | 
			
		||||
				continue
 | 
			
		||||
			}
 | 
			
		||||
			key := split[0]
 | 
			
		||||
			value, err := strconv.ParseInt(split[1], 10, 64)
 | 
			
		||||
			if err != nil {
 | 
			
		||||
				log.Printf("failed to convert %s='%s' to int64: %v", key, split[1], err)
 | 
			
		||||
				continue
 | 
			
		||||
			}
 | 
			
		||||
			y, err := lp.New("numastats_"+key, t.tagSet, m.meta, map[string]interface{}{"value": value}, now)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		file.Close()
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *NUMAStatsCollector) Close() {
 | 
			
		||||
	m.init = false
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user