mirror of
				https://github.com/ClusterCockpit/cc-metric-collector.git
				synced 2025-11-04 10:45:06 +01:00 
			
		
		
		
	Add an collector to read SNMP endpoints
This commit is contained in:
		@@ -40,6 +40,7 @@ var AvailableCollectors = map[string]MetricCollector{
 | 
			
		||||
	"rocm_smi":        new(RocmSmiCollector),
 | 
			
		||||
	"self":            new(SelfCollector),
 | 
			
		||||
	"schedstat":       new(SchedstatCollector),
 | 
			
		||||
	"snmpstat":        new(SNMPCollector),
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Metric collector manager data structure
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										202
									
								
								collectors/snmpMetric.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										202
									
								
								collectors/snmpMetric.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,202 @@
 | 
			
		||||
package collectors
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"encoding/json"
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"regexp"
 | 
			
		||||
	"strings"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
	"github.com/gosnmp/gosnmp"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
type SNMPCollectorTargetConfig struct {
 | 
			
		||||
	Hostname  string `json:"hostname"`
 | 
			
		||||
	Port      int    `json:"port,omitempty"`
 | 
			
		||||
	Community string `json:"community"`
 | 
			
		||||
	Timeout   int    `json:"timeout"` // timeout in seconds
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type SNMPCollectorMetricConfig struct {
 | 
			
		||||
	Name  string `json:"name"`
 | 
			
		||||
	Value string `json:"value"`
 | 
			
		||||
	Unit  string `json:"unit,omitempty"`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// These are the fields we read from the JSON configuration
 | 
			
		||||
type SNMPCollectorConfig struct {
 | 
			
		||||
	Targets []SNMPCollectorTargetConfig `json:"targets"`
 | 
			
		||||
	Metrics []SNMPCollectorMetricConfig `json:"metrics"`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// This contains all variables we need during execution and the variables
 | 
			
		||||
// defined by metricCollector (name, init, ...)
 | 
			
		||||
type SNMPCollector struct {
 | 
			
		||||
	metricCollector
 | 
			
		||||
	config SNMPCollectorConfig // the configuration structure
 | 
			
		||||
	meta   map[string]string   // default meta information
 | 
			
		||||
	tags   map[string]string   // default tags
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func validOid(oid string) bool {
 | 
			
		||||
	match, err := regexp.MatchString(`^[012]\.(?:[0-9]|[1-3][0-9])(\.\d+)*$`, oid)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return false
 | 
			
		||||
	}
 | 
			
		||||
	return match
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Init initializes the snmp collector
 | 
			
		||||
// Called once by the collector manager
 | 
			
		||||
// All tags, meta data tags and metrics that do not change over the runtime should be set here
 | 
			
		||||
func (m *SNMPCollector) Init(config json.RawMessage) error {
 | 
			
		||||
	var err error = nil
 | 
			
		||||
	// Always set the name early in Init() to use it in cclog.Component* functions
 | 
			
		||||
	m.name = "SNMPCollector"
 | 
			
		||||
	// This is for later use, also call it early
 | 
			
		||||
	m.setup()
 | 
			
		||||
	// Tell whether the collector should be run in parallel with others (reading files, ...)
 | 
			
		||||
	// or it should be run serially, mostly for collectors actually doing measurements
 | 
			
		||||
	// because they should not measure the execution of the other collectors
 | 
			
		||||
	m.parallel = true
 | 
			
		||||
	// Define meta information sent with each metric
 | 
			
		||||
	// (Can also be dynamic or this is the basic set with extension through AddMeta())
 | 
			
		||||
	m.meta = map[string]string{"source": m.name, "group": "SNMP"}
 | 
			
		||||
	// Define tags sent with each metric
 | 
			
		||||
	// The 'type' tag is always needed, it defines the granularity of the metric
 | 
			
		||||
	// node -> whole system
 | 
			
		||||
	// socket -> CPU socket (requires socket ID as 'type-id' tag)
 | 
			
		||||
	// die -> CPU die (requires CPU die ID as 'type-id' tag)
 | 
			
		||||
	// memoryDomain -> NUMA domain (requires NUMA domain ID as 'type-id' tag)
 | 
			
		||||
	// llc -> Last level cache (requires last level cache ID as 'type-id' tag)
 | 
			
		||||
	// core -> single CPU core that may consist of multiple hardware threads (SMT) (requires core ID as 'type-id' tag)
 | 
			
		||||
	// hwthtread -> single CPU hardware thread (requires hardware thread ID as 'type-id' tag)
 | 
			
		||||
	// accelerator -> A accelerator device like GPU or FPGA (requires an accelerator ID as 'type-id' tag)
 | 
			
		||||
	m.tags = map[string]string{"type": "node"}
 | 
			
		||||
	// Read in the JSON configuration
 | 
			
		||||
	if len(config) > 0 {
 | 
			
		||||
		err = json.Unmarshal(config, &m.config)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			cclog.ComponentError(m.name, "Error reading config:", err.Error())
 | 
			
		||||
			return err
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if len(m.config.Targets) == 0 {
 | 
			
		||||
		err = fmt.Errorf("no targets configured, exiting")
 | 
			
		||||
		cclog.ComponentError(m.name, err.Error())
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if len(m.config.Metrics) == 0 {
 | 
			
		||||
		err = fmt.Errorf("no metrics configured, exiting")
 | 
			
		||||
		cclog.ComponentError(m.name, err.Error())
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Set this flag only if everything is initialized properly, all required files exist, ...
 | 
			
		||||
	m.init = true
 | 
			
		||||
	return err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Read collects all metrics belonging to the snmp collector
 | 
			
		||||
// and sends them through the output channel to the collector manager
 | 
			
		||||
func (m *SNMPCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
	// Create a snmp metric
 | 
			
		||||
	timestamp := time.Now()
 | 
			
		||||
 | 
			
		||||
	for _, target := range m.config.Targets {
 | 
			
		||||
		port := uint16(161)
 | 
			
		||||
		if target.Port > 0 {
 | 
			
		||||
			port = uint16(target.Port)
 | 
			
		||||
		}
 | 
			
		||||
		comm := "public"
 | 
			
		||||
		if len(target.Community) > 0 {
 | 
			
		||||
			comm = target.Community
 | 
			
		||||
		}
 | 
			
		||||
		timeout := 1
 | 
			
		||||
		if target.Timeout > 0 {
 | 
			
		||||
			timeout = target.Timeout
 | 
			
		||||
		}
 | 
			
		||||
		params := &gosnmp.GoSNMP{
 | 
			
		||||
			Target:    target.Hostname,
 | 
			
		||||
			Port:      port,
 | 
			
		||||
			Community: comm,
 | 
			
		||||
			Version:   gosnmp.Version2c,
 | 
			
		||||
			Timeout:   time.Duration(timeout) * time.Second,
 | 
			
		||||
		}
 | 
			
		||||
		err := params.Connect()
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			cclog.ComponentError(m.name, err.Error())
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		for _, metric := range m.config.Metrics {
 | 
			
		||||
			if !validOid(metric.Value) {
 | 
			
		||||
				continue
 | 
			
		||||
			}
 | 
			
		||||
			oids := []string{}
 | 
			
		||||
			name := gosnmp.SnmpPDU{
 | 
			
		||||
				Value: metric.Name,
 | 
			
		||||
				Name:  metric.Name,
 | 
			
		||||
			}
 | 
			
		||||
			nameidx := -1
 | 
			
		||||
			value := gosnmp.SnmpPDU{
 | 
			
		||||
				Value: 0,
 | 
			
		||||
				Name:  metric.Value,
 | 
			
		||||
			}
 | 
			
		||||
			valueidx := -1
 | 
			
		||||
			unit := gosnmp.SnmpPDU{
 | 
			
		||||
				Value: metric.Unit,
 | 
			
		||||
				Name:  metric.Unit,
 | 
			
		||||
			}
 | 
			
		||||
			unitidx := -1
 | 
			
		||||
			if validOid(metric.Name) {
 | 
			
		||||
				oids = append(oids, metric.Name)
 | 
			
		||||
				nameidx = 0
 | 
			
		||||
			}
 | 
			
		||||
			if validOid(metric.Value) {
 | 
			
		||||
				oids = append(oids, metric.Value)
 | 
			
		||||
				valueidx = 1
 | 
			
		||||
			}
 | 
			
		||||
			if len(metric.Unit) > 0 && validOid(metric.Unit) {
 | 
			
		||||
				oids = append(oids, metric.Unit)
 | 
			
		||||
				unitidx = 2
 | 
			
		||||
			}
 | 
			
		||||
			result, err := gosnmp.Default.Get(oids)
 | 
			
		||||
			if err != nil {
 | 
			
		||||
				cclog.ComponentError(m.name, "failed to get data for OIDs %s", strings.Join(oids, ","))
 | 
			
		||||
				continue
 | 
			
		||||
			}
 | 
			
		||||
			if nameidx >= 0 && len(result.Variables) > nameidx {
 | 
			
		||||
				name = result.Variables[nameidx]
 | 
			
		||||
			}
 | 
			
		||||
			if valueidx >= 0 && len(result.Variables) > valueidx {
 | 
			
		||||
				value = result.Variables[valueidx]
 | 
			
		||||
			}
 | 
			
		||||
			if unitidx >= 0 && len(result.Variables) > unitidx {
 | 
			
		||||
				unit = result.Variables[unitidx]
 | 
			
		||||
			}
 | 
			
		||||
			if len(result.Variables) > 2 {
 | 
			
		||||
				unit = result.Variables[2]
 | 
			
		||||
			}
 | 
			
		||||
			y, err := lp.New(name.Value.(string), m.tags, m.meta, map[string]interface{}{"value": value.Value}, timestamp)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				// Send it to output channel
 | 
			
		||||
				if len(unit.Name) > 0 && unit.Value != nil {
 | 
			
		||||
					y.AddMeta("unit", unit.Value.(string))
 | 
			
		||||
				}
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		params.Conn.Close()
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Close metric collector: close network connection, close files, close libraries, ...
 | 
			
		||||
// Called once by the collector manager
 | 
			
		||||
func (m *SNMPCollector) Close() {
 | 
			
		||||
	// Unset flag
 | 
			
		||||
	m.init = false
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										43
									
								
								collectors/snmpMetric.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										43
									
								
								collectors/snmpMetric.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,43 @@
 | 
			
		||||
 | 
			
		||||
## `snmpstat` collector
 | 
			
		||||
 | 
			
		||||
```json
 | 
			
		||||
  "snmpstat": {
 | 
			
		||||
    "targets" : [{
 | 
			
		||||
        "hostname" : "host1.example.com",
 | 
			
		||||
        "port" : 161,
 | 
			
		||||
        "community": "public",
 | 
			
		||||
        "timeout" : 1,
 | 
			
		||||
    }],
 | 
			
		||||
    "metrics" : [
 | 
			
		||||
        {
 | 
			
		||||
            "name": "sensor1",
 | 
			
		||||
            "value": "1.3.6.1.2.1.1.4.0",
 | 
			
		||||
            "unit": "1.3.6.1.2.1.1.7.0",
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            "name": "1.3.6.1.2.1.1.2.0",
 | 
			
		||||
            "value": "1.3.6.1.2.1.1.4.0",
 | 
			
		||||
            "unit": "mb/s",
 | 
			
		||||
        }
 | 
			
		||||
    ]
 | 
			
		||||
  }
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The `snmpstat` collector uses [gosnmp](https://github.com/gosnmp/gosnmp) to read metrics from network-attached devices.
 | 
			
		||||
 | 
			
		||||
The configuration of SNMP is quite extensive due to it's flexibility. For the collector, the configuration is split in two parts:
 | 
			
		||||
 | 
			
		||||
### Target configuration
 | 
			
		||||
 | 
			
		||||
Each network-attached device that should be queried. A target consits of
 | 
			
		||||
- `hostname`
 | 
			
		||||
- `port` (default 161)
 | 
			
		||||
- `community` (default 'public')
 | 
			
		||||
- `timeout` in seconds (default 1 for 1 second)
 | 
			
		||||
 | 
			
		||||
### Metric configuration
 | 
			
		||||
- `name` can be an OID or a user-given string
 | 
			
		||||
- `value` has to be an OID
 | 
			
		||||
- `unit` can be empty, an OID or a user-given string
 | 
			
		||||
 | 
			
		||||
		Reference in New Issue
	
	Block a user