mirror of
				https://github.com/ClusterCockpit/cc-metric-collector.git
				synced 2025-10-31 00:55:06 +01:00 
			
		
		
		
	* Cleanup: Remove unused code * Use Golang duration parser for 'interval' and 'duration' in main config * Update handling of LIKWID headers. Download only if not already present in the system. Fixes #73 * Units with cc-units (#64) * Add option to normalize units with cc-unit * Add unit conversion to router * Add option to change unit prefix in the router * Add to MetricRouter README * Add order of operations in router to README * Use second add_tags/del_tags only if metric gets renamed * Skip disks in DiskstatCollector that have size=0 * Check readability of sensor files in TempCollector * Fix for --once option * Rename `cpu` type to `hwthread` (#69) * Rename 'cpu' type to 'hwthread' to avoid naming clashes with MetricStore and CC-Webfrontend * Collectors in parallel (#74) * Provide info to CollectorManager whether the collector can be executed in parallel with others * Split serial and parallel collectors. Read in parallel first * Update NvidiaCollector with new metrics, MIG and NvLink support (#75) * CC topology module update (#76) * Rename CPU to hardware thread, write some comments * Do renaming in other parts * Remove CpuList and SocketList function from metricCollector. Available in ccTopology * Option to use MIG UUID as subtype-id in NvidiaCollector * Option to use MIG slice name as subtype-id in NvidiaCollector * MetricRouter: Fix JSON in README * Fix for Github Action to really use the selected version * Remove Ganglia installation in runonce Action and add Go 1.18 * Fix daemon options in init script * Add separate go.mod files to use it with deprecated 1.16 * Minor updates for Makefiles * fix string comparison * AMD ROCm SMI collector (#77) * Add collector for AMD ROCm SMI metrics * Fix import path * Fix imports * Remove Board Number * store GPU index explicitly * Remove board number from description * Use http instead of ftp to download likwid * Fix serial number in rocmCollector * Improved http sink (#78) * automatic flush in NatsSink * tweak default options of HttpSink * shorter cirt. section and retries for HttpSink * fix error handling * Remove file added by mistake. * Use http instead of ftp to download likwid * Fix serial number in rocmCollector Co-authored-by: Thomas Roehl <thomas.roehl@fau.de> Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Co-authored-by: Lou <lou.knauer@gmx.de>
		
			
				
	
	
		
			221 lines
		
	
	
		
			5.2 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			221 lines
		
	
	
		
			5.2 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| package collectors
 | |
| 
 | |
| import (
 | |
| 	"bufio"
 | |
| 	"encoding/json"
 | |
| 	"errors"
 | |
| 	"os"
 | |
| 	"strconv"
 | |
| 	"strings"
 | |
| 	"time"
 | |
| 
 | |
| 	cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
 | |
| 	lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
 | |
| )
 | |
| 
 | |
| const NETSTATFILE = "/proc/net/dev"
 | |
| 
 | |
| type NetstatCollectorConfig struct {
 | |
| 	IncludeDevices     []string `json:"include_devices"`
 | |
| 	SendAbsoluteValues bool     `json:"send_abs_values"`
 | |
| 	SendDerivedValues  bool     `json:"send_derived_values"`
 | |
| }
 | |
| 
 | |
| type NetstatCollectorMetric struct {
 | |
| 	name       string
 | |
| 	index      int
 | |
| 	tags       map[string]string
 | |
| 	meta       map[string]string
 | |
| 	meta_rates map[string]string
 | |
| 	lastValue  int64
 | |
| }
 | |
| 
 | |
| type NetstatCollector struct {
 | |
| 	metricCollector
 | |
| 	config        NetstatCollectorConfig
 | |
| 	matches       map[string][]NetstatCollectorMetric
 | |
| 	lastTimestamp time.Time
 | |
| }
 | |
| 
 | |
| func (m *NetstatCollector) Init(config json.RawMessage) error {
 | |
| 	m.name = "NetstatCollector"
 | |
| 	m.parallel = true
 | |
| 	m.setup()
 | |
| 	m.lastTimestamp = time.Now()
 | |
| 
 | |
| 	const (
 | |
| 		fieldInterface = iota
 | |
| 		fieldReceiveBytes
 | |
| 		fieldReceivePackets
 | |
| 		fieldReceiveErrs
 | |
| 		fieldReceiveDrop
 | |
| 		fieldReceiveFifo
 | |
| 		fieldReceiveFrame
 | |
| 		fieldReceiveCompressed
 | |
| 		fieldReceiveMulticast
 | |
| 		fieldTransmitBytes
 | |
| 		fieldTransmitPackets
 | |
| 		fieldTransmitErrs
 | |
| 		fieldTransmitDrop
 | |
| 		fieldTransmitFifo
 | |
| 		fieldTransmitColls
 | |
| 		fieldTransmitCarrier
 | |
| 		fieldTransmitCompressed
 | |
| 	)
 | |
| 
 | |
| 	m.matches = make(map[string][]NetstatCollectorMetric)
 | |
| 
 | |
| 	// Set default configuration,
 | |
| 	m.config.SendAbsoluteValues = true
 | |
| 	m.config.SendDerivedValues = false
 | |
| 	// Read configuration file, allow overwriting default config
 | |
| 	if len(config) > 0 {
 | |
| 		err := json.Unmarshal(config, &m.config)
 | |
| 		if err != nil {
 | |
| 			cclog.ComponentError(m.name, "Error reading config:", err.Error())
 | |
| 			return err
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Check access to net statistic file
 | |
| 	file, err := os.Open(NETSTATFILE)
 | |
| 	if err != nil {
 | |
| 		cclog.ComponentError(m.name, err.Error())
 | |
| 		return err
 | |
| 	}
 | |
| 	defer file.Close()
 | |
| 
 | |
| 	scanner := bufio.NewScanner(file)
 | |
| 	for scanner.Scan() {
 | |
| 		l := scanner.Text()
 | |
| 
 | |
| 		// Skip lines with no net device entry
 | |
| 		if !strings.Contains(l, ":") {
 | |
| 			continue
 | |
| 		}
 | |
| 
 | |
| 		// Split line into fields
 | |
| 		f := strings.Fields(l)
 | |
| 
 | |
| 		// Get net device entry
 | |
| 		dev := strings.Trim(f[0], ": ")
 | |
| 
 | |
| 		// Check if device is a included device
 | |
| 		if _, ok := stringArrayContains(m.config.IncludeDevices, dev); ok {
 | |
| 			tags := map[string]string{"device": dev, "type": "node"}
 | |
| 			meta_unit_byte := map[string]string{"source": m.name, "group": "Network", "unit": "bytes"}
 | |
| 			meta_unit_byte_per_sec := map[string]string{"source": m.name, "group": "Network", "unit": "bytes/sec"}
 | |
| 			meta_unit_pkts := map[string]string{"source": m.name, "group": "Network", "unit": "packets"}
 | |
| 			meta_unit_pkts_per_sec := map[string]string{"source": m.name, "group": "Network", "unit": "packets/sec"}
 | |
| 
 | |
| 			m.matches[dev] = []NetstatCollectorMetric{
 | |
| 				{
 | |
| 					name:       "net_bytes_in",
 | |
| 					index:      fieldReceiveBytes,
 | |
| 					lastValue:  -1,
 | |
| 					tags:       tags,
 | |
| 					meta:       meta_unit_byte,
 | |
| 					meta_rates: meta_unit_byte_per_sec,
 | |
| 				},
 | |
| 				{
 | |
| 					name:       "net_pkts_in",
 | |
| 					index:      fieldReceivePackets,
 | |
| 					lastValue:  -1,
 | |
| 					tags:       tags,
 | |
| 					meta:       meta_unit_pkts,
 | |
| 					meta_rates: meta_unit_pkts_per_sec,
 | |
| 				},
 | |
| 				{
 | |
| 					name:       "net_bytes_out",
 | |
| 					index:      fieldTransmitBytes,
 | |
| 					lastValue:  -1,
 | |
| 					tags:       tags,
 | |
| 					meta:       meta_unit_byte,
 | |
| 					meta_rates: meta_unit_byte_per_sec,
 | |
| 				},
 | |
| 				{
 | |
| 					name:       "net_pkts_out",
 | |
| 					index:      fieldTransmitPackets,
 | |
| 					lastValue:  -1,
 | |
| 					tags:       tags,
 | |
| 					meta:       meta_unit_pkts,
 | |
| 					meta_rates: meta_unit_pkts_per_sec,
 | |
| 				},
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 	}
 | |
| 
 | |
| 	if len(m.matches) == 0 {
 | |
| 		return errors.New("no devices to collector metrics found")
 | |
| 	}
 | |
| 	m.init = true
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| func (m *NetstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | |
| 	if !m.init {
 | |
| 		return
 | |
| 	}
 | |
| 	// Current time stamp
 | |
| 	now := time.Now()
 | |
| 	// time difference to last time stamp
 | |
| 	timeDiff := now.Sub(m.lastTimestamp).Seconds()
 | |
| 	// Save current timestamp
 | |
| 	m.lastTimestamp = now
 | |
| 
 | |
| 	file, err := os.Open(string(NETSTATFILE))
 | |
| 	if err != nil {
 | |
| 		cclog.ComponentError(m.name, err.Error())
 | |
| 		return
 | |
| 	}
 | |
| 	defer file.Close()
 | |
| 
 | |
| 	scanner := bufio.NewScanner(file)
 | |
| 	for scanner.Scan() {
 | |
| 		l := scanner.Text()
 | |
| 
 | |
| 		// Skip lines with no net device entry
 | |
| 		if !strings.Contains(l, ":") {
 | |
| 			continue
 | |
| 		}
 | |
| 
 | |
| 		// Split line into fields
 | |
| 		f := strings.Fields(l)
 | |
| 
 | |
| 		// Get net device entry
 | |
| 		dev := strings.Trim(f[0], ":")
 | |
| 
 | |
| 		// Check if device is a included device
 | |
| 		if devmetrics, ok := m.matches[dev]; ok {
 | |
| 			for i := range devmetrics {
 | |
| 				metric := &devmetrics[i]
 | |
| 
 | |
| 				// Read value
 | |
| 				v, err := strconv.ParseInt(f[metric.index], 10, 64)
 | |
| 				if err != nil {
 | |
| 					continue
 | |
| 				}
 | |
| 				if m.config.SendAbsoluteValues {
 | |
| 					if y, err := lp.New(metric.name, metric.tags, metric.meta, map[string]interface{}{"value": v}, now); err == nil {
 | |
| 						output <- y
 | |
| 					}
 | |
| 				}
 | |
| 				if m.config.SendDerivedValues {
 | |
| 					if metric.lastValue >= 0 {
 | |
| 						rate := float64(v-metric.lastValue) / timeDiff
 | |
| 						if y, err := lp.New(metric.name+"_bw", metric.tags, metric.meta_rates, map[string]interface{}{"value": rate}, now); err == nil {
 | |
| 							output <- y
 | |
| 						}
 | |
| 					}
 | |
| 					metric.lastValue = v
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (m *NetstatCollector) Close() {
 | |
| 	m.init = false
 | |
| }
 |