mirror of
				https://github.com/ClusterCockpit/cc-metric-collector.git
				synced 2025-11-03 18:25:07 +01:00 
			
		
		
		
	Moved as much work as possible to Init()
This commit is contained in:
		@@ -2,12 +2,12 @@ package collectors
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
import (
 | 
					import (
 | 
				
			||||||
	"fmt"
 | 
						"fmt"
 | 
				
			||||||
	"io/ioutil"
 | 
						"os"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
 | 
						lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
 | 
				
			||||||
 | 
						"golang.org/x/sys/unix"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	"encoding/json"
 | 
						"encoding/json"
 | 
				
			||||||
	"errors"
 | 
					 | 
				
			||||||
	"path/filepath"
 | 
						"path/filepath"
 | 
				
			||||||
	"strconv"
 | 
						"strconv"
 | 
				
			||||||
	"strings"
 | 
						"strings"
 | 
				
			||||||
@@ -16,13 +16,20 @@ import (
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
const IB_BASEPATH = `/sys/class/infiniband/`
 | 
					const IB_BASEPATH = `/sys/class/infiniband/`
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					type InfinibandCollectorInfo struct {
 | 
				
			||||||
 | 
						LID              string            // IB local Identifier (LID)
 | 
				
			||||||
 | 
						device           string            // IB device
 | 
				
			||||||
 | 
						port             string            // IB device port
 | 
				
			||||||
 | 
						portCounterFiles map[string]string // mapping counter name -> file
 | 
				
			||||||
 | 
						tagSet           map[string]string // corresponding tag list
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
type InfinibandCollector struct {
 | 
					type InfinibandCollector struct {
 | 
				
			||||||
	metricCollector
 | 
						metricCollector
 | 
				
			||||||
	tags   map[string]string
 | 
					 | 
				
			||||||
	lids   map[string]map[string]string
 | 
					 | 
				
			||||||
	config struct {
 | 
						config struct {
 | 
				
			||||||
		ExcludeDevices []string `json:"exclude_devices,omitempty"`
 | 
							ExcludeDevices []string `json:"exclude_devices,omitempty"` // IB device to exclude e.g. mlx5_0
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						info []InfinibandCollectorInfo
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func (m *InfinibandCollector) Help() {
 | 
					func (m *InfinibandCollector) Help() {
 | 
				
			||||||
@@ -43,102 +50,116 @@ func (m *InfinibandCollector) Help() {
 | 
				
			|||||||
	fmt.Println("- ib_xmit_pkts")
 | 
						fmt.Println("- ib_xmit_pkts")
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Init initializes the Infiniband collector by walking through files below IB_BASEPATH
 | 
				
			||||||
func (m *InfinibandCollector) Init(config json.RawMessage) error {
 | 
					func (m *InfinibandCollector) Init(config json.RawMessage) error {
 | 
				
			||||||
	var err error
 | 
						var err error
 | 
				
			||||||
	m.name = "InfinibandCollector"
 | 
						m.name = "InfinibandCollector"
 | 
				
			||||||
	m.setup()
 | 
						m.setup()
 | 
				
			||||||
	m.meta = map[string]string{"source": m.name, "group": "Network"}
 | 
						m.meta = map[string]string{
 | 
				
			||||||
	m.tags = map[string]string{"type": "node"}
 | 
							"source": m.name,
 | 
				
			||||||
 | 
							"group":  "Network",
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
	if len(config) > 0 {
 | 
						if len(config) > 0 {
 | 
				
			||||||
		err = json.Unmarshal(config, &m.config)
 | 
							err = json.Unmarshal(config, &m.config)
 | 
				
			||||||
		if err != nil {
 | 
							if err != nil {
 | 
				
			||||||
			return err
 | 
								return err
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	m.lids = make(map[string]map[string]string)
 | 
					
 | 
				
			||||||
	p := fmt.Sprintf("%s/*/ports/*/lid", string(IB_BASEPATH))
 | 
						// Loop for all InfiniBand directories
 | 
				
			||||||
	files, err := filepath.Glob(p)
 | 
						globPattern := filepath.Join(IB_BASEPATH, "*", "ports", "*")
 | 
				
			||||||
	for _, f := range files {
 | 
						ibDirs, err := filepath.Glob(globPattern)
 | 
				
			||||||
		lid, err := ioutil.ReadFile(f)
 | 
						if err != nil {
 | 
				
			||||||
		if err == nil {
 | 
							return fmt.Errorf("Unable to glob files with pattern %s: %v", globPattern, err)
 | 
				
			||||||
			plist := strings.Split(strings.Replace(f, string(IB_BASEPATH), "", -1), "/")
 | 
						}
 | 
				
			||||||
 | 
						if ibDirs == nil {
 | 
				
			||||||
 | 
							return fmt.Errorf("Unable to find any directories with pattern %s", globPattern)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for _, path := range ibDirs {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							// Skip, when no LID is assigned
 | 
				
			||||||
 | 
							LID, ok := readOneLine(path + "/lid")
 | 
				
			||||||
 | 
							if !ok || LID == "0x0" {
 | 
				
			||||||
 | 
								continue
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							// Get device and port component
 | 
				
			||||||
 | 
							pathSplit := strings.Split(path, string(os.PathSeparator))
 | 
				
			||||||
 | 
							device := pathSplit[4]
 | 
				
			||||||
 | 
							port := pathSplit[6]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							// Skip excluded devices
 | 
				
			||||||
		skip := false
 | 
							skip := false
 | 
				
			||||||
			for _, d := range m.config.ExcludeDevices {
 | 
							for _, excludedDevice := range m.config.ExcludeDevices {
 | 
				
			||||||
				if d == plist[0] {
 | 
								if excludedDevice == device {
 | 
				
			||||||
				skip = true
 | 
									skip = true
 | 
				
			||||||
 | 
									break
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
			if !skip {
 | 
							if skip {
 | 
				
			||||||
				m.lids[plist[0]] = make(map[string]string)
 | 
								continue
 | 
				
			||||||
				m.lids[plist[0]][plist[2]] = string(lid)
 | 
					 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							// Check access to counter files
 | 
				
			||||||
 | 
							countersDir := filepath.Join(path, "counters")
 | 
				
			||||||
 | 
							portCounterFiles := map[string]string{
 | 
				
			||||||
 | 
								"ib_recv":      filepath.Join(countersDir, "port_rcv_data"),
 | 
				
			||||||
 | 
								"ib_xmit":      filepath.Join(countersDir, "port_xmit_data"),
 | 
				
			||||||
 | 
								"ib_recv_pkts": filepath.Join(countersDir, "port_rcv_packets"),
 | 
				
			||||||
 | 
								"ib_xmit_pkts": filepath.Join(countersDir, "port_xmit_packets"),
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							for _, counterFile := range portCounterFiles {
 | 
				
			||||||
 | 
								err := unix.Access(counterFile, unix.R_OK)
 | 
				
			||||||
 | 
								if err != nil {
 | 
				
			||||||
 | 
									return fmt.Errorf("Unable to access %s: %v", counterFile, err)
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if len(m.lids) == 0 {
 | 
							m.info = append(m.info,
 | 
				
			||||||
		return errors.New("No usable IB devices")
 | 
								InfinibandCollectorInfo{
 | 
				
			||||||
 | 
									LID:              LID,
 | 
				
			||||||
 | 
									device:           device,
 | 
				
			||||||
 | 
									port:             port,
 | 
				
			||||||
 | 
									portCounterFiles: portCounterFiles,
 | 
				
			||||||
 | 
									tagSet: map[string]string{
 | 
				
			||||||
 | 
										"type":   "node",
 | 
				
			||||||
 | 
										"device": device,
 | 
				
			||||||
 | 
										"port":   port,
 | 
				
			||||||
 | 
										"lid":    LID,
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								})
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if len(m.info) == 0 {
 | 
				
			||||||
 | 
							return fmt.Errorf("Found no IB devices")
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	m.init = true
 | 
						m.init = true
 | 
				
			||||||
	return nil
 | 
						return nil
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Read reads Infiniband counter files below IB_BASEPATH
 | 
				
			||||||
func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
					func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if m.init {
 | 
						// Check if already initialized
 | 
				
			||||||
		for dev, ports := range m.lids {
 | 
						if !m.init {
 | 
				
			||||||
			for port, lid := range ports {
 | 
							return
 | 
				
			||||||
				tags := map[string]string{
 | 
						}
 | 
				
			||||||
					"type":   "node",
 | 
					
 | 
				
			||||||
					"device": dev,
 | 
						now := time.Now()
 | 
				
			||||||
					"port":   port,
 | 
						for i := range m.info {
 | 
				
			||||||
					"lid":    lid}
 | 
					
 | 
				
			||||||
				path := fmt.Sprintf("%s/%s/ports/%s/counters/", string(IB_BASEPATH), dev, port)
 | 
							// device info
 | 
				
			||||||
				buffer, err := ioutil.ReadFile(fmt.Sprintf("%s/port_rcv_data", path))
 | 
							info := &m.info[i]
 | 
				
			||||||
				if err == nil {
 | 
							for counterName, counterFile := range info.portCounterFiles {
 | 
				
			||||||
					data := strings.Replace(string(buffer), "\n", "", -1)
 | 
								if data, ok := readOneLine(counterFile); ok {
 | 
				
			||||||
					v, err := strconv.ParseFloat(data, 64)
 | 
									if v, err := strconv.ParseInt(data, 10, 64); err == nil {
 | 
				
			||||||
					if err == nil {
 | 
										if y, err := lp.New(counterName, info.tagSet, m.meta, map[string]interface{}{"value": v}, now); err == nil {
 | 
				
			||||||
						y, err := lp.New("ib_recv", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
 | 
					 | 
				
			||||||
						if err == nil {
 | 
					 | 
				
			||||||
						output <- y
 | 
											output <- y
 | 
				
			||||||
					}
 | 
										}
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
				buffer, err = ioutil.ReadFile(fmt.Sprintf("%s/port_xmit_data", path))
 | 
					 | 
				
			||||||
				if err == nil {
 | 
					 | 
				
			||||||
					data := strings.Replace(string(buffer), "\n", "", -1)
 | 
					 | 
				
			||||||
					v, err := strconv.ParseFloat(data, 64)
 | 
					 | 
				
			||||||
					if err == nil {
 | 
					 | 
				
			||||||
						y, err := lp.New("ib_xmit", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
 | 
					 | 
				
			||||||
						if err == nil {
 | 
					 | 
				
			||||||
							output <- y
 | 
					 | 
				
			||||||
						}
 | 
					 | 
				
			||||||
					}
 | 
					 | 
				
			||||||
				}
 | 
					 | 
				
			||||||
				buffer, err = ioutil.ReadFile(fmt.Sprintf("%s/port_rcv_packets", path))
 | 
					 | 
				
			||||||
				if err == nil {
 | 
					 | 
				
			||||||
					data := strings.Replace(string(buffer), "\n", "", -1)
 | 
					 | 
				
			||||||
					v, err := strconv.ParseFloat(data, 64)
 | 
					 | 
				
			||||||
					if err == nil {
 | 
					 | 
				
			||||||
						y, err := lp.New("ib_recv_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
 | 
					 | 
				
			||||||
						if err == nil {
 | 
					 | 
				
			||||||
							output <- y
 | 
					 | 
				
			||||||
						}
 | 
					 | 
				
			||||||
					}
 | 
					 | 
				
			||||||
				}
 | 
					 | 
				
			||||||
				buffer, err = ioutil.ReadFile(fmt.Sprintf("%s/port_xmit_packets", path))
 | 
					 | 
				
			||||||
				if err == nil {
 | 
					 | 
				
			||||||
					data := strings.Replace(string(buffer), "\n", "", -1)
 | 
					 | 
				
			||||||
					v, err := strconv.ParseFloat(data, 64)
 | 
					 | 
				
			||||||
					if err == nil {
 | 
					 | 
				
			||||||
						y, err := lp.New("ib_xmit_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
 | 
					 | 
				
			||||||
						if err == nil {
 | 
					 | 
				
			||||||
							output <- y
 | 
					 | 
				
			||||||
						}
 | 
					 | 
				
			||||||
					}
 | 
					 | 
				
			||||||
				}
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user