mirror of
				https://github.com/ClusterCockpit/cc-metric-collector.git
				synced 2025-10-30 16:45:07 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			170 lines
		
	
	
		
			4.4 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			170 lines
		
	
	
		
			4.4 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| package collectors
 | |
| 
 | |
| import (
 | |
| 	"fmt"
 | |
| 	"os"
 | |
| 
 | |
| 	lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
 | |
| 	"golang.org/x/sys/unix"
 | |
| 
 | |
| 	"encoding/json"
 | |
| 	"path/filepath"
 | |
| 	"strconv"
 | |
| 	"strings"
 | |
| 	"time"
 | |
| )
 | |
| 
 | |
| const IB_BASEPATH = `/sys/class/infiniband/`
 | |
| 
 | |
| type InfinibandCollectorInfo struct {
 | |
| 	LID              string            // IB local Identifier (LID)
 | |
| 	device           string            // IB device
 | |
| 	port             string            // IB device port
 | |
| 	portCounterFiles map[string]string // mapping counter name -> file
 | |
| 	tagSet           map[string]string // corresponding tag list
 | |
| }
 | |
| 
 | |
| type InfinibandCollector struct {
 | |
| 	metricCollector
 | |
| 	config struct {
 | |
| 		ExcludeDevices []string `json:"exclude_devices,omitempty"` // IB device to exclude e.g. mlx5_0
 | |
| 	}
 | |
| 	info []InfinibandCollectorInfo
 | |
| }
 | |
| 
 | |
| func (m *InfinibandCollector) Help() {
 | |
| 	fmt.Println("This collector includes all devices that can be found below ", IB_BASEPATH)
 | |
| 	fmt.Println("and where any of the ports provides a 'lid' file (glob ", IB_BASEPATH, "/<dev>/ports/<port>/lid).")
 | |
| 	fmt.Println("The devices can be filtered with the 'exclude_devices' option in the configuration.")
 | |
| 	fmt.Println("For each found LIDs the collector calls the 'perfquery' command")
 | |
| 	fmt.Println("")
 | |
| 	fmt.Println("Full configuration object:")
 | |
| 	fmt.Println("\"ibstat\" : {")
 | |
| 	fmt.Println("  \"exclude_devices\" : [\"dev1\"]")
 | |
| 	fmt.Println("}")
 | |
| 	fmt.Println("")
 | |
| 	fmt.Println("Metrics:")
 | |
| 	fmt.Println("- ib_recv")
 | |
| 	fmt.Println("- ib_xmit")
 | |
| 	fmt.Println("- ib_recv_pkts")
 | |
| 	fmt.Println("- ib_xmit_pkts")
 | |
| }
 | |
| 
 | |
| // Init initializes the Infiniband collector by walking through files below IB_BASEPATH
 | |
| func (m *InfinibandCollector) Init(config json.RawMessage) error {
 | |
| 	var err error
 | |
| 	m.name = "InfinibandCollector"
 | |
| 	m.setup()
 | |
| 	m.meta = map[string]string{
 | |
| 		"source": m.name,
 | |
| 		"group":  "Network",
 | |
| 	}
 | |
| 	if len(config) > 0 {
 | |
| 		err = json.Unmarshal(config, &m.config)
 | |
| 		if err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Loop for all InfiniBand directories
 | |
| 	globPattern := filepath.Join(IB_BASEPATH, "*", "ports", "*")
 | |
| 	ibDirs, err := filepath.Glob(globPattern)
 | |
| 	if err != nil {
 | |
| 		return fmt.Errorf("Unable to glob files with pattern %s: %v", globPattern, err)
 | |
| 	}
 | |
| 	if ibDirs == nil {
 | |
| 		return fmt.Errorf("Unable to find any directories with pattern %s", globPattern)
 | |
| 	}
 | |
| 
 | |
| 	for _, path := range ibDirs {
 | |
| 
 | |
| 		// Skip, when no LID is assigned
 | |
| 		LID, ok := readOneLine(path + "/lid")
 | |
| 		if !ok || LID == "0x0" {
 | |
| 			continue
 | |
| 		}
 | |
| 
 | |
| 		// Get device and port component
 | |
| 		pathSplit := strings.Split(path, string(os.PathSeparator))
 | |
| 		device := pathSplit[4]
 | |
| 		port := pathSplit[6]
 | |
| 
 | |
| 		// Skip excluded devices
 | |
| 		skip := false
 | |
| 		for _, excludedDevice := range m.config.ExcludeDevices {
 | |
| 			if excludedDevice == device {
 | |
| 				skip = true
 | |
| 				break
 | |
| 			}
 | |
| 		}
 | |
| 		if skip {
 | |
| 			continue
 | |
| 		}
 | |
| 
 | |
| 		// Check access to counter files
 | |
| 		countersDir := filepath.Join(path, "counters")
 | |
| 		portCounterFiles := map[string]string{
 | |
| 			"ib_recv":      filepath.Join(countersDir, "port_rcv_data"),
 | |
| 			"ib_xmit":      filepath.Join(countersDir, "port_xmit_data"),
 | |
| 			"ib_recv_pkts": filepath.Join(countersDir, "port_rcv_packets"),
 | |
| 			"ib_xmit_pkts": filepath.Join(countersDir, "port_xmit_packets"),
 | |
| 		}
 | |
| 		for _, counterFile := range portCounterFiles {
 | |
| 			err := unix.Access(counterFile, unix.R_OK)
 | |
| 			if err != nil {
 | |
| 				return fmt.Errorf("Unable to access %s: %v", counterFile, err)
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		m.info = append(m.info,
 | |
| 			InfinibandCollectorInfo{
 | |
| 				LID:              LID,
 | |
| 				device:           device,
 | |
| 				port:             port,
 | |
| 				portCounterFiles: portCounterFiles,
 | |
| 				tagSet: map[string]string{
 | |
| 					"type":   "node",
 | |
| 					"device": device,
 | |
| 					"port":   port,
 | |
| 					"lid":    LID,
 | |
| 				},
 | |
| 			})
 | |
| 	}
 | |
| 
 | |
| 	if len(m.info) == 0 {
 | |
| 		return fmt.Errorf("Found no IB devices")
 | |
| 	}
 | |
| 
 | |
| 	m.init = true
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // Read reads Infiniband counter files below IB_BASEPATH
 | |
| func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | |
| 
 | |
| 	// Check if already initialized
 | |
| 	if !m.init {
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	now := time.Now()
 | |
| 	for i := range m.info {
 | |
| 
 | |
| 		// device info
 | |
| 		info := &m.info[i]
 | |
| 		for counterName, counterFile := range info.portCounterFiles {
 | |
| 			if data, ok := readOneLine(counterFile); ok {
 | |
| 				if v, err := strconv.ParseInt(data, 10, 64); err == nil {
 | |
| 					if y, err := lp.New(counterName, info.tagSet, m.meta, map[string]interface{}{"value": v}, now); err == nil {
 | |
| 						output <- y
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (m *InfinibandCollector) Close() {
 | |
| 	m.init = false
 | |
| }
 |