mirror of
				https://github.com/ClusterCockpit/cc-metric-collector.git
				synced 2025-10-31 09:05:05 +01:00 
			
		
		
		
	Merge branch 'develop' of github.com:ClusterCockpit/cc-metric-collector into develop
This commit is contained in:
		| @@ -19,23 +19,31 @@ const LCTL_CMD = `lctl` | ||||
| const LCTL_OPTION = `get_param` | ||||
|  | ||||
| type LustreCollectorConfig struct { | ||||
| 	LCtlCommand        string   `json:"lctl_command"` | ||||
| 	ExcludeMetrics     []string `json:"exclude_metrics"` | ||||
| 	SendAllMetrics     bool     `json:"send_all_metrics"` | ||||
| 	Sudo               bool     `json:"use_sudo"` | ||||
| 	SendAbsoluteValues bool     `json:"send_abs_values"` | ||||
| 	SendDerivedValues  bool     `json:"send_derived_values"` | ||||
| 	LCtlCommand        string   `json:"lctl_command,omitempty"` | ||||
| 	ExcludeMetrics     []string `json:"exclude_metrics,omitempty"` | ||||
| 	Sudo               bool     `json:"use_sudo,omitempty"` | ||||
| 	SendAbsoluteValues bool     `json:"send_abs_values,omitempty"` | ||||
| 	SendDerivedValues  bool     `json:"send_derived_values,omitempty"` | ||||
| 	SendDiffValues     bool     `json:"send_diff_values,omitempty"` | ||||
| } | ||||
|  | ||||
| type LustreMetricDefinition struct { | ||||
| 	name       string | ||||
| 	lineprefix string | ||||
| 	lineoffset int | ||||
| 	unit       string | ||||
| 	calc       string | ||||
| } | ||||
|  | ||||
| type LustreCollector struct { | ||||
| 	metricCollector | ||||
| 	tags          map[string]string | ||||
| 	matches       map[string]map[string]int | ||||
| 	stats         map[string]map[string]int64 | ||||
| 	config        LustreCollectorConfig | ||||
| 	lctl          string | ||||
| 	sudoCmd       string | ||||
| 	lastTimestamp time.Time                   // Store time stamp of last tick to derive bandwidths | ||||
| 	definitions   []LustreMetricDefinition    // Combined list without excluded metrics | ||||
| 	stats         map[string]map[string]int64 // Data for last value per device and metric | ||||
| } | ||||
|  | ||||
| func (m *LustreCollector) getDeviceDataCommand(device string) []string { | ||||
| @@ -78,6 +86,16 @@ func (m *LustreCollector) getDevices() []string { | ||||
| 	return devices | ||||
| } | ||||
|  | ||||
| func getMetricData(lines []string, prefix string, offset int) (int64, error) { | ||||
| 	for _, line := range lines { | ||||
| 		if strings.HasPrefix(line, prefix) { | ||||
| 			lf := strings.Fields(line) | ||||
| 			return strconv.ParseInt(lf[offset], 0, 64) | ||||
| 		} | ||||
| 	} | ||||
| 	return 0, errors.New("no such line in data") | ||||
| } | ||||
|  | ||||
| // //Version reading the stats data of a device from sysfs | ||||
| // func (m *LustreCollector) getDeviceDataSysfs(device string) []string { | ||||
| // 	llitedir := filepath.Join(LUSTRE_SYSFS, "llite") | ||||
| @@ -90,6 +108,183 @@ func (m *LustreCollector) getDevices() []string { | ||||
| // 	return strings.Split(string(buffer), "\n") | ||||
| // } | ||||
|  | ||||
| var LustreAbsMetrics = []LustreMetricDefinition{ | ||||
| 	{ | ||||
| 		name:       "lustre_read_requests", | ||||
| 		lineprefix: "read_bytes", | ||||
| 		lineoffset: 1, | ||||
| 		unit:       "requests", | ||||
| 		calc:       "none", | ||||
| 	}, | ||||
| 	{ | ||||
| 		name:       "lustre_write_requests", | ||||
| 		lineprefix: "write_bytes", | ||||
| 		lineoffset: 1, | ||||
| 		unit:       "requests", | ||||
| 		calc:       "none", | ||||
| 	}, | ||||
| 	{ | ||||
| 		name:       "lustre_read_bytes", | ||||
| 		lineprefix: "read_bytes", | ||||
| 		lineoffset: 6, | ||||
| 		unit:       "bytes", | ||||
| 		calc:       "none", | ||||
| 	}, | ||||
| 	{ | ||||
| 		name:       "lustre_write_bytes", | ||||
| 		lineprefix: "write_bytes", | ||||
| 		lineoffset: 6, | ||||
| 		unit:       "bytes", | ||||
| 		calc:       "none", | ||||
| 	}, | ||||
| 	{ | ||||
| 		name:       "lustre_open", | ||||
| 		lineprefix: "open", | ||||
| 		lineoffset: 1, | ||||
| 		unit:       "", | ||||
| 		calc:       "none", | ||||
| 	}, | ||||
| 	{ | ||||
| 		name:       "lustre_close", | ||||
| 		lineprefix: "close", | ||||
| 		lineoffset: 1, | ||||
| 		unit:       "", | ||||
| 		calc:       "none", | ||||
| 	}, | ||||
| 	{ | ||||
| 		name:       "lustre_setattr", | ||||
| 		lineprefix: "setattr", | ||||
| 		lineoffset: 1, | ||||
| 		unit:       "", | ||||
| 		calc:       "none", | ||||
| 	}, | ||||
| 	{ | ||||
| 		name:       "lustre_getattr", | ||||
| 		lineprefix: "getattr", | ||||
| 		lineoffset: 1, | ||||
| 		unit:       "", | ||||
| 		calc:       "none", | ||||
| 	}, | ||||
| 	{ | ||||
| 		name:       "lustre_statfs", | ||||
| 		lineprefix: "statfs", | ||||
| 		lineoffset: 1, | ||||
| 		unit:       "", | ||||
| 		calc:       "none", | ||||
| 	}, | ||||
| 	{ | ||||
| 		name:       "lustre_inode_permission", | ||||
| 		lineprefix: "inode_permission", | ||||
| 		lineoffset: 1, | ||||
| 		unit:       "", | ||||
| 		calc:       "none", | ||||
| 	}, | ||||
| } | ||||
|  | ||||
| var LustreDiffMetrics = []LustreMetricDefinition{ | ||||
| 	{ | ||||
| 		name:       "lustre_read_requests_diff", | ||||
| 		lineprefix: "read_bytes", | ||||
| 		lineoffset: 1, | ||||
| 		unit:       "requests", | ||||
| 		calc:       "difference", | ||||
| 	}, | ||||
| 	{ | ||||
| 		name:       "lustre_write_requests_diff", | ||||
| 		lineprefix: "write_bytes", | ||||
| 		lineoffset: 1, | ||||
| 		unit:       "requests", | ||||
| 		calc:       "difference", | ||||
| 	}, | ||||
| 	{ | ||||
| 		name:       "lustre_read_bytes_diff", | ||||
| 		lineprefix: "read_bytes", | ||||
| 		lineoffset: 6, | ||||
| 		unit:       "bytes", | ||||
| 		calc:       "difference", | ||||
| 	}, | ||||
| 	{ | ||||
| 		name:       "lustre_write_bytes_diff", | ||||
| 		lineprefix: "write_bytes", | ||||
| 		lineoffset: 6, | ||||
| 		unit:       "bytes", | ||||
| 		calc:       "difference", | ||||
| 	}, | ||||
| 	{ | ||||
| 		name:       "lustre_open_diff", | ||||
| 		lineprefix: "open", | ||||
| 		lineoffset: 1, | ||||
| 		unit:       "", | ||||
| 		calc:       "difference", | ||||
| 	}, | ||||
| 	{ | ||||
| 		name:       "lustre_close_diff", | ||||
| 		lineprefix: "close", | ||||
| 		lineoffset: 1, | ||||
| 		unit:       "", | ||||
| 		calc:       "difference", | ||||
| 	}, | ||||
| 	{ | ||||
| 		name:       "lustre_setattr_diff", | ||||
| 		lineprefix: "setattr", | ||||
| 		lineoffset: 1, | ||||
| 		unit:       "", | ||||
| 		calc:       "difference", | ||||
| 	}, | ||||
| 	{ | ||||
| 		name:       "lustre_getattr_diff", | ||||
| 		lineprefix: "getattr", | ||||
| 		lineoffset: 1, | ||||
| 		unit:       "", | ||||
| 		calc:       "difference", | ||||
| 	}, | ||||
| 	{ | ||||
| 		name:       "lustre_statfs_diff", | ||||
| 		lineprefix: "statfs", | ||||
| 		lineoffset: 1, | ||||
| 		unit:       "", | ||||
| 		calc:       "difference", | ||||
| 	}, | ||||
| 	{ | ||||
| 		name:       "lustre_inode_permission_diff", | ||||
| 		lineprefix: "inode_permission", | ||||
| 		lineoffset: 1, | ||||
| 		unit:       "", | ||||
| 		calc:       "difference", | ||||
| 	}, | ||||
| } | ||||
|  | ||||
| var LustreDeriveMetrics = []LustreMetricDefinition{ | ||||
| 	{ | ||||
| 		name:       "lustre_read_requests_rate", | ||||
| 		lineprefix: "read_bytes", | ||||
| 		lineoffset: 1, | ||||
| 		unit:       "requests/sec", | ||||
| 		calc:       "derivative", | ||||
| 	}, | ||||
| 	{ | ||||
| 		name:       "lustre_write_requests_rate", | ||||
| 		lineprefix: "write_bytes", | ||||
| 		lineoffset: 1, | ||||
| 		unit:       "requests/sec", | ||||
| 		calc:       "derivative", | ||||
| 	}, | ||||
| 	{ | ||||
| 		name:       "lustre_read_bw", | ||||
| 		lineprefix: "read_bytes", | ||||
| 		lineoffset: 6, | ||||
| 		unit:       "bytes/sec", | ||||
| 		calc:       "derivative", | ||||
| 	}, | ||||
| 	{ | ||||
| 		name:       "lustre_write_bw", | ||||
| 		lineprefix: "write_bytes", | ||||
| 		lineoffset: 6, | ||||
| 		unit:       "bytes/sec", | ||||
| 		calc:       "derivative", | ||||
| 	}, | ||||
| } | ||||
|  | ||||
| func (m *LustreCollector) Init(config json.RawMessage) error { | ||||
| 	var err error | ||||
| 	m.name = "LustreCollector" | ||||
| @@ -102,17 +297,9 @@ func (m *LustreCollector) Init(config json.RawMessage) error { | ||||
| 	m.setup() | ||||
| 	m.tags = map[string]string{"type": "node"} | ||||
| 	m.meta = map[string]string{"source": m.name, "group": "Lustre"} | ||||
| 	defmatches := map[string]map[string]int{ | ||||
| 		"read_bytes":       {"lustre_read_bytes": 6, "lustre_read_requests": 1}, | ||||
| 		"write_bytes":      {"lustre_write_bytes": 6, "lustre_write_requests": 1}, | ||||
| 		"open":             {"lustre_open": 1}, | ||||
| 		"close":            {"lustre_close": 1}, | ||||
| 		"setattr":          {"lustre_setattr": 1}, | ||||
| 		"getattr":          {"lustre_getattr": 1}, | ||||
| 		"statfs":           {"lustre_statfs": 1}, | ||||
| 		"inode_permission": {"lustre_inode_permission": 1}} | ||||
|  | ||||
| 	// Lustre file system statistics can only be queried by user root | ||||
| 	// or with password-less sudo | ||||
| 	if !m.config.Sudo { | ||||
| 		user, err := user.Current() | ||||
| 		if err != nil { | ||||
| @@ -123,23 +310,15 @@ func (m *LustreCollector) Init(config json.RawMessage) error { | ||||
| 			cclog.ComponentError(m.name, "Lustre file system statistics can only be queried by user root") | ||||
| 			return err | ||||
| 		} | ||||
| 	} else { | ||||
| 		p, err := exec.LookPath("sudo") | ||||
| 		if err != nil { | ||||
| 			cclog.ComponentError(m.name, "Cannot find 'sudo'") | ||||
| 			return err | ||||
| 		} | ||||
| 		m.sudoCmd = p | ||||
| 	} | ||||
|  | ||||
| 	m.matches = make(map[string]map[string]int) | ||||
| 	for lineprefix, names := range defmatches { | ||||
| 		for metricname, offset := range names { | ||||
| 			_, skip := stringArrayContains(m.config.ExcludeMetrics, metricname) | ||||
| 			if skip { | ||||
| 				continue | ||||
| 			} | ||||
| 			if _, prefixExist := m.matches[lineprefix]; !prefixExist { | ||||
| 				m.matches[lineprefix] = make(map[string]int) | ||||
| 			} | ||||
| 			if _, metricExist := m.matches[lineprefix][metricname]; !metricExist { | ||||
| 				m.matches[lineprefix][metricname] = offset | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	p, err := exec.LookPath(m.config.LCtlCommand) | ||||
| 	if err != nil { | ||||
| 		p, err = exec.LookPath(LCTL_CMD) | ||||
| @@ -148,23 +327,47 @@ func (m *LustreCollector) Init(config json.RawMessage) error { | ||||
| 		} | ||||
| 	} | ||||
| 	m.lctl = p | ||||
| 	if m.config.Sudo { | ||||
| 		p, err := exec.LookPath("sudo") | ||||
| 		if err != nil { | ||||
| 			m.sudoCmd = p | ||||
|  | ||||
| 	m.definitions = []LustreMetricDefinition{} | ||||
| 	if m.config.SendAbsoluteValues { | ||||
| 		for _, def := range LustreAbsMetrics { | ||||
| 			if _, skip := stringArrayContains(m.config.ExcludeMetrics, def.name); !skip { | ||||
| 				m.definitions = append(m.definitions, def) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	if m.config.SendDiffValues { | ||||
| 		for _, def := range LustreDiffMetrics { | ||||
| 			if _, skip := stringArrayContains(m.config.ExcludeMetrics, def.name); !skip { | ||||
| 				m.definitions = append(m.definitions, def) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	if m.config.SendDerivedValues { | ||||
| 		for _, def := range LustreDeriveMetrics { | ||||
| 			if _, skip := stringArrayContains(m.config.ExcludeMetrics, def.name); !skip { | ||||
| 				m.definitions = append(m.definitions, def) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	if len(m.definitions) == 0 { | ||||
| 		return errors.New("no metrics to collect") | ||||
| 	} | ||||
|  | ||||
| 	devices := m.getDevices() | ||||
| 	if len(devices) == 0 { | ||||
| 		return errors.New("no metrics to collect") | ||||
| 		return errors.New("no Lustre devices found") | ||||
| 	} | ||||
| 	m.stats = make(map[string]map[string]int64) | ||||
| 	for _, d := range devices { | ||||
| 		m.stats[d] = make(map[string]int64) | ||||
| 		for _, names := range m.matches { | ||||
| 			for metricname := range names { | ||||
| 				m.stats[d][metricname] = 0 | ||||
| 		data := m.getDeviceDataCommand(d) | ||||
| 		for _, def := range m.definitions { | ||||
| 			x, err := getMetricData(data, def.lineprefix, def.lineoffset) | ||||
| 			if err == nil { | ||||
| 				m.stats[d][def.name] = x | ||||
| 			} else { | ||||
| 				m.stats[d][def.name] = 0 | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| @@ -180,63 +383,43 @@ func (m *LustreCollector) Read(interval time.Duration, output chan lp.CCMetric) | ||||
| 	now := time.Now() | ||||
| 	tdiff := now.Sub(m.lastTimestamp) | ||||
| 	for device, devData := range m.stats { | ||||
| 		stats := m.getDeviceDataCommand(device) | ||||
| 		processed := []string{} | ||||
|  | ||||
| 		for _, line := range stats { | ||||
| 			lf := strings.Fields(line) | ||||
| 			if len(lf) > 1 { | ||||
| 				if fields, ok := m.matches[lf[0]]; ok { | ||||
| 					for name, idx := range fields { | ||||
| 						x, err := strconv.ParseInt(lf[idx], 0, 64) | ||||
| 		data := m.getDeviceDataCommand(device) | ||||
| 		for _, def := range m.definitions { | ||||
| 			var use_x int64 | ||||
| 			var err error | ||||
| 			var y lp.CCMetric | ||||
| 			x, err := getMetricData(data, def.lineprefix, def.lineoffset) | ||||
| 			if err == nil { | ||||
| 							value := x - devData[name] | ||||
| 							devData[name] = x | ||||
| 							if value < 0 { | ||||
| 				use_x = x | ||||
| 			} else { | ||||
| 				use_x = devData[def.name] | ||||
| 			} | ||||
| 			var value interface{} | ||||
| 			switch def.calc { | ||||
| 			case "none": | ||||
| 				value = use_x | ||||
| 				y, err = lp.New(def.name, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now()) | ||||
| 			case "difference": | ||||
| 				value = use_x - devData[def.name] | ||||
| 				if value.(int64) < 0 { | ||||
| 					value = 0 | ||||
| 				} | ||||
| 							if m.config.SendAbsoluteValues { | ||||
| 								y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now()) | ||||
| 				y, err = lp.New(def.name, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now()) | ||||
| 			case "derivative": | ||||
| 				value = float64(use_x-devData[def.name]) / tdiff.Seconds() | ||||
| 				if value.(float64) < 0 { | ||||
| 					value = 0 | ||||
| 				} | ||||
| 				y, err = lp.New(def.name, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now()) | ||||
| 			} | ||||
| 			if err == nil { | ||||
| 				y.AddTag("device", device) | ||||
| 									if strings.Contains(name, "byte") { | ||||
| 										y.AddMeta("unit", "Byte") | ||||
| 									} | ||||
| 									output <- y | ||||
| 									if m.config.SendAllMetrics { | ||||
| 										processed = append(processed, name) | ||||
| 									} | ||||
| 								} | ||||
| 							} | ||||
| 							if m.config.SendDerivedValues && strings.Contains(name, "bytes") { | ||||
| 								y, err := lp.New(name+"_bw", m.tags, m.meta, map[string]interface{}{"value": float64(value) / tdiff.Seconds()}, time.Now()) | ||||
| 								if err == nil { | ||||
| 									y.AddTag("device", device) | ||||
| 									y.AddMeta("unit", "Bytes/sec") | ||||
| 									output <- y | ||||
| 									if m.config.SendAllMetrics { | ||||
| 										processed = append(processed, name) | ||||
| 									} | ||||
| 								} | ||||
| 							} | ||||
| 						} | ||||
| 					} | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 		if m.config.SendAllMetrics { | ||||
| 			for name := range devData { | ||||
| 				if _, done := stringArrayContains(processed, name); !done { | ||||
| 					y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": 0}, time.Now()) | ||||
| 					if err == nil { | ||||
| 						y.AddTag("device", device) | ||||
| 						if strings.Contains(name, "byte") { | ||||
| 							y.AddMeta("unit", "Byte") | ||||
| 				if len(def.unit) > 0 { | ||||
| 					y.AddMeta("unit", def.unit) | ||||
| 				} | ||||
| 				output <- y | ||||
| 			} | ||||
| 				} | ||||
| 			} | ||||
| 			devData[def.name] = use_x | ||||
| 		} | ||||
| 	} | ||||
| 	m.lastTimestamp = now | ||||
|   | ||||
| @@ -3,32 +3,44 @@ | ||||
|  | ||||
| ```json | ||||
|   "lustrestat": { | ||||
|     "procfiles" : [ | ||||
|       "/proc/fs/lustre/llite/lnec-XXXXXX/stats" | ||||
|     ], | ||||
|     "lctl_command": "/path/to/lctl", | ||||
|     "exclude_metrics": [ | ||||
|       "setattr", | ||||
|       "getattr" | ||||
|     ], | ||||
|     "send_abs_values" : true, | ||||
|     "send_derived_values" : true | ||||
|     "send_derived_values" : true, | ||||
|     "send_diff_values": true, | ||||
|     "use_sudo": false | ||||
|   } | ||||
| ``` | ||||
|  | ||||
| The `lustrestat` collector reads from the procfs stat files for Lustre like `/proc/fs/lustre/llite/lnec-XXXXXX/stats`. | ||||
| The `lustrestat` collector uses the `lctl` application with the `get_param` option to get all `llite` metrics (Lustre client). The `llite` metrics are only available for root users. If password-less sudo is configured, you can enable `sudo` in the configuration. | ||||
|  | ||||
| Metrics: | ||||
| * `lustre_read_bytes` | ||||
| * `lustre_read_requests` | ||||
| * `lustre_write_bytes` | ||||
| * `lustre_write_requests` | ||||
| * `lustre_read_bytes` (unit `bytes`) | ||||
| * `lustre_read_requests` (unit `requests`) | ||||
| * `lustre_write_bytes` (unit `bytes`) | ||||
| * `lustre_write_requests` (unit `requests`) | ||||
| * `lustre_open` | ||||
| * `lustre_close` | ||||
| * `lustre_getattr` | ||||
| * `lustre_setattr` | ||||
| * `lustre_statfs` | ||||
| * `lustre_inode_permission` | ||||
| * `lustre_read_bytes_bw` (if `send_derived_values == true`) | ||||
| * `lustre_write_bytes_bw` (if `send_derived_values == true`) | ||||
| * `lustre_read_bw` (if `send_derived_values == true`, unit `bytes/sec`) | ||||
| * `lustre_write_bw` (if `send_derived_values == true`, unit `bytes/sec`) | ||||
| * `lustre_read_requests_rate` (if `send_derived_values == true`, unit `requests/sec`) | ||||
| * `lustre_write_requests_rate` (if `send_derived_values == true`, unit `requests/sec`) | ||||
| * `lustre_read_bytes_diff` (if `send_diff_values == true`, unit `bytes`) | ||||
| * `lustre_read_requests_diff` (if `send_diff_values == true`, unit `requests`) | ||||
| * `lustre_write_bytes_diff` (if `send_diff_values == true`, unit `bytes`) | ||||
| * `lustre_write_requests_diff` (if `send_diff_values == true`, unit `requests`) | ||||
| * `lustre_open_diff` (if `send_diff_values == true`) | ||||
| * `lustre_close_diff` (if `send_diff_values == true`) | ||||
| * `lustre_getattr_diff` (if `send_diff_values == true`) | ||||
| * `lustre_setattr_diff` (if `send_diff_values == true`) | ||||
| * `lustre_statfs_diff` (if `send_diff_values == true`) | ||||
| * `lustre_inode_permission_diff` (if `send_diff_values == true`) | ||||
|  | ||||
| This collector adds an `device` tag. | ||||
		Reference in New Issue
	
	Block a user