mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2024-12-25 15:09:05 +01:00
Merge branch 'develop' of github.com:ClusterCockpit/cc-metric-collector into develop
This commit is contained in:
commit
c313055570
@ -103,9 +103,8 @@ func (m *SampleCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
||||
}
|
||||
|
||||
// Each metric has exactly one field: value !
|
||||
value := map[string]interface{}{"value": int(x)}
|
||||
y, err := lp.New("sample_metric", tags, m.meta, value, time.Now())
|
||||
if err == nil {
|
||||
value := map[string]interface{}{"value": int64(x)}
|
||||
if y, err := lp.New("sample_metric", tags, m.meta, value, time.Now()); err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
@ -23,14 +23,14 @@ import (
|
||||
type CPUFreqCpuInfoCollectorTopology struct {
|
||||
processor string // logical processor number (continuous, starting at 0)
|
||||
coreID string // socket local core ID
|
||||
coreID_int int
|
||||
coreID_int int64
|
||||
physicalPackageID string // socket / package ID
|
||||
physicalPackageID_int int
|
||||
physicalPackageID_int int64
|
||||
numPhysicalPackages string // number of sockets / packages
|
||||
numPhysicalPackages_int int
|
||||
numPhysicalPackages_int int64
|
||||
isHT bool
|
||||
numNonHT string // number of non hyperthreading processors
|
||||
numNonHT_int int
|
||||
numNonHT_int int64
|
||||
tagSet map[string]string
|
||||
}
|
||||
|
||||
@ -40,26 +40,32 @@ type CPUFreqCpuInfoCollector struct {
|
||||
}
|
||||
|
||||
func (m *CPUFreqCpuInfoCollector) Init(config json.RawMessage) error {
|
||||
// Check if already initialized
|
||||
if m.init {
|
||||
return nil
|
||||
}
|
||||
|
||||
m.name = "CPUFreqCpuInfoCollector"
|
||||
m.meta = map[string]string{
|
||||
"source": m.name,
|
||||
"group": "cpufreq",
|
||||
"group": "CPU",
|
||||
"unit": "MHz",
|
||||
}
|
||||
|
||||
const cpuInfoFile = "/proc/cpuinfo"
|
||||
file, err := os.Open(cpuInfoFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Failed to open '%s': %v", cpuInfoFile, err)
|
||||
return fmt.Errorf("Failed to open file '%s': %v", cpuInfoFile, err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// Collect topology information from file cpuinfo
|
||||
foundFreq := false
|
||||
processor := ""
|
||||
numNonHT_int := 0
|
||||
var numNonHT_int int64 = 0
|
||||
coreID := ""
|
||||
physicalPackageID := ""
|
||||
maxPhysicalPackageID := 0
|
||||
var maxPhysicalPackageID int64 = 0
|
||||
m.topology = make([]CPUFreqCpuInfoCollectorTopology, 0)
|
||||
coreSeenBefore := make(map[string]bool)
|
||||
scanner := bufio.NewScanner(file)
|
||||
@ -87,13 +93,13 @@ func (m *CPUFreqCpuInfoCollector) Init(config json.RawMessage) error {
|
||||
len(coreID) > 0 &&
|
||||
len(physicalPackageID) > 0 {
|
||||
|
||||
coreID_int, err := strconv.Atoi(coreID)
|
||||
coreID_int, err := strconv.ParseInt(coreID, 10, 64)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Unable to convert coreID to int: %v", err)
|
||||
return fmt.Errorf("Unable to convert coreID '%s' to int64: %v", coreID, err)
|
||||
}
|
||||
physicalPackageID_int, err := strconv.Atoi(physicalPackageID)
|
||||
physicalPackageID_int, err := strconv.ParseInt(physicalPackageID, 10, 64)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Unable to convert physicalPackageID to int: %v", err)
|
||||
return fmt.Errorf("Unable to convert physicalPackageID '%s' to int64: %v", physicalPackageID, err)
|
||||
}
|
||||
|
||||
// increase maximun socket / package ID, when required
|
||||
@ -152,15 +158,17 @@ func (m *CPUFreqCpuInfoCollector) Init(config json.RawMessage) error {
|
||||
}
|
||||
|
||||
func (m *CPUFreqCpuInfoCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
// Check if already initialized
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
|
||||
const cpuInfoFile = "/proc/cpuinfo"
|
||||
file, err := os.Open(cpuInfoFile)
|
||||
if err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to open '%s': %v", cpuInfoFile, err))
|
||||
fmt.Sprintf("Read(): Failed to open file '%s': %v", cpuInfoFile, err))
|
||||
return
|
||||
}
|
||||
defer file.Close()
|
||||
@ -181,11 +189,10 @@ func (m *CPUFreqCpuInfoCollector) Read(interval time.Duration, output chan lp.CC
|
||||
if err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to convert cpu MHz to float: %v", err))
|
||||
fmt.Sprintf("Read(): Failed to convert cpu MHz '%s' to float64: %v", lineSplit[1], err))
|
||||
return
|
||||
}
|
||||
y, err := lp.New("cpufreq", t.tagSet, m.meta, map[string]interface{}{"value": value}, now)
|
||||
if err == nil {
|
||||
if y, err := lp.New("cpufreq", t.tagSet, m.meta, map[string]interface{}{"value": value}, now); err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
@ -35,14 +35,14 @@ func readOneLine(filename string) (text string, ok bool) {
|
||||
type CPUFreqCollectorTopology struct {
|
||||
processor string // logical processor number (continuous, starting at 0)
|
||||
coreID string // socket local core ID
|
||||
coreID_int int
|
||||
coreID_int int64
|
||||
physicalPackageID string // socket / package ID
|
||||
physicalPackageID_int int
|
||||
physicalPackageID_int int64
|
||||
numPhysicalPackages string // number of sockets / packages
|
||||
numPhysicalPackages_int int
|
||||
numPhysicalPackages_int int64
|
||||
isHT bool
|
||||
numNonHT string // number of non hyperthreading processors
|
||||
numNonHT_int int
|
||||
numNonHT_int int64
|
||||
scalingCurFreqFile string
|
||||
tagSet map[string]string
|
||||
}
|
||||
@ -64,6 +64,11 @@ type CPUFreqCollector struct {
|
||||
}
|
||||
|
||||
func (m *CPUFreqCollector) Init(config json.RawMessage) error {
|
||||
// Check if already initialized
|
||||
if m.init {
|
||||
return nil
|
||||
}
|
||||
|
||||
m.name = "CPUFreqCollector"
|
||||
m.setup()
|
||||
if len(config) > 0 {
|
||||
@ -74,7 +79,8 @@ func (m *CPUFreqCollector) Init(config json.RawMessage) error {
|
||||
}
|
||||
m.meta = map[string]string{
|
||||
"source": m.name,
|
||||
"group": "CPU Frequency",
|
||||
"group": "CPU",
|
||||
"unit": "MHz",
|
||||
}
|
||||
|
||||
// Loop for all CPU directories
|
||||
@ -82,48 +88,48 @@ func (m *CPUFreqCollector) Init(config json.RawMessage) error {
|
||||
globPattern := filepath.Join(baseDir, "cpu[0-9]*")
|
||||
cpuDirs, err := filepath.Glob(globPattern)
|
||||
if err != nil {
|
||||
return fmt.Errorf("CPUFreqCollector.Init() unable to glob files with pattern %s: %v", globPattern, err)
|
||||
return fmt.Errorf("Unable to glob files with pattern '%s': %v", globPattern, err)
|
||||
}
|
||||
if cpuDirs == nil {
|
||||
return fmt.Errorf("CPUFreqCollector.Init() unable to find any files with pattern %s", globPattern)
|
||||
return fmt.Errorf("Unable to find any files with pattern '%s'", globPattern)
|
||||
}
|
||||
|
||||
// Initialize CPU topology
|
||||
m.topology = make([]CPUFreqCollectorTopology, len(cpuDirs))
|
||||
for _, cpuDir := range cpuDirs {
|
||||
processor := strings.TrimPrefix(cpuDir, "/sys/devices/system/cpu/cpu")
|
||||
processor_int, err := strconv.Atoi(processor)
|
||||
processor_int, err := strconv.ParseInt(processor, 10, 64)
|
||||
if err != nil {
|
||||
return fmt.Errorf("CPUFreqCollector.Init() unable to convert cpuID to int: %v", err)
|
||||
return fmt.Errorf("Unable to convert cpuID '%s' to int64: %v", processor, err)
|
||||
}
|
||||
|
||||
// Read package ID
|
||||
physicalPackageIDFile := filepath.Join(cpuDir, "topology", "physical_package_id")
|
||||
physicalPackageID, ok := readOneLine(physicalPackageIDFile)
|
||||
if !ok {
|
||||
return fmt.Errorf("CPUFreqCollector.Init() unable to read physical package ID from %s", physicalPackageIDFile)
|
||||
return fmt.Errorf("Unable to read physical package ID from file '%s'", physicalPackageIDFile)
|
||||
}
|
||||
physicalPackageID_int, err := strconv.Atoi(physicalPackageID)
|
||||
physicalPackageID_int, err := strconv.ParseInt(physicalPackageID, 10, 64)
|
||||
if err != nil {
|
||||
return fmt.Errorf("CPUFreqCollector.Init() unable to convert packageID to int: %v", err)
|
||||
return fmt.Errorf("Unable to convert packageID '%s' to int64: %v", physicalPackageID, err)
|
||||
}
|
||||
|
||||
// Read core ID
|
||||
coreIDFile := filepath.Join(cpuDir, "topology", "core_id")
|
||||
coreID, ok := readOneLine(coreIDFile)
|
||||
if !ok {
|
||||
return fmt.Errorf("CPUFreqCollector.Init() unable to read core ID from %s", coreIDFile)
|
||||
return fmt.Errorf("Unable to read core ID from file '%s'", coreIDFile)
|
||||
}
|
||||
coreID_int, err := strconv.Atoi(coreID)
|
||||
coreID_int, err := strconv.ParseInt(coreID, 10, 64)
|
||||
if err != nil {
|
||||
return fmt.Errorf("CPUFreqCollector.Init() unable to convert coreID to int: %v", err)
|
||||
return fmt.Errorf("Unable to convert coreID '%s' to int64: %v", coreID, err)
|
||||
}
|
||||
|
||||
// Check access to current frequency file
|
||||
scalingCurFreqFile := filepath.Join(cpuDir, "cpufreq", "scaling_cur_freq")
|
||||
err = unix.Access(scalingCurFreqFile, unix.R_OK)
|
||||
if err != nil {
|
||||
return fmt.Errorf("CPUFreqCollector.Init() unable to access %s: %v", scalingCurFreqFile, err)
|
||||
return fmt.Errorf("Unable to access file '%s': %v", scalingCurFreqFile, err)
|
||||
}
|
||||
|
||||
t := &m.topology[processor_int]
|
||||
@ -146,8 +152,8 @@ func (m *CPUFreqCollector) Init(config json.RawMessage) error {
|
||||
}
|
||||
|
||||
// number of non hyper thread cores and packages / sockets
|
||||
numNonHT_int := 0
|
||||
maxPhysicalPackageID := 0
|
||||
var numNonHT_int int64 = 0
|
||||
var maxPhysicalPackageID int64 = 0
|
||||
for i := range m.topology {
|
||||
t := &m.topology[i]
|
||||
|
||||
@ -184,6 +190,7 @@ func (m *CPUFreqCollector) Init(config json.RawMessage) error {
|
||||
}
|
||||
|
||||
func (m *CPUFreqCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
// Check if already initialized
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@ -205,16 +212,15 @@ func (m *CPUFreqCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
||||
fmt.Sprintf("Read(): Failed to read one line from file '%s'", t.scalingCurFreqFile))
|
||||
continue
|
||||
}
|
||||
cpuFreq, err := strconv.Atoi(line)
|
||||
cpuFreq, err := strconv.ParseInt(line, 10, 64)
|
||||
if err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to convert CPU frequency '%s': %v", line, err))
|
||||
fmt.Sprintf("Read(): Failed to convert CPU frequency '%s' to int64: %v", line, err))
|
||||
continue
|
||||
}
|
||||
|
||||
y, err := lp.New("cpufreq", t.tagSet, m.meta, map[string]interface{}{"value": cpuFreq}, now)
|
||||
if err == nil {
|
||||
if y, err := lp.New("cpufreq", t.tagSet, m.meta, map[string]interface{}{"value": cpuFreq}, now); err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
@ -26,6 +26,11 @@ type GpfsCollector struct {
|
||||
}
|
||||
|
||||
func (m *GpfsCollector) Init(config json.RawMessage) error {
|
||||
// Check if already initialized
|
||||
if m.init {
|
||||
return nil
|
||||
}
|
||||
|
||||
var err error
|
||||
m.name = "GpfsCollector"
|
||||
m.setup()
|
||||
@ -53,16 +58,16 @@ func (m *GpfsCollector) Init(config json.RawMessage) error {
|
||||
// GPFS / IBM Spectrum Scale file system statistics can only be queried by user root
|
||||
user, err := user.Current()
|
||||
if err != nil {
|
||||
return fmt.Errorf("GpfsCollector.Init(): Failed to get current user: %v", err)
|
||||
return fmt.Errorf("Failed to get current user: %v", err)
|
||||
}
|
||||
if user.Uid != "0" {
|
||||
return fmt.Errorf("GpfsCollector.Init(): GPFS file system statistics can only be queried by user root")
|
||||
return fmt.Errorf("GPFS file system statistics can only be queried by user root")
|
||||
}
|
||||
|
||||
// Check if mmpmon is in executable search path
|
||||
_, err = exec.LookPath(m.config.Mmpmon)
|
||||
if err != nil {
|
||||
return fmt.Errorf("GpfsCollector.Init(): Failed to find mmpmon binary '%s': %v", m.config.Mmpmon, err)
|
||||
return fmt.Errorf("Failed to find mmpmon binary '%s': %v", m.config.Mmpmon, err)
|
||||
}
|
||||
|
||||
m.init = true
|
||||
@ -70,6 +75,7 @@ func (m *GpfsCollector) Init(config json.RawMessage) error {
|
||||
}
|
||||
|
||||
func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
// Check if already initialized
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@ -135,7 +141,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if rc != 0 {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Filesystem %s not ok.", filesystem))
|
||||
fmt.Sprintf("Read(): Filesystem '%s' is not ok.", filesystem))
|
||||
continue
|
||||
}
|
||||
|
||||
@ -143,14 +149,14 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to convert seconds '%s' to int: %v", key_value["_t_"], err))
|
||||
fmt.Sprintf("Read(): Failed to convert seconds '%s' to int64: %v", key_value["_t_"], err))
|
||||
continue
|
||||
}
|
||||
msec, err := strconv.ParseInt(key_value["_tu_"], 10, 64)
|
||||
if err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to convert micro seconds '%s' to int: %v", key_value["_tu_"], err))
|
||||
fmt.Sprintf("Read(): Failed to convert micro seconds '%s' to int64: %v", key_value["_tu_"], err))
|
||||
continue
|
||||
}
|
||||
timestamp := time.Unix(sec, msec*1000)
|
||||
@ -160,12 +166,10 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to convert bytes read '%s' to int: %v", key_value["_br_"], err))
|
||||
fmt.Sprintf("Read(): Failed to convert bytes read '%s' to int64: %v", key_value["_br_"], err))
|
||||
continue
|
||||
}
|
||||
|
||||
y, err := lp.New("gpfs_bytes_read", m.tags, m.meta, map[string]interface{}{"value": bytesRead}, timestamp)
|
||||
if err == nil {
|
||||
if y, err := lp.New("gpfs_bytes_read", m.tags, m.meta, map[string]interface{}{"value": bytesRead}, timestamp); err == nil {
|
||||
output <- y
|
||||
}
|
||||
|
||||
@ -174,12 +178,10 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to convert bytes written '%s' to int: %v", key_value["_bw_"], err))
|
||||
fmt.Sprintf("Read(): Failed to convert bytes written '%s' to int64: %v", key_value["_bw_"], err))
|
||||
continue
|
||||
}
|
||||
|
||||
y, err = lp.New("gpfs_bytes_written", m.tags, m.meta, map[string]interface{}{"value": bytesWritten}, timestamp)
|
||||
if err == nil {
|
||||
if y, err := lp.New("gpfs_bytes_written", m.tags, m.meta, map[string]interface{}{"value": bytesWritten}, timestamp); err == nil {
|
||||
output <- y
|
||||
}
|
||||
|
||||
@ -188,11 +190,10 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to convert number of opens '%s' to int: %v", key_value["_oc_"], err))
|
||||
fmt.Sprintf("Read(): Failed to convert number of opens '%s' to int64: %v", key_value["_oc_"], err))
|
||||
continue
|
||||
}
|
||||
y, err = lp.New("gpfs_num_opens", m.tags, m.meta, map[string]interface{}{"value": numOpens}, timestamp)
|
||||
if err == nil {
|
||||
if y, err := lp.New("gpfs_num_opens", m.tags, m.meta, map[string]interface{}{"value": numOpens}, timestamp); err == nil {
|
||||
output <- y
|
||||
}
|
||||
|
||||
@ -201,11 +202,10 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to convert number of closes: '%s' to int: %v", key_value["_cc_"], err))
|
||||
fmt.Sprintf("Read(): Failed to convert number of closes: '%s' to int64: %v", key_value["_cc_"], err))
|
||||
continue
|
||||
}
|
||||
y, err = lp.New("gpfs_num_closes", m.tags, m.meta, map[string]interface{}{"value": numCloses}, timestamp)
|
||||
if err == nil {
|
||||
if y, err := lp.New("gpfs_num_closes", m.tags, m.meta, map[string]interface{}{"value": numCloses}, timestamp); err == nil {
|
||||
output <- y
|
||||
}
|
||||
|
||||
@ -214,11 +214,10 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to convert number of reads: '%s' to int: %v", key_value["_rdc_"], err))
|
||||
fmt.Sprintf("Read(): Failed to convert number of reads: '%s' to int64: %v", key_value["_rdc_"], err))
|
||||
continue
|
||||
}
|
||||
y, err = lp.New("gpfs_num_reads", m.tags, m.meta, map[string]interface{}{"value": numReads}, timestamp)
|
||||
if err == nil {
|
||||
if y, err := lp.New("gpfs_num_reads", m.tags, m.meta, map[string]interface{}{"value": numReads}, timestamp); err == nil {
|
||||
output <- y
|
||||
}
|
||||
|
||||
@ -227,11 +226,10 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to convert number of writes: '%s' to int: %v", key_value["_wc_"], err))
|
||||
fmt.Sprintf("Read(): Failed to convert number of writes: '%s' to int64: %v", key_value["_wc_"], err))
|
||||
continue
|
||||
}
|
||||
y, err = lp.New("gpfs_num_writes", m.tags, m.meta, map[string]interface{}{"value": numWrites}, timestamp)
|
||||
if err == nil {
|
||||
if y, err := lp.New("gpfs_num_writes", m.tags, m.meta, map[string]interface{}{"value": numWrites}, timestamp); err == nil {
|
||||
output <- y
|
||||
}
|
||||
|
||||
@ -240,11 +238,10 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to convert number of read directories: '%s' to int: %v", key_value["_dir_"], err))
|
||||
fmt.Sprintf("Read(): Failed to convert number of read directories: '%s' to int64: %v", key_value["_dir_"], err))
|
||||
continue
|
||||
}
|
||||
y, err = lp.New("gpfs_num_readdirs", m.tags, m.meta, map[string]interface{}{"value": numReaddirs}, timestamp)
|
||||
if err == nil {
|
||||
if y, err := lp.New("gpfs_num_readdirs", m.tags, m.meta, map[string]interface{}{"value": numReaddirs}, timestamp); err == nil {
|
||||
output <- y
|
||||
}
|
||||
|
||||
@ -256,8 +253,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
fmt.Sprintf("Read(): Failed to convert number of inode updates: '%s' to int: %v", key_value["_iu_"], err))
|
||||
continue
|
||||
}
|
||||
y, err = lp.New("gpfs_num_inode_updates", m.tags, m.meta, map[string]interface{}{"value": numInodeUpdates}, timestamp)
|
||||
if err == nil {
|
||||
if y, err := lp.New("gpfs_num_inode_updates", m.tags, m.meta, map[string]interface{}{"value": numInodeUpdates}, timestamp); err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
30
collectors/gpfsMetric.md
Normal file
30
collectors/gpfsMetric.md
Normal file
@ -0,0 +1,30 @@
|
||||
## `gpfs` collector
|
||||
|
||||
```json
|
||||
"ibstat": {
|
||||
"mmpmon_path": "/path/to/mmpmon",
|
||||
"exclude_filesystem": [
|
||||
"fs1"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
The `gpfs` collector uses the `mmpmon` command to read performance metrics for
|
||||
GPFS / IBM Spectrum Scale filesystems.
|
||||
|
||||
The reported filesystems can be filtered with the `exclude_filesystem` option
|
||||
in the configuration.
|
||||
|
||||
The path to the `mmpmon` command can be configured with the `mmpmon_path` option
|
||||
in the configuration.
|
||||
|
||||
Metrics:
|
||||
* `bytes_read`
|
||||
* `gpfs_bytes_written`
|
||||
* `gpfs_num_opens`
|
||||
* `gpfs_num_closes`
|
||||
* `gpfs_num_reads`
|
||||
* `gpfs_num_readdirs`
|
||||
* `gpfs_num_inode_updates`
|
||||
|
||||
The collector adds a `filesystem` tag to all metrics
|
@ -4,6 +4,7 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
@ -20,7 +21,7 @@ type InfinibandCollectorInfo struct {
|
||||
LID string // IB local Identifier (LID)
|
||||
device string // IB device
|
||||
port string // IB device port
|
||||
portCounterFiles map[string]string // mapping counter name -> file
|
||||
portCounterFiles map[string]string // mapping counter name -> sysfs file
|
||||
tagSet map[string]string // corresponding tag list
|
||||
}
|
||||
|
||||
@ -32,26 +33,14 @@ type InfinibandCollector struct {
|
||||
info []InfinibandCollectorInfo
|
||||
}
|
||||
|
||||
func (m *InfinibandCollector) Help() {
|
||||
fmt.Println("This collector includes all devices that can be found below ", IB_BASEPATH)
|
||||
fmt.Println("and where any of the ports provides a 'lid' file (glob ", IB_BASEPATH, "/<dev>/ports/<port>/lid).")
|
||||
fmt.Println("The devices can be filtered with the 'exclude_devices' option in the configuration.")
|
||||
fmt.Println("For each found LIDs the collector calls the 'perfquery' command")
|
||||
fmt.Println("")
|
||||
fmt.Println("Full configuration object:")
|
||||
fmt.Println("\"ibstat\" : {")
|
||||
fmt.Println(" \"exclude_devices\" : [\"dev1\"]")
|
||||
fmt.Println("}")
|
||||
fmt.Println("")
|
||||
fmt.Println("Metrics:")
|
||||
fmt.Println("- ib_recv")
|
||||
fmt.Println("- ib_xmit")
|
||||
fmt.Println("- ib_recv_pkts")
|
||||
fmt.Println("- ib_xmit_pkts")
|
||||
}
|
||||
|
||||
// Init initializes the Infiniband collector by walking through files below IB_BASEPATH
|
||||
func (m *InfinibandCollector) Init(config json.RawMessage) error {
|
||||
|
||||
// Check if already initialized
|
||||
if !m.init {
|
||||
return nil
|
||||
}
|
||||
|
||||
var err error
|
||||
m.name = "InfinibandCollector"
|
||||
m.setup()
|
||||
@ -153,14 +142,25 @@ func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetr
|
||||
// device info
|
||||
info := &m.info[i]
|
||||
for counterName, counterFile := range info.portCounterFiles {
|
||||
if data, ok := readOneLine(counterFile); ok {
|
||||
if v, err := strconv.ParseInt(data, 10, 64); err == nil {
|
||||
if y, err := lp.New(counterName, info.tagSet, m.meta, map[string]interface{}{"value": v}, now); err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
data, ok := readOneLine(counterFile)
|
||||
if !ok {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to read one line from file '%s'", counterFile))
|
||||
continue
|
||||
}
|
||||
v, err := strconv.ParseInt(data, 10, 64)
|
||||
if err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to convert Infininiband metrice %s='%s' to int64: %v", counterName, data, err))
|
||||
continue
|
||||
}
|
||||
if y, err := lp.New(counterName, info.tagSet, m.meta, map[string]interface{}{"value": v}, now); err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3,17 +3,24 @@
|
||||
|
||||
```json
|
||||
"ibstat": {
|
||||
"perfquery_path" : "<path to perfquery command>",
|
||||
"exclude_devices": [
|
||||
"mlx4"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
The `ibstat` collector reads either data through the `perfquery` command or the sysfs files below `/sys/class/infiniband/<device>`.
|
||||
The `ibstat` collector includes all Infiniband devices that can be
|
||||
found below `/sys/class/infiniband/` and where any of the ports provides a
|
||||
LID file (`/sys/class/infiniband/<dev>/ports/<port>/lid`)
|
||||
|
||||
The devices can be filtered with the `exclude_devices` option in the configuration.
|
||||
|
||||
For each found LID the collector reads data through the sysfs files below `/sys/class/infiniband/<device>`.
|
||||
|
||||
Metrics:
|
||||
* `ib_recv`
|
||||
* `ib_xmit`
|
||||
* `ib_recv_pkts`
|
||||
* `ib_xmit_pkts`
|
||||
|
||||
The collector adds a `device` tag to all metrics
|
||||
|
@ -29,27 +29,6 @@ type InfinibandPerfQueryCollector struct {
|
||||
}
|
||||
}
|
||||
|
||||
func (m *InfinibandPerfQueryCollector) Help() {
|
||||
fmt.Println("This collector includes all devices that can be found below ", IB_BASEPATH)
|
||||
fmt.Println("and where any of the ports provides a 'lid' file (glob ", IB_BASEPATH, "/<dev>/ports/<port>/lid).")
|
||||
fmt.Println("The devices can be filtered with the 'exclude_devices' option in the configuration.")
|
||||
fmt.Println("For each found LIDs the collector calls the 'perfquery' command")
|
||||
fmt.Println("The path to the 'perfquery' command can be configured with the 'perfquery_path' option")
|
||||
fmt.Println("in the configuration")
|
||||
fmt.Println("")
|
||||
fmt.Println("Full configuration object:")
|
||||
fmt.Println("\"ibstat\" : {")
|
||||
fmt.Println(" \"perfquery_path\" : \"path/to/perfquery\" # if omitted, it searches in $PATH")
|
||||
fmt.Println(" \"exclude_devices\" : [\"dev1\"]")
|
||||
fmt.Println("}")
|
||||
fmt.Println("")
|
||||
fmt.Println("Metrics:")
|
||||
fmt.Println("- ib_recv")
|
||||
fmt.Println("- ib_xmit")
|
||||
fmt.Println("- ib_recv_pkts")
|
||||
fmt.Println("- ib_xmit_pkts")
|
||||
}
|
||||
|
||||
func (m *InfinibandPerfQueryCollector) Init(config json.RawMessage) error {
|
||||
var err error
|
||||
m.name = "InfinibandCollectorPerfQuery"
|
||||
|
28
collectors/infinibandPerfQueryMetric.md
Normal file
28
collectors/infinibandPerfQueryMetric.md
Normal file
@ -0,0 +1,28 @@
|
||||
|
||||
## `ibstat_perfquery` collector
|
||||
|
||||
```json
|
||||
"ibstat_perfquery": {
|
||||
"perfquery_path": "/path/to/perfquery",
|
||||
"exclude_devices": [
|
||||
"mlx4"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
The `ibstat_perfquery` collector includes all Infiniband devices that can be
|
||||
found below `/sys/class/infiniband/` and where any of the ports provides a
|
||||
LID file (`/sys/class/infiniband/<dev>/ports/<port>/lid`)
|
||||
|
||||
The devices can be filtered with the `exclude_devices` option in the configuration.
|
||||
|
||||
For each found LID the collector calls the `perfquery` command. The path to the
|
||||
`perfquery` command can be configured with the `perfquery_path` option in the configuration
|
||||
|
||||
Metrics:
|
||||
* `ib_recv`
|
||||
* `ib_xmit`
|
||||
* `ib_recv_pkts`
|
||||
* `ib_xmit_pkts`
|
||||
|
||||
The collector adds a `device` tag to all metrics
|
@ -22,16 +22,16 @@ import (
|
||||
//
|
||||
const LOADAVGFILE = "/proc/loadavg"
|
||||
|
||||
type LoadavgCollectorConfig struct {
|
||||
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
||||
}
|
||||
|
||||
type LoadavgCollector struct {
|
||||
metricCollector
|
||||
tags map[string]string
|
||||
load_matches []string
|
||||
load_skips []bool
|
||||
proc_matches []string
|
||||
config LoadavgCollectorConfig
|
||||
proc_skips []bool
|
||||
config struct {
|
||||
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
||||
}
|
||||
}
|
||||
|
||||
func (m *LoadavgCollector) Init(config json.RawMessage) error {
|
||||
@ -51,15 +51,23 @@ func (m *LoadavgCollector) Init(config json.RawMessage) error {
|
||||
"load_one",
|
||||
"load_five",
|
||||
"load_fifteen"}
|
||||
m.load_skips = make([]bool, len(m.load_matches))
|
||||
m.proc_matches = []string{
|
||||
"proc_run",
|
||||
"proc_total"}
|
||||
m.proc_skips = make([]bool, len(m.proc_matches))
|
||||
|
||||
for i, name := range m.load_matches {
|
||||
_, m.load_skips[i] = stringArrayContains(m.config.ExcludeMetrics, name)
|
||||
}
|
||||
for i, name := range m.proc_matches {
|
||||
_, m.proc_skips[i] = stringArrayContains(m.config.ExcludeMetrics, name)
|
||||
}
|
||||
m.init = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
var skip bool
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@ -84,9 +92,11 @@ func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
||||
fmt.Sprintf("Read(): Failed to convert '%s' to float64: %v", ls[i], err))
|
||||
continue
|
||||
}
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, name)
|
||||
if m.load_skips[i] {
|
||||
continue
|
||||
}
|
||||
y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": x}, now)
|
||||
if err == nil && !skip {
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
@ -94,16 +104,18 @@ func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
||||
// Process metrics
|
||||
lv := strings.Split(ls[3], `/`)
|
||||
for i, name := range m.proc_matches {
|
||||
x, err := strconv.ParseFloat(lv[i], 64)
|
||||
x, err := strconv.ParseInt(lv[i], 10, 64)
|
||||
if err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to convert '%s' to float64: %v", lv[i], err))
|
||||
continue
|
||||
}
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, name)
|
||||
if m.proc_skips[i] {
|
||||
continue
|
||||
}
|
||||
y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": x}, now)
|
||||
if err == nil && !skip {
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user