mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2024-11-14 05:57:25 +01:00
Merge branch 'develop' of github.com:ClusterCockpit/cc-metric-collector into develop
This commit is contained in:
commit
c313055570
@ -103,9 +103,8 @@ func (m *SampleCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Each metric has exactly one field: value !
|
// Each metric has exactly one field: value !
|
||||||
value := map[string]interface{}{"value": int(x)}
|
value := map[string]interface{}{"value": int64(x)}
|
||||||
y, err := lp.New("sample_metric", tags, m.meta, value, time.Now())
|
if y, err := lp.New("sample_metric", tags, m.meta, value, time.Now()); err == nil {
|
||||||
if err == nil {
|
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -23,14 +23,14 @@ import (
|
|||||||
type CPUFreqCpuInfoCollectorTopology struct {
|
type CPUFreqCpuInfoCollectorTopology struct {
|
||||||
processor string // logical processor number (continuous, starting at 0)
|
processor string // logical processor number (continuous, starting at 0)
|
||||||
coreID string // socket local core ID
|
coreID string // socket local core ID
|
||||||
coreID_int int
|
coreID_int int64
|
||||||
physicalPackageID string // socket / package ID
|
physicalPackageID string // socket / package ID
|
||||||
physicalPackageID_int int
|
physicalPackageID_int int64
|
||||||
numPhysicalPackages string // number of sockets / packages
|
numPhysicalPackages string // number of sockets / packages
|
||||||
numPhysicalPackages_int int
|
numPhysicalPackages_int int64
|
||||||
isHT bool
|
isHT bool
|
||||||
numNonHT string // number of non hyperthreading processors
|
numNonHT string // number of non hyperthreading processors
|
||||||
numNonHT_int int
|
numNonHT_int int64
|
||||||
tagSet map[string]string
|
tagSet map[string]string
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -40,26 +40,32 @@ type CPUFreqCpuInfoCollector struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (m *CPUFreqCpuInfoCollector) Init(config json.RawMessage) error {
|
func (m *CPUFreqCpuInfoCollector) Init(config json.RawMessage) error {
|
||||||
|
// Check if already initialized
|
||||||
|
if m.init {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
m.name = "CPUFreqCpuInfoCollector"
|
m.name = "CPUFreqCpuInfoCollector"
|
||||||
m.meta = map[string]string{
|
m.meta = map[string]string{
|
||||||
"source": m.name,
|
"source": m.name,
|
||||||
"group": "cpufreq",
|
"group": "CPU",
|
||||||
|
"unit": "MHz",
|
||||||
}
|
}
|
||||||
|
|
||||||
const cpuInfoFile = "/proc/cpuinfo"
|
const cpuInfoFile = "/proc/cpuinfo"
|
||||||
file, err := os.Open(cpuInfoFile)
|
file, err := os.Open(cpuInfoFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("Failed to open '%s': %v", cpuInfoFile, err)
|
return fmt.Errorf("Failed to open file '%s': %v", cpuInfoFile, err)
|
||||||
}
|
}
|
||||||
defer file.Close()
|
defer file.Close()
|
||||||
|
|
||||||
// Collect topology information from file cpuinfo
|
// Collect topology information from file cpuinfo
|
||||||
foundFreq := false
|
foundFreq := false
|
||||||
processor := ""
|
processor := ""
|
||||||
numNonHT_int := 0
|
var numNonHT_int int64 = 0
|
||||||
coreID := ""
|
coreID := ""
|
||||||
physicalPackageID := ""
|
physicalPackageID := ""
|
||||||
maxPhysicalPackageID := 0
|
var maxPhysicalPackageID int64 = 0
|
||||||
m.topology = make([]CPUFreqCpuInfoCollectorTopology, 0)
|
m.topology = make([]CPUFreqCpuInfoCollectorTopology, 0)
|
||||||
coreSeenBefore := make(map[string]bool)
|
coreSeenBefore := make(map[string]bool)
|
||||||
scanner := bufio.NewScanner(file)
|
scanner := bufio.NewScanner(file)
|
||||||
@ -87,13 +93,13 @@ func (m *CPUFreqCpuInfoCollector) Init(config json.RawMessage) error {
|
|||||||
len(coreID) > 0 &&
|
len(coreID) > 0 &&
|
||||||
len(physicalPackageID) > 0 {
|
len(physicalPackageID) > 0 {
|
||||||
|
|
||||||
coreID_int, err := strconv.Atoi(coreID)
|
coreID_int, err := strconv.ParseInt(coreID, 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("Unable to convert coreID to int: %v", err)
|
return fmt.Errorf("Unable to convert coreID '%s' to int64: %v", coreID, err)
|
||||||
}
|
}
|
||||||
physicalPackageID_int, err := strconv.Atoi(physicalPackageID)
|
physicalPackageID_int, err := strconv.ParseInt(physicalPackageID, 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("Unable to convert physicalPackageID to int: %v", err)
|
return fmt.Errorf("Unable to convert physicalPackageID '%s' to int64: %v", physicalPackageID, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// increase maximun socket / package ID, when required
|
// increase maximun socket / package ID, when required
|
||||||
@ -152,15 +158,17 @@ func (m *CPUFreqCpuInfoCollector) Init(config json.RawMessage) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (m *CPUFreqCpuInfoCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
func (m *CPUFreqCpuInfoCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||||
|
// Check if already initialized
|
||||||
if !m.init {
|
if !m.init {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
const cpuInfoFile = "/proc/cpuinfo"
|
const cpuInfoFile = "/proc/cpuinfo"
|
||||||
file, err := os.Open(cpuInfoFile)
|
file, err := os.Open(cpuInfoFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(
|
cclog.ComponentError(
|
||||||
m.name,
|
m.name,
|
||||||
fmt.Sprintf("Read(): Failed to open '%s': %v", cpuInfoFile, err))
|
fmt.Sprintf("Read(): Failed to open file '%s': %v", cpuInfoFile, err))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
defer file.Close()
|
defer file.Close()
|
||||||
@ -181,11 +189,10 @@ func (m *CPUFreqCpuInfoCollector) Read(interval time.Duration, output chan lp.CC
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(
|
cclog.ComponentError(
|
||||||
m.name,
|
m.name,
|
||||||
fmt.Sprintf("Read(): Failed to convert cpu MHz to float: %v", err))
|
fmt.Sprintf("Read(): Failed to convert cpu MHz '%s' to float64: %v", lineSplit[1], err))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
y, err := lp.New("cpufreq", t.tagSet, m.meta, map[string]interface{}{"value": value}, now)
|
if y, err := lp.New("cpufreq", t.tagSet, m.meta, map[string]interface{}{"value": value}, now); err == nil {
|
||||||
if err == nil {
|
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -35,14 +35,14 @@ func readOneLine(filename string) (text string, ok bool) {
|
|||||||
type CPUFreqCollectorTopology struct {
|
type CPUFreqCollectorTopology struct {
|
||||||
processor string // logical processor number (continuous, starting at 0)
|
processor string // logical processor number (continuous, starting at 0)
|
||||||
coreID string // socket local core ID
|
coreID string // socket local core ID
|
||||||
coreID_int int
|
coreID_int int64
|
||||||
physicalPackageID string // socket / package ID
|
physicalPackageID string // socket / package ID
|
||||||
physicalPackageID_int int
|
physicalPackageID_int int64
|
||||||
numPhysicalPackages string // number of sockets / packages
|
numPhysicalPackages string // number of sockets / packages
|
||||||
numPhysicalPackages_int int
|
numPhysicalPackages_int int64
|
||||||
isHT bool
|
isHT bool
|
||||||
numNonHT string // number of non hyperthreading processors
|
numNonHT string // number of non hyperthreading processors
|
||||||
numNonHT_int int
|
numNonHT_int int64
|
||||||
scalingCurFreqFile string
|
scalingCurFreqFile string
|
||||||
tagSet map[string]string
|
tagSet map[string]string
|
||||||
}
|
}
|
||||||
@ -64,6 +64,11 @@ type CPUFreqCollector struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (m *CPUFreqCollector) Init(config json.RawMessage) error {
|
func (m *CPUFreqCollector) Init(config json.RawMessage) error {
|
||||||
|
// Check if already initialized
|
||||||
|
if m.init {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
m.name = "CPUFreqCollector"
|
m.name = "CPUFreqCollector"
|
||||||
m.setup()
|
m.setup()
|
||||||
if len(config) > 0 {
|
if len(config) > 0 {
|
||||||
@ -74,7 +79,8 @@ func (m *CPUFreqCollector) Init(config json.RawMessage) error {
|
|||||||
}
|
}
|
||||||
m.meta = map[string]string{
|
m.meta = map[string]string{
|
||||||
"source": m.name,
|
"source": m.name,
|
||||||
"group": "CPU Frequency",
|
"group": "CPU",
|
||||||
|
"unit": "MHz",
|
||||||
}
|
}
|
||||||
|
|
||||||
// Loop for all CPU directories
|
// Loop for all CPU directories
|
||||||
@ -82,48 +88,48 @@ func (m *CPUFreqCollector) Init(config json.RawMessage) error {
|
|||||||
globPattern := filepath.Join(baseDir, "cpu[0-9]*")
|
globPattern := filepath.Join(baseDir, "cpu[0-9]*")
|
||||||
cpuDirs, err := filepath.Glob(globPattern)
|
cpuDirs, err := filepath.Glob(globPattern)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("CPUFreqCollector.Init() unable to glob files with pattern %s: %v", globPattern, err)
|
return fmt.Errorf("Unable to glob files with pattern '%s': %v", globPattern, err)
|
||||||
}
|
}
|
||||||
if cpuDirs == nil {
|
if cpuDirs == nil {
|
||||||
return fmt.Errorf("CPUFreqCollector.Init() unable to find any files with pattern %s", globPattern)
|
return fmt.Errorf("Unable to find any files with pattern '%s'", globPattern)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize CPU topology
|
// Initialize CPU topology
|
||||||
m.topology = make([]CPUFreqCollectorTopology, len(cpuDirs))
|
m.topology = make([]CPUFreqCollectorTopology, len(cpuDirs))
|
||||||
for _, cpuDir := range cpuDirs {
|
for _, cpuDir := range cpuDirs {
|
||||||
processor := strings.TrimPrefix(cpuDir, "/sys/devices/system/cpu/cpu")
|
processor := strings.TrimPrefix(cpuDir, "/sys/devices/system/cpu/cpu")
|
||||||
processor_int, err := strconv.Atoi(processor)
|
processor_int, err := strconv.ParseInt(processor, 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("CPUFreqCollector.Init() unable to convert cpuID to int: %v", err)
|
return fmt.Errorf("Unable to convert cpuID '%s' to int64: %v", processor, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read package ID
|
// Read package ID
|
||||||
physicalPackageIDFile := filepath.Join(cpuDir, "topology", "physical_package_id")
|
physicalPackageIDFile := filepath.Join(cpuDir, "topology", "physical_package_id")
|
||||||
physicalPackageID, ok := readOneLine(physicalPackageIDFile)
|
physicalPackageID, ok := readOneLine(physicalPackageIDFile)
|
||||||
if !ok {
|
if !ok {
|
||||||
return fmt.Errorf("CPUFreqCollector.Init() unable to read physical package ID from %s", physicalPackageIDFile)
|
return fmt.Errorf("Unable to read physical package ID from file '%s'", physicalPackageIDFile)
|
||||||
}
|
}
|
||||||
physicalPackageID_int, err := strconv.Atoi(physicalPackageID)
|
physicalPackageID_int, err := strconv.ParseInt(physicalPackageID, 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("CPUFreqCollector.Init() unable to convert packageID to int: %v", err)
|
return fmt.Errorf("Unable to convert packageID '%s' to int64: %v", physicalPackageID, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read core ID
|
// Read core ID
|
||||||
coreIDFile := filepath.Join(cpuDir, "topology", "core_id")
|
coreIDFile := filepath.Join(cpuDir, "topology", "core_id")
|
||||||
coreID, ok := readOneLine(coreIDFile)
|
coreID, ok := readOneLine(coreIDFile)
|
||||||
if !ok {
|
if !ok {
|
||||||
return fmt.Errorf("CPUFreqCollector.Init() unable to read core ID from %s", coreIDFile)
|
return fmt.Errorf("Unable to read core ID from file '%s'", coreIDFile)
|
||||||
}
|
}
|
||||||
coreID_int, err := strconv.Atoi(coreID)
|
coreID_int, err := strconv.ParseInt(coreID, 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("CPUFreqCollector.Init() unable to convert coreID to int: %v", err)
|
return fmt.Errorf("Unable to convert coreID '%s' to int64: %v", coreID, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check access to current frequency file
|
// Check access to current frequency file
|
||||||
scalingCurFreqFile := filepath.Join(cpuDir, "cpufreq", "scaling_cur_freq")
|
scalingCurFreqFile := filepath.Join(cpuDir, "cpufreq", "scaling_cur_freq")
|
||||||
err = unix.Access(scalingCurFreqFile, unix.R_OK)
|
err = unix.Access(scalingCurFreqFile, unix.R_OK)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("CPUFreqCollector.Init() unable to access %s: %v", scalingCurFreqFile, err)
|
return fmt.Errorf("Unable to access file '%s': %v", scalingCurFreqFile, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
t := &m.topology[processor_int]
|
t := &m.topology[processor_int]
|
||||||
@ -146,8 +152,8 @@ func (m *CPUFreqCollector) Init(config json.RawMessage) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// number of non hyper thread cores and packages / sockets
|
// number of non hyper thread cores and packages / sockets
|
||||||
numNonHT_int := 0
|
var numNonHT_int int64 = 0
|
||||||
maxPhysicalPackageID := 0
|
var maxPhysicalPackageID int64 = 0
|
||||||
for i := range m.topology {
|
for i := range m.topology {
|
||||||
t := &m.topology[i]
|
t := &m.topology[i]
|
||||||
|
|
||||||
@ -184,6 +190,7 @@ func (m *CPUFreqCollector) Init(config json.RawMessage) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (m *CPUFreqCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
func (m *CPUFreqCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||||
|
// Check if already initialized
|
||||||
if !m.init {
|
if !m.init {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -205,16 +212,15 @@ func (m *CPUFreqCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
fmt.Sprintf("Read(): Failed to read one line from file '%s'", t.scalingCurFreqFile))
|
fmt.Sprintf("Read(): Failed to read one line from file '%s'", t.scalingCurFreqFile))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
cpuFreq, err := strconv.Atoi(line)
|
cpuFreq, err := strconv.ParseInt(line, 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(
|
cclog.ComponentError(
|
||||||
m.name,
|
m.name,
|
||||||
fmt.Sprintf("Read(): Failed to convert CPU frequency '%s': %v", line, err))
|
fmt.Sprintf("Read(): Failed to convert CPU frequency '%s' to int64: %v", line, err))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
y, err := lp.New("cpufreq", t.tagSet, m.meta, map[string]interface{}{"value": cpuFreq}, now)
|
if y, err := lp.New("cpufreq", t.tagSet, m.meta, map[string]interface{}{"value": cpuFreq}, now); err == nil {
|
||||||
if err == nil {
|
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -26,6 +26,11 @@ type GpfsCollector struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (m *GpfsCollector) Init(config json.RawMessage) error {
|
func (m *GpfsCollector) Init(config json.RawMessage) error {
|
||||||
|
// Check if already initialized
|
||||||
|
if m.init {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
var err error
|
var err error
|
||||||
m.name = "GpfsCollector"
|
m.name = "GpfsCollector"
|
||||||
m.setup()
|
m.setup()
|
||||||
@ -53,16 +58,16 @@ func (m *GpfsCollector) Init(config json.RawMessage) error {
|
|||||||
// GPFS / IBM Spectrum Scale file system statistics can only be queried by user root
|
// GPFS / IBM Spectrum Scale file system statistics can only be queried by user root
|
||||||
user, err := user.Current()
|
user, err := user.Current()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("GpfsCollector.Init(): Failed to get current user: %v", err)
|
return fmt.Errorf("Failed to get current user: %v", err)
|
||||||
}
|
}
|
||||||
if user.Uid != "0" {
|
if user.Uid != "0" {
|
||||||
return fmt.Errorf("GpfsCollector.Init(): GPFS file system statistics can only be queried by user root")
|
return fmt.Errorf("GPFS file system statistics can only be queried by user root")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if mmpmon is in executable search path
|
// Check if mmpmon is in executable search path
|
||||||
_, err = exec.LookPath(m.config.Mmpmon)
|
_, err = exec.LookPath(m.config.Mmpmon)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("GpfsCollector.Init(): Failed to find mmpmon binary '%s': %v", m.config.Mmpmon, err)
|
return fmt.Errorf("Failed to find mmpmon binary '%s': %v", m.config.Mmpmon, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
m.init = true
|
m.init = true
|
||||||
@ -70,6 +75,7 @@ func (m *GpfsCollector) Init(config json.RawMessage) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||||
|
// Check if already initialized
|
||||||
if !m.init {
|
if !m.init {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -135,7 +141,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
|||||||
if rc != 0 {
|
if rc != 0 {
|
||||||
cclog.ComponentError(
|
cclog.ComponentError(
|
||||||
m.name,
|
m.name,
|
||||||
fmt.Sprintf("Read(): Filesystem %s not ok.", filesystem))
|
fmt.Sprintf("Read(): Filesystem '%s' is not ok.", filesystem))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -143,14 +149,14 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(
|
cclog.ComponentError(
|
||||||
m.name,
|
m.name,
|
||||||
fmt.Sprintf("Read(): Failed to convert seconds '%s' to int: %v", key_value["_t_"], err))
|
fmt.Sprintf("Read(): Failed to convert seconds '%s' to int64: %v", key_value["_t_"], err))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
msec, err := strconv.ParseInt(key_value["_tu_"], 10, 64)
|
msec, err := strconv.ParseInt(key_value["_tu_"], 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(
|
cclog.ComponentError(
|
||||||
m.name,
|
m.name,
|
||||||
fmt.Sprintf("Read(): Failed to convert micro seconds '%s' to int: %v", key_value["_tu_"], err))
|
fmt.Sprintf("Read(): Failed to convert micro seconds '%s' to int64: %v", key_value["_tu_"], err))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
timestamp := time.Unix(sec, msec*1000)
|
timestamp := time.Unix(sec, msec*1000)
|
||||||
@ -160,12 +166,10 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(
|
cclog.ComponentError(
|
||||||
m.name,
|
m.name,
|
||||||
fmt.Sprintf("Read(): Failed to convert bytes read '%s' to int: %v", key_value["_br_"], err))
|
fmt.Sprintf("Read(): Failed to convert bytes read '%s' to int64: %v", key_value["_br_"], err))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if y, err := lp.New("gpfs_bytes_read", m.tags, m.meta, map[string]interface{}{"value": bytesRead}, timestamp); err == nil {
|
||||||
y, err := lp.New("gpfs_bytes_read", m.tags, m.meta, map[string]interface{}{"value": bytesRead}, timestamp)
|
|
||||||
if err == nil {
|
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -174,12 +178,10 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(
|
cclog.ComponentError(
|
||||||
m.name,
|
m.name,
|
||||||
fmt.Sprintf("Read(): Failed to convert bytes written '%s' to int: %v", key_value["_bw_"], err))
|
fmt.Sprintf("Read(): Failed to convert bytes written '%s' to int64: %v", key_value["_bw_"], err))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if y, err := lp.New("gpfs_bytes_written", m.tags, m.meta, map[string]interface{}{"value": bytesWritten}, timestamp); err == nil {
|
||||||
y, err = lp.New("gpfs_bytes_written", m.tags, m.meta, map[string]interface{}{"value": bytesWritten}, timestamp)
|
|
||||||
if err == nil {
|
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -188,11 +190,10 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(
|
cclog.ComponentError(
|
||||||
m.name,
|
m.name,
|
||||||
fmt.Sprintf("Read(): Failed to convert number of opens '%s' to int: %v", key_value["_oc_"], err))
|
fmt.Sprintf("Read(): Failed to convert number of opens '%s' to int64: %v", key_value["_oc_"], err))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
y, err = lp.New("gpfs_num_opens", m.tags, m.meta, map[string]interface{}{"value": numOpens}, timestamp)
|
if y, err := lp.New("gpfs_num_opens", m.tags, m.meta, map[string]interface{}{"value": numOpens}, timestamp); err == nil {
|
||||||
if err == nil {
|
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -201,11 +202,10 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(
|
cclog.ComponentError(
|
||||||
m.name,
|
m.name,
|
||||||
fmt.Sprintf("Read(): Failed to convert number of closes: '%s' to int: %v", key_value["_cc_"], err))
|
fmt.Sprintf("Read(): Failed to convert number of closes: '%s' to int64: %v", key_value["_cc_"], err))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
y, err = lp.New("gpfs_num_closes", m.tags, m.meta, map[string]interface{}{"value": numCloses}, timestamp)
|
if y, err := lp.New("gpfs_num_closes", m.tags, m.meta, map[string]interface{}{"value": numCloses}, timestamp); err == nil {
|
||||||
if err == nil {
|
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -214,11 +214,10 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(
|
cclog.ComponentError(
|
||||||
m.name,
|
m.name,
|
||||||
fmt.Sprintf("Read(): Failed to convert number of reads: '%s' to int: %v", key_value["_rdc_"], err))
|
fmt.Sprintf("Read(): Failed to convert number of reads: '%s' to int64: %v", key_value["_rdc_"], err))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
y, err = lp.New("gpfs_num_reads", m.tags, m.meta, map[string]interface{}{"value": numReads}, timestamp)
|
if y, err := lp.New("gpfs_num_reads", m.tags, m.meta, map[string]interface{}{"value": numReads}, timestamp); err == nil {
|
||||||
if err == nil {
|
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -227,11 +226,10 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(
|
cclog.ComponentError(
|
||||||
m.name,
|
m.name,
|
||||||
fmt.Sprintf("Read(): Failed to convert number of writes: '%s' to int: %v", key_value["_wc_"], err))
|
fmt.Sprintf("Read(): Failed to convert number of writes: '%s' to int64: %v", key_value["_wc_"], err))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
y, err = lp.New("gpfs_num_writes", m.tags, m.meta, map[string]interface{}{"value": numWrites}, timestamp)
|
if y, err := lp.New("gpfs_num_writes", m.tags, m.meta, map[string]interface{}{"value": numWrites}, timestamp); err == nil {
|
||||||
if err == nil {
|
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -240,11 +238,10 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(
|
cclog.ComponentError(
|
||||||
m.name,
|
m.name,
|
||||||
fmt.Sprintf("Read(): Failed to convert number of read directories: '%s' to int: %v", key_value["_dir_"], err))
|
fmt.Sprintf("Read(): Failed to convert number of read directories: '%s' to int64: %v", key_value["_dir_"], err))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
y, err = lp.New("gpfs_num_readdirs", m.tags, m.meta, map[string]interface{}{"value": numReaddirs}, timestamp)
|
if y, err := lp.New("gpfs_num_readdirs", m.tags, m.meta, map[string]interface{}{"value": numReaddirs}, timestamp); err == nil {
|
||||||
if err == nil {
|
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -256,8 +253,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
|||||||
fmt.Sprintf("Read(): Failed to convert number of inode updates: '%s' to int: %v", key_value["_iu_"], err))
|
fmt.Sprintf("Read(): Failed to convert number of inode updates: '%s' to int: %v", key_value["_iu_"], err))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
y, err = lp.New("gpfs_num_inode_updates", m.tags, m.meta, map[string]interface{}{"value": numInodeUpdates}, timestamp)
|
if y, err := lp.New("gpfs_num_inode_updates", m.tags, m.meta, map[string]interface{}{"value": numInodeUpdates}, timestamp); err == nil {
|
||||||
if err == nil {
|
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
30
collectors/gpfsMetric.md
Normal file
30
collectors/gpfsMetric.md
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
## `gpfs` collector
|
||||||
|
|
||||||
|
```json
|
||||||
|
"ibstat": {
|
||||||
|
"mmpmon_path": "/path/to/mmpmon",
|
||||||
|
"exclude_filesystem": [
|
||||||
|
"fs1"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The `gpfs` collector uses the `mmpmon` command to read performance metrics for
|
||||||
|
GPFS / IBM Spectrum Scale filesystems.
|
||||||
|
|
||||||
|
The reported filesystems can be filtered with the `exclude_filesystem` option
|
||||||
|
in the configuration.
|
||||||
|
|
||||||
|
The path to the `mmpmon` command can be configured with the `mmpmon_path` option
|
||||||
|
in the configuration.
|
||||||
|
|
||||||
|
Metrics:
|
||||||
|
* `bytes_read`
|
||||||
|
* `gpfs_bytes_written`
|
||||||
|
* `gpfs_num_opens`
|
||||||
|
* `gpfs_num_closes`
|
||||||
|
* `gpfs_num_reads`
|
||||||
|
* `gpfs_num_readdirs`
|
||||||
|
* `gpfs_num_inode_updates`
|
||||||
|
|
||||||
|
The collector adds a `filesystem` tag to all metrics
|
@ -4,6 +4,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
"golang.org/x/sys/unix"
|
"golang.org/x/sys/unix"
|
||||||
|
|
||||||
@ -20,7 +21,7 @@ type InfinibandCollectorInfo struct {
|
|||||||
LID string // IB local Identifier (LID)
|
LID string // IB local Identifier (LID)
|
||||||
device string // IB device
|
device string // IB device
|
||||||
port string // IB device port
|
port string // IB device port
|
||||||
portCounterFiles map[string]string // mapping counter name -> file
|
portCounterFiles map[string]string // mapping counter name -> sysfs file
|
||||||
tagSet map[string]string // corresponding tag list
|
tagSet map[string]string // corresponding tag list
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -32,26 +33,14 @@ type InfinibandCollector struct {
|
|||||||
info []InfinibandCollectorInfo
|
info []InfinibandCollectorInfo
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *InfinibandCollector) Help() {
|
|
||||||
fmt.Println("This collector includes all devices that can be found below ", IB_BASEPATH)
|
|
||||||
fmt.Println("and where any of the ports provides a 'lid' file (glob ", IB_BASEPATH, "/<dev>/ports/<port>/lid).")
|
|
||||||
fmt.Println("The devices can be filtered with the 'exclude_devices' option in the configuration.")
|
|
||||||
fmt.Println("For each found LIDs the collector calls the 'perfquery' command")
|
|
||||||
fmt.Println("")
|
|
||||||
fmt.Println("Full configuration object:")
|
|
||||||
fmt.Println("\"ibstat\" : {")
|
|
||||||
fmt.Println(" \"exclude_devices\" : [\"dev1\"]")
|
|
||||||
fmt.Println("}")
|
|
||||||
fmt.Println("")
|
|
||||||
fmt.Println("Metrics:")
|
|
||||||
fmt.Println("- ib_recv")
|
|
||||||
fmt.Println("- ib_xmit")
|
|
||||||
fmt.Println("- ib_recv_pkts")
|
|
||||||
fmt.Println("- ib_xmit_pkts")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Init initializes the Infiniband collector by walking through files below IB_BASEPATH
|
// Init initializes the Infiniband collector by walking through files below IB_BASEPATH
|
||||||
func (m *InfinibandCollector) Init(config json.RawMessage) error {
|
func (m *InfinibandCollector) Init(config json.RawMessage) error {
|
||||||
|
|
||||||
|
// Check if already initialized
|
||||||
|
if !m.init {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
var err error
|
var err error
|
||||||
m.name = "InfinibandCollector"
|
m.name = "InfinibandCollector"
|
||||||
m.setup()
|
m.setup()
|
||||||
@ -153,14 +142,25 @@ func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetr
|
|||||||
// device info
|
// device info
|
||||||
info := &m.info[i]
|
info := &m.info[i]
|
||||||
for counterName, counterFile := range info.portCounterFiles {
|
for counterName, counterFile := range info.portCounterFiles {
|
||||||
if data, ok := readOneLine(counterFile); ok {
|
data, ok := readOneLine(counterFile)
|
||||||
if v, err := strconv.ParseInt(data, 10, 64); err == nil {
|
if !ok {
|
||||||
if y, err := lp.New(counterName, info.tagSet, m.meta, map[string]interface{}{"value": v}, now); err == nil {
|
cclog.ComponentError(
|
||||||
output <- y
|
m.name,
|
||||||
}
|
fmt.Sprintf("Read(): Failed to read one line from file '%s'", counterFile))
|
||||||
}
|
continue
|
||||||
|
}
|
||||||
|
v, err := strconv.ParseInt(data, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
cclog.ComponentError(
|
||||||
|
m.name,
|
||||||
|
fmt.Sprintf("Read(): Failed to convert Infininiband metrice %s='%s' to int64: %v", counterName, data, err))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if y, err := lp.New(counterName, info.tagSet, m.meta, map[string]interface{}{"value": v}, now); err == nil {
|
||||||
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3,17 +3,24 @@
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
"ibstat": {
|
"ibstat": {
|
||||||
"perfquery_path" : "<path to perfquery command>",
|
|
||||||
"exclude_devices": [
|
"exclude_devices": [
|
||||||
"mlx4"
|
"mlx4"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
The `ibstat` collector reads either data through the `perfquery` command or the sysfs files below `/sys/class/infiniband/<device>`.
|
The `ibstat` collector includes all Infiniband devices that can be
|
||||||
|
found below `/sys/class/infiniband/` and where any of the ports provides a
|
||||||
|
LID file (`/sys/class/infiniband/<dev>/ports/<port>/lid`)
|
||||||
|
|
||||||
|
The devices can be filtered with the `exclude_devices` option in the configuration.
|
||||||
|
|
||||||
|
For each found LID the collector reads data through the sysfs files below `/sys/class/infiniband/<device>`.
|
||||||
|
|
||||||
Metrics:
|
Metrics:
|
||||||
* `ib_recv`
|
* `ib_recv`
|
||||||
* `ib_xmit`
|
* `ib_xmit`
|
||||||
|
* `ib_recv_pkts`
|
||||||
|
* `ib_xmit_pkts`
|
||||||
|
|
||||||
The collector adds a `device` tag to all metrics
|
The collector adds a `device` tag to all metrics
|
||||||
|
@ -29,27 +29,6 @@ type InfinibandPerfQueryCollector struct {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *InfinibandPerfQueryCollector) Help() {
|
|
||||||
fmt.Println("This collector includes all devices that can be found below ", IB_BASEPATH)
|
|
||||||
fmt.Println("and where any of the ports provides a 'lid' file (glob ", IB_BASEPATH, "/<dev>/ports/<port>/lid).")
|
|
||||||
fmt.Println("The devices can be filtered with the 'exclude_devices' option in the configuration.")
|
|
||||||
fmt.Println("For each found LIDs the collector calls the 'perfquery' command")
|
|
||||||
fmt.Println("The path to the 'perfquery' command can be configured with the 'perfquery_path' option")
|
|
||||||
fmt.Println("in the configuration")
|
|
||||||
fmt.Println("")
|
|
||||||
fmt.Println("Full configuration object:")
|
|
||||||
fmt.Println("\"ibstat\" : {")
|
|
||||||
fmt.Println(" \"perfquery_path\" : \"path/to/perfquery\" # if omitted, it searches in $PATH")
|
|
||||||
fmt.Println(" \"exclude_devices\" : [\"dev1\"]")
|
|
||||||
fmt.Println("}")
|
|
||||||
fmt.Println("")
|
|
||||||
fmt.Println("Metrics:")
|
|
||||||
fmt.Println("- ib_recv")
|
|
||||||
fmt.Println("- ib_xmit")
|
|
||||||
fmt.Println("- ib_recv_pkts")
|
|
||||||
fmt.Println("- ib_xmit_pkts")
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *InfinibandPerfQueryCollector) Init(config json.RawMessage) error {
|
func (m *InfinibandPerfQueryCollector) Init(config json.RawMessage) error {
|
||||||
var err error
|
var err error
|
||||||
m.name = "InfinibandCollectorPerfQuery"
|
m.name = "InfinibandCollectorPerfQuery"
|
||||||
|
28
collectors/infinibandPerfQueryMetric.md
Normal file
28
collectors/infinibandPerfQueryMetric.md
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
|
||||||
|
## `ibstat_perfquery` collector
|
||||||
|
|
||||||
|
```json
|
||||||
|
"ibstat_perfquery": {
|
||||||
|
"perfquery_path": "/path/to/perfquery",
|
||||||
|
"exclude_devices": [
|
||||||
|
"mlx4"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The `ibstat_perfquery` collector includes all Infiniband devices that can be
|
||||||
|
found below `/sys/class/infiniband/` and where any of the ports provides a
|
||||||
|
LID file (`/sys/class/infiniband/<dev>/ports/<port>/lid`)
|
||||||
|
|
||||||
|
The devices can be filtered with the `exclude_devices` option in the configuration.
|
||||||
|
|
||||||
|
For each found LID the collector calls the `perfquery` command. The path to the
|
||||||
|
`perfquery` command can be configured with the `perfquery_path` option in the configuration
|
||||||
|
|
||||||
|
Metrics:
|
||||||
|
* `ib_recv`
|
||||||
|
* `ib_xmit`
|
||||||
|
* `ib_recv_pkts`
|
||||||
|
* `ib_xmit_pkts`
|
||||||
|
|
||||||
|
The collector adds a `device` tag to all metrics
|
@ -22,16 +22,16 @@ import (
|
|||||||
//
|
//
|
||||||
const LOADAVGFILE = "/proc/loadavg"
|
const LOADAVGFILE = "/proc/loadavg"
|
||||||
|
|
||||||
type LoadavgCollectorConfig struct {
|
|
||||||
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type LoadavgCollector struct {
|
type LoadavgCollector struct {
|
||||||
metricCollector
|
metricCollector
|
||||||
tags map[string]string
|
tags map[string]string
|
||||||
load_matches []string
|
load_matches []string
|
||||||
|
load_skips []bool
|
||||||
proc_matches []string
|
proc_matches []string
|
||||||
config LoadavgCollectorConfig
|
proc_skips []bool
|
||||||
|
config struct {
|
||||||
|
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *LoadavgCollector) Init(config json.RawMessage) error {
|
func (m *LoadavgCollector) Init(config json.RawMessage) error {
|
||||||
@ -51,15 +51,23 @@ func (m *LoadavgCollector) Init(config json.RawMessage) error {
|
|||||||
"load_one",
|
"load_one",
|
||||||
"load_five",
|
"load_five",
|
||||||
"load_fifteen"}
|
"load_fifteen"}
|
||||||
|
m.load_skips = make([]bool, len(m.load_matches))
|
||||||
m.proc_matches = []string{
|
m.proc_matches = []string{
|
||||||
"proc_run",
|
"proc_run",
|
||||||
"proc_total"}
|
"proc_total"}
|
||||||
|
m.proc_skips = make([]bool, len(m.proc_matches))
|
||||||
|
|
||||||
|
for i, name := range m.load_matches {
|
||||||
|
_, m.load_skips[i] = stringArrayContains(m.config.ExcludeMetrics, name)
|
||||||
|
}
|
||||||
|
for i, name := range m.proc_matches {
|
||||||
|
_, m.proc_skips[i] = stringArrayContains(m.config.ExcludeMetrics, name)
|
||||||
|
}
|
||||||
m.init = true
|
m.init = true
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||||
var skip bool
|
|
||||||
if !m.init {
|
if !m.init {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -84,9 +92,11 @@ func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
fmt.Sprintf("Read(): Failed to convert '%s' to float64: %v", ls[i], err))
|
fmt.Sprintf("Read(): Failed to convert '%s' to float64: %v", ls[i], err))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, name)
|
if m.load_skips[i] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": x}, now)
|
y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": x}, now)
|
||||||
if err == nil && !skip {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -94,16 +104,18 @@ func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
// Process metrics
|
// Process metrics
|
||||||
lv := strings.Split(ls[3], `/`)
|
lv := strings.Split(ls[3], `/`)
|
||||||
for i, name := range m.proc_matches {
|
for i, name := range m.proc_matches {
|
||||||
x, err := strconv.ParseFloat(lv[i], 64)
|
x, err := strconv.ParseInt(lv[i], 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(
|
cclog.ComponentError(
|
||||||
m.name,
|
m.name,
|
||||||
fmt.Sprintf("Read(): Failed to convert '%s' to float64: %v", lv[i], err))
|
fmt.Sprintf("Read(): Failed to convert '%s' to float64: %v", lv[i], err))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, name)
|
if m.proc_skips[i] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": x}, now)
|
y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": x}, now)
|
||||||
if err == nil && !skip {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user