mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2025-07-21 12:21:41 +02:00
Compare commits
9 Commits
smartmon_c
...
v0.6.2
Author | SHA1 | Date | |
---|---|---|---|
|
e79601e2e8 | ||
|
317d36c9dd | ||
|
821d104656 | ||
|
be20f956c2 | ||
|
5b6a2b9018 | ||
|
3438972237 | ||
|
88fabc2e83 | ||
|
b3c27e0af5 | ||
|
2adf9484a3 |
8
Makefile
8
Makefile
@@ -112,7 +112,9 @@ DEB: scripts/cc-metric-collector.deb.control $(APP)
|
|||||||
#@mkdir --parents --verbose $$DEBIANDIR
|
#@mkdir --parents --verbose $$DEBIANDIR
|
||||||
@CONTROLFILE="$${BASEDIR}/scripts/cc-metric-collector.deb.control"
|
@CONTROLFILE="$${BASEDIR}/scripts/cc-metric-collector.deb.control"
|
||||||
@COMMITISH="HEAD"
|
@COMMITISH="HEAD"
|
||||||
|
@git describe --tags --abbrev=0 $${COMMITISH}
|
||||||
@VERS=$$(git describe --tags --abbrev=0 $${COMMITISH})
|
@VERS=$$(git describe --tags --abbrev=0 $${COMMITISH})
|
||||||
|
@if [ -z "$$VERS" ]; then VERS=${GITHUB_REF_NAME}; fi
|
||||||
@VERS=$${VERS#v}
|
@VERS=$${VERS#v}
|
||||||
@VERS=$$(echo $$VERS | sed -e s+'-'+'_'+g)
|
@VERS=$$(echo $$VERS | sed -e s+'-'+'_'+g)
|
||||||
@ARCH=$$(uname -m)
|
@ARCH=$$(uname -m)
|
||||||
@@ -121,8 +123,14 @@ DEB: scripts/cc-metric-collector.deb.control $(APP)
|
|||||||
@SIZE_BYTES=$$(du -bcs --exclude=.dpkgbuild "$$WORKSPACE"/ | awk '{print $$1}' | head -1 | sed -e 's/^0\+//')
|
@SIZE_BYTES=$$(du -bcs --exclude=.dpkgbuild "$$WORKSPACE"/ | awk '{print $$1}' | head -1 | sed -e 's/^0\+//')
|
||||||
@SIZE="$$(awk -v size="$$SIZE_BYTES" 'BEGIN {print (size/1024)+1}' | awk '{print int($$0)}')"
|
@SIZE="$$(awk -v size="$$SIZE_BYTES" 'BEGIN {print (size/1024)+1}' | awk '{print int($$0)}')"
|
||||||
#@sed -e s+"{VERSION}"+"$$VERS"+g -e s+"{INSTALLED_SIZE}"+"$$SIZE"+g -e s+"{ARCH}"+"$$ARCH"+g $$CONTROLFILE > $${DEBIANDIR}/control
|
#@sed -e s+"{VERSION}"+"$$VERS"+g -e s+"{INSTALLED_SIZE}"+"$$SIZE"+g -e s+"{ARCH}"+"$$ARCH"+g $$CONTROLFILE > $${DEBIANDIR}/control
|
||||||
|
@echo "Version: $$VERS"
|
||||||
|
@echo "Size: $$SIZE"
|
||||||
|
@echo "Arch: $$ARCH"
|
||||||
@sed -e s+"{VERSION}"+"$$VERS"+g -e s+"{INSTALLED_SIZE}"+"$$SIZE"+g -e s+"{ARCH}"+"$$ARCH"+g $$CONTROLFILE > $${DEBIANBINDIR}/control
|
@sed -e s+"{VERSION}"+"$$VERS"+g -e s+"{INSTALLED_SIZE}"+"$$SIZE"+g -e s+"{ARCH}"+"$$ARCH"+g $$CONTROLFILE > $${DEBIANBINDIR}/control
|
||||||
@make PREFIX=$${WORKSPACE} install
|
@make PREFIX=$${WORKSPACE} install
|
||||||
@DEB_FILE="cc-metric-collector_$${VERS}_$${ARCH}.deb"
|
@DEB_FILE="cc-metric-collector_$${VERS}_$${ARCH}.deb"
|
||||||
@dpkg-deb -b $${WORKSPACE} "$$DEB_FILE"
|
@dpkg-deb -b $${WORKSPACE} "$$DEB_FILE"
|
||||||
|
@if [ "$${GITHUB_ACTIONS}" = "true" ]; then
|
||||||
|
@ echo "::set-output name=DEB::$${DEB_FILE}"
|
||||||
|
@fi
|
||||||
@rm -r "$${WORKSPACE}"
|
@rm -r "$${WORKSPACE}"
|
||||||
|
@@ -37,7 +37,7 @@ var AvailableCollectors = map[string]MetricCollector{
|
|||||||
"beegfs_meta": new(BeegfsMetaCollector),
|
"beegfs_meta": new(BeegfsMetaCollector),
|
||||||
"beegfs_storage": new(BeegfsStorageCollector),
|
"beegfs_storage": new(BeegfsStorageCollector),
|
||||||
"rocm_smi": new(RocmSmiCollector),
|
"rocm_smi": new(RocmSmiCollector),
|
||||||
"smartmon": new(SmartMonCollector),
|
"self": new(SelfCollector),
|
||||||
"schedstat": new(SchedstatCollector),
|
"schedstat": new(SchedstatCollector),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
144
collectors/selfMetric.go
Normal file
144
collectors/selfMetric.go
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
package collectors
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"runtime"
|
||||||
|
"syscall"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
|
)
|
||||||
|
|
||||||
|
type SelfCollectorConfig struct {
|
||||||
|
MemStats bool `json:"read_mem_stats"`
|
||||||
|
GoRoutines bool `json:"read_goroutines"`
|
||||||
|
CgoCalls bool `json:"read_cgo_calls"`
|
||||||
|
Rusage bool `json:"read_rusage"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type SelfCollector struct {
|
||||||
|
metricCollector
|
||||||
|
config SelfCollectorConfig // the configuration structure
|
||||||
|
meta map[string]string // default meta information
|
||||||
|
tags map[string]string // default tags
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *SelfCollector) Init(config json.RawMessage) error {
|
||||||
|
var err error = nil
|
||||||
|
m.name = "SelfCollector"
|
||||||
|
m.setup()
|
||||||
|
m.parallel = true
|
||||||
|
m.meta = map[string]string{"source": m.name, "group": "Self"}
|
||||||
|
m.tags = map[string]string{"type": "node"}
|
||||||
|
if len(config) > 0 {
|
||||||
|
err = json.Unmarshal(config, &m.config)
|
||||||
|
if err != nil {
|
||||||
|
cclog.ComponentError(m.name, "Error reading config:", err.Error())
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m.init = true
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *SelfCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||||
|
timestamp := time.Now()
|
||||||
|
|
||||||
|
if m.config.MemStats {
|
||||||
|
var memstats runtime.MemStats
|
||||||
|
runtime.ReadMemStats(&memstats)
|
||||||
|
|
||||||
|
y, err := lp.New("total_alloc", m.tags, m.meta, map[string]interface{}{"value": memstats.TotalAlloc}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
y.AddMeta("unit", "Bytes")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("heap_alloc", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapAlloc}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
y.AddMeta("unit", "Bytes")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("heap_sys", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapSys}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
y.AddMeta("unit", "Bytes")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("heap_idle", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapIdle}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
y.AddMeta("unit", "Bytes")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("heap_inuse", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapInuse}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
y.AddMeta("unit", "Bytes")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("heap_released", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapReleased}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
y.AddMeta("unit", "Bytes")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("heap_objects", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapObjects}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if m.config.GoRoutines {
|
||||||
|
y, err := lp.New("num_goroutines", m.tags, m.meta, map[string]interface{}{"value": runtime.NumGoroutine()}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if m.config.CgoCalls {
|
||||||
|
y, err := lp.New("num_cgo_calls", m.tags, m.meta, map[string]interface{}{"value": runtime.NumCgoCall()}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if m.config.Rusage {
|
||||||
|
var rusage syscall.Rusage
|
||||||
|
err := syscall.Getrusage(syscall.RUSAGE_SELF, &rusage)
|
||||||
|
if err == nil {
|
||||||
|
sec, nsec := rusage.Utime.Unix()
|
||||||
|
t := float64(sec) + (float64(nsec) * 1e-9)
|
||||||
|
y, err := lp.New("rusage_user_time", m.tags, m.meta, map[string]interface{}{"value": t}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
y.AddMeta("unit", "seconds")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
sec, nsec = rusage.Stime.Unix()
|
||||||
|
t = float64(sec) + (float64(nsec) * 1e-9)
|
||||||
|
y, err = lp.New("rusage_system_time", m.tags, m.meta, map[string]interface{}{"value": t}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
y.AddMeta("unit", "seconds")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("rusage_vol_ctx_switch", m.tags, m.meta, map[string]interface{}{"value": rusage.Nvcsw}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("rusage_invol_ctx_switch", m.tags, m.meta, map[string]interface{}{"value": rusage.Nivcsw}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("rusage_signals", m.tags, m.meta, map[string]interface{}{"value": rusage.Nsignals}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("rusage_major_pgfaults", m.tags, m.meta, map[string]interface{}{"value": rusage.Majflt}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("rusage_minor_pgfaults", m.tags, m.meta, map[string]interface{}{"value": rusage.Minflt}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *SelfCollector) Close() {
|
||||||
|
m.init = false
|
||||||
|
}
|
34
collectors/selfMetric.md
Normal file
34
collectors/selfMetric.md
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
## `self` collector
|
||||||
|
|
||||||
|
```json
|
||||||
|
"self": {
|
||||||
|
"read_mem_stats" : true,
|
||||||
|
"read_goroutines" : true,
|
||||||
|
"read_cgo_calls" : true,
|
||||||
|
"read_rusage" : true
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The `self` collector reads the data from the `runtime` and `syscall` packages, so monitors the execution of the cc-metric-collector itself.
|
||||||
|
|
||||||
|
Metrics:
|
||||||
|
* If `read_mem_stats == true`:
|
||||||
|
* `total_alloc`: The metric reports cumulative bytes allocated for heap objects.
|
||||||
|
* `heap_alloc`: The metric reports bytes of allocated heap objects.
|
||||||
|
* `heap_sys`: The metric reports bytes of heap memory obtained from the OS.
|
||||||
|
* `heap_idle`: The metric reports bytes in idle (unused) spans.
|
||||||
|
* `heap_inuse`: The metric reports bytes in in-use spans.
|
||||||
|
* `heap_released`: The metric reports bytes of physical memory returned to the OS.
|
||||||
|
* `heap_objects`: The metric reports the number of allocated heap objects.
|
||||||
|
* If `read_goroutines == true`:
|
||||||
|
* `num_goroutines`: The metric reports the number of goroutines that currently exist.
|
||||||
|
* If `read_cgo_calls == true`:
|
||||||
|
* `num_cgo_calls`: The metric reports the number of cgo calls made by the current process.
|
||||||
|
* If `read_rusage == true`:
|
||||||
|
* `rusage_user_time`: The metric reports the amount of time that this process has been scheduled in user mode.
|
||||||
|
* `rusage_system_time`: The metric reports the amount of time that this process has been scheduled in kernel mode.
|
||||||
|
* `rusage_vol_ctx_switch`: The metric reports the amount of voluntary context switches.
|
||||||
|
* `rusage_invol_ctx_switch`: The metric reports the amount of involuntary context switches.
|
||||||
|
* `rusage_signals`: The metric reports the number of signals received.
|
||||||
|
* `rusage_major_pgfaults`: The metric reports the number of major faults the process has made which have required loading a memory page from disk.
|
||||||
|
* `rusage_minor_pgfaults`: The metric reports the number of minor faults the process has made which have not required loading a memory page from disk.
|
@@ -1,220 +0,0 @@
|
|||||||
package collectors
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/json"
|
|
||||||
"os/exec"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
|
||||||
)
|
|
||||||
|
|
||||||
type SmartMonCollectorConfig struct {
|
|
||||||
UseSudo bool `json:"use_sudo"`
|
|
||||||
ExcludeDevices []string `json:"exclude_devices"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type SmartMonCollector struct {
|
|
||||||
metricCollector
|
|
||||||
config SmartMonCollectorConfig // the configuration structure
|
|
||||||
meta map[string]string // default meta information
|
|
||||||
tags map[string]string // default tags
|
|
||||||
devices []string // smartmon devices
|
|
||||||
sudoCmd string // Full path to 'sudo' command
|
|
||||||
smartCtlCmd string // Full path to 'smartctl' command
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *SmartMonCollector) getSmartmonDevices() error {
|
|
||||||
var command *exec.Cmd
|
|
||||||
var scan struct {
|
|
||||||
Devices []struct {
|
|
||||||
Name string `json:"name"`
|
|
||||||
Type string `json:"type"`
|
|
||||||
} `json:"devices"`
|
|
||||||
}
|
|
||||||
m.devices = make([]string, 0)
|
|
||||||
if m.config.UseSudo {
|
|
||||||
command = exec.Command(m.sudoCmd, m.smartCtlCmd, "--scan", "-j")
|
|
||||||
} else {
|
|
||||||
command = exec.Command(m.smartCtlCmd, "--scan", "-j")
|
|
||||||
}
|
|
||||||
command.Wait()
|
|
||||||
stdout, err := command.Output()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
err = json.Unmarshal(stdout, &scan)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
for _, d := range scan.Devices {
|
|
||||||
if len(d.Name) > 0 {
|
|
||||||
m.devices = append(m.devices, d.Name)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *SmartMonCollector) Init(config json.RawMessage) error {
|
|
||||||
var err error = nil
|
|
||||||
m.name = "SmartMonCollector"
|
|
||||||
m.setup()
|
|
||||||
m.parallel = true
|
|
||||||
m.meta = map[string]string{"source": m.name, "group": "Disk"}
|
|
||||||
m.tags = map[string]string{"type": "node", "stype": "disk"}
|
|
||||||
// Read in the JSON configuration
|
|
||||||
if len(config) > 0 {
|
|
||||||
err = json.Unmarshal(config, &m.config)
|
|
||||||
if err != nil {
|
|
||||||
cclog.ComponentError(m.name, "Error reading config:", err.Error())
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if m.config.UseSudo {
|
|
||||||
p, err := exec.LookPath("sudo")
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
m.sudoCmd = p
|
|
||||||
}
|
|
||||||
p, err := exec.LookPath("smartctl")
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
m.smartCtlCmd = p
|
|
||||||
err = m.getSmartmonDevices()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
m.init = true
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
type SmartMonData struct {
|
|
||||||
SerialNumber string `json:"serial_number"`
|
|
||||||
UserCapacity struct {
|
|
||||||
Blocks int `json:"blocks"`
|
|
||||||
Bytes int `json:"bytes"`
|
|
||||||
} `json:"user_capacity"`
|
|
||||||
HealthLog struct {
|
|
||||||
Temperature int `json:"temperature"`
|
|
||||||
PercentageUsed int `json:"percentage_used"`
|
|
||||||
AvailableSpare int `json:"available_spare"`
|
|
||||||
DataUnitsRead int `json:"data_units_read"`
|
|
||||||
DataUnitsWrite int `json:"data_units_written"`
|
|
||||||
HostReads int `json:"host_reads"`
|
|
||||||
HostWrites int `json:"host_writes"`
|
|
||||||
PowerCycles int `json:"power_cycles"`
|
|
||||||
PowerOnHours int `json:"power_on_hours"`
|
|
||||||
UnsafeShutdowns int `json:"unsafe_shutdowns"`
|
|
||||||
MediaErrors int `json:"media_errors"`
|
|
||||||
NumErrorLogEntries int `json:"num_err_log_entries"`
|
|
||||||
WarnTempTime int `json:"warning_temp_time"`
|
|
||||||
CriticalTempTime int `json:"critical_comp_time"`
|
|
||||||
} `json:"nvme_smart_health_information_log"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *SmartMonCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
|
||||||
timestamp := time.Now()
|
|
||||||
for _, d := range m.devices {
|
|
||||||
var command *exec.Cmd
|
|
||||||
var data SmartMonData
|
|
||||||
if m.config.UseSudo {
|
|
||||||
command = exec.Command(m.sudoCmd, m.smartCtlCmd, "-j", "-a", d)
|
|
||||||
} else {
|
|
||||||
command = exec.Command(m.smartCtlCmd, "-j", "-a", d)
|
|
||||||
}
|
|
||||||
command.Wait()
|
|
||||||
stdout, err := command.Output()
|
|
||||||
if err != nil {
|
|
||||||
cclog.ComponentError(m.name, "cannot read data for device", d)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
err = json.Unmarshal(stdout, &data)
|
|
||||||
if err != nil {
|
|
||||||
cclog.ComponentError(m.name, "cannot unmarshal data for device", d)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
y, err := lp.New("smartmon_temp", m.tags, m.meta, map[string]interface{}{"value": data.HealthLog.Temperature}, timestamp)
|
|
||||||
if err == nil {
|
|
||||||
y.AddTag("stype-id", d)
|
|
||||||
y.AddMeta("unit", "degC")
|
|
||||||
output <- y
|
|
||||||
}
|
|
||||||
y, err = lp.New("smartmon_percent_used", m.tags, m.meta, map[string]interface{}{"value": data.HealthLog.PercentageUsed}, timestamp)
|
|
||||||
if err == nil {
|
|
||||||
y.AddTag("stype-id", d)
|
|
||||||
y.AddMeta("unit", "percent")
|
|
||||||
output <- y
|
|
||||||
}
|
|
||||||
y, err = lp.New("smartmon_avail_spare", m.tags, m.meta, map[string]interface{}{"value": data.HealthLog.AvailableSpare}, timestamp)
|
|
||||||
if err == nil {
|
|
||||||
y.AddTag("stype-id", d)
|
|
||||||
y.AddMeta("unit", "percent")
|
|
||||||
output <- y
|
|
||||||
}
|
|
||||||
y, err = lp.New("smartmon_data_units_read", m.tags, m.meta, map[string]interface{}{"value": data.HealthLog.DataUnitsRead}, timestamp)
|
|
||||||
if err == nil {
|
|
||||||
y.AddTag("stype-id", d)
|
|
||||||
output <- y
|
|
||||||
}
|
|
||||||
y, err = lp.New("smartmon_data_units_write", m.tags, m.meta, map[string]interface{}{"value": data.HealthLog.DataUnitsWrite}, timestamp)
|
|
||||||
if err == nil {
|
|
||||||
y.AddTag("stype-id", d)
|
|
||||||
output <- y
|
|
||||||
}
|
|
||||||
y, err = lp.New("smartmon_host_reads", m.tags, m.meta, map[string]interface{}{"value": data.HealthLog.HostReads}, timestamp)
|
|
||||||
if err == nil {
|
|
||||||
y.AddTag("stype-id", d)
|
|
||||||
output <- y
|
|
||||||
}
|
|
||||||
y, err = lp.New("smartmon_host_writes", m.tags, m.meta, map[string]interface{}{"value": data.HealthLog.HostWrites}, timestamp)
|
|
||||||
if err == nil {
|
|
||||||
y.AddTag("stype-id", d)
|
|
||||||
output <- y
|
|
||||||
}
|
|
||||||
y, err = lp.New("smartmon_power_cycles", m.tags, m.meta, map[string]interface{}{"value": data.HealthLog.PowerCycles}, timestamp)
|
|
||||||
if err == nil {
|
|
||||||
y.AddTag("stype-id", d)
|
|
||||||
output <- y
|
|
||||||
}
|
|
||||||
y, err = lp.New("smartmon_power_on", m.tags, m.meta, map[string]interface{}{"value": int64(data.HealthLog.PowerOnHours) * 3600}, timestamp)
|
|
||||||
if err == nil {
|
|
||||||
y.AddTag("stype-id", d)
|
|
||||||
y.AddMeta("unit", "seconds")
|
|
||||||
output <- y
|
|
||||||
}
|
|
||||||
y, err = lp.New("smartmon_unsafe_shutdowns", m.tags, m.meta, map[string]interface{}{"value": data.HealthLog.UnsafeShutdowns}, timestamp)
|
|
||||||
if err == nil {
|
|
||||||
y.AddTag("stype-id", d)
|
|
||||||
output <- y
|
|
||||||
}
|
|
||||||
y, err = lp.New("smartmon_media_errors", m.tags, m.meta, map[string]interface{}{"value": data.HealthLog.MediaErrors}, timestamp)
|
|
||||||
if err == nil {
|
|
||||||
y.AddTag("stype-id", d)
|
|
||||||
output <- y
|
|
||||||
}
|
|
||||||
y, err = lp.New("smartmon_errlog_entries", m.tags, m.meta, map[string]interface{}{"value": data.HealthLog.NumErrorLogEntries}, timestamp)
|
|
||||||
if err == nil {
|
|
||||||
y.AddTag("stype-id", d)
|
|
||||||
output <- y
|
|
||||||
}
|
|
||||||
y, err = lp.New("smartmon_warn_temp_time", m.tags, m.meta, map[string]interface{}{"value": data.HealthLog.WarnTempTime}, timestamp)
|
|
||||||
if err == nil {
|
|
||||||
y.AddTag("stype-id", d)
|
|
||||||
output <- y
|
|
||||||
}
|
|
||||||
y, err = lp.New("smartmon_crit_temp_time", m.tags, m.meta, map[string]interface{}{"value": data.HealthLog.CriticalTempTime}, timestamp)
|
|
||||||
if err == nil {
|
|
||||||
y.AddTag("stype-id", d)
|
|
||||||
output <- y
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *SmartMonCollector) Close() {
|
|
||||||
m.init = false
|
|
||||||
}
|
|
@@ -1,29 +0,0 @@
|
|||||||
## `smartmon` collector
|
|
||||||
|
|
||||||
```json
|
|
||||||
"smartmon": {
|
|
||||||
"use_sudo" : true,
|
|
||||||
"exclude_devices": [
|
|
||||||
"/dev/sda",
|
|
||||||
]
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
The `smartmon` collector reads the data from the command `smartctl`. It retrieves S.M.A.R.T data from disks
|
|
||||||
|
|
||||||
Metrics:
|
|
||||||
* `smartmon_temp`: Temperature of the device (`unit=degC`)
|
|
||||||
* `smartmon_avail_spare`: Amount of spare left (`unit=percent`)
|
|
||||||
* `smartmon_percent_used`: Percentage of the device is used (`unit=percent`)
|
|
||||||
* `smartmon_data_units_read`: Read data units
|
|
||||||
* `smartmon_data_units_write`: Written data units
|
|
||||||
* `smartmon_host_reads`: Read operations
|
|
||||||
* `smartmon_host_writes`: Write operations
|
|
||||||
* `smartmon_power_cycles`: Number of power cycles
|
|
||||||
* `smartmon_power_on`: Seconds the device is powered on (`unit=seconds`)
|
|
||||||
* `smartmon_unsafe_shutdowns`: Count of unsafe shutdowns
|
|
||||||
* `smartmon_media_errors`: Media errors of the device
|
|
||||||
* `smartmon_errlog_entries`: Error log entries
|
|
||||||
* `smartmon_warn_temp_time`: Time above the warning temperature threshold
|
|
||||||
* `smartmon_crit_temp_time`: Time above the critical temperature threshold
|
|
||||||
|
|
2
go.mod
2
go.mod
@@ -8,6 +8,7 @@ require (
|
|||||||
github.com/NVIDIA/go-nvml v0.11.6-0
|
github.com/NVIDIA/go-nvml v0.11.6-0
|
||||||
github.com/PaesslerAG/gval v1.2.0
|
github.com/PaesslerAG/gval v1.2.0
|
||||||
github.com/gorilla/mux v1.8.0
|
github.com/gorilla/mux v1.8.0
|
||||||
|
github.com/influxdata/influxdb-client-go/v2 v2.9.0
|
||||||
github.com/influxdata/influxdb-client-go/v2 v2.9.1
|
github.com/influxdata/influxdb-client-go/v2 v2.9.1
|
||||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf
|
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf
|
||||||
github.com/nats-io/nats.go v1.16.0
|
github.com/nats-io/nats.go v1.16.0
|
||||||
@@ -17,6 +18,7 @@ require (
|
|||||||
golang.org/x/sys v0.0.0-20220712014510-0a85c31ab51e
|
golang.org/x/sys v0.0.0-20220712014510-0a85c31ab51e
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/beorn7/perks v1.0.1 // indirect
|
github.com/beorn7/perks v1.0.1 // indirect
|
||||||
github.com/cespare/xxhash/v2 v2.1.2 // indirect
|
github.com/cespare/xxhash/v2 v2.1.2 // indirect
|
||||||
|
Reference in New Issue
Block a user