From b973e8ac9c6088c3c6861d5b0f56824c95a08769 Mon Sep 17 00:00:00 2001 From: Thomas Roehl Date: Fri, 7 Oct 2022 18:49:31 +0200 Subject: [PATCH] Add LustreJobstatCollector (server-side) and prepare LustreCollector (client-side) for common parser --- collectors/collectorManager.go | 3 +- collectors/lustreJobstatMetric.go | 270 ++++++++++++++++++++ collectors/lustreJobstatMetric.md | 35 +++ collectors/lustreMetric.go | 403 +++++++++++++++--------------- collectors/lustreMetricCommon.go | 190 ++++++++++++++ 5 files changed, 700 insertions(+), 201 deletions(-) create mode 100644 collectors/lustreJobstatMetric.go create mode 100644 collectors/lustreJobstatMetric.md create mode 100644 collectors/lustreMetricCommon.go diff --git a/collectors/collectorManager.go b/collectors/collectorManager.go index 63d0cb4..9854487 100644 --- a/collectors/collectorManager.go +++ b/collectors/collectorManager.go @@ -20,6 +20,7 @@ var AvailableCollectors = map[string]MetricCollector{ "netstat": new(NetstatCollector), "ibstat": new(InfinibandCollector), "lustrestat": new(LustreCollector), + "lustre_jobstat": new(LustreJobstatCollector), "cpustat": new(CpustatCollector), "topprocs": new(TopProcsCollector), "nvidia": new(NvidiaCollector), @@ -37,7 +38,7 @@ var AvailableCollectors = map[string]MetricCollector{ "beegfs_meta": new(BeegfsMetaCollector), "beegfs_storage": new(BeegfsStorageCollector), "rocm_smi": new(RocmSmiCollector), - "schedstat": new(SchedstatCollector), + "schedstat": new(SchedstatCollector), } // Metric collector manager data structure diff --git a/collectors/lustreJobstatMetric.go b/collectors/lustreJobstatMetric.go new file mode 100644 index 0000000..919bf8d --- /dev/null +++ b/collectors/lustreJobstatMetric.go @@ -0,0 +1,270 @@ +package collectors + +import ( + "encoding/json" + "errors" + "os/exec" + "regexp" + "strings" + "time" + + cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" +) + +type LustreJobstatCollectorConfig struct { + LCtlCommand string `json:"lctl_command,omitempty"` + ExcludeMetrics []string `json:"exclude_metrics,omitempty"` + Sudo bool `json:"use_sudo,omitempty"` + SendAbsoluteValues bool `json:"send_abs_values,omitempty"` + SendDerivedValues bool `json:"send_derived_values,omitempty"` + SendDiffValues bool `json:"send_diff_values,omitempty"` + JobRegex string `json:"jobid_regex,omitempty"` +} + +type LustreJobstatCollector struct { + metricCollector + tags map[string]string + config LustreJobstatCollectorConfig + lctl string + sudoCmd string + lastTimestamp time.Time // Store time stamp of last tick to derive bandwidths + definitions []LustreMetricDefinition // Combined list without excluded metrics + //stats map[string]map[string]int64 // Data for last value per device and metric + lastMdtData *map[string]map[string]LustreMetricData + lastObdfilterData *map[string]map[string]LustreMetricData + jobidRegex *regexp.Regexp +} + +var defaultJobidRegex = `^(?P[\d\w\.]+)$` + +var LustreMetricJobstatsDefinition = []LustreMetricDefinition{ + { + name: "lustre_job_read_samples", + lineprefix: "read", + offsetname: "samples", + unit: "requests", + calc: "none", + }, + { + name: "lustre_job_read_min_bytes", + lineprefix: "read_bytes", + offsetname: "min", + unit: "bytes", + calc: "none", + }, + { + name: "lustre_job_read_max_bytes", + lineprefix: "read_bytes", + offsetname: "max", + unit: "bytes", + calc: "none", + }, +} + +func (m *LustreJobstatCollector) executeLustreCommand(option string) []string { + return executeLustreCommand(m.sudoCmd, m.lctl, LCTL_OPTION, option, m.config.Sudo) +} + +func (m *LustreJobstatCollector) Init(config json.RawMessage) error { + var err error + m.name = "LustreJobstatCollector" + m.parallel = true + m.config.JobRegex = defaultJobidRegex + m.config.SendAbsoluteValues = true + if len(config) > 0 { + err = json.Unmarshal(config, &m.config) + if err != nil { + return err + } + } + m.setup() + m.tags = map[string]string{"type": "jobid"} + m.meta = map[string]string{"source": m.name, "group": "Lustre", "scope": "job"} + + // Lustre file system statistics can only be queried by user root + // or with password-less sudo + // if !m.config.Sudo { + // user, err := user.Current() + // if err != nil { + // cclog.ComponentError(m.name, "Failed to get current user:", err.Error()) + // return err + // } + // if user.Uid != "0" { + // cclog.ComponentError(m.name, "Lustre file system statistics can only be queried by user root") + // return err + // } + // } else { + // p, err := exec.LookPath("sudo") + // if err != nil { + // cclog.ComponentError(m.name, "Cannot find 'sudo'") + // return err + // } + // m.sudoCmd = p + // } + + p, err := exec.LookPath(m.config.LCtlCommand) + if err != nil { + p, err = exec.LookPath(LCTL_CMD) + if err != nil { + return err + } + } + m.lctl = p + + m.definitions = make([]LustreMetricDefinition, 0) + if m.config.SendAbsoluteValues { + for _, def := range LustreMetricJobstatsDefinition { + if _, skip := stringArrayContains(m.config.ExcludeMetrics, def.name); !skip { + m.definitions = append(m.definitions, def) + } + } + } + + if len(m.definitions) == 0 { + return errors.New("no metrics to collect") + } + + x := make(map[string]map[string]LustreMetricData) + m.lastMdtData = &x + x = make(map[string]map[string]LustreMetricData) + m.lastObdfilterData = &x + + if len(m.config.JobRegex) > 0 { + jregex := strings.ReplaceAll(m.config.JobRegex, "%", "\\") + r, err := regexp.Compile(jregex) + if err == nil { + m.jobidRegex = r + } else { + cclog.ComponentError(m.name, "Cannot compile jobid regex") + return err + } + } + + m.lastTimestamp = time.Now() + m.init = true + return nil +} + +func (m *LustreJobstatCollector) Read(interval time.Duration, output chan lp.CCMetric) { + if !m.init { + return + } + + getValue := func(data map[string]map[string]LustreMetricData, device string, jobid string, operation string, field string) int64 { + var value int64 = -1 + if ddata, ok := data[device]; ok { + if jdata, ok := ddata[jobid]; ok { + if opdata, ok := jdata.op_data[operation]; ok { + if v, ok := opdata[field]; ok { + value = v + } + } + } + } + return value + } + + jobIdToTags := func(jobregex *regexp.Regexp, job string) map[string]string { + tags := make(map[string]string) + groups := jobregex.SubexpNames() + for _, match := range jobregex.FindAllStringSubmatch(job, -1) { + for groupIdx, group := range match { + if len(groups[groupIdx]) > 0 { + tags[groups[groupIdx]] = group + } + } + } + return tags + } + + generateMetric := func(definition LustreMetricDefinition, data map[string]map[string]LustreMetricData, last map[string]map[string]LustreMetricData, now time.Time) { + tdiff := now.Sub(m.lastTimestamp) + for dev, ddata := range data { + for jobid, jdata := range ddata { + jobtags := jobIdToTags(m.jobidRegex, jobid) + if _, ok := jobtags["jobid"]; !ok { + continue + } + cur := getValue(data, dev, jobid, definition.lineprefix, definition.offsetname) + old := getValue(last, dev, jobid, definition.lineprefix, definition.offsetname) + var x interface{} = -1 + var valid = false + switch definition.calc { + case "none": + x = cur + valid = true + case "difference": + if len(last) > 0 { + if old >= 0 { + x = cur - old + valid = true + } + } + case "derivative": + if len(last) > 0 { + if old >= 0 { + x = float64(cur-old) / tdiff.Seconds() + valid = true + } + } + } + if valid { + y, err := lp.New(definition.name, m.tags, m.meta, map[string]interface{}{"value": x}, now) + if err == nil { + y.AddTag("stype", "device") + y.AddTag("stype-id", dev) + if j, ok := jobtags["jobid"]; ok { + y.AddTag("type-id", j) + } else { + y.AddTag("type-id", jobid) + } + for k, v := range jobtags { + switch k { + case "jobid": + case "hostname": + y.AddTag("hostname", v) + default: + y.AddMeta(k, v) + } + } + if len(definition.unit) > 0 { + y.AddMeta("unit", definition.unit) + } else { + if unit, ok := jdata.op_units[definition.lineprefix]; ok { + y.AddMeta("unit", unit) + } + } + output <- y + } + } + } + } + } + + now := time.Now() + + mdt_lines := m.executeLustreCommand("mdt.*.job_stats") + if len(mdt_lines) > 0 { + mdt_data := readCommandOutput(mdt_lines) + for _, def := range m.definitions { + generateMetric(def, mdt_data, *m.lastMdtData, now) + } + m.lastMdtData = &mdt_data + } + + obdfilter_lines := m.executeLustreCommand("obdfilter.*.job_stats") + if len(obdfilter_lines) > 0 { + obdfilter_data := readCommandOutput(obdfilter_lines) + for _, def := range m.definitions { + generateMetric(def, obdfilter_data, *m.lastObdfilterData, now) + } + m.lastObdfilterData = &obdfilter_data + } + + m.lastTimestamp = now +} + +func (m *LustreJobstatCollector) Close() { + m.init = false +} diff --git a/collectors/lustreJobstatMetric.md b/collectors/lustreJobstatMetric.md new file mode 100644 index 0000000..3501959 --- /dev/null +++ b/collectors/lustreJobstatMetric.md @@ -0,0 +1,35 @@ + +## `lustre_jobstat` collector + +**Note**: This collector is meant to run on the Lustre servers, **not** the clients + +The Lustre filesystem provides a feature (`job_stats`) to group processes on client side with an identifier string (like a compute job with its jobid) and retrieve the file system operation counts on the server side. Check the section [How to configure `job_stats`]() for more information. + +### Configuration + +```json + "lustre_jobstat_": { + "lctl_command": "/path/to/lctl", + "use_sudo": false, + "exclude_metrics": [ + "setattr", + "getattr" + ], + "send_abs_values" : true, + + "jobid_regex" : "^(?P[%d%w%.]+)$" + } +``` + +The `lustre_jobstat` collector uses the `lctl` application with the `get_param` option to get all `mdt.*.job_stats` and `obdfilter.*.job_stats` metrics. These metrics are only available for root users. If password-less sudo is configured, you can enable `sudo` in the configuration. In the `exclude_metrics` list, some metrics can be excluded to reduce network traffic and storage. With the `send_abs_values` flag, the collector sends absolute values for the configured metrics. The `jobid_regex` can be used to split the Lustre `job_stats` identifier into multiple parts. Since JSON cannot handle strings like `\d`, use `%` instead of `\`. + +Metrics: +- `lustre_job_read_samples` (unit: `requests`) +- `lustre_job_read_min_bytes` (unit: `bytes`) +- `lustre_job_read_max_bytes` (unit: `bytes`) + +The collector adds the tags: `type=jobid,typeid=,stype=device,stype=`. + +The collector adds the mega information: `unit=,scope=job` + +### How to configure `job_stats` diff --git a/collectors/lustreMetric.go b/collectors/lustreMetric.go index eade2ca..6ce431d 100644 --- a/collectors/lustreMetric.go +++ b/collectors/lustreMetric.go @@ -14,10 +14,6 @@ import ( lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" ) -const LUSTRE_SYSFS = `/sys/fs/lustre` -const LCTL_CMD = `lctl` -const LCTL_OPTION = `get_param` - type LustreCollectorConfig struct { LCtlCommand string `json:"lctl_command,omitempty"` ExcludeMetrics []string `json:"exclude_metrics,omitempty"` @@ -27,14 +23,6 @@ type LustreCollectorConfig struct { SendDiffValues bool `json:"send_diff_values,omitempty"` } -type LustreMetricDefinition struct { - name string - lineprefix string - lineoffset int - unit string - calc string -} - type LustreCollector struct { metricCollector tags map[string]string @@ -46,17 +34,209 @@ type LustreCollector struct { stats map[string]map[string]int64 // Data for last value per device and metric } +var LustreAbsMetrics = []LustreMetricDefinition{ + { + name: "lustre_read_requests", + lineprefix: "read_bytes", + lineoffset: 1, + offsetname: "samples", + unit: "requests", + calc: "none", + }, + { + name: "lustre_write_requests", + lineprefix: "write_bytes", + lineoffset: 1, + offsetname: "samples", + unit: "requests", + calc: "none", + }, + { + name: "lustre_read_bytes", + lineprefix: "read_bytes", + lineoffset: 6, + offsetname: "sum", + unit: "bytes", + calc: "none", + }, + { + name: "lustre_write_bytes", + lineprefix: "write_bytes", + lineoffset: 6, + offsetname: "sum", + unit: "bytes", + calc: "none", + }, + { + name: "lustre_open", + lineprefix: "open", + lineoffset: 1, + offsetname: "samples", + unit: "", + calc: "none", + }, + { + name: "lustre_close", + lineprefix: "close", + lineoffset: 1, + offsetname: "samples", + unit: "", + calc: "none", + }, + { + name: "lustre_setattr", + lineprefix: "setattr", + lineoffset: 1, + offsetname: "samples", + unit: "", + calc: "none", + }, + { + name: "lustre_getattr", + lineprefix: "getattr", + lineoffset: 1, + offsetname: "samples", + unit: "", + calc: "none", + }, + { + name: "lustre_statfs", + lineprefix: "statfs", + lineoffset: 1, + offsetname: "samples", + unit: "", + calc: "none", + }, + { + name: "lustre_inode_permission", + lineprefix: "inode_permission", + lineoffset: 1, + offsetname: "samples", + unit: "", + calc: "none", + }, +} + +var LustreDiffMetrics = []LustreMetricDefinition{ + { + name: "lustre_read_requests_diff", + lineprefix: "read_bytes", + lineoffset: 1, + offsetname: "samples", + unit: "requests", + calc: "difference", + }, + { + name: "lustre_write_requests_diff", + lineprefix: "write_bytes", + lineoffset: 1, + offsetname: "samples", + unit: "requests", + calc: "difference", + }, + { + name: "lustre_read_bytes_diff", + lineprefix: "read_bytes", + lineoffset: 6, + offsetname: "sum", + unit: "bytes", + calc: "difference", + }, + { + name: "lustre_write_bytes_diff", + lineprefix: "write_bytes", + lineoffset: 6, + offsetname: "sum", + unit: "bytes", + calc: "difference", + }, + { + name: "lustre_open_diff", + lineprefix: "open", + lineoffset: 1, + offsetname: "samples", + unit: "", + calc: "difference", + }, + { + name: "lustre_close_diff", + lineprefix: "close", + lineoffset: 1, + offsetname: "samples", + unit: "", + calc: "difference", + }, + { + name: "lustre_setattr_diff", + lineprefix: "setattr", + lineoffset: 1, + offsetname: "samples", + unit: "", + calc: "difference", + }, + { + name: "lustre_getattr_diff", + lineprefix: "getattr", + lineoffset: 1, + offsetname: "samples", + unit: "", + calc: "difference", + }, + { + name: "lustre_statfs_diff", + lineprefix: "statfs", + lineoffset: 1, + offsetname: "samples", + unit: "", + calc: "difference", + }, + { + name: "lustre_inode_permission_diff", + lineprefix: "inode_permission", + lineoffset: 1, + offsetname: "samples", + unit: "", + calc: "difference", + }, +} + +var LustreDeriveMetrics = []LustreMetricDefinition{ + { + name: "lustre_read_requests_rate", + lineprefix: "read_bytes", + lineoffset: 1, + offsetname: "samples", + unit: "requests/sec", + calc: "derivative", + }, + { + name: "lustre_write_requests_rate", + lineprefix: "write_bytes", + lineoffset: 1, + offsetname: "samples", + unit: "requests/sec", + calc: "derivative", + }, + { + name: "lustre_read_bw", + lineprefix: "read_bytes", + lineoffset: 6, + offsetname: "sum", + unit: "bytes/sec", + calc: "derivative", + }, + { + name: "lustre_write_bw", + lineprefix: "write_bytes", + lineoffset: 6, + offsetname: "sum", + unit: "bytes/sec", + calc: "derivative", + }, +} + func (m *LustreCollector) getDeviceDataCommand(device string) []string { - var command *exec.Cmd - statsfile := fmt.Sprintf("llite.%s.stats", device) - if m.config.Sudo { - command = exec.Command(m.sudoCmd, m.lctl, LCTL_OPTION, statsfile) - } else { - command = exec.Command(m.lctl, LCTL_OPTION, statsfile) - } - command.Wait() - stdout, _ := command.Output() - return strings.Split(string(stdout), "\n") + return executeLustreCommand(m.sudoCmd, m.lctl, LCTL_OPTION, fmt.Sprintf("llite.%s.stats", device), m.config.Sudo) } func (m *LustreCollector) getDevices() []string { @@ -108,183 +288,6 @@ func getMetricData(lines []string, prefix string, offset int) (int64, error) { // return strings.Split(string(buffer), "\n") // } -var LustreAbsMetrics = []LustreMetricDefinition{ - { - name: "lustre_read_requests", - lineprefix: "read_bytes", - lineoffset: 1, - unit: "requests", - calc: "none", - }, - { - name: "lustre_write_requests", - lineprefix: "write_bytes", - lineoffset: 1, - unit: "requests", - calc: "none", - }, - { - name: "lustre_read_bytes", - lineprefix: "read_bytes", - lineoffset: 6, - unit: "bytes", - calc: "none", - }, - { - name: "lustre_write_bytes", - lineprefix: "write_bytes", - lineoffset: 6, - unit: "bytes", - calc: "none", - }, - { - name: "lustre_open", - lineprefix: "open", - lineoffset: 1, - unit: "", - calc: "none", - }, - { - name: "lustre_close", - lineprefix: "close", - lineoffset: 1, - unit: "", - calc: "none", - }, - { - name: "lustre_setattr", - lineprefix: "setattr", - lineoffset: 1, - unit: "", - calc: "none", - }, - { - name: "lustre_getattr", - lineprefix: "getattr", - lineoffset: 1, - unit: "", - calc: "none", - }, - { - name: "lustre_statfs", - lineprefix: "statfs", - lineoffset: 1, - unit: "", - calc: "none", - }, - { - name: "lustre_inode_permission", - lineprefix: "inode_permission", - lineoffset: 1, - unit: "", - calc: "none", - }, -} - -var LustreDiffMetrics = []LustreMetricDefinition{ - { - name: "lustre_read_requests_diff", - lineprefix: "read_bytes", - lineoffset: 1, - unit: "requests", - calc: "difference", - }, - { - name: "lustre_write_requests_diff", - lineprefix: "write_bytes", - lineoffset: 1, - unit: "requests", - calc: "difference", - }, - { - name: "lustre_read_bytes_diff", - lineprefix: "read_bytes", - lineoffset: 6, - unit: "bytes", - calc: "difference", - }, - { - name: "lustre_write_bytes_diff", - lineprefix: "write_bytes", - lineoffset: 6, - unit: "bytes", - calc: "difference", - }, - { - name: "lustre_open_diff", - lineprefix: "open", - lineoffset: 1, - unit: "", - calc: "difference", - }, - { - name: "lustre_close_diff", - lineprefix: "close", - lineoffset: 1, - unit: "", - calc: "difference", - }, - { - name: "lustre_setattr_diff", - lineprefix: "setattr", - lineoffset: 1, - unit: "", - calc: "difference", - }, - { - name: "lustre_getattr_diff", - lineprefix: "getattr", - lineoffset: 1, - unit: "", - calc: "difference", - }, - { - name: "lustre_statfs_diff", - lineprefix: "statfs", - lineoffset: 1, - unit: "", - calc: "difference", - }, - { - name: "lustre_inode_permission_diff", - lineprefix: "inode_permission", - lineoffset: 1, - unit: "", - calc: "difference", - }, -} - -var LustreDeriveMetrics = []LustreMetricDefinition{ - { - name: "lustre_read_requests_rate", - lineprefix: "read_bytes", - lineoffset: 1, - unit: "requests/sec", - calc: "derivative", - }, - { - name: "lustre_write_requests_rate", - lineprefix: "write_bytes", - lineoffset: 1, - unit: "requests/sec", - calc: "derivative", - }, - { - name: "lustre_read_bw", - lineprefix: "read_bytes", - lineoffset: 6, - unit: "bytes/sec", - calc: "derivative", - }, - { - name: "lustre_write_bw", - lineprefix: "write_bytes", - lineoffset: 6, - unit: "bytes/sec", - calc: "derivative", - }, -} - func (m *LustreCollector) Init(config json.RawMessage) error { var err error m.name = "LustreCollector" @@ -297,7 +300,7 @@ func (m *LustreCollector) Init(config json.RawMessage) error { } m.setup() m.tags = map[string]string{"type": "node"} - m.meta = map[string]string{"source": m.name, "group": "Lustre"} + m.meta = map[string]string{"source": m.name, "group": "Lustre", "scope": "node"} // Lustre file system statistics can only be queried by user root // or with password-less sudo diff --git a/collectors/lustreMetricCommon.go b/collectors/lustreMetricCommon.go new file mode 100644 index 0000000..90ade78 --- /dev/null +++ b/collectors/lustreMetricCommon.go @@ -0,0 +1,190 @@ +package collectors + +import ( + "fmt" + "os/exec" + "regexp" + "strconv" + "strings" +) + +const LUSTRE_SYSFS = `/sys/fs/lustre` +const LCTL_CMD = `lctl` +const LCTL_OPTION = `get_param` + +type LustreMetricDefinition struct { + name string + lineprefix string + lineoffset int + offsetname string + unit string + calc string +} + +type LustreMetricData struct { + sample_time int64 + start_time int64 + elapsed_time int64 + op_data map[string]map[string]int64 + op_units map[string]string + sample_time_unit string + start_time_unit string + elapsed_time_unit string +} + +var devicePattern = regexp.MustCompile(`^[\w\d\-_]+\.([\w\d\-_]+)\.[\w\d\-_]+=$`) +var jobPattern = regexp.MustCompile(`^-\s*job_id:\s*([\w\d\-_\.:]+)$`) +var snapshotPattern = regexp.MustCompile(`^\s*snapshot_time\s*:\s*([\d\.]+)\s*([\w\d\-_\.]*)$`) +var startPattern = regexp.MustCompile(`^\s*start_time\s*:\s*([\d\.]+)\s*([\w\d\-_\.]*)$`) +var elapsedPattern = regexp.MustCompile(`^\s*elapsed_time\s*:\s*([\d\.]+)\s*([\w\d\-_\.]*)$`) +var linePattern = regexp.MustCompile(`^\s*([\w\d\-_\.]+):\s*\{\s*samples:\s*([\d\.]+),\s*unit:\s*([\w\d\-_\.]+),\s*min:\s*([\d\.]+),\s*max:\s*([\d\.]+),\s*sum:\s*([\d\.]+),\s*sumsq:\s*([\d\.]+)\s*\}`) + +func executeLustreCommand(sudo, lctl, option, search string, use_sudo bool) []string { + var command *exec.Cmd + if use_sudo { + command = exec.Command(sudo, lctl, option, search) + } else { + command = exec.Command(lctl, option, search) + } + command.Wait() + stdout, _ := command.Output() + return strings.Split(string(stdout), "\n") +} + +func splitTree(lines []string, splitRegex *regexp.Regexp) map[string][]string { + entries := make(map[string][]string) + ent_lines := make([]int, 0) + for i, l := range lines { + m := splitRegex.FindStringSubmatch(l) + if len(m) == 2 { + ent_lines = append(ent_lines, i) + } + } + if len(ent_lines) > 0 { + for i, idx := range ent_lines[:len(ent_lines)-1] { + m := splitRegex.FindStringSubmatch(lines[idx]) + entries[m[1]] = lines[idx+1 : ent_lines[i+1]] + } + last := ent_lines[len(ent_lines)-1] + m := splitRegex.FindStringSubmatch(lines[last]) + entries[m[1]] = lines[last:] + } + return entries +} + +func readDevices(lines []string) map[string][]string { + return splitTree(lines, devicePattern) +} + +func readJobs(lines []string) map[string][]string { + return splitTree(lines, jobPattern) +} + +func readJobdata(lines []string) LustreMetricData { + + jobdata := LustreMetricData{ + op_data: make(map[string]map[string]int64), + op_units: make(map[string]string), + sample_time: 0, + sample_time_unit: "nsec", + start_time: 0, + start_time_unit: "nsec", + elapsed_time: 0, + elapsed_time_unit: "nsec", + } + parseTime := func(value, unit string) int64 { + var t int64 = 0 + if len(unit) == 0 { + unit = "secs" + } + values := strings.Split(value, ".") + units := strings.Split(unit, ".") + if len(values) != len(units) { + fmt.Printf("Invalid time specification '%s' and '%s'\n", value, unit) + } + for i, v := range values { + if len(units) > i { + s, err := strconv.ParseInt(v, 10, 64) + if err == nil { + switch units[i] { + case "secs": + t += s * 1e9 + case "msecs": + t += s * 1e6 + case "usecs": + t += s * 1e3 + case "nsecs": + t += s + } + } + } + } + return t + } + parseNumber := func(value string) int64 { + s, err := strconv.ParseInt(value, 10, 64) + if err == nil { + return s + } + return 0 + } + for _, l := range lines { + if jobdata.sample_time == 0 { + m := snapshotPattern.FindStringSubmatch(l) + if len(m) == 3 { + if len(m[2]) > 0 { + jobdata.sample_time = parseTime(m[1], m[2]) + } else { + jobdata.sample_time = parseTime(m[1], "secs") + } + } + } + + if jobdata.start_time == 0 { + m := startPattern.FindStringSubmatch(l) + if len(m) == 3 { + if len(m[2]) > 0 { + jobdata.start_time = parseTime(m[1], m[2]) + } else { + jobdata.start_time = parseTime(m[1], "secs") + } + } + } + if jobdata.elapsed_time == 0 { + m := elapsedPattern.FindStringSubmatch(l) + if len(m) == 3 { + if len(m[2]) > 0 { + jobdata.elapsed_time = parseTime(m[1], m[2]) + } else { + jobdata.elapsed_time = parseTime(m[1], "secs") + } + } + } + m := linePattern.FindStringSubmatch(l) + if len(m) == 8 { + jobdata.op_units[m[1]] = m[3] + jobdata.op_data[m[1]] = map[string]int64{ + "samples": parseNumber(m[2]), + "min": parseNumber(m[4]), + "max": parseNumber(m[5]), + "sum": parseNumber(m[6]), + "sumsq": parseNumber(m[7]), + } + } + } + return jobdata +} + +func readCommandOutput(lines []string) map[string]map[string]LustreMetricData { + var data map[string]map[string]LustreMetricData = make(map[string]map[string]LustreMetricData) + devs := readDevices(lines) + for d, ddata := range devs { + data[d] = make(map[string]LustreMetricData) + jobs := readJobs(ddata) + for j, jdata := range jobs { + x := readJobdata(jdata) + data[d][j] = x + } + } + return data +}