Compare commits

...

12 Commits

Author SHA1 Message Date
Thomas Roehl
a7bf94a52b Update go.sum 2024-01-07 13:36:25 +01:00
Thomas Gruber
2e8182adb8 Merge branch 'develop' into slurm_cgroup_collector 2024-01-07 13:35:52 +01:00
Thomas Roehl
8055d1425c Code reduction and preparation for cgroup/v2 2024-01-07 13:13:22 +01:00
Thomas Roehl
8450bc4342 Add info about dummy script for testing 2024-01-05 17:48:32 +01:00
Thomas Roehl
113ccb3ac5 Add SLURM collector to README 2024-01-05 17:32:20 +01:00
Thomas Roehl
48335dd872 Fix for path joining in events 2024-01-05 17:24:06 +01:00
Thomas Roehl
bace84bad0 Add collector for SLURM jobs (analyzing /sys/fs/cgroup) and a dummy script for testing 2024-01-05 17:01:33 +01:00
Thomas Roehl
9b671ce68f Add comment about precision requirement for cc-metric-store 2023-12-11 16:06:28 +01:00
Thomas Roehl
226e8425cb Allow selection of timestamp precision in HttpSink 2023-12-11 14:57:06 +01:00
Thomas Gruber
a37f6603c8 Update cc-metric-collector.init 2023-12-11 13:47:53 +01:00
Thomas Roehl
78902305e8 Merge branch 'develop' of github.com:ClusterCockpit/cc-metric-collector into develop 2023-12-08 15:11:40 +01:00
Thomas Röhl
e7b77f7721 Add cpu_used (all-cpu_idle) to CpustatCollector 2023-04-05 11:20:09 +02:00
9 changed files with 868 additions and 5 deletions

View File

@@ -40,6 +40,7 @@ In contrast to the configuration files for sinks and receivers, the collectors c
* [`beegfs_meta`](./beegfsmetaMetric.md)
* [`beegfs_storage`](./beegfsstorageMetric.md)
* [`rocm_smi`](./rocmsmiMetric.md)
* [`slurm`](./slurmJobDetector.md)
## Todos

View File

@@ -40,6 +40,7 @@ var AvailableCollectors = map[string]MetricCollector{
"rocm_smi": new(RocmSmiCollector),
"self": new(SelfCollector),
"schedstat": new(SchedstatCollector),
"slurm": new(SlurmJobDetector),
"nfsiostat": new(NfsIOStatCollector),
}

View File

@@ -0,0 +1,620 @@
package collectors
import (
"encoding/json"
"fmt"
"os"
osuser "os/user"
filepath "path/filepath"
"strconv"
"strings"
"sync"
"time"
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
)
// These are the fields we read from the JSON configuration
type SlurmJobDetectorConfig struct {
Interval string `json:"interval"`
SendJobEvents bool `json:"send_job_events,omitempty"`
SendStepEvents bool `json:"send_step_events,omitempty"`
SendJobMetrics bool `json:"send_job_metrics,omitempty"`
SendStepMetrics bool `json:"send_step_metrics,omitempty"`
BaseDirectory string `json:"sysfs_base,omitempty"`
CgroupVersion string `json:"cgroup_version"`
}
// This information is sent as JSON when an event occurs
type SlurmJobMetadata struct {
UID string `json:"uid,omitempty"`
JobId string `json:"jobid"`
Timestamp uint64 `json:"timestamp"`
Status string `json:"status"`
Step string `json:"step,omitempty"`
Cpus []int `json:"cpus,omitempty"`
Memories []int `json:"memories,omitempty"`
MemoryLimitHard int64 `json:"memory_limit_hard,omitempty"`
MemoryLimitSoft int64 `json:"memory_limit_soft,omitempty"`
Devices []string `json:"devices,omitempty"`
}
type SlurmJobMetrics struct {
MemoryUsage int64
MaxMemoryUsage int64
LimitMemoryUsage int64
CpuUsageUser int64
CpuUsageSys int64
}
// This contains all variables we need during execution and the variables
// defined by metricCollector (name, init, ...)
type SlurmJobDetector struct {
metricCollector
config SlurmJobDetectorConfig // the configuration structure
meta map[string]string // default meta information
interval time.Duration // the interval parsed from configuration
ticker *time.Ticker // own timer for event checking
output chan lp.CCMetric // variable to save output channel at Read() for event sending
wg sync.WaitGroup // sync group for event checking management
done chan bool // channel for event checking management
directories map[string]SlurmJobMetadata // directory -> data mapping (data stored to re-send data in end job events)
}
const default_base_dir = "/sys/fs/cgroup"
const default_cgroup_version = "v1"
// not required to pre-initialized. Will be overwritten in Init() based on configuration
var cpuacct_base = filepath.Join(default_base_dir, "cpuacct", "slurm")
var memory_base = filepath.Join(default_base_dir, "memory", "slurm")
var cpuset_base = filepath.Join(default_base_dir, "cpuset", "slurm")
var devices_base = filepath.Join(default_base_dir, "devices", "slurm")
// Filenames for cgroup/v1
var limit_in_bytes_file = "memory.limit_in_bytes"
var soft_limit_in_bytes_file = "memory.soft_limit_in_bytes"
var cpus_effective_file = "cpuset.effective_cpus"
var mems_effective_file = "cpuset.effective_mems"
var devices_list_file = "devices.list"
var usage_in_bytes_file = "memory.usage_in_bytes"
var max_usage_in_bytes_file = "memory.max_usage_in_bytes"
var cpuacct_usage_file = "cpuacct.usage"
var cpuacct_usage_user_file = "cpuacct.usage_user"
// Filenames for cgroup/v2
// In Init() the filenames are set based on configuration
const soft_limit_in_bytes_file_v2 = "memory.high"
const limit_in_bytes_file_v2 = "memory.max"
const cpus_effective_file_v2 = "cpuset.cpus.effective"
const mems_effective_file_v2 = "cpuset.mems.effective"
const devices_list_file_v2 = "devices.list"
const usage_in_bytes_file_v2 = "memory.usage_in_bytes"
const max_usage_in_bytes_file_v2 = "memory.max_usage_in_bytes"
const cpuacct_usage_file_v2 = "cpuacct.usage"
const cpuacct_usage_user_file_v2 = "cpuacct.usage_user"
func fileToInt64(filename string) (int64, error) {
data, err := os.ReadFile(filename)
if err == nil {
x, err := strconv.ParseInt(string(data), 0, 64)
if err == nil {
return x, err
}
}
return 0, err
}
func ExpandList(strlist string) []int {
out := make([]int, 0)
level1 := strings.Split(strlist, ",")
if len(level1) > 0 {
for _, entry := range level1 {
var s, e int
_, err := fmt.Sscanf(entry, "%d-%d", &s, &e)
if err == nil {
if s < e {
for i := s; i <= e; i++ {
out = append(out, i)
}
} else {
for i := e; i <= s; i-- {
out = append(out, i)
}
}
} else {
_, err := fmt.Sscanf(entry, "%d", &s)
if err == nil {
out = append(out, s)
}
}
}
}
return out
}
func ParseDevices(devlist string) []string {
out := make([]string, 0)
// a *:* rwm
return out
}
func GetPathParts(path string) []string {
out := make([]string, 0)
uid := ""
jobid := ""
step := ""
parts := strings.Split(path, "/")
// the folders of interest are at the end of the list, so traverse
// from the back
for i := len(parts) - 1; i >= 0; i-- {
if strings.HasPrefix(parts[i], "uid_") {
uid = parts[i]
} else if strings.HasPrefix(parts[i], "job_") {
jobid = parts[i]
} else if strings.HasPrefix(parts[i], "step_") {
step = parts[i]
}
}
// only cgroup/v1 provides a uid but needs to be first entry
if len(uid) > 0 {
out = append(out, uid)
}
if len(jobid) > 0 {
out = append(out, jobid)
}
// only if it's a step folder
if len(step) > 0 {
out = append(out, step)
}
return out
}
func GetIdsFromParts(parts []string) (string, string, string) {
uid := ""
jobid := ""
step := ""
for _, p := range parts {
if strings.HasPrefix(p, "job_") {
jobid = strings.TrimPrefix(p, "job_")
} else if strings.HasPrefix(p, "uid_") {
uid = strings.TrimPrefix(p, "uid_")
} else if strings.HasPrefix(p, "step_") {
step = strings.TrimPrefix(p, "step_")
}
}
return uid, jobid, step
}
func (m *SlurmJobDetector) CheckEvents(timestamp time.Time) {
var err error = nil
var dirs []string = nil
parts := make([]string, 3)
parts = append(parts, cpuacct_base)
if m.config.CgroupVersion == "v1" {
parts = append(parts, "uid_[0-9]*")
}
parts = append(parts, "job_[0-9]*")
dirs, err = filepath.Glob(filepath.Join(parts...))
if err != nil {
cclog.ComponentError(m.name, "Cannot get directory list for SLURM jobs")
return
}
if m.config.SendStepEvents {
parts = append(parts, "step_*")
sdirs, err := filepath.Glob(filepath.Join(parts...))
if err != nil {
cclog.ComponentError(m.name, "Cannot get directory list for SLURM steps")
return
}
dirs = append(dirs, sdirs...)
}
for _, d := range dirs {
// Folder not in known directories map -> New job
if _, ok := m.directories[d]; !ok {
dirParts := GetPathParts(d)
data, err := m.NewJobEvent(dirParts, timestamp, m.output)
if err == nil {
// Add the directory to the map
cclog.ComponentDebug(m.name, "Adding directory ", d, " to known directories")
m.directories[d] = data
}
}
}
for d, data := range m.directories {
// Known directory but it does not exist anymore -> Vanished/Finished job
if _, ok := stringArrayContains(dirs, d); !ok {
dirParts := GetPathParts(d)
err := m.EndJobEvent(dirParts, data, timestamp, m.output)
if err != nil {
uid, jobid, step := GetIdsFromParts(dirParts)
if len(step) == 0 {
cclog.ComponentError(m.name, "Failed to end job for user ", uid, " jobid ", jobid)
} else {
cclog.ComponentError(m.name, "Failed to end job for user ", uid, " jobid ", jobid, " step ", step)
}
}
// Remove the directory from the map
cclog.ComponentDebug(m.name, "Removing directory ", d, " to known directories")
delete(m.directories, d)
}
}
}
func (m *SlurmJobDetector) NewJobEvent(parts []string, timestamp time.Time, output chan lp.CCMetric) (SlurmJobMetadata, error) {
uid, jobid, step := GetIdsFromParts(parts)
pathstr := filepath.Join(parts...)
if len(jobid) > 0 {
cclog.ComponentError(m.name, "No jobid in path ", pathstr)
return SlurmJobMetadata{}, fmt.Errorf("no jobid in path %s", pathstr)
}
jobtags := map[string]string{
"type": "job",
"type-id": jobid,
}
// Fill job JSON with data from cgroup
md := SlurmJobMetadata{
JobId: jobid,
Timestamp: uint64(timestamp.Unix()),
Status: "start",
}
// cgroup/v2 has no uid in parts
if len(uid) > 0 {
md.UID = uid
}
if len(step) > 0 {
md.Step = step
jobtags["stype"] = "step"
jobtags["stype-id"] = step
}
job_cpus, err := os.ReadFile(filepath.Join(cpuset_base, pathstr, cpus_effective_file))
if err == nil {
md.Cpus = ExpandList(string(job_cpus))
}
job_mems, err := os.ReadFile(filepath.Join(cpuset_base, pathstr, mems_effective_file))
if err == nil {
md.Memories = ExpandList(string(job_mems))
}
job_devs, err := os.ReadFile(filepath.Join(devices_base, pathstr, devices_list_file))
if err == nil {
md.Devices = ParseDevices(string(job_devs))
}
x, err := fileToInt64(filepath.Join(memory_base, pathstr, limit_in_bytes_file))
if err == nil {
md.MemoryLimitHard = x
}
x, err = fileToInt64(filepath.Join(memory_base, pathstr, soft_limit_in_bytes_file))
if err == nil {
md.MemoryLimitSoft = x
}
jobjson, err := json.Marshal(md)
if err == nil {
y, err := lp.New("slurm", jobtags, m.meta, map[string]interface{}{"value": string(jobjson)}, timestamp)
if err == nil {
if len(uid) > 0 {
y.AddMeta("uid", uid)
uname, err := osuser.LookupId(uid)
if err == nil {
y.AddMeta("username", uname.Username)
}
}
y.AddMeta("metric_type", "event")
output <- y
}
}
return md, nil
}
// Not sure if it works with steps since the folders commonly do not vanish when a job step is finished
func (m *SlurmJobDetector) EndJobEvent(parts []string, data SlurmJobMetadata, timestamp time.Time, output chan lp.CCMetric) error {
uid, jobid, step := GetIdsFromParts(parts)
pathstr := filepath.Join(parts...)
if len(jobid) > 0 {
err := fmt.Errorf("no jobid in path %s", pathstr)
cclog.ComponentError(m.name, err.Error())
return err
}
jobtags := map[string]string{
"type": "job",
"type-id": jobid,
}
// Fill job JSON with data from cgroup
md := SlurmJobMetadata{
JobId: jobid,
Timestamp: uint64(timestamp.Unix()),
Cpus: data.Cpus,
Memories: data.Memories,
Devices: data.Devices,
MemoryLimitHard: data.MemoryLimitHard,
MemoryLimitSoft: data.MemoryLimitSoft,
Status: "end",
}
// cgroup/v2 has no uid in parts
if len(uid) > 0 {
md.UID = uid
}
if len(step) > 0 {
md.Step = step
jobtags["stype"] = "step"
jobtags["stype-id"] = step
}
jobjson, err := json.Marshal(md)
if err == nil {
y, err := lp.New("slurm", jobtags, m.meta, map[string]interface{}{"value": string(jobjson)}, timestamp)
if err == nil {
if len(uid) > 0 {
y.AddMeta("uid", uid)
uname, err := osuser.LookupId(uid)
if err == nil {
y.AddMeta("username", uname.Username)
}
}
y.AddMeta("metric_type", "event")
output <- y
} else {
return err
}
} else {
return err
}
return nil
}
func (m *SlurmJobDetector) ReadMetrics(parts []string) (SlurmJobMetrics, error) {
jobdata := SlurmJobMetrics{
MemoryUsage: 0,
MaxMemoryUsage: 0,
LimitMemoryUsage: 0,
CpuUsageUser: 0,
CpuUsageSys: 0,
}
part := filepath.Join(parts...)
x, err := fileToInt64(filepath.Join(memory_base, part, usage_in_bytes_file))
if err == nil {
jobdata.MemoryUsage = x
}
x, err = fileToInt64(filepath.Join(memory_base, part, max_usage_in_bytes_file))
if err == nil {
jobdata.MaxMemoryUsage = x
}
tu, err := fileToInt64(filepath.Join(cpuacct_base, part, cpuacct_usage_file))
if err == nil {
uu, err := fileToInt64(filepath.Join(cpuacct_base, part, cpuacct_usage_user_file))
if err == nil {
jobdata.CpuUsageUser = int64(uu/tu) * 100
jobdata.CpuUsageSys = 100 - jobdata.CpuUsageUser
}
}
return jobdata, nil
}
func (m *SlurmJobDetector) SendMetrics(jobtags, jobmeta map[string]string, jobmetrics SlurmJobMetrics, timestamp time.Time, output chan lp.CCMetric) {
y, err := lp.New("job_mem_used", jobtags, m.meta, map[string]interface{}{"value": jobmetrics.MemoryUsage}, timestamp)
if err == nil {
y.AddMeta("unit", "Bytes")
for k, v := range jobmeta {
y.AddMeta(k, v)
}
output <- y
}
y, err = lp.New("job_max_mem_used", jobtags, m.meta, map[string]interface{}{"value": jobmetrics.MaxMemoryUsage}, timestamp)
if err == nil {
y.AddMeta("unit", "Bytes")
for k, v := range jobmeta {
y.AddMeta(k, v)
}
output <- y
}
y, err = lp.New("job_cpu_user", jobtags, m.meta, map[string]interface{}{"value": jobmetrics.CpuUsageUser}, timestamp)
if err == nil {
y.AddMeta("unit", "%")
for k, v := range jobmeta {
y.AddMeta(k, v)
}
output <- y
}
y, err = lp.New("job_cpu_sys", jobtags, m.meta, map[string]interface{}{"value": jobmetrics.CpuUsageSys}, timestamp)
if err == nil {
y.AddMeta("unit", "%")
for k, v := range jobmeta {
y.AddMeta(k, v)
}
output <- y
}
}
// Init initializes the sample collector
// Called once by the collector manager
// All tags, meta data tags and metrics that do not change over the runtime should be set here
func (m *SlurmJobDetector) Init(config json.RawMessage) error {
var err error = nil
m.name = "SlurmJobDetector"
// This is for later use, also call it early
m.setup()
// Can be run in parallel with others
m.parallel = true
// Define meta information sent with each metric
m.meta = map[string]string{"source": m.name, "group": "SLURM"}
// Set configuration defaults
m.config.SendJobEvents = false
m.config.SendJobMetrics = false
m.config.SendStepEvents = false
m.config.SendStepMetrics = false
m.config.CgroupVersion = default_cgroup_version
m.config.BaseDirectory = default_base_dir
// Read in the JSON configuration
if len(config) > 0 {
err = json.Unmarshal(config, &m.config)
if err != nil {
cclog.ComponentError(m.name, "Error reading config:", err.Error())
return err
}
}
// Parse the read interval duration
m.interval, err = time.ParseDuration(m.config.Interval)
if err != nil {
cclog.ComponentError(m.name, "Error parsing interval:", err.Error())
return err
}
if m.config.CgroupVersion != "v1" && m.config.CgroupVersion != "v2" {
cclog.ComponentError(m.name, "Invalid cgroup version", m.config.CgroupVersion, ":", err.Error())
return err
}
// Storage for output channel
m.output = nil
// Management channel for the timer function.
m.done = make(chan bool)
// Create the own ticker
m.ticker = time.NewTicker(m.interval)
// Create space for storing files
m.directories = make(map[string]SlurmJobMetadata)
if _, err := os.Stat(m.config.BaseDirectory); err != nil {
err := fmt.Errorf("cannot find base folder %s", m.config.BaseDirectory)
cclog.ComponentError(m.name, err.Error())
return err
}
cclog.ComponentDebug(m.name, "Using base directory", m.config.BaseDirectory)
cpuacct_base = filepath.Join(m.config.BaseDirectory, "cpuacct", "slurm")
memory_base = filepath.Join(m.config.BaseDirectory, "memory", "slurm")
cpuset_base = filepath.Join(m.config.BaseDirectory, "cpuset", "slurm")
devices_base = filepath.Join(m.config.BaseDirectory, "devices", "slurm")
if m.config.CgroupVersion == "v2" {
cclog.ComponentDebug(m.name, "Reconfiguring folders and filenames for cgroup/v2")
cpuacct_base = filepath.Join(m.config.BaseDirectory, "system.slice", "slurmstepd.scope")
memory_base = filepath.Join(m.config.BaseDirectory, "system.slice", "slurmstepd.scope")
cpuset_base = filepath.Join(m.config.BaseDirectory, "system.slice", "slurmstepd.scope")
devices_base = filepath.Join(m.config.BaseDirectory, "system.slice", "slurmstepd.scope")
cpus_effective_file = cpus_effective_file_v2
mems_effective_file = mems_effective_file_v2
devices_list_file = devices_list_file_v2
limit_in_bytes_file = limit_in_bytes_file_v2
soft_limit_in_bytes_file = soft_limit_in_bytes_file_v2
usage_in_bytes_file = usage_in_bytes_file_v2
max_usage_in_bytes_file = max_usage_in_bytes_file_v2
cpuacct_usage_file = cpuacct_usage_file_v2
cpuacct_usage_user_file = cpuacct_usage_user_file_v2
}
if _, err := os.Stat(cpuacct_base); err != nil {
err := fmt.Errorf("cannot find SLURM cgroup folder %s", cpuacct_base)
cclog.ComponentError(m.name, err.Error())
return err
}
m.wg.Add(1)
go func() {
for {
select {
case <-m.done:
// Exit the timer loop
cclog.ComponentDebug(m.name, "Closing...")
m.wg.Done()
return
case timestamp := <-m.ticker.C:
if m.output != nil {
cclog.ComponentDebug(m.name, "Checking events")
m.CheckEvents(timestamp)
}
}
}
}()
// Set this flag only if everything is initialized properly, all required files exist, ...
m.init = true
return err
}
// Read collects all metrics belonging to the SlurmJobDetector collector
// and sends them through the output channel to the collector manager
func (m *SlurmJobDetector) Read(interval time.Duration, output chan lp.CCMetric) {
// Create a sample metric
timestamp := time.Now()
// Capture output channel for event sending in goroutine, so at startup, the event checking
// waits until the first call of Read()
m.output = output
// This is the reading for metrics for all running jobs. For the event checking, check
// the goroutine in Init()
parts := make([]string, 0)
parts = append(parts, cpuacct_base)
// Only cgroup/v1 has a uid_* folder
if m.config.CgroupVersion == "v1" {
parts = append(parts, "uid_[0-9]*")
}
parts = append(parts, "job_[0-9]*")
// Get folders based on constructed glob path
dirs, err := filepath.Glob(filepath.Join(parts...))
if err != nil {
cclog.ComponentError(m.name, "Cannot get directory list for SLURM jobs")
return
}
if m.config.SendStepEvents {
// Add step lookup if we process step events
parts = append(parts, "step_*")
// Get step folders based on constructed glob path
sdirs, err := filepath.Glob(filepath.Join(parts...))
if err != nil {
cclog.ComponentError(m.name, "Cannot get directory list for SLURM steps")
return
}
// Add step folders to directory list for processsing
dirs = append(dirs, sdirs...)
}
for _, d := range dirs {
dirParts := GetPathParts(d) // Gets uid_*, job_* and step_* (if available)
uid, jobid, step := GetIdsFromParts(dirParts) // extracts the IDs from the available parts
// Create tags map for the job
jobtags := map[string]string{
"type": "job",
"type-id": jobid,
}
// Create meta map for the job
jobmeta := make(map[string]string)
// if cgroup/v1, we have a uid
if len(uid) > 0 {
jobmeta["uid"] = uid
uname, err := osuser.LookupId(uid)
if err == nil {
jobmeta["username"] = uname.Username
}
}
// if this is a step directory, add the sub type with value
if len(step) > 0 {
jobtags["stype"] = "step"
jobtags["stype-id"] = step
}
jobmetrics, err := m.ReadMetrics(parts)
if err != nil {
// Send all metrics for the job
m.SendMetrics(jobtags, jobmeta, jobmetrics, timestamp, output)
}
}
}
// Close metric collector: close network connection, close files, close libraries, ...
// Called once by the collector manager
func (m *SlurmJobDetector) Close() {
m.done <- true
m.wg.Wait()
// Unset flag
m.init = false
}

View File

@@ -0,0 +1,45 @@
# `slurm` collector
```json
"slurm": {
"interval" : "1s",
"send_job_events" : true,
"send_job_metrics" : true,
"send_step_events": false,
"send_step_metrics" : false,
"cgroup_version" : "v1"
}
```
The `slurm` collector reads the data from `/sys/fs/cgroup/` to detect the creation and deletion of SLURM jobs on the node. Then detecting an event, it collects some event related information and sends the event. The event detection happens every `interval`.
Additionally, for all running jobs, is can collect metrics and send them out. This collection is done in the global collector interval.
Options:
* `interval`: Time interval in which the folders are checked for new or vanished SLURM jobs
* `send_job_events`: Send events when a job starts or ends
* `send_job_metrics`: Send metrics of each running job with the global collector interval
* `send_step_events`: Send events when a job step starts
* `send_step_metrics`: Send metrics of each job step with the global collector interval
* `cgroup_version`: Which cgroup version is in use. Possible values are `v1` and `v2`. `v1` is the default
* `sysfs_base`: (Testing only) Set the base path for lookups, default `/sys/fs/cgroup`.
For `cgroup_version = v2`, the collector searches for jobs at `<sysfs_base>/system.slice/slurmstepd.scope`, by default with `<sysfs_base>=/sys/fs/cgroup`. If the cgroup folders are created below `/sys/fs/cgroup/unified`, adjust the `sysfs_base` option to `/sys/fs/cgroup/unified`.
## Testing
For testing the collector, you can specifiy a different base directory that should be checked for new events. The default is `/sys/fs/cgroup/`. By specifying a `sysfs_base` in the configuration, this can be changed. Moreover, with the `slurmJobDetector_dummy.sh`, you can create and delete "jobs" for testing. Use the same directory with `--basedir`. It generates only cgroup/v1 directory structures at the moment.
```sh
$ slurmJobDetector_dummy.sh -h
Usage: slurmJobDetector_dummy.sh <opts>
[ -h | --help ]
[ -v | --verbosity ]
[ -u | --uid <UID> (default: XXXX) ]
[ -j | --jobid <JOBID> (default: random) ]
[ -b | --basedir <JOBID> (default: ./slurm-test) ]
[ -d | --delete ]
[ -l | --list ]
```
With no options, it creates a job with the executing user's UID and a random JOBID. For deletion, use `-d -j JOBID`, deletion requires a JOBID. If you want to get a list of all UIDs and JOBIDs that currently exist, you can get the list with `-l`.

View File

@@ -0,0 +1,139 @@
#!/bin/bash -l
# Some settings for scripting with less headache
# when a command fails, bash exits instead of continuing
set -o errexit
# make the script fail, when accessing an unset variable
# use "${VARNAME-}" instead of "$VARNAME" when you want to access
# a variable that may or may not have been set
set -o nounset
# ensure that a pipeline command is treated as failed, even if one command in the pipeline fails
set -o pipefail
# enable debug mode, by running your script as TRACE=1 ./script.sh
if [[ "${TRACE-0}" == "1" ]]; then
set -o xtrace
fi
# Default values for variables
: ${UID=$(id -u)}
: ${VERBOSITY=0}
: ${DELETE=0}
: ${LIST=0}
: ${JOBID="random"}
: ${BASE=./slurmJobDetector-sys-fs-cgroup}
# Print usage if needed
usage()
{
echo "
Usage: $(basename $0) <opts>
[ -h | --help ]
[ -v | --verbosity ]
[ -u | --uid <UID> (default: ${UID}) ]
[ -j | --jobid <JOBID> (default: ${JOBID}) ]
[ -b | --basedir <JOBID> (default: ${BASE}) ]
[ -d | --delete ]
[ -l | --list ]
"
exit $1;
}
cd "$(dirname "$0")"
main() {
PARSED_ARGUMENTS=$(getopt -a -n $(basename $0) -o hj:u:vb:dl --long help,verbosity,uid:,jobid:,basedir:,delete,list -- "$@")
VALID_ARGUMENTS=$?
# Parsing failed
if [[ "$VALID_ARGUMENTS" != "0" ]]; then
usage 2
fi
# No argument (comment out if command should work without any arguments)
# if [[ "${PARSED_ARGUMENTS}" == " --" ]]; then
# usage 0
# fi
# Evaluate arguments
eval set -- "$PARSED_ARGUMENTS"
while :
do
case "$1" in
-h | --help) usage 0; shift ;;
-v | --verbosity) VERBOSITY=1; shift ;;
-d | --delete) DELETE=1; shift ;;
-l | --list) LIST=1; shift ;;
-u | --uid) UID=$2 ; shift 2 ;;
-j | --jobid) JOBID=$2 ; shift 2 ;;
-b | --basedir) BASE=$2 ; shift 2 ;;
--) shift; break ;;
*) echo "Unexpected option: $1 - this should not happen."
usage 2;;
esac
done
if [[ ${LIST} -eq 1 ]]; then
for F in $(ls -d ${BASE}/cpuset/slurm/uid_*/job_*); do
JOBID=$(echo "$F" | rev | cut -d '/' -f 1 | rev | cut -d '_' -f 2)
MYUID=$(echo "$F" | rev | cut -d '/' -f 2 | rev | cut -d '_' -f 2)
echo "UID ${MYUID} JOBID ${JOBID}"
done
exit 0
fi
if [[ ${JOBID} == "random" ]]; then
if [[ ${DELETE} -eq 1 ]]; then
echo "Cannot use random JOBID for deletion"
exit 1
else
JOBID=$RANDOM
fi
fi
FOLDERS="cpuset cpuacct memory devices"
if [[ ${DELETE} -eq 1 ]]; then
for F in ${FOLDERS}; do
rm -r --force "${BASE}/${F}/slurm/uid_${UID}/job_${JOBID}"
done
else
for F in ${FOLDERS}; do
if [[ $VERBOSITY -eq 1 ]]; then
echo "${BASE}/${F}/slurm/uid_${UID}/job_${JOBID}"
fi
mkdir -p "${BASE}/${F}/slurm/uid_${UID}/job_${JOBID}"
done
echo "0-71" > "${BASE}/cpuset/slurm/uid_${UID}/job_${JOBID}/cpuset.effective_cpus"
echo "0-3" > "${BASE}/cpuset/slurm/uid_${UID}/job_${JOBID}/cpuset.effective_mems"
echo "249036800000" > "${BASE}/memory/slurm/uid_${UID}/job_${JOBID}/memory.limit_in_bytes"
echo "249036800000" > "${BASE}/memory/slurm/uid_${UID}/job_${JOBID}/memory.soft_limit_in_bytes"
echo "13987840" > "${BASE}/memory/slurm/uid_${UID}/job_${JOBID}/memory.usage_in_bytes"
echo "14966784" > "${BASE}/memory/slurm/uid_${UID}/job_${JOBID}/memory.max_usage_in_bytes"
echo "60" > "${BASE}/memory/slurm/uid_${UID}/job_${JOBID}/memory.swappiness"
echo "474140369" > "${BASE}/cpuacct/slurm/uid_${UID}/job_${JOBID}/cpuacct.usage"
echo "169078878" > "${BASE}/cpuacct/slurm/uid_${UID}/job_${JOBID}/cpuacct.usage_user"
echo "315684619" > "${BASE}/cpuacct/slurm/uid_${UID}/job_${JOBID}/cpuacct.usage_sys"
echo "a *:* rwm" > "${BASE}/devices/slurm/uid_${UID}/job_${JOBID}/devices.list"
#memory.numa_stat
#total=0 N0=0 N1=0 N2=0 N3=0
#file=0 N0=0 N1=0 N2=0 N3=0
#anon=0 N0=0 N1=0 N2=0 N3=0
#unevictable=0 N0=0 N1=0 N2=0 N3=0
#hierarchical_total=958 N0=28 N1=579 N2=180 N3=171
#hierarchical_file=194 N0=0 N1=194 N2=0 N3=0
#hierarchical_anon=764 N0=28 N1=385 N2=180 N3=171
#hierarchical_unevictable=0 N0=0 N1=0 N2=0 N3=0
fi
}
main "$@"

36
go.sum
View File

@@ -8,6 +8,7 @@ github.com/ClusterCockpit/cc-units v0.4.0 h1:zP5DOu99GmErW0tCDf0gcLrlWt42RQ9dpoO
github.com/ClusterCockpit/cc-units v0.4.0/go.mod h1:3S3PAhAayS3pbgcT4q9Vn9VJw22Op51X0YimtG77zBw=
github.com/ClusterCockpit/go-rocm-smi v0.3.0 h1:1qZnSpG7/NyLtc7AjqnUL9Jb8xtqG1nMVgp69rJfaR8=
github.com/ClusterCockpit/go-rocm-smi v0.3.0/go.mod h1:+I3UMeX3OlizXDf1WpGD43W4KGZZGVSGmny6rTeOnWA=
github.com/Joker/hpp v1.0.0 h1:65+iuJYdRXv/XyN62C1uEmmOx3432rNG/rKlX6V7Kkc=
github.com/Joker/hpp v1.0.0/go.mod h1:8x5n+M1Hp5hC0g8okX3sR3vFQwynaX/UgSOM9MeBKzY=
github.com/Joker/jade v1.1.3 h1:Qbeh12Vq6BxURXT1qZBRHsDxeURB8ztcL6f3EXSGeHk=
github.com/Joker/jade v1.1.3/go.mod h1:T+2WLyt7VH6Lp0TRxQrUYEs64nRc83wkMQrfeIQKduM=
@@ -16,10 +17,12 @@ github.com/NVIDIA/go-nvml v0.12.0-1 h1:6mdjtlFo+17dWL7VFPfuRMtf0061TF4DKls9pkSw6
github.com/NVIDIA/go-nvml v0.12.0-1/go.mod h1:hy7HYeQy335x6nEss0Ne3PYqleRa6Ct+VKD9RQ4nyFs=
github.com/PaesslerAG/gval v1.2.2 h1:Y7iBzhgE09IGTt5QgGQ2IdaYYYOU134YGHBThD+wm9E=
github.com/PaesslerAG/gval v1.2.2/go.mod h1:XRFLwvmkTEdYziLdaCeCa5ImcGVrfQbeNUbVR+C6xac=
github.com/PaesslerAG/jsonpath v0.1.0 h1:gADYeifvlqK3R3i2cR5B4DGgxLXIPb3TRTH1mGi0jPI=
github.com/PaesslerAG/jsonpath v0.1.0/go.mod h1:4BzmtoM/PI8fPO4aQGIusjGxGir2BzcV0grWtFzq1Y8=
github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk=
github.com/Shopify/goreferrer v0.0.0-20220729165902-8cddb4f5de06 h1:KkH3I3sJuOLP3TjA/dfr4NAY8bghDwnXiU7cTKxQqo0=
github.com/Shopify/goreferrer v0.0.0-20220729165902-8cddb4f5de06/go.mod h1:7erjKLwalezA0k99cWs5L11HWOAPNjdUZ6RxH1BXbbM=
github.com/ajg/form v1.5.1 h1:t9c7v8JUKu/XxOGBU0yjNpaMloxGEJhUkqFRq0ibGeU=
github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs=
github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7Dml6nw9rQ=
@@ -33,7 +36,6 @@ github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1
github.com/bytedance/sonic v1.10.0-rc/go.mod h1:ElCzW+ufi8qKqNW0FY314xriJhyJhuoJ3gFZdAHF7NM=
github.com/bytedance/sonic v1.10.2 h1:GQebETVBxYB7JGWJtLBi07OVzWwt+8dWA00gEVW2ZFE=
github.com/bytedance/sonic v1.10.2/go.mod h1:iZcSUejdk5aukTND/Eu/ivjQuEL0Cu9/rf50Hi0u/g4=
github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko=
github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY=
@@ -44,15 +46,18 @@ github.com/chenzhuoyu/iasm v0.9.0 h1:9fhXjVzq5hUy2gkhhgHl95zG2cEAhw9OSGs8toWWAwo
github.com/chenzhuoyu/iasm v0.9.0/go.mod h1:Xjy2NpN3h7aUqeqM+woSuuvxmIe6+DDsiNLIrkAmYog=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/deepmap/oapi-codegen v1.15.0 h1:SQqViaeb4k2vMul8gx12oDOIadEtoRqTdLkxjzqtQ90=
github.com/deepmap/oapi-codegen v1.15.0/go.mod h1:a6KoHV7lMRwsPoEg2C6NDHiXYV3EQfiFocOlJ8dgJQE=
github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs=
github.com/fatih/structs v1.1.0 h1:Q7juDM0QtcnhCpeyLGQKyg4TOIghuNXrkL32pHAUMxo=
github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M=
github.com/flosch/pongo2/v4 v4.0.2 h1:gv+5Pe3vaSVmiJvh/BZa82b7/00YUGm0PIyVVLop0Hw=
github.com/flosch/pongo2/v4 v4.0.2/go.mod h1:B5ObFANs/36VwxxlgKpdchIJHMvHB562PW+BWPhwZD8=
github.com/frankban/quicktest v1.11.0/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s=
github.com/frankban/quicktest v1.11.2/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s=
github.com/frankban/quicktest v1.13.0 h1:yNZif1OkDfNoDfb9zZa9aXIpejNR4F23Wely0c+Qdqk=
github.com/frankban/quicktest v1.13.0/go.mod h1:qLE0fzW0VuyUAJgPU19zByoIr0HtCHN/r/VLSOOIySU=
github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=
github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw=
@@ -62,12 +67,14 @@ github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE
github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg=
github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU=
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
github.com/go-playground/validator/v10 v10.15.5 h1:LEBecTWb/1j5TNY1YYG2RcOUN3R7NLylN+x8TTueE24=
github.com/go-playground/validator/v10 v10.15.5/go.mod h1:9iXMNT7sEkjXb0I+enO7QXmzG6QCsPWY4zveKFVRSyU=
github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
@@ -80,6 +87,8 @@ github.com/gomarkdown/markdown v0.0.0-20230922112808-5421fefb8386 h1:EcQR3gusLHN
github.com/gomarkdown/markdown v0.0.0-20230922112808-5421fefb8386/go.mod h1:JDGcbDT52eL4fju3sZ4TeHGsQwhG9nbDV21aMyhwPoA=
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4=
github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
@@ -87,16 +96,20 @@ github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY=
github.com/gorilla/css v1.0.0/go.mod h1:Dn721qIggHpt4+EFCcTLTU/vk5ySda2ReITrtgBl60c=
github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI=
github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So=
github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc=
github.com/imkira/go-interpol v1.1.0 h1:KIiKr0VSG2CUW1hl1jpiyuzuJeKUUpC8iM1AIE7N1Vk=
github.com/influxdata/influxdb-client-go/v2 v2.12.3 h1:28nRlNMRIV4QbtIUvxhWqaxn0IpXeMSkY/uJa/O/vC4=
github.com/influxdata/influxdb-client-go/v2 v2.12.3/go.mod h1:IrrLUbCjjfkmRuaCiGQg4m2GbkaeJDcuWoxiWdQEbA0=
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf h1:7JTmneyiNEwVBOHSjoMxiWAqB992atOeepeFYegn5RU=
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo=
github.com/influxdata/line-protocol-corpus v0.0.0-20210519164801-ca6fa5da0184/go.mod h1:03nmhxzZ7Xk2pdG+lmMd7mHDfeVOYFyhOgwO61qWU98=
github.com/influxdata/line-protocol-corpus v0.0.0-20210922080147-aa28ccfb8937 h1:MHJNQ+p99hFATQm6ORoLmpUCF7ovjwEFshs/NHzAbig=
github.com/influxdata/line-protocol-corpus v0.0.0-20210922080147-aa28ccfb8937/go.mod h1:BKR9c0uHSmRgM/se9JhFHtTT7JTO67X23MtKMHtZcpo=
github.com/influxdata/line-protocol/v2 v2.0.0-20210312151457-c52fdecb625a/go.mod h1:6+9Xt5Sq1rWx+glMgxhcg2c0DUaehK+5TDcPZ76GypY=
github.com/influxdata/line-protocol/v2 v2.1.0/go.mod h1:QKw43hdUBg3GTk2iC3iyCxksNj7PX9aUSeYOYE/ceHY=
github.com/influxdata/line-protocol/v2 v2.2.1 h1:EAPkqJ9Km4uAxtMRgUubJyqAr6zgWM0dznKMLRauQRE=
github.com/influxdata/line-protocol/v2 v2.2.1/go.mod h1:DmB3Cnh+3oxmG6LOBIxce4oaL4CPj3OmMPgvauXh+tM=
github.com/iris-contrib/httpexpect/v2 v2.15.2 h1:T9THsdP1woyAqKHwjkEsbCnMefsAFvk8iJJKokcJ3Go=
github.com/iris-contrib/schema v0.0.6 h1:CPSBLyx2e91H2yJzPuhGuifVRnZBBJ3pCOMbOvPZaTw=
github.com/iris-contrib/schema v0.0.6/go.mod h1:iYszG0IOsuIsfzjymw1kMzTL8YQcCWlm65f3wX8J5iA=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
@@ -123,8 +136,10 @@ github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/q
github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/labstack/echo/v4 v4.11.1 h1:dEpLU2FLg4UVmvCGPuk/APjlH6GDpbEPti61srUUUs4=
github.com/labstack/echo/v4 v4.11.1/go.mod h1:YuYRTSM3CHs2ybfrL8Px48bO6BAnYIN4l8wSTMP6BDQ=
@@ -147,11 +162,15 @@ github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zk
github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
github.com/microcosm-cc/bluemonday v1.0.25 h1:4NEwSfiJ+Wva0VxN5B8OwMicaJvD8r9tlJWm9rtloEg=
github.com/microcosm-cc/bluemonday v1.0.25/go.mod h1:ZIOjCQp1OrzBBPIJmfX4qDYFuhU02nx4bn030ixfHLE=
github.com/minio/highwayhash v1.0.2 h1:Aak5U0nElisjDCfPSG79Tgzkn2gl66NxOMspRrKnA/g=
github.com/mitchellh/go-wordwrap v1.0.1 h1:TLuKupo69TCn6TQSyGxwI1EblZZEsQ0vMlAFQflz0v0=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/nats-io/jwt/v2 v2.5.0 h1:WQQ40AAlqqfx+f6ku+i0pOVm+ASirD4fUh+oQsiE9Ak=
github.com/nats-io/nats-server/v2 v2.8.4 h1:0jQzze1T9mECg8YZEl8+WYUXb9JKluJfCBriPUtluB4=
github.com/nats-io/nats-server/v2 v2.8.4/go.mod h1:8zZa+Al3WsESfmgSs98Fi06dRWLH5Bnq90m5bKD/eT4=
github.com/nats-io/nats.go v1.30.2 h1:aloM0TGpPorZKQhbAkdCzYDj+ZmsJDyeo3Gkbr72NuY=
github.com/nats-io/nats.go v1.30.2/go.mod h1:dcfhUgmQNN4GJEfIb2f9R7Fow+gzBF4emzDHrVBd5qM=
@@ -164,6 +183,7 @@ github.com/pelletier/go-toml/v2 v2.1.0 h1:FnwAJ4oYMvbT/34k9zzHuZNrhlz48GB3/s6at6
github.com/pelletier/go-toml/v2 v2.1.0/go.mod h1:tJU2Z3ZkXwnxa4DPO899bsyIoywizdUvyaeZurnPPDc=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang v1.17.0 h1:rl2sfwZMtSthVU752MqfjQozy7blglC+1SOtjMAMh+Q=
github.com/prometheus/client_golang v1.17.0/go.mod h1:VeL+gMmOAxkS2IqfCq0ZmHSL+LjWfWDUmp1mBz9JgUY=
@@ -173,10 +193,13 @@ github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdO
github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY=
github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo=
github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo=
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/sanity-io/litter v1.5.5 h1:iE+sBxPBzoK6uaEP5Lt3fHNgpKcHXc/A2HGETy0uJQo=
github.com/schollz/closestmatch v2.1.0+incompatible h1:Uel2GXEpJqOWBrlyI+oY9LTiyyjYS17cCYRqP13/SHk=
github.com/schollz/closestmatch v2.1.0+incompatible/go.mod h1:RtP1ddjLong6gTkbtmuhtR2uUrrJOpYzYRvbcPAid+g=
github.com/sergi/go-diff v1.0.0 h1:Kpca3qRNrduNnOQeazBd0ysaKrUJiIuISHxogkT9RPQ=
github.com/shopspring/decimal v1.3.1 h1:2Usl1nmF/WZucqkFZhnfFYxxxu8LG21F6nPQBE5gKV8=
github.com/shopspring/decimal v1.3.1/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
@@ -195,11 +218,13 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/tdewolff/minify/v2 v2.12.9 h1:dvn5MtmuQ/DFMwqf5j8QhEVpPX6fi3WGImhv8RUB4zA=
github.com/tdewolff/minify/v2 v2.12.9/go.mod h1:qOqdlDfL+7v0/fyymB+OP497nIxJYSvX4MQWA8OoiXU=
github.com/tdewolff/parse/v2 v2.6.8 h1:mhNZXYCx//xG7Yq2e/kVLNZw4YfYmeHbhx+Zc0OvFMA=
github.com/tdewolff/parse/v2 v2.6.8/go.mod h1:XHDhaU6IBgsryfdnpzUXBlT6leW/l25yrFBTEb4eIyM=
github.com/tdewolff/test v1.0.9 h1:SswqJCmeN4B+9gEAi/5uqT0qpi1y2/2O47V/1hhGZT0=
github.com/tdewolff/test v1.0.9/go.mod h1:6DAvZliBAAnD7rhVgwaM7DE5/d9NMOAJ09SqYqeK4QE=
github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU=
github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI=
@@ -218,8 +243,14 @@ github.com/vmihailenco/msgpack/v5 v5.4.0 h1:hRM0digJwyR6vll33NNAwCFguy5JuBD6jxDm
github.com/vmihailenco/msgpack/v5 v5.4.0/go.mod h1:GaZTsDaehaPpQVyxrf5mtQlH+pc21PIudVV/E3rRQok=
github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g=
github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds=
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f h1:J9EGpcZtP0E/raorCMxlFGSTBrsSlaDGf3jU/qvAE2c=
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0=
github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74=
github.com/yalp/jsonpath v0.0.0-20180802001716-5cc68e5049a0 h1:6fRhSjgLCkTD3JnJxvaJ4Sj+TYblw757bqYgZaOq5ZY=
github.com/yosssi/ace v0.0.5 h1:tUkIP/BLdKqrlrPwcmH0shwEEhTRHoGnc1wFIWmaBUA=
github.com/yosssi/ace v0.0.5/go.mod h1:ALfIzm2vT7t5ZE7uoIZqF3TQ7SAOyupFZnkrF5id+K0=
github.com/yudai/gojsondiff v1.0.0 h1:27cbfqXLVEJ1o8I6v3y9lg8Ydm53EKqHXAOMxEGlCOA=
github.com/yudai/golcs v0.0.0-20170316035057-ecda9a501e82 h1:BHyfKlQyqbsFN5p3IfnEUduWvb9is428/nNb5L3U01M=
github.com/yuin/goldmark v1.4.1/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
golang.design/x/thread v0.0.0-20210122121316-335e9adffdf1 h1:P7S/GeHBAFEZIYp0ePPs2kHXoazz8q2KsyxHyQVGCJg=
golang.design/x/thread v0.0.0-20210122121316-335e9adffdf1/go.mod h1:9CWpnTUmlQkfdpdutA1nNf4iE5lAVt3QZOu0Z6hahBE=
@@ -283,13 +314,16 @@ google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqw
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=
gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
moul.io/http2curl/v2 v2.3.0 h1:9r3JfDzWPcbIklMOs2TnIFzDYvfAZvjeavG6EzP7jYs=
nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=

View File

@@ -25,7 +25,7 @@ CC_USER=clustercockpit
CC_GROUP=clustercockpit
CONF_DIR=/etc/cc-metric-collector
PID_FILE=/var/run/$NAME.pid
DAEMON=/usr/sbin/$NAME
DAEMON=/usr/bin/$NAME
CONF_FILE=${CONF_DIR}/cc-metric-collector.json
umask 0027

View File

@@ -45,6 +45,9 @@ type HttpSinkConfig struct {
// Maximum number of retries to connect to the http server (default: 3)
MaxRetries int `json:"max_retries,omitempty"`
// Timestamp precision
Precision string `json:"precision,omitempty"`
}
type key_value_pair struct {
@@ -141,7 +144,7 @@ func (s *HttpSink) Write(m lp.CCMetric) error {
// Check that encoding worked
if err != nil {
return fmt.Errorf("Encoding failed: %v", err)
return fmt.Errorf("encoding failed: %v", err)
}
if s.config.flushDelay == 0 {
@@ -268,6 +271,7 @@ func NewHttpSink(name string, config json.RawMessage) (Sink, error) {
s.config.Timeout = "5s"
s.config.FlushDelay = "5s"
s.config.MaxRetries = 3
s.config.Precision = "ns"
cclog.ComponentDebug(s.name, "Init()")
// Read config
@@ -315,6 +319,19 @@ func NewHttpSink(name string, config json.RawMessage) (Sink, error) {
cclog.ComponentDebug(s.name, "Init(): flushDelay", t)
}
}
precision := influx.Nanosecond
if len(s.config.Precision) > 0 {
switch s.config.Precision {
case "s":
precision = influx.Second
case "ms":
precision = influx.Millisecond
case "us":
precision = influx.Microsecond
case "ns":
precision = influx.Nanosecond
}
}
// Create http client
s.client = &http.Client{
@@ -326,7 +343,7 @@ func NewHttpSink(name string, config json.RawMessage) (Sink, error) {
}
// Configure influx line protocol encoder
s.encoder.SetPrecision(influx.Nanosecond)
s.encoder.SetPrecision(precision)
s.extended_tag_list = make([]key_value_pair, 0)
return s, nil

View File

@@ -18,7 +18,8 @@ The `http` sink uses POST requests to a HTTP server to submit the metrics in the
"timeout": "5s",
"idle_connection_timeout" : "5s",
"flush_delay": "2s",
"batch_size": 1000
"batch_size": 1000,
"precision": "s"
}
}
```
@@ -34,3 +35,8 @@ The `http` sink uses POST requests to a HTTP server to submit the metrics in the
- `idle_connection_timeout`: Timeout for idle connections (default '120s'). Should be larger than the measurement interval to keep the connection open
- `flush_delay`: Batch all writes arriving in during this duration (default '1s', batching can be disabled by setting it to 0)
- `batch_size`: Maximal batch size. If `batch_size` is reached before the end of `flush_delay`, the metrics are sent without further delay
- `precision`: Precision of the timestamp. Valid values are 's', 'ms', 'us' and 'ns'. (default is 'ns')
### Using HttpSink for communication with cc-metric-store
The cc-metric-store only accepts metrics with a timestamp precision in seconds, so it is required to set `"precision": "s"`.