cc-metric-collector/collectors/slurmJobDetector.go
2024-01-07 13:13:22 +01:00

621 lines
19 KiB
Go

package collectors
import (
"encoding/json"
"fmt"
"os"
osuser "os/user"
filepath "path/filepath"
"strconv"
"strings"
"sync"
"time"
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
)
// These are the fields we read from the JSON configuration
type SlurmJobDetectorConfig struct {
Interval string `json:"interval"`
SendJobEvents bool `json:"send_job_events,omitempty"`
SendStepEvents bool `json:"send_step_events,omitempty"`
SendJobMetrics bool `json:"send_job_metrics,omitempty"`
SendStepMetrics bool `json:"send_step_metrics,omitempty"`
BaseDirectory string `json:"sysfs_base,omitempty"`
CgroupVersion string `json:"cgroup_version"`
}
// This information is sent as JSON when an event occurs
type SlurmJobMetadata struct {
UID string `json:"uid,omitempty"`
JobId string `json:"jobid"`
Timestamp uint64 `json:"timestamp"`
Status string `json:"status"`
Step string `json:"step,omitempty"`
Cpus []int `json:"cpus,omitempty"`
Memories []int `json:"memories,omitempty"`
MemoryLimitHard int64 `json:"memory_limit_hard,omitempty"`
MemoryLimitSoft int64 `json:"memory_limit_soft,omitempty"`
Devices []string `json:"devices,omitempty"`
}
type SlurmJobMetrics struct {
MemoryUsage int64
MaxMemoryUsage int64
LimitMemoryUsage int64
CpuUsageUser int64
CpuUsageSys int64
}
// This contains all variables we need during execution and the variables
// defined by metricCollector (name, init, ...)
type SlurmJobDetector struct {
metricCollector
config SlurmJobDetectorConfig // the configuration structure
meta map[string]string // default meta information
interval time.Duration // the interval parsed from configuration
ticker *time.Ticker // own timer for event checking
output chan lp.CCMetric // variable to save output channel at Read() for event sending
wg sync.WaitGroup // sync group for event checking management
done chan bool // channel for event checking management
directories map[string]SlurmJobMetadata // directory -> data mapping (data stored to re-send data in end job events)
}
const default_base_dir = "/sys/fs/cgroup"
const default_cgroup_version = "v1"
// not required to pre-initialized. Will be overwritten in Init() based on configuration
var cpuacct_base = filepath.Join(default_base_dir, "cpuacct", "slurm")
var memory_base = filepath.Join(default_base_dir, "memory", "slurm")
var cpuset_base = filepath.Join(default_base_dir, "cpuset", "slurm")
var devices_base = filepath.Join(default_base_dir, "devices", "slurm")
// Filenames for cgroup/v1
var limit_in_bytes_file = "memory.limit_in_bytes"
var soft_limit_in_bytes_file = "memory.soft_limit_in_bytes"
var cpus_effective_file = "cpuset.effective_cpus"
var mems_effective_file = "cpuset.effective_mems"
var devices_list_file = "devices.list"
var usage_in_bytes_file = "memory.usage_in_bytes"
var max_usage_in_bytes_file = "memory.max_usage_in_bytes"
var cpuacct_usage_file = "cpuacct.usage"
var cpuacct_usage_user_file = "cpuacct.usage_user"
// Filenames for cgroup/v2
// In Init() the filenames are set based on configuration
const soft_limit_in_bytes_file_v2 = "memory.high"
const limit_in_bytes_file_v2 = "memory.max"
const cpus_effective_file_v2 = "cpuset.cpus.effective"
const mems_effective_file_v2 = "cpuset.mems.effective"
const devices_list_file_v2 = "devices.list"
const usage_in_bytes_file_v2 = "memory.usage_in_bytes"
const max_usage_in_bytes_file_v2 = "memory.max_usage_in_bytes"
const cpuacct_usage_file_v2 = "cpuacct.usage"
const cpuacct_usage_user_file_v2 = "cpuacct.usage_user"
func fileToInt64(filename string) (int64, error) {
data, err := os.ReadFile(filename)
if err == nil {
x, err := strconv.ParseInt(string(data), 0, 64)
if err == nil {
return x, err
}
}
return 0, err
}
func ExpandList(strlist string) []int {
out := make([]int, 0)
level1 := strings.Split(strlist, ",")
if len(level1) > 0 {
for _, entry := range level1 {
var s, e int
_, err := fmt.Sscanf(entry, "%d-%d", &s, &e)
if err == nil {
if s < e {
for i := s; i <= e; i++ {
out = append(out, i)
}
} else {
for i := e; i <= s; i-- {
out = append(out, i)
}
}
} else {
_, err := fmt.Sscanf(entry, "%d", &s)
if err == nil {
out = append(out, s)
}
}
}
}
return out
}
func ParseDevices(devlist string) []string {
out := make([]string, 0)
// a *:* rwm
return out
}
func GetPathParts(path string) []string {
out := make([]string, 0)
uid := ""
jobid := ""
step := ""
parts := strings.Split(path, "/")
// the folders of interest are at the end of the list, so traverse
// from the back
for i := len(parts) - 1; i >= 0; i-- {
if strings.HasPrefix(parts[i], "uid_") {
uid = parts[i]
} else if strings.HasPrefix(parts[i], "job_") {
jobid = parts[i]
} else if strings.HasPrefix(parts[i], "step_") {
step = parts[i]
}
}
// only cgroup/v1 provides a uid but needs to be first entry
if len(uid) > 0 {
out = append(out, uid)
}
if len(jobid) > 0 {
out = append(out, jobid)
}
// only if it's a step folder
if len(step) > 0 {
out = append(out, step)
}
return out
}
func GetIdsFromParts(parts []string) (string, string, string) {
uid := ""
jobid := ""
step := ""
for _, p := range parts {
if strings.HasPrefix(p, "job_") {
jobid = strings.TrimPrefix(p, "job_")
} else if strings.HasPrefix(p, "uid_") {
uid = strings.TrimPrefix(p, "uid_")
} else if strings.HasPrefix(p, "step_") {
step = strings.TrimPrefix(p, "step_")
}
}
return uid, jobid, step
}
func (m *SlurmJobDetector) CheckEvents(timestamp time.Time) {
var err error = nil
var dirs []string = nil
parts := make([]string, 3)
parts = append(parts, cpuacct_base)
if m.config.CgroupVersion == "v1" {
parts = append(parts, "uid_[0-9]*")
}
parts = append(parts, "job_[0-9]*")
dirs, err = filepath.Glob(filepath.Join(parts...))
if err != nil {
cclog.ComponentError(m.name, "Cannot get directory list for SLURM jobs")
return
}
if m.config.SendStepEvents {
parts = append(parts, "step_*")
sdirs, err := filepath.Glob(filepath.Join(parts...))
if err != nil {
cclog.ComponentError(m.name, "Cannot get directory list for SLURM steps")
return
}
dirs = append(dirs, sdirs...)
}
for _, d := range dirs {
// Folder not in known directories map -> New job
if _, ok := m.directories[d]; !ok {
dirParts := GetPathParts(d)
data, err := m.NewJobEvent(dirParts, timestamp, m.output)
if err == nil {
// Add the directory to the map
cclog.ComponentDebug(m.name, "Adding directory ", d, " to known directories")
m.directories[d] = data
}
}
}
for d, data := range m.directories {
// Known directory but it does not exist anymore -> Vanished/Finished job
if _, ok := stringArrayContains(dirs, d); !ok {
dirParts := GetPathParts(d)
err := m.EndJobEvent(dirParts, data, timestamp, m.output)
if err != nil {
uid, jobid, step := GetIdsFromParts(dirParts)
if len(step) == 0 {
cclog.ComponentError(m.name, "Failed to end job for user ", uid, " jobid ", jobid)
} else {
cclog.ComponentError(m.name, "Failed to end job for user ", uid, " jobid ", jobid, " step ", step)
}
}
// Remove the directory from the map
cclog.ComponentDebug(m.name, "Removing directory ", d, " to known directories")
delete(m.directories, d)
}
}
}
func (m *SlurmJobDetector) NewJobEvent(parts []string, timestamp time.Time, output chan lp.CCMetric) (SlurmJobMetadata, error) {
uid, jobid, step := GetIdsFromParts(parts)
pathstr := filepath.Join(parts...)
if len(jobid) > 0 {
cclog.ComponentError(m.name, "No jobid in path ", pathstr)
return SlurmJobMetadata{}, fmt.Errorf("no jobid in path %s", pathstr)
}
jobtags := map[string]string{
"type": "job",
"type-id": jobid,
}
// Fill job JSON with data from cgroup
md := SlurmJobMetadata{
JobId: jobid,
Timestamp: uint64(timestamp.Unix()),
Status: "start",
}
// cgroup/v2 has no uid in parts
if len(uid) > 0 {
md.UID = uid
}
if len(step) > 0 {
md.Step = step
jobtags["stype"] = "step"
jobtags["stype-id"] = step
}
job_cpus, err := os.ReadFile(filepath.Join(cpuset_base, pathstr, cpus_effective_file))
if err == nil {
md.Cpus = ExpandList(string(job_cpus))
}
job_mems, err := os.ReadFile(filepath.Join(cpuset_base, pathstr, mems_effective_file))
if err == nil {
md.Memories = ExpandList(string(job_mems))
}
job_devs, err := os.ReadFile(filepath.Join(devices_base, pathstr, devices_list_file))
if err == nil {
md.Devices = ParseDevices(string(job_devs))
}
x, err := fileToInt64(filepath.Join(memory_base, pathstr, limit_in_bytes_file))
if err == nil {
md.MemoryLimitHard = x
}
x, err = fileToInt64(filepath.Join(memory_base, pathstr, soft_limit_in_bytes_file))
if err == nil {
md.MemoryLimitSoft = x
}
jobjson, err := json.Marshal(md)
if err == nil {
y, err := lp.New("slurm", jobtags, m.meta, map[string]interface{}{"value": string(jobjson)}, timestamp)
if err == nil {
if len(uid) > 0 {
y.AddMeta("uid", uid)
uname, err := osuser.LookupId(uid)
if err == nil {
y.AddMeta("username", uname.Username)
}
}
y.AddMeta("metric_type", "event")
output <- y
}
}
return md, nil
}
// Not sure if it works with steps since the folders commonly do not vanish when a job step is finished
func (m *SlurmJobDetector) EndJobEvent(parts []string, data SlurmJobMetadata, timestamp time.Time, output chan lp.CCMetric) error {
uid, jobid, step := GetIdsFromParts(parts)
pathstr := filepath.Join(parts...)
if len(jobid) > 0 {
err := fmt.Errorf("no jobid in path %s", pathstr)
cclog.ComponentError(m.name, err.Error())
return err
}
jobtags := map[string]string{
"type": "job",
"type-id": jobid,
}
// Fill job JSON with data from cgroup
md := SlurmJobMetadata{
JobId: jobid,
Timestamp: uint64(timestamp.Unix()),
Cpus: data.Cpus,
Memories: data.Memories,
Devices: data.Devices,
MemoryLimitHard: data.MemoryLimitHard,
MemoryLimitSoft: data.MemoryLimitSoft,
Status: "end",
}
// cgroup/v2 has no uid in parts
if len(uid) > 0 {
md.UID = uid
}
if len(step) > 0 {
md.Step = step
jobtags["stype"] = "step"
jobtags["stype-id"] = step
}
jobjson, err := json.Marshal(md)
if err == nil {
y, err := lp.New("slurm", jobtags, m.meta, map[string]interface{}{"value": string(jobjson)}, timestamp)
if err == nil {
if len(uid) > 0 {
y.AddMeta("uid", uid)
uname, err := osuser.LookupId(uid)
if err == nil {
y.AddMeta("username", uname.Username)
}
}
y.AddMeta("metric_type", "event")
output <- y
} else {
return err
}
} else {
return err
}
return nil
}
func (m *SlurmJobDetector) ReadMetrics(parts []string) (SlurmJobMetrics, error) {
jobdata := SlurmJobMetrics{
MemoryUsage: 0,
MaxMemoryUsage: 0,
LimitMemoryUsage: 0,
CpuUsageUser: 0,
CpuUsageSys: 0,
}
part := filepath.Join(parts...)
x, err := fileToInt64(filepath.Join(memory_base, part, usage_in_bytes_file))
if err == nil {
jobdata.MemoryUsage = x
}
x, err = fileToInt64(filepath.Join(memory_base, part, max_usage_in_bytes_file))
if err == nil {
jobdata.MaxMemoryUsage = x
}
tu, err := fileToInt64(filepath.Join(cpuacct_base, part, cpuacct_usage_file))
if err == nil {
uu, err := fileToInt64(filepath.Join(cpuacct_base, part, cpuacct_usage_user_file))
if err == nil {
jobdata.CpuUsageUser = int64(uu/tu) * 100
jobdata.CpuUsageSys = 100 - jobdata.CpuUsageUser
}
}
return jobdata, nil
}
func (m *SlurmJobDetector) SendMetrics(jobtags, jobmeta map[string]string, jobmetrics SlurmJobMetrics, timestamp time.Time, output chan lp.CCMetric) {
y, err := lp.New("job_mem_used", jobtags, m.meta, map[string]interface{}{"value": jobmetrics.MemoryUsage}, timestamp)
if err == nil {
y.AddMeta("unit", "Bytes")
for k, v := range jobmeta {
y.AddMeta(k, v)
}
output <- y
}
y, err = lp.New("job_max_mem_used", jobtags, m.meta, map[string]interface{}{"value": jobmetrics.MaxMemoryUsage}, timestamp)
if err == nil {
y.AddMeta("unit", "Bytes")
for k, v := range jobmeta {
y.AddMeta(k, v)
}
output <- y
}
y, err = lp.New("job_cpu_user", jobtags, m.meta, map[string]interface{}{"value": jobmetrics.CpuUsageUser}, timestamp)
if err == nil {
y.AddMeta("unit", "%")
for k, v := range jobmeta {
y.AddMeta(k, v)
}
output <- y
}
y, err = lp.New("job_cpu_sys", jobtags, m.meta, map[string]interface{}{"value": jobmetrics.CpuUsageSys}, timestamp)
if err == nil {
y.AddMeta("unit", "%")
for k, v := range jobmeta {
y.AddMeta(k, v)
}
output <- y
}
}
// Init initializes the sample collector
// Called once by the collector manager
// All tags, meta data tags and metrics that do not change over the runtime should be set here
func (m *SlurmJobDetector) Init(config json.RawMessage) error {
var err error = nil
m.name = "SlurmJobDetector"
// This is for later use, also call it early
m.setup()
// Can be run in parallel with others
m.parallel = true
// Define meta information sent with each metric
m.meta = map[string]string{"source": m.name, "group": "SLURM"}
// Set configuration defaults
m.config.SendJobEvents = false
m.config.SendJobMetrics = false
m.config.SendStepEvents = false
m.config.SendStepMetrics = false
m.config.CgroupVersion = default_cgroup_version
m.config.BaseDirectory = default_base_dir
// Read in the JSON configuration
if len(config) > 0 {
err = json.Unmarshal(config, &m.config)
if err != nil {
cclog.ComponentError(m.name, "Error reading config:", err.Error())
return err
}
}
// Parse the read interval duration
m.interval, err = time.ParseDuration(m.config.Interval)
if err != nil {
cclog.ComponentError(m.name, "Error parsing interval:", err.Error())
return err
}
if m.config.CgroupVersion != "v1" && m.config.CgroupVersion != "v2" {
cclog.ComponentError(m.name, "Invalid cgroup version", m.config.CgroupVersion, ":", err.Error())
return err
}
// Storage for output channel
m.output = nil
// Management channel for the timer function.
m.done = make(chan bool)
// Create the own ticker
m.ticker = time.NewTicker(m.interval)
// Create space for storing files
m.directories = make(map[string]SlurmJobMetadata)
if _, err := os.Stat(m.config.BaseDirectory); err != nil {
err := fmt.Errorf("cannot find base folder %s", m.config.BaseDirectory)
cclog.ComponentError(m.name, err.Error())
return err
}
cclog.ComponentDebug(m.name, "Using base directory", m.config.BaseDirectory)
cpuacct_base = filepath.Join(m.config.BaseDirectory, "cpuacct", "slurm")
memory_base = filepath.Join(m.config.BaseDirectory, "memory", "slurm")
cpuset_base = filepath.Join(m.config.BaseDirectory, "cpuset", "slurm")
devices_base = filepath.Join(m.config.BaseDirectory, "devices", "slurm")
if m.config.CgroupVersion == "v2" {
cclog.ComponentDebug(m.name, "Reconfiguring folders and filenames for cgroup/v2")
cpuacct_base = filepath.Join(m.config.BaseDirectory, "system.slice", "slurmstepd.scope")
memory_base = filepath.Join(m.config.BaseDirectory, "system.slice", "slurmstepd.scope")
cpuset_base = filepath.Join(m.config.BaseDirectory, "system.slice", "slurmstepd.scope")
devices_base = filepath.Join(m.config.BaseDirectory, "system.slice", "slurmstepd.scope")
cpus_effective_file = cpus_effective_file_v2
mems_effective_file = mems_effective_file_v2
devices_list_file = devices_list_file_v2
limit_in_bytes_file = limit_in_bytes_file_v2
soft_limit_in_bytes_file = soft_limit_in_bytes_file_v2
usage_in_bytes_file = usage_in_bytes_file_v2
max_usage_in_bytes_file = max_usage_in_bytes_file_v2
cpuacct_usage_file = cpuacct_usage_file_v2
cpuacct_usage_user_file = cpuacct_usage_user_file_v2
}
if _, err := os.Stat(cpuacct_base); err != nil {
err := fmt.Errorf("cannot find SLURM cgroup folder %s", cpuacct_base)
cclog.ComponentError(m.name, err.Error())
return err
}
m.wg.Add(1)
go func() {
for {
select {
case <-m.done:
// Exit the timer loop
cclog.ComponentDebug(m.name, "Closing...")
m.wg.Done()
return
case timestamp := <-m.ticker.C:
if m.output != nil {
cclog.ComponentDebug(m.name, "Checking events")
m.CheckEvents(timestamp)
}
}
}
}()
// Set this flag only if everything is initialized properly, all required files exist, ...
m.init = true
return err
}
// Read collects all metrics belonging to the SlurmJobDetector collector
// and sends them through the output channel to the collector manager
func (m *SlurmJobDetector) Read(interval time.Duration, output chan lp.CCMetric) {
// Create a sample metric
timestamp := time.Now()
// Capture output channel for event sending in goroutine, so at startup, the event checking
// waits until the first call of Read()
m.output = output
// This is the reading for metrics for all running jobs. For the event checking, check
// the goroutine in Init()
parts := make([]string, 0)
parts = append(parts, cpuacct_base)
// Only cgroup/v1 has a uid_* folder
if m.config.CgroupVersion == "v1" {
parts = append(parts, "uid_[0-9]*")
}
parts = append(parts, "job_[0-9]*")
// Get folders based on constructed glob path
dirs, err := filepath.Glob(filepath.Join(parts...))
if err != nil {
cclog.ComponentError(m.name, "Cannot get directory list for SLURM jobs")
return
}
if m.config.SendStepEvents {
// Add step lookup if we process step events
parts = append(parts, "step_*")
// Get step folders based on constructed glob path
sdirs, err := filepath.Glob(filepath.Join(parts...))
if err != nil {
cclog.ComponentError(m.name, "Cannot get directory list for SLURM steps")
return
}
// Add step folders to directory list for processsing
dirs = append(dirs, sdirs...)
}
for _, d := range dirs {
dirParts := GetPathParts(d) // Gets uid_*, job_* and step_* (if available)
uid, jobid, step := GetIdsFromParts(dirParts) // extracts the IDs from the available parts
// Create tags map for the job
jobtags := map[string]string{
"type": "job",
"type-id": jobid,
}
// Create meta map for the job
jobmeta := make(map[string]string)
// if cgroup/v1, we have a uid
if len(uid) > 0 {
jobmeta["uid"] = uid
uname, err := osuser.LookupId(uid)
if err == nil {
jobmeta["username"] = uname.Username
}
}
// if this is a step directory, add the sub type with value
if len(step) > 0 {
jobtags["stype"] = "step"
jobtags["stype-id"] = step
}
jobmetrics, err := m.ReadMetrics(parts)
if err != nil {
// Send all metrics for the job
m.SendMetrics(jobtags, jobmeta, jobmetrics, timestamp, output)
}
}
}
// Close metric collector: close network connection, close files, close libraries, ...
// Called once by the collector manager
func (m *SlurmJobDetector) Close() {
m.done <- true
m.wg.Wait()
// Unset flag
m.init = false
}