From cdfe7224576a7db0b798327b5f145a8169ae2eb2 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Tue, 27 May 2025 13:02:13 +0200 Subject: [PATCH] Include metric thresholds in rule environment Not yet tested --- internal/tagger/classifyJob.go | 13 ++++++++++++- internal/tagger/jobclasses/highload.json | 2 +- internal/tagger/jobclasses/lowload.json | 2 +- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/internal/tagger/classifyJob.go b/internal/tagger/classifyJob.go index 0af6738..6fd3fae 100644 --- a/internal/tagger/classifyJob.go +++ b/internal/tagger/classifyJob.go @@ -226,6 +226,7 @@ func (t *JobClassTagger) Register() error { func (t *JobClassTagger) Match(job *schema.Job) { r := repository.GetJobRepository() jobstats, err := archive.GetStatistics(job) + metricsList := archive.GetMetricConfigSubCluster(job.Cluster, job.SubCluster) log.Infof("Enter match rule with %d rules for job %d", len(t.rules), job.JobID) if err != nil { log.Errorf("job classification failed for job %d: %#v", job.JobID, err) @@ -255,7 +256,17 @@ func (t *JobClassTagger) Match(job *schema.Job) { log.Errorf("job classification failed for job %d: missing metric '%s'", job.JobID, m) return } - env[m] = stats.Avg + env[m] = map[string]any{ + "min": stats.Min, + "max": stats.Max, + "avg": stats.Avg, + "limits": map[string]float64{ + "peak": metricsList[m].Peak, + "normal": metricsList[m].Normal, + "caution": metricsList[m].Caution, + "alert": metricsList[m].Alert, + }, + } } // check rule requirements apply diff --git a/internal/tagger/jobclasses/highload.json b/internal/tagger/jobclasses/highload.json index 2715ee8..444ca4d 100644 --- a/internal/tagger/jobclasses/highload.json +++ b/internal/tagger/jobclasses/highload.json @@ -19,7 +19,7 @@ }, { "name": "load_perc", - "expr": "cpu_load / load_threshold" + "expr": "cpu_load.avg / load_threshold" } ], "rule": "cpu_load > load_threshold", diff --git a/internal/tagger/jobclasses/lowload.json b/internal/tagger/jobclasses/lowload.json index 4c21a6b..1d7e041 100644 --- a/internal/tagger/jobclasses/lowload.json +++ b/internal/tagger/jobclasses/lowload.json @@ -21,6 +21,6 @@ "expr": "1.0 - (cpu_load / load_threshold)" } ], - "rule": "cpu_load < load_threshold", + "rule": "cpu_load.avg < load_threshold", "hint": "This job was detected as lowload because the average cpu load {{.cpu_load}} falls below the threshold {{.load_threshold}}." }