Include metric thresholds in rule environment

Not yet tested
This commit is contained in:
Jan Eitzinger 2025-05-27 13:02:13 +02:00
parent 0aecea6de2
commit cdfe722457
3 changed files with 14 additions and 3 deletions

View File

@ -226,6 +226,7 @@ func (t *JobClassTagger) Register() error {
func (t *JobClassTagger) Match(job *schema.Job) { func (t *JobClassTagger) Match(job *schema.Job) {
r := repository.GetJobRepository() r := repository.GetJobRepository()
jobstats, err := archive.GetStatistics(job) jobstats, err := archive.GetStatistics(job)
metricsList := archive.GetMetricConfigSubCluster(job.Cluster, job.SubCluster)
log.Infof("Enter match rule with %d rules for job %d", len(t.rules), job.JobID) log.Infof("Enter match rule with %d rules for job %d", len(t.rules), job.JobID)
if err != nil { if err != nil {
log.Errorf("job classification failed for job %d: %#v", job.JobID, err) log.Errorf("job classification failed for job %d: %#v", job.JobID, err)
@ -255,7 +256,17 @@ func (t *JobClassTagger) Match(job *schema.Job) {
log.Errorf("job classification failed for job %d: missing metric '%s'", job.JobID, m) log.Errorf("job classification failed for job %d: missing metric '%s'", job.JobID, m)
return return
} }
env[m] = stats.Avg env[m] = map[string]any{
"min": stats.Min,
"max": stats.Max,
"avg": stats.Avg,
"limits": map[string]float64{
"peak": metricsList[m].Peak,
"normal": metricsList[m].Normal,
"caution": metricsList[m].Caution,
"alert": metricsList[m].Alert,
},
}
} }
// check rule requirements apply // check rule requirements apply

View File

@ -19,7 +19,7 @@
}, },
{ {
"name": "load_perc", "name": "load_perc",
"expr": "cpu_load / load_threshold" "expr": "cpu_load.avg / load_threshold"
} }
], ],
"rule": "cpu_load > load_threshold", "rule": "cpu_load > load_threshold",

View File

@ -21,6 +21,6 @@
"expr": "1.0 - (cpu_load / load_threshold)" "expr": "1.0 - (cpu_load / load_threshold)"
} }
], ],
"rule": "cpu_load < load_threshold", "rule": "cpu_load.avg < load_threshold",
"hint": "This job was detected as lowload because the average cpu load {{.cpu_load}} falls below the threshold {{.load_threshold}}." "hint": "This job was detected as lowload because the average cpu load {{.cpu_load}} falls below the threshold {{.load_threshold}}."
} }