Merge pull request #501 from ClusterCockpit/dev

Update jobclass rules
This commit is contained in:
Jan Eitzinger
2026-02-24 07:03:23 +01:00
committed by GitHub
2 changed files with 4 additions and 11 deletions

View File

@@ -11,15 +11,11 @@
"job.duration > job_min_duration_seconds" "job.duration > job_min_duration_seconds"
], ],
"variables": [ "variables": [
{
"name": "memory_threshold",
"expr": "mem_used.limits.peak * highmemoryusage_threshold_factor"
},
{ {
"name": "memory_usage_pct", "name": "memory_usage_pct",
"expr": "mem_used.max / mem_used.limits.peak * 100.0" "expr": "mem_used.max / mem_used.limits.peak * 100.0"
} }
], ],
"rule": "mem_used.max > memory_threshold", "rule": "mem_used.max > memory_used.limits.alert",
"hint": "This job used high memory: peak memory usage {{.mem_used.max}} GB ({{.memory_usage_pct}}% of {{.mem_used.limits.peak}} GB node capacity), exceeding the {{.highmemoryusage_threshold_factor}} utilization threshold. Risk of out-of-memory conditions." "hint": "This job used high memory: peak memory usage {{.mem_used.max}} GB ({{.memory_usage_pct}}% of {{.mem_used.limits.peak}} GB node capacity), exceeding the {{.highmemoryusage_threshold_factor}} utilization threshold. Risk of out-of-memory conditions."
} }

View File

@@ -1,10 +1,7 @@
{ {
"name": "Low CPU load", "name": "Low CPU load",
"tag": "lowload", "tag": "lowload",
"parameters": [ "parameters": ["lowcpuload_threshold_factor", "job_min_duration_seconds"],
"lowcpuload_threshold_factor",
"job_min_duration_seconds"
],
"metrics": ["cpu_load"], "metrics": ["cpu_load"],
"requirements": [ "requirements": [
"job.shared == \"none\"", "job.shared == \"none\"",
@@ -13,9 +10,9 @@
"variables": [ "variables": [
{ {
"name": "load_threshold", "name": "load_threshold",
"expr": "job.numCores * lowcpuload_threshold_factor" "expr": "cpu_load.limits.peak * lowcpuload_threshold_factor"
} }
], ],
"rule": "cpu_load.avg < load_threshold", "rule": "cpu_load.avg < load_threshold",
"hint": "This job was detected as low CPU load: average cpu load {{.cpu_load.avg}} is below the threshold {{.load_threshold}} ({{.lowcpuload_threshold_factor}} \u00d7 {{.job.numCores}} allocated cores)." "hint": "This job was detected as low CPU load: average cpu load {{.cpu_load.avg}} is below the threshold {{.load_threshold}} ({{.lowcpuload_threshold_factor}})."
} }