mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-02-24 11:27:30 +01:00
@@ -11,15 +11,11 @@
|
|||||||
"job.duration > job_min_duration_seconds"
|
"job.duration > job_min_duration_seconds"
|
||||||
],
|
],
|
||||||
"variables": [
|
"variables": [
|
||||||
{
|
|
||||||
"name": "memory_threshold",
|
|
||||||
"expr": "mem_used.limits.peak * highmemoryusage_threshold_factor"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"name": "memory_usage_pct",
|
"name": "memory_usage_pct",
|
||||||
"expr": "mem_used.max / mem_used.limits.peak * 100.0"
|
"expr": "mem_used.max / mem_used.limits.peak * 100.0"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"rule": "mem_used.max > memory_threshold",
|
"rule": "mem_used.max > memory_used.limits.alert",
|
||||||
"hint": "This job used high memory: peak memory usage {{.mem_used.max}} GB ({{.memory_usage_pct}}% of {{.mem_used.limits.peak}} GB node capacity), exceeding the {{.highmemoryusage_threshold_factor}} utilization threshold. Risk of out-of-memory conditions."
|
"hint": "This job used high memory: peak memory usage {{.mem_used.max}} GB ({{.memory_usage_pct}}% of {{.mem_used.limits.peak}} GB node capacity), exceeding the {{.highmemoryusage_threshold_factor}} utilization threshold. Risk of out-of-memory conditions."
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,10 +1,7 @@
|
|||||||
{
|
{
|
||||||
"name": "Low CPU load",
|
"name": "Low CPU load",
|
||||||
"tag": "lowload",
|
"tag": "lowload",
|
||||||
"parameters": [
|
"parameters": ["lowcpuload_threshold_factor", "job_min_duration_seconds"],
|
||||||
"lowcpuload_threshold_factor",
|
|
||||||
"job_min_duration_seconds"
|
|
||||||
],
|
|
||||||
"metrics": ["cpu_load"],
|
"metrics": ["cpu_load"],
|
||||||
"requirements": [
|
"requirements": [
|
||||||
"job.shared == \"none\"",
|
"job.shared == \"none\"",
|
||||||
@@ -13,9 +10,9 @@
|
|||||||
"variables": [
|
"variables": [
|
||||||
{
|
{
|
||||||
"name": "load_threshold",
|
"name": "load_threshold",
|
||||||
"expr": "job.numCores * lowcpuload_threshold_factor"
|
"expr": "cpu_load.limits.peak * lowcpuload_threshold_factor"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"rule": "cpu_load.avg < load_threshold",
|
"rule": "cpu_load.avg < load_threshold",
|
||||||
"hint": "This job was detected as low CPU load: average cpu load {{.cpu_load.avg}} is below the threshold {{.load_threshold}} ({{.lowcpuload_threshold_factor}} \u00d7 {{.job.numCores}} allocated cores)."
|
"hint": "This job was detected as low CPU load: average cpu load {{.cpu_load.avg}} is below the threshold {{.load_threshold}} ({{.lowcpuload_threshold_factor}})."
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user