Remove embedded tagger rules

This commit is contained in:
2026-01-13 07:20:26 +01:00
parent 4cec933349
commit 42809e3f75
25 changed files with 166 additions and 78 deletions

View File

@@ -0,0 +1,26 @@
{
"name": "Excessive CPU load",
"tag": "excessiveload",
"parameters": [
"excessivecpuload_threshold_factor",
"job_min_duration_seconds",
"sampling_interval_seconds"
],
"metrics": ["cpu_load"],
"requirements": [
"job.shared == \"none\"",
"job.duration > job_min_duration_seconds"
],
"variables": [
{
"name": "load_threshold",
"expr": "cpu_load.limits.peak * excessivecpuload_threshold_factor"
},
{
"name": "load_perc",
"expr": "1.0 - (cpu_load.avg / cpu_load.limits.peak)"
}
],
"rule": "cpu_load.avg > load_threshold",
"hint": "This job was detected as excessiveload because the average cpu load {{.cpu_load.avg}} falls above the threshold {{.load_threshold}}."
}

View File

@@ -0,0 +1,22 @@
{
"name": "Low ressource utilization",
"tag": "lowutilization",
"parameters": ["job_min_duration_seconds"],
"metrics": ["flops_any", "mem_bw"],
"requirements": [
"job.shared == \"none\"",
"job.duration > job_min_duration_seconds"
],
"variables": [
{
"name": "mem_bw_perc",
"expr": "1.0 - (mem_bw.avg / mem_bw.limits.peak)"
},
{
"name": "flops_any_perc",
"expr": "1.0 - (flops_any.avg / flops_any.limits.peak)"
}
],
"rule": "flops_any.avg < flops_any.limits.alert && mem_bw.avg < mem_bw.limits.alert",
"hint": "This job was detected as low utilization because the average flop rate {{.flops_any.avg}} falls below the threshold {{.flops_any.limits.alert}}."
}

View File

@@ -0,0 +1,26 @@
{
"name": "Low CPU load",
"tag": "lowload",
"parameters": [
"lowcpuload_threshold_factor",
"job_min_duration_seconds",
"sampling_interval_seconds"
],
"metrics": ["cpu_load"],
"requirements": [
"job.shared == \"none\"",
"job.duration > job_min_duration_seconds"
],
"variables": [
{
"name": "load_threshold",
"expr": "job.numCores * lowcpuload_threshold_factor"
},
{
"name": "load_perc",
"expr": "1.0 - (cpu_load.avg / cpu_load.limits.peak)"
}
],
"rule": "cpu_load.avg < cpu_load.limits.caution",
"hint": "This job was detected as lowload because the average cpu load {{.cpu_load}} falls below the threshold {{.cpu_load.limits.caution}}."
}

View File

@@ -0,0 +1,14 @@
{
"lowcpuload_threshold_factor": 0.9,
"excessivecpuload_threshold_factor": 1.1,
"highmemoryusage_threshold_factor": 0.9,
"node_load_imbalance_threshold_factor": 0.1,
"core_load_imbalance_threshold_factor": 0.1,
"high_memory_load_threshold_factor": 0.9,
"lowgpuload_threshold_factor": 0.7,
"memory_leak_slope_threshold": 0.1,
"job_min_duration_seconds": 600.0,
"sampling_interval_seconds": 30.0,
"cpu_load_pre_cutoff_samples": 11.0,
"cpu_load_core_pre_cutoff_samples": 6.0
}