mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-01-15 17:21:46 +01:00
Remove embedded tagger rules
This commit is contained in:
26
configs/tagger/jobclasses/highload.json
Normal file
26
configs/tagger/jobclasses/highload.json
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"name": "Excessive CPU load",
|
||||
"tag": "excessiveload",
|
||||
"parameters": [
|
||||
"excessivecpuload_threshold_factor",
|
||||
"job_min_duration_seconds",
|
||||
"sampling_interval_seconds"
|
||||
],
|
||||
"metrics": ["cpu_load"],
|
||||
"requirements": [
|
||||
"job.shared == \"none\"",
|
||||
"job.duration > job_min_duration_seconds"
|
||||
],
|
||||
"variables": [
|
||||
{
|
||||
"name": "load_threshold",
|
||||
"expr": "cpu_load.limits.peak * excessivecpuload_threshold_factor"
|
||||
},
|
||||
{
|
||||
"name": "load_perc",
|
||||
"expr": "1.0 - (cpu_load.avg / cpu_load.limits.peak)"
|
||||
}
|
||||
],
|
||||
"rule": "cpu_load.avg > load_threshold",
|
||||
"hint": "This job was detected as excessiveload because the average cpu load {{.cpu_load.avg}} falls above the threshold {{.load_threshold}}."
|
||||
}
|
||||
22
configs/tagger/jobclasses/lowUtilization.json
Normal file
22
configs/tagger/jobclasses/lowUtilization.json
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"name": "Low ressource utilization",
|
||||
"tag": "lowutilization",
|
||||
"parameters": ["job_min_duration_seconds"],
|
||||
"metrics": ["flops_any", "mem_bw"],
|
||||
"requirements": [
|
||||
"job.shared == \"none\"",
|
||||
"job.duration > job_min_duration_seconds"
|
||||
],
|
||||
"variables": [
|
||||
{
|
||||
"name": "mem_bw_perc",
|
||||
"expr": "1.0 - (mem_bw.avg / mem_bw.limits.peak)"
|
||||
},
|
||||
{
|
||||
"name": "flops_any_perc",
|
||||
"expr": "1.0 - (flops_any.avg / flops_any.limits.peak)"
|
||||
}
|
||||
],
|
||||
"rule": "flops_any.avg < flops_any.limits.alert && mem_bw.avg < mem_bw.limits.alert",
|
||||
"hint": "This job was detected as low utilization because the average flop rate {{.flops_any.avg}} falls below the threshold {{.flops_any.limits.alert}}."
|
||||
}
|
||||
26
configs/tagger/jobclasses/lowload.json
Normal file
26
configs/tagger/jobclasses/lowload.json
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"name": "Low CPU load",
|
||||
"tag": "lowload",
|
||||
"parameters": [
|
||||
"lowcpuload_threshold_factor",
|
||||
"job_min_duration_seconds",
|
||||
"sampling_interval_seconds"
|
||||
],
|
||||
"metrics": ["cpu_load"],
|
||||
"requirements": [
|
||||
"job.shared == \"none\"",
|
||||
"job.duration > job_min_duration_seconds"
|
||||
],
|
||||
"variables": [
|
||||
{
|
||||
"name": "load_threshold",
|
||||
"expr": "job.numCores * lowcpuload_threshold_factor"
|
||||
},
|
||||
{
|
||||
"name": "load_perc",
|
||||
"expr": "1.0 - (cpu_load.avg / cpu_load.limits.peak)"
|
||||
}
|
||||
],
|
||||
"rule": "cpu_load.avg < cpu_load.limits.caution",
|
||||
"hint": "This job was detected as lowload because the average cpu load {{.cpu_load}} falls below the threshold {{.cpu_load.limits.caution}}."
|
||||
}
|
||||
14
configs/tagger/jobclasses/parameters.json
Normal file
14
configs/tagger/jobclasses/parameters.json
Normal file
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"lowcpuload_threshold_factor": 0.9,
|
||||
"excessivecpuload_threshold_factor": 1.1,
|
||||
"highmemoryusage_threshold_factor": 0.9,
|
||||
"node_load_imbalance_threshold_factor": 0.1,
|
||||
"core_load_imbalance_threshold_factor": 0.1,
|
||||
"high_memory_load_threshold_factor": 0.9,
|
||||
"lowgpuload_threshold_factor": 0.7,
|
||||
"memory_leak_slope_threshold": 0.1,
|
||||
"job_min_duration_seconds": 600.0,
|
||||
"sampling_interval_seconds": 30.0,
|
||||
"cpu_load_pre_cutoff_samples": 11.0,
|
||||
"cpu_load_core_pre_cutoff_samples": 6.0
|
||||
}
|
||||
Reference in New Issue
Block a user