Remove embedded tagger rules

This commit is contained in:
2026-01-13 07:20:26 +01:00
parent 4cec933349
commit 42809e3f75
25 changed files with 166 additions and 78 deletions

View File

@@ -1 +0,0 @@
alf

View File

@@ -1,7 +0,0 @@
calc_rate
qmdffgen
dynamic
evbopt
explore
black_box
poly_qmdff

View File

@@ -1,3 +0,0 @@
chroma
qdp
qmp

View File

@@ -1 +0,0 @@
cp2k

View File

@@ -1 +0,0 @@
cpmd

View File

@@ -1 +0,0 @@
flame

View File

@@ -1,3 +0,0 @@
gromacs
gmx
mdrun

View File

@@ -1 +0,0 @@
julia

View File

@@ -1 +0,0 @@
lmp

View File

@@ -1 +0,0 @@
matlab

View File

@@ -1 +0,0 @@
openfoam

View File

@@ -1 +0,0 @@
orca

View File

@@ -1,4 +0,0 @@
python
pip
anaconda
conda

View File

@@ -1,2 +0,0 @@
starccm+
-podkey

View File

@@ -1,10 +0,0 @@
dscf
grad
ridft
rdgrad
ricc2
statpt
aoforce
escf
egrad
odft

View File

@@ -1,2 +0,0 @@
vasp
VASP

View File

@@ -2,15 +2,16 @@
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package tagger
import (
"bytes"
"embed"
"encoding/json"
"fmt"
"maps"
"os"
"path/filepath"
"strings"
"text/template"
@@ -23,8 +24,16 @@ import (
"github.com/expr-lang/expr/vm"
)
//go:embed jobclasses/*
var jobClassFiles embed.FS
const (
// defaultJobClassConfigPath is the default path for job classification configuration
defaultJobClassConfigPath = "./var/tagger/jobclasses"
// tagTypeJobClass is the tag type identifier for job classification tags
tagTypeJobClass = "jobClass"
// jobClassConfigDirMatch is the directory name used for matching filesystem events
jobClassConfigDirMatch = "jobclasses"
// parametersFileName is the name of the parameters configuration file
parametersFileName = "parameters.json"
)
// Variable defines a named expression that can be computed and reused in rules.
// Variables are evaluated before the main rule and their results are added to the environment.
@@ -45,21 +54,21 @@ type ruleVariable struct {
// and the final rule expression that determines if the job matches the classification.
type RuleFormat struct {
// Name is a human-readable description of the rule
Name string `json:"name"`
Name string `json:"name"`
// Tag is the classification tag to apply if the rule matches
Tag string `json:"tag"`
Tag string `json:"tag"`
// Parameters are shared values referenced in the rule (e.g., thresholds)
Parameters []string `json:"parameters"`
Parameters []string `json:"parameters"`
// Metrics are the job metrics required for this rule (e.g., "cpu_load", "mem_used")
Metrics []string `json:"metrics"`
Metrics []string `json:"metrics"`
// Requirements are boolean expressions that must be true for the rule to apply
Requirements []string `json:"requirements"`
Requirements []string `json:"requirements"`
// Variables are computed values used in the rule expression
Variables []Variable `json:"variables"`
Variables []Variable `json:"variables"`
// Rule is the boolean expression that determines if the job matches
Rule string `json:"rule"`
Rule string `json:"rule"`
// Hint is a template string that generates a message when the rule matches
Hint string `json:"hint"`
Hint string `json:"hint"`
}
type ruleInfo struct {
@@ -75,29 +84,29 @@ type ruleInfo struct {
// This interface allows for easier testing and decoupling from the concrete repository implementation.
type JobRepository interface {
// HasTag checks if a job already has a specific tag
HasTag(jobId int64, tagType string, tagName string) bool
HasTag(jobID int64, tagType string, tagName string) bool
// AddTagOrCreateDirect adds a tag to a job or creates it if it doesn't exist
AddTagOrCreateDirect(jobId int64, tagType string, tagName string) (tagId int64, err error)
AddTagOrCreateDirect(jobID int64, tagType string, tagName string) (tagID int64, err error)
// UpdateMetadata updates job metadata with a key-value pair
UpdateMetadata(job *schema.Job, key, val string) (err error)
}
// JobClassTagger classifies jobs based on configurable rules that evaluate job metrics and properties.
// Rules are loaded from embedded JSON files and can be dynamically reloaded from a watched directory.
// Rules are loaded from an external configuration directory and can be dynamically reloaded when files change.
// When a job matches a rule, it is tagged with the corresponding classification and an optional hint message.
type JobClassTagger struct {
// rules maps classification tags to their compiled rule information
rules map[string]ruleInfo
rules map[string]ruleInfo
// parameters are shared values (e.g., thresholds) used across multiple rules
parameters map[string]any
parameters map[string]any
// tagType is the type of tag ("jobClass")
tagType string
tagType string
// cfgPath is the path to watch for configuration changes
cfgPath string
cfgPath string
// repo provides access to job database operations
repo JobRepository
repo JobRepository
// getStatistics retrieves job statistics for analysis
getStatistics func(job *schema.Job) (map[string]schema.JobStatistics, error)
getStatistics func(job *schema.Job) (map[string]schema.JobStatistics, error)
// getMetricConfig retrieves metric configuration (limits) for a cluster
getMetricConfig func(cluster, subCluster string) map[string]*schema.Metric
}
@@ -169,7 +178,7 @@ func (t *JobClassTagger) prepareRule(b []byte, fns string) {
// EventMatch checks if a filesystem event should trigger configuration reload.
// It returns true if the event path contains "jobclasses".
func (t *JobClassTagger) EventMatch(s string) bool {
return strings.Contains(s, "jobclasses")
return strings.Contains(s, jobClassConfigDirMatch)
}
// EventCallback is triggered when the configuration directory changes.
@@ -181,9 +190,10 @@ func (t *JobClassTagger) EventCallback() {
cclog.Fatal(err)
}
if util.CheckFileExists(t.cfgPath + "/parameters.json") {
parametersFile := filepath.Join(t.cfgPath, parametersFileName)
if util.CheckFileExists(parametersFile) {
cclog.Info("Merge parameters")
b, err := os.ReadFile(t.cfgPath + "/parameters.json")
b, err := os.ReadFile(parametersFile)
if err != nil {
cclog.Warnf("prepareRule() > open file error: %v", err)
}
@@ -198,13 +208,13 @@ func (t *JobClassTagger) EventCallback() {
for _, fn := range files {
fns := fn.Name()
if fns != "parameters.json" {
if fns != parametersFileName {
cclog.Debugf("Process: %s", fns)
filename := fmt.Sprintf("%s/%s", t.cfgPath, fns)
filename := filepath.Join(t.cfgPath, fns)
b, err := os.ReadFile(filename)
if err != nil {
cclog.Warnf("prepareRule() > open file error: %v", err)
return
continue
}
t.prepareRule(b, fns)
}
@@ -213,7 +223,8 @@ func (t *JobClassTagger) EventCallback() {
func (t *JobClassTagger) initParameters() error {
cclog.Info("Initialize parameters")
b, err := jobClassFiles.ReadFile("jobclasses/parameters.json")
parametersFile := filepath.Join(t.cfgPath, parametersFileName)
b, err := os.ReadFile(parametersFile)
if err != nil {
cclog.Warnf("prepareRule() > open file error: %v", err)
return err
@@ -227,13 +238,20 @@ func (t *JobClassTagger) initParameters() error {
return nil
}
// Register initializes the JobClassTagger by loading parameters and classification rules.
// It loads embedded configuration files and sets up a file watch on ./var/tagger/jobclasses
// if it exists, allowing for dynamic configuration updates without restarting the application.
// Returns an error if the embedded configuration files cannot be read or parsed.
// Register initializes the JobClassTagger by loading parameters and classification rules from external folder.
// It sets up a file watch on ./var/tagger/jobclasses if it exists, allowing for
// dynamic configuration updates without restarting the application.
// Returns an error if the configuration path does not exist or cannot be read.
func (t *JobClassTagger) Register() error {
t.cfgPath = "./var/tagger/jobclasses"
t.tagType = "jobClass"
if t.cfgPath == "" {
t.cfgPath = defaultJobClassConfigPath
}
t.tagType = tagTypeJobClass
t.rules = make(map[string]ruleInfo)
if !util.CheckFileExists(t.cfgPath) {
return fmt.Errorf("configuration path does not exist: %s", t.cfgPath)
}
err := t.initParameters()
if err != nil {
@@ -241,31 +259,28 @@ func (t *JobClassTagger) Register() error {
return err
}
files, err := jobClassFiles.ReadDir("jobclasses")
files, err := os.ReadDir(t.cfgPath)
if err != nil {
return fmt.Errorf("error reading app folder: %#v", err)
return fmt.Errorf("error reading jobclasses folder: %#v", err)
}
t.rules = make(map[string]ruleInfo)
for _, fn := range files {
fns := fn.Name()
if fns != "parameters.json" {
filename := fmt.Sprintf("jobclasses/%s", fns)
if fns != parametersFileName {
cclog.Infof("Process: %s", fns)
filename := filepath.Join(t.cfgPath, fns)
b, err := jobClassFiles.ReadFile(filename)
b, err := os.ReadFile(filename)
if err != nil {
cclog.Warnf("prepareRule() > open file error: %v", err)
return err
continue
}
t.prepareRule(b, fns)
}
}
if util.CheckFileExists(t.cfgPath) {
t.EventCallback()
cclog.Infof("Setup file watch for %s", t.cfgPath)
util.AddListener(t.cfgPath, t)
}
cclog.Infof("Setup file watch for %s", t.cfgPath)
util.AddListener(t.cfgPath, t)
t.repo = repository.GetJobRepository()
t.getStatistics = archive.GetStatistics

View File

@@ -13,13 +13,13 @@ type MockJobRepository struct {
mock.Mock
}
func (m *MockJobRepository) HasTag(jobId int64, tagType string, tagName string) bool {
args := m.Called(jobId, tagType, tagName)
func (m *MockJobRepository) HasTag(jobID int64, tagType string, tagName string) bool {
args := m.Called(jobID, tagType, tagName)
return args.Bool(0)
}
func (m *MockJobRepository) AddTagOrCreateDirect(jobId int64, tagType string, tagName string) (tagId int64, err error) {
args := m.Called(jobId, tagType, tagName)
func (m *MockJobRepository) AddTagOrCreateDirect(jobID int64, tagType string, tagName string) (tagID int64, err error) {
args := m.Called(jobID, tagType, tagName)
return args.Get(0).(int64), args.Error(1)
}

View File

@@ -7,9 +7,7 @@ package tagger
import (
"bufio"
"embed"
"fmt"
"io/fs"
"os"
"path/filepath"
"regexp"
@@ -21,8 +19,14 @@ import (
"github.com/ClusterCockpit/cc-lib/v2/util"
)
//go:embed apps/*
var appFiles embed.FS
const (
// defaultConfigPath is the default path for application tagging configuration
defaultConfigPath = "./var/tagger/apps"
// tagTypeApp is the tag type identifier for application tags
tagTypeApp = "app"
// configDirMatch is the directory name used for matching filesystem events
configDirMatch = "apps"
)
type appInfo struct {
tag string
@@ -30,19 +34,19 @@ type appInfo struct {
}
// AppTagger detects applications by matching patterns in job scripts.
// It loads application patterns from embedded files and can dynamically reload
// configuration from a watched directory. When a job script matches a pattern,
// It loads application patterns from an external configuration directory and can dynamically reload
// configuration when files change. When a job script matches a pattern,
// the corresponding application tag is automatically applied.
type AppTagger struct {
// apps maps application tags to their matching patterns
apps map[string]appInfo
apps map[string]appInfo
// tagType is the type of tag ("app")
tagType string
// cfgPath is the path to watch for configuration changes
cfgPath string
}
func (t *AppTagger) scanApp(f fs.File, fns string) {
func (t *AppTagger) scanApp(f *os.File, fns string) {
scanner := bufio.NewScanner(f)
ai := appInfo{tag: strings.TrimSuffix(fns, filepath.Ext(fns)), strings: make([]string, 0)}
@@ -56,7 +60,7 @@ func (t *AppTagger) scanApp(f fs.File, fns string) {
// EventMatch checks if a filesystem event should trigger configuration reload.
// It returns true if the event path contains "apps".
func (t *AppTagger) EventMatch(s string) bool {
return strings.Contains(s, "apps")
return strings.Contains(s, configDirMatch)
}
// EventCallback is triggered when the configuration directory changes.
@@ -71,43 +75,50 @@ func (t *AppTagger) EventCallback() {
for _, fn := range files {
fns := fn.Name()
cclog.Debugf("Process: %s", fns)
f, err := os.Open(fmt.Sprintf("%s/%s", t.cfgPath, fns))
f, err := os.Open(filepath.Join(t.cfgPath, fns))
if err != nil {
cclog.Errorf("error opening app file %s: %#v", fns, err)
continue
}
t.scanApp(f, fns)
f.Close()
}
}
// Register initializes the AppTagger by loading application patterns from embedded files.
// It also sets up a file watch on ./var/tagger/apps if it exists, allowing for
// Register initializes the AppTagger by loading application patterns from external folder.
// It sets up a file watch on ./var/tagger/apps if it exists, allowing for
// dynamic configuration updates without restarting the application.
// Returns an error if the embedded application files cannot be read.
// Returns an error if the configuration path does not exist or cannot be read.
func (t *AppTagger) Register() error {
t.cfgPath = "./var/tagger/apps"
t.tagType = "app"
if t.cfgPath == "" {
t.cfgPath = defaultConfigPath
}
t.tagType = tagTypeApp
t.apps = make(map[string]appInfo, 0)
files, err := appFiles.ReadDir("apps")
if !util.CheckFileExists(t.cfgPath) {
return fmt.Errorf("configuration path does not exist: %s", t.cfgPath)
}
files, err := os.ReadDir(t.cfgPath)
if err != nil {
return fmt.Errorf("error reading app folder: %#v", err)
}
t.apps = make(map[string]appInfo, 0)
for _, fn := range files {
fns := fn.Name()
cclog.Debugf("Process: %s", fns)
f, err := appFiles.Open(fmt.Sprintf("apps/%s", fns))
f, err := os.Open(filepath.Join(t.cfgPath, fns))
if err != nil {
return fmt.Errorf("error opening app file %s: %#v", fns, err)
cclog.Errorf("error opening app file %s: %#v", fns, err)
continue
}
defer f.Close()
t.scanApp(f, fns)
f.Close()
}
if util.CheckFileExists(t.cfgPath) {
t.EventCallback()
cclog.Infof("Setup file watch for %s", t.cfgPath)
util.AddListener(t.cfgPath, t)
}
cclog.Infof("Setup file watch for %s", t.cfgPath)
util.AddListener(t.cfgPath, t)
return nil
}

View File

@@ -5,6 +5,8 @@
package tagger
import (
"os"
"path/filepath"
"testing"
"github.com/ClusterCockpit/cc-backend/internal/repository"
@@ -29,28 +31,88 @@ func noErr(tb testing.TB, err error) {
}
}
func TestRegister(t *testing.T) {
var tagger AppTagger
func setupAppTaggerTestDir(t *testing.T) string {
t.Helper()
err := tagger.Register()
testDir := t.TempDir()
appsDir := filepath.Join(testDir, "apps")
err := os.MkdirAll(appsDir, 0o755)
noErr(t, err)
srcDir := "../../configs/tagger/apps"
files, err := os.ReadDir(srcDir)
noErr(t, err)
for _, file := range files {
if file.IsDir() {
continue
}
srcPath := filepath.Join(srcDir, file.Name())
dstPath := filepath.Join(appsDir, file.Name())
data, err := os.ReadFile(srcPath)
noErr(t, err)
err = os.WriteFile(dstPath, data, 0o644)
noErr(t, err)
}
return appsDir
}
func TestRegister(t *testing.T) {
appsDir := setupAppTaggerTestDir(t)
var tagger AppTagger
tagger.cfgPath = appsDir
tagger.tagType = tagTypeApp
tagger.apps = make(map[string]appInfo, 0)
files, err := os.ReadDir(appsDir)
noErr(t, err)
for _, fn := range files {
if fn.IsDir() {
continue
}
fns := fn.Name()
f, err := os.Open(filepath.Join(appsDir, fns))
noErr(t, err)
tagger.scanApp(f, fns)
f.Close()
}
if len(tagger.apps) != 16 {
t.Errorf("wrong summary for diagnostic \ngot: %d \nwant: 16", len(tagger.apps))
}
}
func TestMatch(t *testing.T) {
appsDir := setupAppTaggerTestDir(t)
r := setup(t)
job, err := r.FindByIDDirect(317)
noErr(t, err)
var tagger AppTagger
tagger.cfgPath = appsDir
tagger.tagType = tagTypeApp
tagger.apps = make(map[string]appInfo, 0)
err = tagger.Register()
files, err := os.ReadDir(appsDir)
noErr(t, err)
for _, fn := range files {
if fn.IsDir() {
continue
}
fns := fn.Name()
f, err := os.Open(filepath.Join(appsDir, fns))
noErr(t, err)
tagger.scanApp(f, fns)
f.Close()
}
tagger.Match(job)
if !r.HasTag(317, "app", "vasp") {

View File

@@ -1,26 +0,0 @@
{
"name": "Excessive CPU load",
"tag": "excessiveload",
"parameters": [
"excessivecpuload_threshold_factor",
"job_min_duration_seconds",
"sampling_interval_seconds"
],
"metrics": ["cpu_load"],
"requirements": [
"job.shared == \"none\"",
"job.duration > job_min_duration_seconds"
],
"variables": [
{
"name": "load_threshold",
"expr": "cpu_load.limits.peak * excessivecpuload_threshold_factor"
},
{
"name": "load_perc",
"expr": "1.0 - (cpu_load.avg / cpu_load.limits.peak)"
}
],
"rule": "cpu_load.avg > load_threshold",
"hint": "This job was detected as excessiveload because the average cpu load {{.cpu_load.avg}} falls above the threshold {{.load_threshold}}."
}

View File

@@ -1,22 +0,0 @@
{
"name": "Low ressource utilization",
"tag": "lowutilization",
"parameters": ["job_min_duration_seconds"],
"metrics": ["flops_any", "mem_bw"],
"requirements": [
"job.shared == \"none\"",
"job.duration > job_min_duration_seconds"
],
"variables": [
{
"name": "mem_bw_perc",
"expr": "1.0 - (mem_bw.avg / mem_bw.limits.peak)"
},
{
"name": "flops_any_perc",
"expr": "1.0 - (flops_any.avg / flops_any.limits.peak)"
}
],
"rule": "flops_any.avg < flops_any.limits.alert && mem_bw.avg < mem_bw.limits.alert",
"hint": "This job was detected as low utilization because the average flop rate {{.flops_any.avg}} falls below the threshold {{.flops_any.limits.alert}}."
}

View File

@@ -1,26 +0,0 @@
{
"name": "Low CPU load",
"tag": "lowload",
"parameters": [
"lowcpuload_threshold_factor",
"job_min_duration_seconds",
"sampling_interval_seconds"
],
"metrics": ["cpu_load"],
"requirements": [
"job.shared == \"none\"",
"job.duration > job_min_duration_seconds"
],
"variables": [
{
"name": "load_threshold",
"expr": "job.numCores * lowcpuload_threshold_factor"
},
{
"name": "load_perc",
"expr": "1.0 - (cpu_load.avg / cpu_load.limits.peak)"
}
],
"rule": "cpu_load.avg < cpu_load.limits.caution",
"hint": "This job was detected as lowload because the average cpu load {{.cpu_load}} falls below the threshold {{.cpu_load.limits.caution}}."
}

View File

@@ -1,14 +0,0 @@
{
"lowcpuload_threshold_factor": 0.9,
"excessivecpuload_threshold_factor": 1.1,
"highmemoryusage_threshold_factor": 0.9,
"node_load_imbalance_threshold_factor": 0.1,
"core_load_imbalance_threshold_factor": 0.1,
"high_memory_load_threshold_factor": 0.9,
"lowgpuload_threshold_factor": 0.7,
"memory_leak_slope_threshold": 0.1,
"job_min_duration_seconds": 600.0,
"sampling_interval_seconds": 30.0,
"cpu_load_pre_cutoff_samples": 11.0,
"cpu_load_core_pre_cutoff_samples": 6.0
}