From 42809e3f75256d282e3f7a5b8bdfd9980c222882 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Tue, 13 Jan 2026 07:20:26 +0100 Subject: [PATCH] Remove embedded tagger rules --- configs/tagger/README.md | 0 {internal => configs}/tagger/apps/alf.txt | 0 {internal => configs}/tagger/apps/caracal.txt | 0 {internal => configs}/tagger/apps/chroma.txt | 0 {internal => configs}/tagger/apps/cp2k.txt | 0 {internal => configs}/tagger/apps/cpmd.txt | 0 {internal => configs}/tagger/apps/flame.txt | 0 {internal => configs}/tagger/apps/gromacs.txt | 0 {internal => configs}/tagger/apps/julia.txt | 0 {internal => configs}/tagger/apps/lammps.txt | 0 {internal => configs}/tagger/apps/matlab.txt | 0 .../tagger/apps/openfoam.txt | 0 {internal => configs}/tagger/apps/orca.txt | 0 {internal => configs}/tagger/apps/python.txt | 0 {internal => configs}/tagger/apps/starccm.txt | 0 .../tagger/apps/turbomole.txt | 0 {internal => configs}/tagger/apps/vasp.txt | 0 .../tagger/jobclasses/highload.json | 0 .../tagger/jobclasses/lowUtilization.json | 0 .../tagger/jobclasses/lowload.json | 0 .../tagger/jobclasses/parameters.json | 0 internal/tagger/classifyJob.go | 105 ++++++++++-------- internal/tagger/classifyJob_test.go | 8 +- internal/tagger/detectApp.go | 61 +++++----- internal/tagger/detectApp_test.go | 70 +++++++++++- 25 files changed, 166 insertions(+), 78 deletions(-) create mode 100644 configs/tagger/README.md rename {internal => configs}/tagger/apps/alf.txt (100%) rename {internal => configs}/tagger/apps/caracal.txt (100%) rename {internal => configs}/tagger/apps/chroma.txt (100%) rename {internal => configs}/tagger/apps/cp2k.txt (100%) rename {internal => configs}/tagger/apps/cpmd.txt (100%) rename {internal => configs}/tagger/apps/flame.txt (100%) rename {internal => configs}/tagger/apps/gromacs.txt (100%) rename {internal => configs}/tagger/apps/julia.txt (100%) rename {internal => configs}/tagger/apps/lammps.txt (100%) rename {internal => configs}/tagger/apps/matlab.txt (100%) rename {internal => configs}/tagger/apps/openfoam.txt (100%) rename {internal => configs}/tagger/apps/orca.txt (100%) rename {internal => configs}/tagger/apps/python.txt (100%) rename {internal => configs}/tagger/apps/starccm.txt (100%) rename {internal => configs}/tagger/apps/turbomole.txt (100%) rename {internal => configs}/tagger/apps/vasp.txt (100%) rename {internal => configs}/tagger/jobclasses/highload.json (100%) rename {internal => configs}/tagger/jobclasses/lowUtilization.json (100%) rename {internal => configs}/tagger/jobclasses/lowload.json (100%) rename {internal => configs}/tagger/jobclasses/parameters.json (100%) diff --git a/configs/tagger/README.md b/configs/tagger/README.md new file mode 100644 index 00000000..e69de29b diff --git a/internal/tagger/apps/alf.txt b/configs/tagger/apps/alf.txt similarity index 100% rename from internal/tagger/apps/alf.txt rename to configs/tagger/apps/alf.txt diff --git a/internal/tagger/apps/caracal.txt b/configs/tagger/apps/caracal.txt similarity index 100% rename from internal/tagger/apps/caracal.txt rename to configs/tagger/apps/caracal.txt diff --git a/internal/tagger/apps/chroma.txt b/configs/tagger/apps/chroma.txt similarity index 100% rename from internal/tagger/apps/chroma.txt rename to configs/tagger/apps/chroma.txt diff --git a/internal/tagger/apps/cp2k.txt b/configs/tagger/apps/cp2k.txt similarity index 100% rename from internal/tagger/apps/cp2k.txt rename to configs/tagger/apps/cp2k.txt diff --git a/internal/tagger/apps/cpmd.txt b/configs/tagger/apps/cpmd.txt similarity index 100% rename from internal/tagger/apps/cpmd.txt rename to configs/tagger/apps/cpmd.txt diff --git a/internal/tagger/apps/flame.txt b/configs/tagger/apps/flame.txt similarity index 100% rename from internal/tagger/apps/flame.txt rename to configs/tagger/apps/flame.txt diff --git a/internal/tagger/apps/gromacs.txt b/configs/tagger/apps/gromacs.txt similarity index 100% rename from internal/tagger/apps/gromacs.txt rename to configs/tagger/apps/gromacs.txt diff --git a/internal/tagger/apps/julia.txt b/configs/tagger/apps/julia.txt similarity index 100% rename from internal/tagger/apps/julia.txt rename to configs/tagger/apps/julia.txt diff --git a/internal/tagger/apps/lammps.txt b/configs/tagger/apps/lammps.txt similarity index 100% rename from internal/tagger/apps/lammps.txt rename to configs/tagger/apps/lammps.txt diff --git a/internal/tagger/apps/matlab.txt b/configs/tagger/apps/matlab.txt similarity index 100% rename from internal/tagger/apps/matlab.txt rename to configs/tagger/apps/matlab.txt diff --git a/internal/tagger/apps/openfoam.txt b/configs/tagger/apps/openfoam.txt similarity index 100% rename from internal/tagger/apps/openfoam.txt rename to configs/tagger/apps/openfoam.txt diff --git a/internal/tagger/apps/orca.txt b/configs/tagger/apps/orca.txt similarity index 100% rename from internal/tagger/apps/orca.txt rename to configs/tagger/apps/orca.txt diff --git a/internal/tagger/apps/python.txt b/configs/tagger/apps/python.txt similarity index 100% rename from internal/tagger/apps/python.txt rename to configs/tagger/apps/python.txt diff --git a/internal/tagger/apps/starccm.txt b/configs/tagger/apps/starccm.txt similarity index 100% rename from internal/tagger/apps/starccm.txt rename to configs/tagger/apps/starccm.txt diff --git a/internal/tagger/apps/turbomole.txt b/configs/tagger/apps/turbomole.txt similarity index 100% rename from internal/tagger/apps/turbomole.txt rename to configs/tagger/apps/turbomole.txt diff --git a/internal/tagger/apps/vasp.txt b/configs/tagger/apps/vasp.txt similarity index 100% rename from internal/tagger/apps/vasp.txt rename to configs/tagger/apps/vasp.txt diff --git a/internal/tagger/jobclasses/highload.json b/configs/tagger/jobclasses/highload.json similarity index 100% rename from internal/tagger/jobclasses/highload.json rename to configs/tagger/jobclasses/highload.json diff --git a/internal/tagger/jobclasses/lowUtilization.json b/configs/tagger/jobclasses/lowUtilization.json similarity index 100% rename from internal/tagger/jobclasses/lowUtilization.json rename to configs/tagger/jobclasses/lowUtilization.json diff --git a/internal/tagger/jobclasses/lowload.json b/configs/tagger/jobclasses/lowload.json similarity index 100% rename from internal/tagger/jobclasses/lowload.json rename to configs/tagger/jobclasses/lowload.json diff --git a/internal/tagger/jobclasses/parameters.json b/configs/tagger/jobclasses/parameters.json similarity index 100% rename from internal/tagger/jobclasses/parameters.json rename to configs/tagger/jobclasses/parameters.json diff --git a/internal/tagger/classifyJob.go b/internal/tagger/classifyJob.go index 70399218..b5f30949 100644 --- a/internal/tagger/classifyJob.go +++ b/internal/tagger/classifyJob.go @@ -2,15 +2,16 @@ // All rights reserved. This file is part of cc-backend. // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. + package tagger import ( "bytes" - "embed" "encoding/json" "fmt" "maps" "os" + "path/filepath" "strings" "text/template" @@ -23,8 +24,16 @@ import ( "github.com/expr-lang/expr/vm" ) -//go:embed jobclasses/* -var jobClassFiles embed.FS +const ( + // defaultJobClassConfigPath is the default path for job classification configuration + defaultJobClassConfigPath = "./var/tagger/jobclasses" + // tagTypeJobClass is the tag type identifier for job classification tags + tagTypeJobClass = "jobClass" + // jobClassConfigDirMatch is the directory name used for matching filesystem events + jobClassConfigDirMatch = "jobclasses" + // parametersFileName is the name of the parameters configuration file + parametersFileName = "parameters.json" +) // Variable defines a named expression that can be computed and reused in rules. // Variables are evaluated before the main rule and their results are added to the environment. @@ -45,21 +54,21 @@ type ruleVariable struct { // and the final rule expression that determines if the job matches the classification. type RuleFormat struct { // Name is a human-readable description of the rule - Name string `json:"name"` + Name string `json:"name"` // Tag is the classification tag to apply if the rule matches - Tag string `json:"tag"` + Tag string `json:"tag"` // Parameters are shared values referenced in the rule (e.g., thresholds) - Parameters []string `json:"parameters"` + Parameters []string `json:"parameters"` // Metrics are the job metrics required for this rule (e.g., "cpu_load", "mem_used") - Metrics []string `json:"metrics"` + Metrics []string `json:"metrics"` // Requirements are boolean expressions that must be true for the rule to apply - Requirements []string `json:"requirements"` + Requirements []string `json:"requirements"` // Variables are computed values used in the rule expression - Variables []Variable `json:"variables"` + Variables []Variable `json:"variables"` // Rule is the boolean expression that determines if the job matches - Rule string `json:"rule"` + Rule string `json:"rule"` // Hint is a template string that generates a message when the rule matches - Hint string `json:"hint"` + Hint string `json:"hint"` } type ruleInfo struct { @@ -75,29 +84,29 @@ type ruleInfo struct { // This interface allows for easier testing and decoupling from the concrete repository implementation. type JobRepository interface { // HasTag checks if a job already has a specific tag - HasTag(jobId int64, tagType string, tagName string) bool + HasTag(jobID int64, tagType string, tagName string) bool // AddTagOrCreateDirect adds a tag to a job or creates it if it doesn't exist - AddTagOrCreateDirect(jobId int64, tagType string, tagName string) (tagId int64, err error) + AddTagOrCreateDirect(jobID int64, tagType string, tagName string) (tagID int64, err error) // UpdateMetadata updates job metadata with a key-value pair UpdateMetadata(job *schema.Job, key, val string) (err error) } // JobClassTagger classifies jobs based on configurable rules that evaluate job metrics and properties. -// Rules are loaded from embedded JSON files and can be dynamically reloaded from a watched directory. +// Rules are loaded from an external configuration directory and can be dynamically reloaded when files change. // When a job matches a rule, it is tagged with the corresponding classification and an optional hint message. type JobClassTagger struct { // rules maps classification tags to their compiled rule information - rules map[string]ruleInfo + rules map[string]ruleInfo // parameters are shared values (e.g., thresholds) used across multiple rules - parameters map[string]any + parameters map[string]any // tagType is the type of tag ("jobClass") - tagType string + tagType string // cfgPath is the path to watch for configuration changes - cfgPath string + cfgPath string // repo provides access to job database operations - repo JobRepository + repo JobRepository // getStatistics retrieves job statistics for analysis - getStatistics func(job *schema.Job) (map[string]schema.JobStatistics, error) + getStatistics func(job *schema.Job) (map[string]schema.JobStatistics, error) // getMetricConfig retrieves metric configuration (limits) for a cluster getMetricConfig func(cluster, subCluster string) map[string]*schema.Metric } @@ -169,7 +178,7 @@ func (t *JobClassTagger) prepareRule(b []byte, fns string) { // EventMatch checks if a filesystem event should trigger configuration reload. // It returns true if the event path contains "jobclasses". func (t *JobClassTagger) EventMatch(s string) bool { - return strings.Contains(s, "jobclasses") + return strings.Contains(s, jobClassConfigDirMatch) } // EventCallback is triggered when the configuration directory changes. @@ -181,9 +190,10 @@ func (t *JobClassTagger) EventCallback() { cclog.Fatal(err) } - if util.CheckFileExists(t.cfgPath + "/parameters.json") { + parametersFile := filepath.Join(t.cfgPath, parametersFileName) + if util.CheckFileExists(parametersFile) { cclog.Info("Merge parameters") - b, err := os.ReadFile(t.cfgPath + "/parameters.json") + b, err := os.ReadFile(parametersFile) if err != nil { cclog.Warnf("prepareRule() > open file error: %v", err) } @@ -198,13 +208,13 @@ func (t *JobClassTagger) EventCallback() { for _, fn := range files { fns := fn.Name() - if fns != "parameters.json" { + if fns != parametersFileName { cclog.Debugf("Process: %s", fns) - filename := fmt.Sprintf("%s/%s", t.cfgPath, fns) + filename := filepath.Join(t.cfgPath, fns) b, err := os.ReadFile(filename) if err != nil { cclog.Warnf("prepareRule() > open file error: %v", err) - return + continue } t.prepareRule(b, fns) } @@ -213,7 +223,8 @@ func (t *JobClassTagger) EventCallback() { func (t *JobClassTagger) initParameters() error { cclog.Info("Initialize parameters") - b, err := jobClassFiles.ReadFile("jobclasses/parameters.json") + parametersFile := filepath.Join(t.cfgPath, parametersFileName) + b, err := os.ReadFile(parametersFile) if err != nil { cclog.Warnf("prepareRule() > open file error: %v", err) return err @@ -227,13 +238,20 @@ func (t *JobClassTagger) initParameters() error { return nil } -// Register initializes the JobClassTagger by loading parameters and classification rules. -// It loads embedded configuration files and sets up a file watch on ./var/tagger/jobclasses -// if it exists, allowing for dynamic configuration updates without restarting the application. -// Returns an error if the embedded configuration files cannot be read or parsed. +// Register initializes the JobClassTagger by loading parameters and classification rules from external folder. +// It sets up a file watch on ./var/tagger/jobclasses if it exists, allowing for +// dynamic configuration updates without restarting the application. +// Returns an error if the configuration path does not exist or cannot be read. func (t *JobClassTagger) Register() error { - t.cfgPath = "./var/tagger/jobclasses" - t.tagType = "jobClass" + if t.cfgPath == "" { + t.cfgPath = defaultJobClassConfigPath + } + t.tagType = tagTypeJobClass + t.rules = make(map[string]ruleInfo) + + if !util.CheckFileExists(t.cfgPath) { + return fmt.Errorf("configuration path does not exist: %s", t.cfgPath) + } err := t.initParameters() if err != nil { @@ -241,31 +259,28 @@ func (t *JobClassTagger) Register() error { return err } - files, err := jobClassFiles.ReadDir("jobclasses") + files, err := os.ReadDir(t.cfgPath) if err != nil { - return fmt.Errorf("error reading app folder: %#v", err) + return fmt.Errorf("error reading jobclasses folder: %#v", err) } - t.rules = make(map[string]ruleInfo) + for _, fn := range files { fns := fn.Name() - if fns != "parameters.json" { - filename := fmt.Sprintf("jobclasses/%s", fns) + if fns != parametersFileName { cclog.Infof("Process: %s", fns) + filename := filepath.Join(t.cfgPath, fns) - b, err := jobClassFiles.ReadFile(filename) + b, err := os.ReadFile(filename) if err != nil { cclog.Warnf("prepareRule() > open file error: %v", err) - return err + continue } t.prepareRule(b, fns) } } - if util.CheckFileExists(t.cfgPath) { - t.EventCallback() - cclog.Infof("Setup file watch for %s", t.cfgPath) - util.AddListener(t.cfgPath, t) - } + cclog.Infof("Setup file watch for %s", t.cfgPath) + util.AddListener(t.cfgPath, t) t.repo = repository.GetJobRepository() t.getStatistics = archive.GetStatistics diff --git a/internal/tagger/classifyJob_test.go b/internal/tagger/classifyJob_test.go index bed7a8f0..f82cf807 100644 --- a/internal/tagger/classifyJob_test.go +++ b/internal/tagger/classifyJob_test.go @@ -13,13 +13,13 @@ type MockJobRepository struct { mock.Mock } -func (m *MockJobRepository) HasTag(jobId int64, tagType string, tagName string) bool { - args := m.Called(jobId, tagType, tagName) +func (m *MockJobRepository) HasTag(jobID int64, tagType string, tagName string) bool { + args := m.Called(jobID, tagType, tagName) return args.Bool(0) } -func (m *MockJobRepository) AddTagOrCreateDirect(jobId int64, tagType string, tagName string) (tagId int64, err error) { - args := m.Called(jobId, tagType, tagName) +func (m *MockJobRepository) AddTagOrCreateDirect(jobID int64, tagType string, tagName string) (tagID int64, err error) { + args := m.Called(jobID, tagType, tagName) return args.Get(0).(int64), args.Error(1) } diff --git a/internal/tagger/detectApp.go b/internal/tagger/detectApp.go index 0b8e3e7e..2a89ea21 100644 --- a/internal/tagger/detectApp.go +++ b/internal/tagger/detectApp.go @@ -7,9 +7,7 @@ package tagger import ( "bufio" - "embed" "fmt" - "io/fs" "os" "path/filepath" "regexp" @@ -21,8 +19,14 @@ import ( "github.com/ClusterCockpit/cc-lib/v2/util" ) -//go:embed apps/* -var appFiles embed.FS +const ( + // defaultConfigPath is the default path for application tagging configuration + defaultConfigPath = "./var/tagger/apps" + // tagTypeApp is the tag type identifier for application tags + tagTypeApp = "app" + // configDirMatch is the directory name used for matching filesystem events + configDirMatch = "apps" +) type appInfo struct { tag string @@ -30,19 +34,19 @@ type appInfo struct { } // AppTagger detects applications by matching patterns in job scripts. -// It loads application patterns from embedded files and can dynamically reload -// configuration from a watched directory. When a job script matches a pattern, +// It loads application patterns from an external configuration directory and can dynamically reload +// configuration when files change. When a job script matches a pattern, // the corresponding application tag is automatically applied. type AppTagger struct { // apps maps application tags to their matching patterns - apps map[string]appInfo + apps map[string]appInfo // tagType is the type of tag ("app") tagType string // cfgPath is the path to watch for configuration changes cfgPath string } -func (t *AppTagger) scanApp(f fs.File, fns string) { +func (t *AppTagger) scanApp(f *os.File, fns string) { scanner := bufio.NewScanner(f) ai := appInfo{tag: strings.TrimSuffix(fns, filepath.Ext(fns)), strings: make([]string, 0)} @@ -56,7 +60,7 @@ func (t *AppTagger) scanApp(f fs.File, fns string) { // EventMatch checks if a filesystem event should trigger configuration reload. // It returns true if the event path contains "apps". func (t *AppTagger) EventMatch(s string) bool { - return strings.Contains(s, "apps") + return strings.Contains(s, configDirMatch) } // EventCallback is triggered when the configuration directory changes. @@ -71,43 +75,50 @@ func (t *AppTagger) EventCallback() { for _, fn := range files { fns := fn.Name() cclog.Debugf("Process: %s", fns) - f, err := os.Open(fmt.Sprintf("%s/%s", t.cfgPath, fns)) + f, err := os.Open(filepath.Join(t.cfgPath, fns)) if err != nil { cclog.Errorf("error opening app file %s: %#v", fns, err) + continue } t.scanApp(f, fns) + f.Close() } } -// Register initializes the AppTagger by loading application patterns from embedded files. -// It also sets up a file watch on ./var/tagger/apps if it exists, allowing for +// Register initializes the AppTagger by loading application patterns from external folder. +// It sets up a file watch on ./var/tagger/apps if it exists, allowing for // dynamic configuration updates without restarting the application. -// Returns an error if the embedded application files cannot be read. +// Returns an error if the configuration path does not exist or cannot be read. func (t *AppTagger) Register() error { - t.cfgPath = "./var/tagger/apps" - t.tagType = "app" + if t.cfgPath == "" { + t.cfgPath = defaultConfigPath + } + t.tagType = tagTypeApp + t.apps = make(map[string]appInfo, 0) - files, err := appFiles.ReadDir("apps") + if !util.CheckFileExists(t.cfgPath) { + return fmt.Errorf("configuration path does not exist: %s", t.cfgPath) + } + + files, err := os.ReadDir(t.cfgPath) if err != nil { return fmt.Errorf("error reading app folder: %#v", err) } - t.apps = make(map[string]appInfo, 0) + for _, fn := range files { fns := fn.Name() cclog.Debugf("Process: %s", fns) - f, err := appFiles.Open(fmt.Sprintf("apps/%s", fns)) + f, err := os.Open(filepath.Join(t.cfgPath, fns)) if err != nil { - return fmt.Errorf("error opening app file %s: %#v", fns, err) + cclog.Errorf("error opening app file %s: %#v", fns, err) + continue } - defer f.Close() t.scanApp(f, fns) + f.Close() } - if util.CheckFileExists(t.cfgPath) { - t.EventCallback() - cclog.Infof("Setup file watch for %s", t.cfgPath) - util.AddListener(t.cfgPath, t) - } + cclog.Infof("Setup file watch for %s", t.cfgPath) + util.AddListener(t.cfgPath, t) return nil } diff --git a/internal/tagger/detectApp_test.go b/internal/tagger/detectApp_test.go index 1c44f670..fe5e7a21 100644 --- a/internal/tagger/detectApp_test.go +++ b/internal/tagger/detectApp_test.go @@ -5,6 +5,8 @@ package tagger import ( + "os" + "path/filepath" "testing" "github.com/ClusterCockpit/cc-backend/internal/repository" @@ -29,28 +31,88 @@ func noErr(tb testing.TB, err error) { } } -func TestRegister(t *testing.T) { - var tagger AppTagger +func setupAppTaggerTestDir(t *testing.T) string { + t.Helper() - err := tagger.Register() + testDir := t.TempDir() + appsDir := filepath.Join(testDir, "apps") + err := os.MkdirAll(appsDir, 0o755) noErr(t, err) + srcDir := "../../configs/tagger/apps" + files, err := os.ReadDir(srcDir) + noErr(t, err) + + for _, file := range files { + if file.IsDir() { + continue + } + srcPath := filepath.Join(srcDir, file.Name()) + dstPath := filepath.Join(appsDir, file.Name()) + + data, err := os.ReadFile(srcPath) + noErr(t, err) + + err = os.WriteFile(dstPath, data, 0o644) + noErr(t, err) + } + + return appsDir +} + +func TestRegister(t *testing.T) { + appsDir := setupAppTaggerTestDir(t) + + var tagger AppTagger + tagger.cfgPath = appsDir + tagger.tagType = tagTypeApp + tagger.apps = make(map[string]appInfo, 0) + + files, err := os.ReadDir(appsDir) + noErr(t, err) + + for _, fn := range files { + if fn.IsDir() { + continue + } + fns := fn.Name() + f, err := os.Open(filepath.Join(appsDir, fns)) + noErr(t, err) + tagger.scanApp(f, fns) + f.Close() + } + if len(tagger.apps) != 16 { t.Errorf("wrong summary for diagnostic \ngot: %d \nwant: 16", len(tagger.apps)) } } func TestMatch(t *testing.T) { + appsDir := setupAppTaggerTestDir(t) r := setup(t) job, err := r.FindByIDDirect(317) noErr(t, err) var tagger AppTagger + tagger.cfgPath = appsDir + tagger.tagType = tagTypeApp + tagger.apps = make(map[string]appInfo, 0) - err = tagger.Register() + files, err := os.ReadDir(appsDir) noErr(t, err) + for _, fn := range files { + if fn.IsDir() { + continue + } + fns := fn.Name() + f, err := os.Open(filepath.Join(appsDir, fns)) + noErr(t, err) + tagger.scanApp(f, fns) + f.Close() + } + tagger.Match(job) if !r.HasTag(317, "app", "vasp") {