From dca25cc6011b36b354c336f3347087f564dfe1b4 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Fri, 27 Jun 2025 12:15:42 +0200 Subject: [PATCH 1/5] Saveguard changes to archive --- pkg/archive/archive.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pkg/archive/archive.go b/pkg/archive/archive.go index c221e91..318d6b4 100644 --- a/pkg/archive/archive.go +++ b/pkg/archive/archive.go @@ -7,6 +7,7 @@ package archive import ( "encoding/json" "fmt" + "maps" "sync" "github.com/ClusterCockpit/cc-backend/pkg/log" @@ -60,6 +61,7 @@ var ( cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024) ar ArchiveBackend useArchive bool + mutex sync.Mutex ) func Init(rawConfig json.RawMessage, disableArchive bool) error { @@ -184,6 +186,9 @@ func GetStatistics(job *schema.Job) (map[string]schema.JobStatistics, error) { // If the job is archived, find its `meta.json` file and override the Metadata // in that JSON file. If the job is not archived, nothing is done. func UpdateMetadata(job *schema.Job, metadata map[string]string) error { + mutex.Lock() + defer mutex.Unlock() + if job.State == schema.JobStateRunning || !useArchive { return nil } @@ -194,9 +199,7 @@ func UpdateMetadata(job *schema.Job, metadata map[string]string) error { return err } - for k, v := range metadata { - jobMeta.MetaData[k] = v - } + maps.Copy(jobMeta.MetaData, metadata) return ar.StoreJobMeta(jobMeta) } @@ -204,6 +207,9 @@ func UpdateMetadata(job *schema.Job, metadata map[string]string) error { // If the job is archived, find its `meta.json` file and override the tags list // in that JSON file. If the job is not archived, nothing is done. func UpdateTags(job *schema.Job, tags []*schema.Tag) error { + mutex.Lock() + defer mutex.Unlock() + if job.State == schema.JobStateRunning || !useArchive { return nil } From 83787842313bbef0194b1399620c18433eefaba4 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Fri, 27 Jun 2025 12:16:06 +0200 Subject: [PATCH 2/5] Enclose terms by spaces in app detection --- internal/tagger/detectApp.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/tagger/detectApp.go b/internal/tagger/detectApp.go index 00f8286..6b73f56 100644 --- a/internal/tagger/detectApp.go +++ b/internal/tagger/detectApp.go @@ -111,7 +111,7 @@ func (t *AppTagger) Match(job *schema.Job) { for _, a := range t.apps { tag := a.tag for _, s := range a.strings { - if strings.Contains(strings.ToLower(jobscript), s) { + if strings.Contains(strings.ToLower(jobscript), fmt.Sprintf(" %s ", s)) { if !r.HasTag(id, t.tagType, tag) { r.AddTagOrCreateDirect(id, t.tagType, tag) break out From 983aa592d84c38eaa4e47da5f7a89106ca7a0fdd Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Fri, 27 Jun 2025 12:16:17 +0200 Subject: [PATCH 3/5] refine highload rule --- internal/tagger/jobclasses/highload.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/tagger/jobclasses/highload.json b/internal/tagger/jobclasses/highload.json index 01476c1..0d16b45 100644 --- a/internal/tagger/jobclasses/highload.json +++ b/internal/tagger/jobclasses/highload.json @@ -14,13 +14,13 @@ "variables": [ { "name": "load_threshold", - "expr": "(job.numCores / job.numNodes) * excessivecpuload_threshold_factor" + "expr": "cpu_load.limits.peak * excessivecpuload_threshold_factor" }, { "name": "load_perc", "expr": "1.0 - (cpu_load.avg / cpu_load.limits.peak)" } ], - "rule": "cpu_load.avg > cpu_load.limits.peak", - "hint": "This job was detected as excessiveload because the average cpu load {{.cpu_load.avg}} falls above the threshold {{.cpu_load.limits.peak}}." + "rule": "cpu_load.avg > load_threshold", + "hint": "This job was detected as excessiveload because the average cpu load {{.cpu_load.avg}} falls above the threshold {{.load_threshold}}." } From 7531ba4b5cd3dce2e7d875690e195586a8e0122b Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Fri, 27 Jun 2025 14:11:10 +0200 Subject: [PATCH 4/5] Refine app detection Switch to regexp --- internal/repository/tags.go | 4 ++-- internal/tagger/apps/qe.txt | 3 --- internal/tagger/apps/starccm.txt | 2 ++ internal/tagger/detectApp.go | 4 +++- 4 files changed, 7 insertions(+), 6 deletions(-) delete mode 100644 internal/tagger/apps/qe.txt create mode 100644 internal/tagger/apps/starccm.txt diff --git a/internal/repository/tags.go b/internal/repository/tags.go index 3a81901..18ce62f 100644 --- a/internal/repository/tags.go +++ b/internal/repository/tags.go @@ -152,13 +152,13 @@ func (r *JobRepository) removeTagFromArchiveJobs(jobIds []int64) { for _, j := range jobIds { tags, err := r.getArchiveTags(&j) if err != nil { - log.Warn("Error while getting tags for job") + log.Warnf("Error while getting tags for job %d", j) continue } job, err := r.FindByIdDirect(j) if err != nil { - log.Warn("Error while getting job") + log.Warnf("Error while getting job %d", j) continue } diff --git a/internal/tagger/apps/qe.txt b/internal/tagger/apps/qe.txt deleted file mode 100644 index a1f7106..0000000 --- a/internal/tagger/apps/qe.txt +++ /dev/null @@ -1,3 +0,0 @@ -pw -neb -ph diff --git a/internal/tagger/apps/starccm.txt b/internal/tagger/apps/starccm.txt new file mode 100644 index 0000000..97cd388 --- /dev/null +++ b/internal/tagger/apps/starccm.txt @@ -0,0 +1,2 @@ +starccm+ +-podkey diff --git a/internal/tagger/detectApp.go b/internal/tagger/detectApp.go index 6b73f56..d82db1a 100644 --- a/internal/tagger/detectApp.go +++ b/internal/tagger/detectApp.go @@ -11,6 +11,7 @@ import ( "io/fs" "os" "path/filepath" + "regexp" "strings" "github.com/ClusterCockpit/cc-backend/internal/repository" @@ -111,7 +112,8 @@ func (t *AppTagger) Match(job *schema.Job) { for _, a := range t.apps { tag := a.tag for _, s := range a.strings { - if strings.Contains(strings.ToLower(jobscript), fmt.Sprintf(" %s ", s)) { + matched, _ := regexp.MatchString(s, strings.ToLower(jobscript)) + if matched { if !r.HasTag(id, t.tagType, tag) { r.AddTagOrCreateDirect(id, t.tagType, tag) break out From 43edccb28429eba62d8274380c1010499e023309 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Fri, 27 Jun 2025 14:11:37 +0200 Subject: [PATCH 5/5] Add enable jobtagger options. Reformat. --- pkg/schema/schemas/config.schema.json | 94 ++++++--------------------- 1 file changed, 21 insertions(+), 73 deletions(-) diff --git a/pkg/schema/schemas/config.schema.json b/pkg/schema/schemas/config.schema.json index c844174..2d22d6f 100644 --- a/pkg/schema/schemas/config.schema.json +++ b/pkg/schema/schemas/config.schema.json @@ -38,10 +38,7 @@ "db-driver": { "description": "sqlite3 or mysql (mysql will work for mariadb as well).", "type": "string", - "enum": [ - "sqlite3", - "mysql" - ] + "enum": ["sqlite3", "mysql"] }, "db": { "description": "For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!).", @@ -54,10 +51,7 @@ "kind": { "description": "Backend type for job-archive", "type": "string", - "enum": [ - "file", - "s3" - ] + "enum": ["file", "s3"] }, "path": { "description": "Path to job archive for file backend", @@ -74,11 +68,7 @@ "policy": { "description": "Retention policy", "type": "string", - "enum": [ - "none", - "delete", - "move" - ] + "enum": ["none", "delete", "move"] }, "includeDB": { "description": "Also remove jobs from database", @@ -93,19 +83,19 @@ "type": "string" } }, - "required": [ - "policy" - ] + "required": ["policy"] } }, - "required": [ - "kind" - ] + "required": ["kind"] }, "disable-archive": { "description": "Keep all metric data in the metric data repositories, do not write to the job-archive.", "type": "boolean" }, + "enable-job-taggers": { + "description": "Turn on automatic application and jobclass taggers", + "type": "boolean" + }, "validate": { "description": "Validate all input json documents against json schema.", "type": "boolean" @@ -168,10 +158,7 @@ } } }, - "required": [ - "trigger", - "resolutions" - ] + "required": ["trigger", "resolutions"] }, "jwts": { "description": "For JWT token authentication.", @@ -198,9 +185,7 @@ "type": "boolean" } }, - "required": [ - "max-age" - ] + "required": ["max-age"] }, "oidc": { "provider": { @@ -215,9 +200,7 @@ "description": "", "type": "boolean" }, - "required": [ - "provider" - ] + "required": ["provider"] }, "ldap": { "description": "For LDAP Authentication and user synchronisation.", @@ -260,13 +243,7 @@ "type": "boolean" } }, - "required": [ - "url", - "user_base", - "search_dn", - "user_bind", - "user_filter" - ] + "required": ["url", "user_base", "search_dn", "user_bind", "user_filter"] }, "clusters": { "description": "Configuration for the clusters to be displayed.", @@ -284,12 +261,7 @@ "properties": { "kind": { "type": "string", - "enum": [ - "influxdb", - "prometheus", - "cc-metric-store", - "test" - ] + "enum": ["influxdb", "prometheus", "cc-metric-store", "test"] }, "url": { "type": "string" @@ -298,10 +270,7 @@ "type": "string" } }, - "required": [ - "kind", - "url" - ] + "required": ["kind", "url"] }, "filterRanges": { "description": "This option controls the slider ranges for the UI controls of numNodes, duration, and startTime.", @@ -318,10 +287,7 @@ "type": "integer" } }, - "required": [ - "from", - "to" - ] + "required": ["from", "to"] }, "duration": { "description": "UI slider range for duration", @@ -334,10 +300,7 @@ "type": "integer" } }, - "required": [ - "from", - "to" - ] + "required": ["from", "to"] }, "startTime": { "description": "UI slider range for start time", @@ -351,24 +314,13 @@ "type": "null" } }, - "required": [ - "from", - "to" - ] + "required": ["from", "to"] } }, - "required": [ - "numNodes", - "duration", - "startTime" - ] + "required": ["numNodes", "duration", "startTime"] } }, - "required": [ - "name", - "metricDataRepository", - "filterRanges" - ], + "required": ["name", "metricDataRepository", "filterRanges"], "minItems": 1 } }, @@ -490,9 +442,5 @@ ] } }, - "required": [ - "jwts", - "clusters", - "apiAllowedIPs" - ] + "required": ["jwts", "clusters", "apiAllowedIPs"] }