diff --git a/internal/repository/tags.go b/internal/repository/tags.go index 3a81901..18ce62f 100644 --- a/internal/repository/tags.go +++ b/internal/repository/tags.go @@ -152,13 +152,13 @@ func (r *JobRepository) removeTagFromArchiveJobs(jobIds []int64) { for _, j := range jobIds { tags, err := r.getArchiveTags(&j) if err != nil { - log.Warn("Error while getting tags for job") + log.Warnf("Error while getting tags for job %d", j) continue } job, err := r.FindByIdDirect(j) if err != nil { - log.Warn("Error while getting job") + log.Warnf("Error while getting job %d", j) continue } diff --git a/internal/tagger/apps/qe.txt b/internal/tagger/apps/qe.txt deleted file mode 100644 index a1f7106..0000000 --- a/internal/tagger/apps/qe.txt +++ /dev/null @@ -1,3 +0,0 @@ -pw -neb -ph diff --git a/internal/tagger/apps/starccm.txt b/internal/tagger/apps/starccm.txt new file mode 100644 index 0000000..97cd388 --- /dev/null +++ b/internal/tagger/apps/starccm.txt @@ -0,0 +1,2 @@ +starccm+ +-podkey diff --git a/internal/tagger/detectApp.go b/internal/tagger/detectApp.go index 00f8286..d82db1a 100644 --- a/internal/tagger/detectApp.go +++ b/internal/tagger/detectApp.go @@ -11,6 +11,7 @@ import ( "io/fs" "os" "path/filepath" + "regexp" "strings" "github.com/ClusterCockpit/cc-backend/internal/repository" @@ -111,7 +112,8 @@ func (t *AppTagger) Match(job *schema.Job) { for _, a := range t.apps { tag := a.tag for _, s := range a.strings { - if strings.Contains(strings.ToLower(jobscript), s) { + matched, _ := regexp.MatchString(s, strings.ToLower(jobscript)) + if matched { if !r.HasTag(id, t.tagType, tag) { r.AddTagOrCreateDirect(id, t.tagType, tag) break out diff --git a/internal/tagger/jobclasses/highload.json b/internal/tagger/jobclasses/highload.json index 01476c1..0d16b45 100644 --- a/internal/tagger/jobclasses/highload.json +++ b/internal/tagger/jobclasses/highload.json @@ -14,13 +14,13 @@ "variables": [ { "name": "load_threshold", - "expr": "(job.numCores / job.numNodes) * excessivecpuload_threshold_factor" + "expr": "cpu_load.limits.peak * excessivecpuload_threshold_factor" }, { "name": "load_perc", "expr": "1.0 - (cpu_load.avg / cpu_load.limits.peak)" } ], - "rule": "cpu_load.avg > cpu_load.limits.peak", - "hint": "This job was detected as excessiveload because the average cpu load {{.cpu_load.avg}} falls above the threshold {{.cpu_load.limits.peak}}." + "rule": "cpu_load.avg > load_threshold", + "hint": "This job was detected as excessiveload because the average cpu load {{.cpu_load.avg}} falls above the threshold {{.load_threshold}}." } diff --git a/pkg/archive/archive.go b/pkg/archive/archive.go index c221e91..318d6b4 100644 --- a/pkg/archive/archive.go +++ b/pkg/archive/archive.go @@ -7,6 +7,7 @@ package archive import ( "encoding/json" "fmt" + "maps" "sync" "github.com/ClusterCockpit/cc-backend/pkg/log" @@ -60,6 +61,7 @@ var ( cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024) ar ArchiveBackend useArchive bool + mutex sync.Mutex ) func Init(rawConfig json.RawMessage, disableArchive bool) error { @@ -184,6 +186,9 @@ func GetStatistics(job *schema.Job) (map[string]schema.JobStatistics, error) { // If the job is archived, find its `meta.json` file and override the Metadata // in that JSON file. If the job is not archived, nothing is done. func UpdateMetadata(job *schema.Job, metadata map[string]string) error { + mutex.Lock() + defer mutex.Unlock() + if job.State == schema.JobStateRunning || !useArchive { return nil } @@ -194,9 +199,7 @@ func UpdateMetadata(job *schema.Job, metadata map[string]string) error { return err } - for k, v := range metadata { - jobMeta.MetaData[k] = v - } + maps.Copy(jobMeta.MetaData, metadata) return ar.StoreJobMeta(jobMeta) } @@ -204,6 +207,9 @@ func UpdateMetadata(job *schema.Job, metadata map[string]string) error { // If the job is archived, find its `meta.json` file and override the tags list // in that JSON file. If the job is not archived, nothing is done. func UpdateTags(job *schema.Job, tags []*schema.Tag) error { + mutex.Lock() + defer mutex.Unlock() + if job.State == schema.JobStateRunning || !useArchive { return nil } diff --git a/pkg/schema/schemas/config.schema.json b/pkg/schema/schemas/config.schema.json index c844174..2d22d6f 100644 --- a/pkg/schema/schemas/config.schema.json +++ b/pkg/schema/schemas/config.schema.json @@ -38,10 +38,7 @@ "db-driver": { "description": "sqlite3 or mysql (mysql will work for mariadb as well).", "type": "string", - "enum": [ - "sqlite3", - "mysql" - ] + "enum": ["sqlite3", "mysql"] }, "db": { "description": "For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!).", @@ -54,10 +51,7 @@ "kind": { "description": "Backend type for job-archive", "type": "string", - "enum": [ - "file", - "s3" - ] + "enum": ["file", "s3"] }, "path": { "description": "Path to job archive for file backend", @@ -74,11 +68,7 @@ "policy": { "description": "Retention policy", "type": "string", - "enum": [ - "none", - "delete", - "move" - ] + "enum": ["none", "delete", "move"] }, "includeDB": { "description": "Also remove jobs from database", @@ -93,19 +83,19 @@ "type": "string" } }, - "required": [ - "policy" - ] + "required": ["policy"] } }, - "required": [ - "kind" - ] + "required": ["kind"] }, "disable-archive": { "description": "Keep all metric data in the metric data repositories, do not write to the job-archive.", "type": "boolean" }, + "enable-job-taggers": { + "description": "Turn on automatic application and jobclass taggers", + "type": "boolean" + }, "validate": { "description": "Validate all input json documents against json schema.", "type": "boolean" @@ -168,10 +158,7 @@ } } }, - "required": [ - "trigger", - "resolutions" - ] + "required": ["trigger", "resolutions"] }, "jwts": { "description": "For JWT token authentication.", @@ -198,9 +185,7 @@ "type": "boolean" } }, - "required": [ - "max-age" - ] + "required": ["max-age"] }, "oidc": { "provider": { @@ -215,9 +200,7 @@ "description": "", "type": "boolean" }, - "required": [ - "provider" - ] + "required": ["provider"] }, "ldap": { "description": "For LDAP Authentication and user synchronisation.", @@ -260,13 +243,7 @@ "type": "boolean" } }, - "required": [ - "url", - "user_base", - "search_dn", - "user_bind", - "user_filter" - ] + "required": ["url", "user_base", "search_dn", "user_bind", "user_filter"] }, "clusters": { "description": "Configuration for the clusters to be displayed.", @@ -284,12 +261,7 @@ "properties": { "kind": { "type": "string", - "enum": [ - "influxdb", - "prometheus", - "cc-metric-store", - "test" - ] + "enum": ["influxdb", "prometheus", "cc-metric-store", "test"] }, "url": { "type": "string" @@ -298,10 +270,7 @@ "type": "string" } }, - "required": [ - "kind", - "url" - ] + "required": ["kind", "url"] }, "filterRanges": { "description": "This option controls the slider ranges for the UI controls of numNodes, duration, and startTime.", @@ -318,10 +287,7 @@ "type": "integer" } }, - "required": [ - "from", - "to" - ] + "required": ["from", "to"] }, "duration": { "description": "UI slider range for duration", @@ -334,10 +300,7 @@ "type": "integer" } }, - "required": [ - "from", - "to" - ] + "required": ["from", "to"] }, "startTime": { "description": "UI slider range for start time", @@ -351,24 +314,13 @@ "type": "null" } }, - "required": [ - "from", - "to" - ] + "required": ["from", "to"] } }, - "required": [ - "numNodes", - "duration", - "startTime" - ] + "required": ["numNodes", "duration", "startTime"] } }, - "required": [ - "name", - "metricDataRepository", - "filterRanges" - ], + "required": ["name", "metricDataRepository", "filterRanges"], "minItems": 1 } }, @@ -490,9 +442,5 @@ ] } }, - "required": [ - "jwts", - "clusters", - "apiAllowedIPs" - ] + "required": ["jwts", "clusters", "apiAllowedIPs"] }