From 436afa4a61116035e069801650551e8bd84c57f9 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Thu, 18 Dec 2025 15:55:30 +0100 Subject: [PATCH 01/59] fix tag count by including type in grouping --- internal/repository/tags.go | 9 +++++---- internal/routerConfig/routes.go | 4 ++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/internal/repository/tags.go b/internal/repository/tags.go index 5ca13382..8a076e8a 100644 --- a/internal/repository/tags.go +++ b/internal/repository/tags.go @@ -224,10 +224,10 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts } // Query and Count Jobs with attached Tags - q := sq.Select("t.tag_name, t.id, count(jt.tag_id)"). + q := sq.Select("t.tag_type, t.tag_name, t.id, count(jt.tag_id)"). From("tag t"). LeftJoin("jobtag jt ON t.id = jt.tag_id"). - GroupBy("t.tag_name") + GroupBy("t.tag_type, t.tag_name") // Build scope list for filtering var scopeBuilder strings.Builder @@ -260,14 +260,15 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts counts = make(map[string]int) for rows.Next() { + var tagType string var tagName string var tagId int var count int - if err = rows.Scan(&tagName, &tagId, &count); err != nil { + if err = rows.Scan(&tagType, &tagName, &tagId, &count); err != nil { return nil, nil, err } // Use tagId as second Map-Key component to differentiate tags with identical names - counts[fmt.Sprint(tagName, tagId)] = count + counts[fmt.Sprint(tagType, tagName, tagId)] = count } err = rows.Err() diff --git a/internal/routerConfig/routes.go b/internal/routerConfig/routes.go index c2126cd0..4466034d 100644 --- a/internal/routerConfig/routes.go +++ b/internal/routerConfig/routes.go @@ -205,13 +205,13 @@ func setupTaglistRoute(i InfoType, r *http.Request) InfoType { "id": tag.ID, "name": tag.Name, "scope": tag.Scope, - "count": counts[fmt.Sprint(tag.Name, tag.ID)], + "count": counts[fmt.Sprint(tag.Type, tag.Name, tag.ID)], } tagMap[tag.Type] = append(tagMap[tag.Type], tagItem) } } else if userAuthlevel < 4 && userAuthlevel >= 2 { // User+ : Show global and admin scope only if at least 1 tag used, private scope regardless of count for _, tag := range tags { - tagCount := counts[fmt.Sprint(tag.Name, tag.ID)] + tagCount := counts[fmt.Sprint(tag.Type, tag.Name, tag.ID)] if ((tag.Scope == "global" || tag.Scope == "admin") && tagCount >= 1) || (tag.Scope != "global" && tag.Scope != "admin") { tagItem := map[string]interface{}{ "id": tag.ID, From c58b01a602543d893c0ffdecb34461ce4964a725 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Fri, 19 Dec 2025 14:42:02 +0100 Subject: [PATCH 02/59] fix wrong render condition order in nodeList --- web/frontend/src/systems/NodeList.svelte | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/web/frontend/src/systems/NodeList.svelte b/web/frontend/src/systems/NodeList.svelte index c01ef237..e904076e 100644 --- a/web/frontend/src/systems/NodeList.svelte +++ b/web/frontend/src/systems/NodeList.svelte @@ -246,16 +246,7 @@ {$nodesQuery.error.message} - {:else} - {#each nodes as nodeData (nodeData.host)} - - {:else} - - No nodes found - - {/each} - {/if} - {#if $nodesQuery.fetching || !$nodesQuery.data} + {:else if $nodesQuery.fetching || !$nodesQuery.data}
@@ -272,6 +263,14 @@
+ {:else} + {#each nodes as nodeData (nodeData.host)} + + {:else} + + No nodes found + + {/each} {/if} From 7a0975b94d8af215139c2202c55f2dbfa0a2153a Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Fri, 19 Dec 2025 15:10:15 +0100 Subject: [PATCH 03/59] final fix render race condition if metrics change in nodeList --- web/frontend/src/Systems.root.svelte | 2 +- web/frontend/src/systems/NodeList.svelte | 24 ++++++++++--------- .../src/systems/nodelist/NodeListRow.svelte | 18 ++++++++++++++ 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/web/frontend/src/Systems.root.svelte b/web/frontend/src/Systems.root.svelte index b27cefa9..d81a64cd 100644 --- a/web/frontend/src/Systems.root.svelte +++ b/web/frontend/src/Systems.root.svelte @@ -269,7 +269,7 @@ {:else} - + {/if} {/if} diff --git a/web/frontend/src/systems/NodeList.svelte b/web/frontend/src/systems/NodeList.svelte index e904076e..fa758c18 100644 --- a/web/frontend/src/systems/NodeList.svelte +++ b/web/frontend/src/systems/NodeList.svelte @@ -5,7 +5,7 @@ - `cluster String`: The nodes' cluster - `subCluster String`: The nodes' subCluster [Default: ""] - `ccconfig Object?`: The ClusterCockpit Config Context [Default: null] - - `selectedMetrics [String]`: The array of selected metrics [Default []] + - `pendingSelectedMetrics [String]`: The array of selected metrics [Default []] - `selectedResolution Number?`: The selected data resolution [Default: 0] - `hostnameFilter String?`: The active hostnamefilter [Default: ""] - `hoststateFilter String?`: The active hoststatefilter [Default: ""] @@ -27,7 +27,7 @@ cluster, subCluster = "", ccconfig = null, - selectedMetrics = [], + pendingSelectedMetrics = [], selectedResolution = 0, hostnameFilter = "", hoststateFilter = "", @@ -94,6 +94,7 @@ /* State Init */ let nodes = $state([]); + let selectedMetrics = $state(pendingSelectedMetrics); let page = $state(1); let itemsPerPage = $state(usePaging ? (ccconfig?.nodeList_nodesPerPage || 10) : 10); let headerPaddingTop = $state(0); @@ -110,7 +111,7 @@ stateFilter: hoststateFilter, nodeFilter: hostnameFilter, scopes: ["core", "socket", "accelerator"], - metrics: selectedMetrics, + metrics: pendingSelectedMetrics, from: from.toISOString(), to: to.toISOString(), paging: paging, @@ -140,15 +141,17 @@ $effect(() => { if ($nodesQuery?.data) { untrack(() => { - handleNodes($nodesQuery?.data?.nodeMetricsList); + nodes = handleNodes($nodesQuery?.data?.nodeMetricsList); + matchedNodes = $nodesQuery?.data?.totalNodes || 0; }); + selectedMetrics = [...pendingSelectedMetrics]; // Trigger Rerender in NodeListRow Only After Data is Fetched }; }); $effect(() => { // Triggers (Except Paging) from, to - selectedMetrics, selectedResolution + pendingSelectedMetrics, selectedResolution hostnameFilter, hoststateFilter // Continous Scroll: Paging if parameters change: Existing entries will not match new selections // Nodes Array Reset in HandleNodes func @@ -162,17 +165,16 @@ if (data) { if (usePaging) { // console.log('New Paging', $state.snapshot(paging)) - nodes = [...data.items].sort((a, b) => a.host.localeCompare(b.host)); + return [...data.items].sort((a, b) => a.host.localeCompare(b.host)); } else { if ($state.snapshot(page) == 1) { // console.log('Page 1 Reset', [...data.items]) - nodes = [...data.items].sort((a, b) => a.host.localeCompare(b.host)); + return [...data.items].sort((a, b) => a.host.localeCompare(b.host)); } else { // console.log('Add Nodes', $state.snapshot(nodes), [...data.items]) - nodes = nodes.concat([...data.items]) + return nodes.concat([...data.items]) } } - matchedNodes = data.totalNodes; }; }; @@ -228,7 +230,7 @@ {/if} - {#each selectedMetrics as metric (metric)} + {#each pendingSelectedMetrics as metric (metric)} {:else if $nodesQuery.fetching || !$nodesQuery.data} - +
{#if !usePaging}

diff --git a/web/frontend/src/systems/nodelist/NodeListRow.svelte b/web/frontend/src/systems/nodelist/NodeListRow.svelte index bc93a323..e7e095ea 100644 --- a/web/frontend/src/systems/nodelist/NodeListRow.svelte +++ b/web/frontend/src/systems/nodelist/NodeListRow.svelte @@ -128,6 +128,24 @@ } return pendingExtendedLegendData; } + + /* Inspect */ + // $inspect(selectedMetrics).with((type, selectedMetrics) => { + // console.log(type, 'selectedMetrics', selectedMetrics) + // }); + + // $inspect(nodeData).with((type, nodeData) => { + // console.log(type, 'nodeData', nodeData) + // }); + + // $inspect(refinedData).with((type, refinedData) => { + // console.log(type, 'refinedData', refinedData) + // }); + + // $inspect(dataHealth).with((type, dataHealth) => { + // console.log(type, 'dataHealth', dataHealth) + // }); + From 91b90d033e8703acffe5740872310a2f094fdcd2 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Fri, 19 Dec 2025 15:27:35 +0100 Subject: [PATCH 04/59] fix metric select drag and drop --- .../src/generic/select/MetricSelection.svelte | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/web/frontend/src/generic/select/MetricSelection.svelte b/web/frontend/src/generic/select/MetricSelection.svelte index 8bcaefcb..eeab56d7 100644 --- a/web/frontend/src/generic/select/MetricSelection.svelte +++ b/web/frontend/src/generic/select/MetricSelection.svelte @@ -107,13 +107,18 @@ } } + function columnsDragOver(event) { + event.preventDefault(); + event.dataTransfer.dropEffect = 'move'; + } + function columnsDragStart(event, i) { event.dataTransfer.effectAllowed = "move"; event.dataTransfer.dropEffect = "move"; event.dataTransfer.setData("text/plain", i); } - function columnsDrag(event, target) { + function columnsDrop(event, target) { event.dataTransfer.dropEffect = "move"; const start = Number.parseInt(event.dataTransfer.getData("text/plain")); @@ -182,19 +187,18 @@ {/if} {#each listedMetrics as metric, index (metric)}

  • { - event.preventDefault() - return false + columnsDragOver(event) }} ondragstart={(event) => { columnsDragStart(event, index) }} ondrop={(event) => { event.preventDefault() - columnsDrag(event, index) + columnsDrop(event, index) }} ondragenter={() => (columnHovering = index)} > @@ -237,4 +241,10 @@ color: #fff; cursor: grabbing; } + + li.prevent-select { + -webkit-user-select: none; /* Safari */ + -ms-user-select: none; /* IE 10 and IE 11 */ + user-select: none; /* Standard syntax */ +} From af7d208c21c9dfbd539d2baa9c6bd6d4ea95d85c Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Fri, 19 Dec 2025 16:16:57 +0100 Subject: [PATCH 05/59] remove unused class --- web/frontend/src/generic/select/MetricSelection.svelte | 6 ------ 1 file changed, 6 deletions(-) diff --git a/web/frontend/src/generic/select/MetricSelection.svelte b/web/frontend/src/generic/select/MetricSelection.svelte index eeab56d7..67bbbd01 100644 --- a/web/frontend/src/generic/select/MetricSelection.svelte +++ b/web/frontend/src/generic/select/MetricSelection.svelte @@ -241,10 +241,4 @@ color: #fff; cursor: grabbing; } - - li.prevent-select { - -webkit-user-select: none; /* Safari */ - -ms-user-select: none; /* IE 10 and IE 11 */ - user-select: none; /* Standard syntax */ -} From 7acc89e42d6b23f0250f387f93dc72223cce1655 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Fri, 19 Dec 2025 17:52:21 +0100 Subject: [PATCH 06/59] move public dash close button --- web/frontend/src/DashPublic.root.svelte | 27 +++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/web/frontend/src/DashPublic.root.svelte b/web/frontend/src/DashPublic.root.svelte index 36e6703e..25e2683c 100644 --- a/web/frontend/src/DashPublic.root.svelte +++ b/web/frontend/src/DashPublic.root.svelte @@ -338,7 +338,7 @@ - + - - - {#if $statusQuery.fetching || $statesTimed.fetching} @@ -368,6 +363,13 @@ {:else if $statusQuery.error || $statesTimed.error} + + + + + {#if $statusQuery.error} @@ -385,8 +387,17 @@ - -

    Cluster {presetCluster.charAt(0).toUpperCase() + presetCluster.slice(1)}

    + + + +

    Cluster {presetCluster.charAt(0).toUpperCase() + presetCluster.slice(1)}

    + + + + +

    CPU(s)

    {[...clusterInfo?.processorTypes].join(', ')}

    From fdee4f89386aa8c712effe19467cf755d97807e8 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Sat, 20 Dec 2025 09:21:58 +0100 Subject: [PATCH 07/59] Integrate NATS API. Only start either REST start/stop API or NATS start/stop API --- cmd/cc-backend/server.go | 26 +++++++++++++++++--------- internal/api/rest.go | 7 +++++-- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/cmd/cc-backend/server.go b/cmd/cc-backend/server.go index 2c5ce8bc..4ed79622 100644 --- a/cmd/cc-backend/server.go +++ b/cmd/cc-backend/server.go @@ -49,9 +49,10 @@ const ( // Server encapsulates the HTTP server state and dependencies type Server struct { - router *mux.Router - server *http.Server - apiHandle *api.RestAPI + router *mux.Router + server *http.Server + restAPIHandle *api.RestAPI + natsAPIHandle *api.NatsAPI } func onFailureResponse(rw http.ResponseWriter, r *http.Request, err error) { @@ -104,7 +105,7 @@ func (s *Server) init() error { authHandle := auth.GetAuthInstance() - s.apiHandle = api.New() + s.restAPIHandle = api.New() info := map[string]any{} info["hasOpenIDConnect"] = false @@ -240,13 +241,20 @@ func (s *Server) init() error { // Mount all /monitoring/... and /api/... routes. routerConfig.SetupRoutes(secured, buildInfo) - s.apiHandle.MountAPIRoutes(securedapi) - s.apiHandle.MountUserAPIRoutes(userapi) - s.apiHandle.MountConfigAPIRoutes(configapi) - s.apiHandle.MountFrontendAPIRoutes(frontendapi) + s.restAPIHandle.MountAPIRoutes(securedapi) + s.restAPIHandle.MountUserAPIRoutes(userapi) + s.restAPIHandle.MountConfigAPIRoutes(configapi) + s.restAPIHandle.MountFrontendAPIRoutes(frontendapi) + + if config.Keys.APISubjects != nil { + s.natsAPIHandle = api.NewNatsAPI() + if err := s.natsAPIHandle.StartSubscriptions(); err != nil { + return fmt.Errorf("starting NATS subscriptions: %w", err) + } + } if memorystore.InternalCCMSFlag { - s.apiHandle.MountMetricStoreAPIRoutes(metricstoreapi) + s.restAPIHandle.MountMetricStoreAPIRoutes(metricstoreapi) } if config.Keys.EmbedStaticFiles { diff --git a/internal/api/rest.go b/internal/api/rest.go index 8232b64e..ebcf31ed 100644 --- a/internal/api/rest.go +++ b/internal/api/rest.go @@ -79,8 +79,11 @@ func (api *RestAPI) MountAPIRoutes(r *mux.Router) { // Slurm node state r.HandleFunc("/nodestate/", api.updateNodeStates).Methods(http.MethodPost, http.MethodPut) // Job Handler - r.HandleFunc("/jobs/start_job/", api.startJob).Methods(http.MethodPost, http.MethodPut) - r.HandleFunc("/jobs/stop_job/", api.stopJobByRequest).Methods(http.MethodPost, http.MethodPut) + if config.Keys.APISubjects == nil { + cclog.Info("Enabling REST start/stop job API") + r.HandleFunc("/jobs/start_job/", api.startJob).Methods(http.MethodPost, http.MethodPut) + r.HandleFunc("/jobs/stop_job/", api.stopJobByRequest).Methods(http.MethodPost, http.MethodPut) + } r.HandleFunc("/jobs/", api.getJobs).Methods(http.MethodGet) r.HandleFunc("/jobs/{id}", api.getJobByID).Methods(http.MethodPost) r.HandleFunc("/jobs/{id}", api.getCompleteJobByID).Methods(http.MethodGet) From e56532e5c8b8b52f9e1089552cc949fb6844db43 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Sat, 20 Dec 2025 09:35:54 +0100 Subject: [PATCH 08/59] Add example json API payloads --- configs/startJobPayload.json | 22 ++++++++++++++++++++++ configs/stopJobPayload.json | 7 +++++++ 2 files changed, 29 insertions(+) create mode 100644 configs/startJobPayload.json create mode 100644 configs/stopJobPayload.json diff --git a/configs/startJobPayload.json b/configs/startJobPayload.json new file mode 100644 index 00000000..9517876f --- /dev/null +++ b/configs/startJobPayload.json @@ -0,0 +1,22 @@ +{ + "cluster": "fritz", + "jobId": 123000, + "jobState": "running", + "numAcc": 0, + "numHwthreads": 72, + "numNodes": 1, + "partition": "main", + "requestedMemory": 128000, + "resources": [{ "hostname": "f0726" }], + "startTime": 1649723812, + "subCluster": "main", + "submitTime": 1649723812, + "user": "k106eb10", + "project": "k106eb", + "walltime": 86400, + "metaData": { + "slurmInfo": "JobId=398759\nJobName=myJob\nUserId=dummyUser\nGroupId=dummyGroup\nAccount=dummyAccount\nQOS=normal Requeue=False Restarts=0 BatchFlag=True\nTimeLimit=1439'\nSubmitTime=2023-02-09T14:10:18\nPartition=singlenode\nNodeList=xx\nNumNodes=xx NumCPUs=72 NumTasks=72 CPUs/Task=1\nNTasksPerNode:Socket:Core=0:None:None\nTRES_req=cpu=72,mem=250000M,node=1,billing=72\nTRES_alloc=cpu=72,node=1,billing=72\nCommand=myCmd\nWorkDir=myDir\nStdErr=\nStdOut=\n", + "jobScript": "#!/bin/bash -l\n#SBATCH --job-name=dummy_job\n#SBATCH --time=23:59:00\n#SBATCH --partition=singlenode\n#SBATCH --ntasks=72\n#SBATCH --hint=multithread\n#SBATCH --chdir=/home/atuin/k106eb/dummy/\n#SBATCH --export=NONE\nunset SLURM_EXPORT_ENV\n\n#This is a dummy job script\n./mybinary\n", + "jobName": "ams_pipeline" + } +} diff --git a/configs/stopJobPayload.json b/configs/stopJobPayload.json new file mode 100644 index 00000000..baf76f95 --- /dev/null +++ b/configs/stopJobPayload.json @@ -0,0 +1,7 @@ +{ + "cluster": "fritz", + "jobId": 123000, + "jobState": "completed", + "startTime": 1649723812, + "stopTime": 1649763839 +} From 3cfcd301281c14af88f60e10790c4d52e44c213b Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Sat, 20 Dec 2025 10:17:54 +0100 Subject: [PATCH 09/59] Add CLAUDE.md documentation for Claude Code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provides architecture overview, build commands, and development workflows to help future Claude Code instances work productively in this codebase. Includes guidance on GraphQL/REST API patterns, database migrations, and the repository/metric data architecture. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- CLAUDE.md | 198 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 198 insertions(+) create mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..2bb08c98 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,198 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +ClusterCockpit is a job-specific performance monitoring framework for HPC clusters. This is a Golang backend that provides REST and GraphQL APIs, serves a Svelte-based frontend, and manages job archives and metric data from various time-series databases. + +## Build and Development Commands + +### Building + +```bash +# Build everything (frontend + backend) +make + +# Build only the frontend +make frontend + +# Build only the backend (requires frontend to be built first) +go build -ldflags='-s -X main.date=$(date +"%Y-%m-%d:T%H:%M:%S") -X main.version=1.4.4 -X main.commit=$(git rev-parse --short HEAD)' ./cmd/cc-backend +``` + +### Testing + +```bash +# Run all tests +make test + +# Run tests with verbose output +go test -v ./... + +# Run tests for a specific package +go test ./internal/repository +``` + +### Code Generation + +```bash +# Regenerate GraphQL schema and resolvers (after modifying api/*.graphqls) +make graphql + +# Regenerate Swagger/OpenAPI docs (after modifying API comments) +make swagger +``` + +### Frontend Development + +```bash +cd web/frontend + +# Install dependencies +npm install + +# Build for production +npm run build + +# Development mode with watch +npm run dev +``` + +### Running + +```bash +# Initialize database and create admin user +./cc-backend -init-db -add-user demo:admin:demo + +# Start server in development mode (enables GraphQL Playground and Swagger UI) +./cc-backend -server -dev -loglevel info + +# Start demo with sample data +./startDemo.sh +``` + +## Architecture + +### Backend Structure + +The backend follows a layered architecture with clear separation of concerns: + +- **cmd/cc-backend**: Entry point, orchestrates initialization of all subsystems +- **internal/repository**: Data access layer using repository pattern + - Abstracts database operations (SQLite/MySQL) + - Implements LRU caching for performance + - Provides repositories for Job, User, Node, and Tag entities + - Transaction support for batch operations +- **internal/api**: REST API endpoints (Swagger/OpenAPI documented) +- **internal/graph**: GraphQL API (uses gqlgen) + - Schema in `api/*.graphqls` + - Generated code in `internal/graph/generated/` + - Resolvers in `internal/graph/schema.resolvers.go` +- **internal/auth**: Authentication layer + - Supports local accounts, LDAP, OIDC, and JWT tokens + - Implements rate limiting for login attempts +- **internal/metricdata**: Metric data repository abstraction + - Pluggable backends: cc-metric-store, Prometheus, InfluxDB + - Each cluster can have a different metric data backend +- **internal/archiver**: Job archiving to file-based archive +- **pkg/archive**: Job archive backend implementations + - File system backend (default) + - S3 backend + - SQLite backend (experimental) +- **pkg/nats**: NATS integration for metric ingestion + +### Frontend Structure + +- **web/frontend**: Svelte 5 application + - Uses Rollup for building + - Components organized by feature (analysis, job, user, etc.) + - GraphQL client using @urql/svelte + - Bootstrap 5 + SvelteStrap for UI + - uPlot for time-series visualization +- **web/templates**: Server-side Go templates + +### Key Concepts + +**Job Archive**: Completed jobs are stored in a file-based archive following the [ClusterCockpit job-archive specification](https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive). Each job has a `meta.json` file with metadata and metric data files. + +**Metric Data Repositories**: Time-series metric data is stored separately from job metadata. The system supports multiple backends (cc-metric-store is recommended). Configuration is per-cluster in `config.json`. + +**Authentication Flow**: +1. Multiple authenticators can be configured (local, LDAP, OIDC, JWT) +2. Each authenticator's `CanLogin` method is called to determine if it should handle the request +3. The first authenticator that returns true performs the actual `Login` +4. JWT tokens are used for API authentication + +**Database Migrations**: SQL migrations in `internal/repository/migrations/` are applied automatically on startup. Version tracking in `version` table. + +**Scopes**: Metrics can be collected at different scopes: +- Node scope (always available) +- Core scope (for jobs with ≤8 nodes) +- Accelerator scope (for GPU/accelerator metrics) + +## Configuration + +- **config.json**: Main configuration (clusters, metric repositories, archive settings) +- **.env**: Environment variables (secrets like JWT keys) + - Copy from `configs/env-template.txt` + - NEVER commit this file +- **cluster.json**: Cluster topology and metric definitions (loaded from archive or config) + +## Database + +- Default: SQLite 3 (`./var/job.db`) +- Optional: MySQL/MariaDB +- Connection managed by `internal/repository` +- Schema version in `internal/repository/migration.go` + +## Code Generation + +**GraphQL** (gqlgen): +- Schema: `api/*.graphqls` +- Config: `gqlgen.yml` +- Generated code: `internal/graph/generated/` +- Custom resolvers: `internal/graph/schema.resolvers.go` +- Run `make graphql` after schema changes + +**Swagger/OpenAPI**: +- Annotations in `internal/api/*.go` +- Generated docs: `api/docs.go`, `api/swagger.yaml` +- Run `make swagger` after API changes + +## Testing Conventions + +- Test files use `_test.go` suffix +- Test data in `testdata/` subdirectories +- Repository tests use in-memory SQLite +- API tests use httptest + +## Common Workflows + +### Adding a new GraphQL field +1. Edit schema in `api/*.graphqls` +2. Run `make graphql` +3. Implement resolver in `internal/graph/schema.resolvers.go` + +### Adding a new REST endpoint +1. Add handler in `internal/api/*.go` +2. Add route in `internal/api/rest.go` +3. Add Swagger annotations +4. Run `make swagger` + +### Adding a new metric data backend +1. Implement `MetricDataRepository` interface in `internal/metricdata/` +2. Register in `metricdata.Init()` switch statement +3. Update config.json schema documentation + +### Modifying database schema +1. Create new migration in `internal/repository/migrations/` +2. Increment `repository.Version` +3. Test with fresh database and existing database + +## Dependencies + +- Go 1.24.0+ (check go.mod for exact version) +- Node.js (for frontend builds) +- SQLite 3 or MySQL/MariaDB +- Optional: NATS server for metric ingestion From b35172e2f7bc56fad47a12ef36398ddba376d6db Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Sat, 20 Dec 2025 11:13:02 +0100 Subject: [PATCH 10/59] Add context information for CLAUDE coding agent --- CLAUDE.md | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 2bb08c98..379b4dbb 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,10 +1,14 @@ # CLAUDE.md -This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. +This file provides guidance to Claude Code (claude.ai/code) when working with +code in this repository. ## Project Overview -ClusterCockpit is a job-specific performance monitoring framework for HPC clusters. This is a Golang backend that provides REST and GraphQL APIs, serves a Svelte-based frontend, and manages job archives and metric data from various time-series databases. +ClusterCockpit is a job-specific performance monitoring framework for HPC +clusters. This is a Golang backend that provides REST and GraphQL APIs, serves a +Svelte-based frontend, and manages job archives and metric data from various +time-series databases. ## Build and Development Commands @@ -80,7 +84,7 @@ The backend follows a layered architecture with clear separation of concerns: - **cmd/cc-backend**: Entry point, orchestrates initialization of all subsystems - **internal/repository**: Data access layer using repository pattern - - Abstracts database operations (SQLite/MySQL) + - Abstracts database operations (SQLite3 only) - Implements LRU caching for performance - Provides repositories for Job, User, Node, and Tag entities - Transaction support for batch operations @@ -114,19 +118,27 @@ The backend follows a layered architecture with clear separation of concerns: ### Key Concepts -**Job Archive**: Completed jobs are stored in a file-based archive following the [ClusterCockpit job-archive specification](https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive). Each job has a `meta.json` file with metadata and metric data files. +**Job Archive**: Completed jobs are stored in a file-based archive following the +[ClusterCockpit job-archive +specification](https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive). +Each job has a `meta.json` file with metadata and metric data files. -**Metric Data Repositories**: Time-series metric data is stored separately from job metadata. The system supports multiple backends (cc-metric-store is recommended). Configuration is per-cluster in `config.json`. +**Metric Data Repositories**: Time-series metric data is stored separately from +job metadata. The system supports multiple backends (cc-metric-store is +recommended). Configuration is per-cluster in `config.json`. **Authentication Flow**: + 1. Multiple authenticators can be configured (local, LDAP, OIDC, JWT) 2. Each authenticator's `CanLogin` method is called to determine if it should handle the request 3. The first authenticator that returns true performs the actual `Login` 4. JWT tokens are used for API authentication -**Database Migrations**: SQL migrations in `internal/repository/migrations/` are applied automatically on startup. Version tracking in `version` table. +**Database Migrations**: SQL migrations in `internal/repository/migrations/` are +applied automatically on startup. Version tracking in `version` table. **Scopes**: Metrics can be collected at different scopes: + - Node scope (always available) - Core scope (for jobs with ≤8 nodes) - Accelerator scope (for GPU/accelerator metrics) @@ -142,13 +154,13 @@ The backend follows a layered architecture with clear separation of concerns: ## Database - Default: SQLite 3 (`./var/job.db`) -- Optional: MySQL/MariaDB - Connection managed by `internal/repository` - Schema version in `internal/repository/migration.go` ## Code Generation **GraphQL** (gqlgen): + - Schema: `api/*.graphqls` - Config: `gqlgen.yml` - Generated code: `internal/graph/generated/` @@ -156,6 +168,7 @@ The backend follows a layered architecture with clear separation of concerns: - Run `make graphql` after schema changes **Swagger/OpenAPI**: + - Annotations in `internal/api/*.go` - Generated docs: `api/docs.go`, `api/swagger.yaml` - Run `make swagger` after API changes @@ -170,22 +183,26 @@ The backend follows a layered architecture with clear separation of concerns: ## Common Workflows ### Adding a new GraphQL field + 1. Edit schema in `api/*.graphqls` 2. Run `make graphql` 3. Implement resolver in `internal/graph/schema.resolvers.go` ### Adding a new REST endpoint + 1. Add handler in `internal/api/*.go` 2. Add route in `internal/api/rest.go` 3. Add Swagger annotations 4. Run `make swagger` ### Adding a new metric data backend + 1. Implement `MetricDataRepository` interface in `internal/metricdata/` 2. Register in `metricdata.Init()` switch statement 3. Update config.json schema documentation ### Modifying database schema + 1. Create new migration in `internal/repository/migrations/` 2. Increment `repository.Version` 3. Test with fresh database and existing database @@ -194,5 +211,5 @@ The backend follows a layered architecture with clear separation of concerns: - Go 1.24.0+ (check go.mod for exact version) - Node.js (for frontend builds) -- SQLite 3 or MySQL/MariaDB +- SQLite 3 (only supported database) - Optional: NATS server for metric ingestion From 1cd4a57bd3206e1f3115c1cbc58fcad5cbfb87a5 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Sat, 20 Dec 2025 11:13:41 +0100 Subject: [PATCH 11/59] Remove support for mysql/mariadb --- README.md | 11 +- cmd/cc-backend/init.go | 4 +- cmd/cc-backend/main.go | 19 ++- configs/config-mariadb.json | 64 --------- go.mod | 2 - go.sum | 49 +------ init/clustercockpit.service | 2 +- internal/api/api_test.go | 2 +- internal/config/config.go | 4 +- internal/config/schema.go | 2 +- internal/importer/importer_test.go | 2 +- internal/repository/dbConnection.go | 56 ++++---- internal/repository/job.go | 52 ++------ internal/repository/migration.go | 89 ++++--------- .../migrations/mysql/01_init-schema.down.sql | 5 - .../migrations/mysql/01_init-schema.up.sql | 66 ---------- .../migrations/mysql/02_add-index.down.sql | 8 -- .../migrations/mysql/02_add-index.up.sql | 8 -- .../mysql/03_add-userprojects.down.sql | 1 - .../mysql/03_add-userprojects.up.sql | 1 - .../mysql/04_alter-table-job.down.sql | 5 - .../mysql/04_alter-table-job.up.sql | 5 - .../migrations/mysql/05_extend-tags.down.sql | 2 - .../migrations/mysql/05_extend-tags.up.sql | 2 - .../mysql/06_change-config.down.sql | 1 - .../migrations/mysql/06_change-config.up.sql | 1 - .../migrations/mysql/07_fix-tag-id.down.sql | 3 - .../migrations/mysql/07_fix-tag-id.up.sql | 3 - .../mysql/08_add-footprint.down.sql | 83 ------------ .../migrations/mysql/08_add-footprint.up.sql | 123 ------------------ internal/repository/node_test.go | 2 +- internal/repository/repository_test.go | 2 +- internal/repository/stats.go | 49 +++---- internal/repository/userConfig_test.go | 2 +- internal/tagger/detectApp_test.go | 2 +- 35 files changed, 104 insertions(+), 628 deletions(-) delete mode 100644 configs/config-mariadb.json delete mode 100644 internal/repository/migrations/mysql/01_init-schema.down.sql delete mode 100644 internal/repository/migrations/mysql/01_init-schema.up.sql delete mode 100644 internal/repository/migrations/mysql/02_add-index.down.sql delete mode 100644 internal/repository/migrations/mysql/02_add-index.up.sql delete mode 100644 internal/repository/migrations/mysql/03_add-userprojects.down.sql delete mode 100644 internal/repository/migrations/mysql/03_add-userprojects.up.sql delete mode 100644 internal/repository/migrations/mysql/04_alter-table-job.down.sql delete mode 100644 internal/repository/migrations/mysql/04_alter-table-job.up.sql delete mode 100644 internal/repository/migrations/mysql/05_extend-tags.down.sql delete mode 100644 internal/repository/migrations/mysql/05_extend-tags.up.sql delete mode 100644 internal/repository/migrations/mysql/06_change-config.down.sql delete mode 100644 internal/repository/migrations/mysql/06_change-config.up.sql delete mode 100644 internal/repository/migrations/mysql/07_fix-tag-id.down.sql delete mode 100644 internal/repository/migrations/mysql/07_fix-tag-id.up.sql delete mode 100644 internal/repository/migrations/mysql/08_add-footprint.down.sql delete mode 100644 internal/repository/migrations/mysql/08_add-footprint.up.sql diff --git a/README.md b/README.md index 0799bd92..a0352d17 100644 --- a/README.md +++ b/README.md @@ -29,12 +29,11 @@ is also served by the backend using [Svelte](https://svelte.dev/) components. Layout and styling are based on [Bootstrap 5](https://getbootstrap.com/) using [Bootstrap Icons](https://icons.getbootstrap.com/). -The backend uses [SQLite 3](https://sqlite.org/) as a relational SQL database by -default. Optionally it can use a MySQL/MariaDB database server. While there are -metric data backends for the InfluxDB and Prometheus time series databases, the -only tested and supported setup is to use cc-metric-store as the metric data -backend. Documentation on how to integrate ClusterCockpit with other time series -databases will be added in the future. +The backend uses [SQLite 3](https://sqlite.org/) as the relational SQL database. +While there are metric data backends for the InfluxDB and Prometheus time series +databases, the only tested and supported setup is to use cc-metric-store as the +metric data backend. Documentation on how to integrate ClusterCockpit with other +time series databases will be added in the future. Completed batch jobs are stored in a file-based job archive according to [this specification](https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive). diff --git a/cmd/cc-backend/init.go b/cmd/cc-backend/init.go index ee60b12c..151eee9e 100644 --- a/cmd/cc-backend/init.go +++ b/cmd/cc-backend/init.go @@ -105,9 +105,9 @@ func initEnv() { cclog.Abortf("Could not create default ./var folder with permissions '0o777'. Application initialization failed, exited.\nError: %s\n", err.Error()) } - err := repository.MigrateDB("sqlite3", "./var/job.db") + err := repository.MigrateDB("./var/job.db") if err != nil { - cclog.Abortf("Could not initialize default sqlite3 database as './var/job.db'. Application initialization failed, exited.\nError: %s\n", err.Error()) + cclog.Abortf("Could not initialize default SQLite database as './var/job.db'. Application initialization failed, exited.\nError: %s\n", err.Error()) } if err := os.Mkdir("var/job-archive", 0o777); err != nil { cclog.Abortf("Could not create default ./var/job-archive folder with permissions '0o777'. Application initialization failed, exited.\nError: %s\n", err.Error()) diff --git a/cmd/cc-backend/main.go b/cmd/cc-backend/main.go index 6239d36c..9464ccf4 100644 --- a/cmd/cc-backend/main.go +++ b/cmd/cc-backend/main.go @@ -40,7 +40,6 @@ import ( "github.com/google/gops/agent" "github.com/joho/godotenv" - _ "github.com/go-sql-driver/mysql" _ "github.com/mattn/go-sqlite3" ) @@ -120,30 +119,30 @@ func initDatabase() error { func handleDatabaseCommands() error { if flagMigrateDB { - err := repository.MigrateDB(config.Keys.DBDriver, config.Keys.DB) + err := repository.MigrateDB(config.Keys.DB) if err != nil { return fmt.Errorf("migrating database to version %d: %w", repository.Version, err) } - cclog.Exitf("MigrateDB Success: Migrated '%s' database at location '%s' to version %d.\n", - config.Keys.DBDriver, config.Keys.DB, repository.Version) + cclog.Exitf("MigrateDB Success: Migrated SQLite database at '%s' to version %d.\n", + config.Keys.DB, repository.Version) } if flagRevertDB { - err := repository.RevertDB(config.Keys.DBDriver, config.Keys.DB) + err := repository.RevertDB(config.Keys.DB) if err != nil { return fmt.Errorf("reverting database to version %d: %w", repository.Version-1, err) } - cclog.Exitf("RevertDB Success: Reverted '%s' database at location '%s' to version %d.\n", - config.Keys.DBDriver, config.Keys.DB, repository.Version-1) + cclog.Exitf("RevertDB Success: Reverted SQLite database at '%s' to version %d.\n", + config.Keys.DB, repository.Version-1) } if flagForceDB { - err := repository.ForceDB(config.Keys.DBDriver, config.Keys.DB) + err := repository.ForceDB(config.Keys.DB) if err != nil { return fmt.Errorf("forcing database to version %d: %w", repository.Version, err) } - cclog.Exitf("ForceDB Success: Forced '%s' database at location '%s' to version %d.\n", - config.Keys.DBDriver, config.Keys.DB, repository.Version) + cclog.Exitf("ForceDB Success: Forced SQLite database at '%s' to version %d.\n", + config.Keys.DB, repository.Version) } return nil diff --git a/configs/config-mariadb.json b/configs/config-mariadb.json deleted file mode 100644 index 38bb8a93..00000000 --- a/configs/config-mariadb.json +++ /dev/null @@ -1,64 +0,0 @@ -{ - "addr": "127.0.0.1:8080", - "short-running-jobs-duration": 300, - "archive": { - "kind": "file", - "path": "./var/job-archive" - }, - "jwts": { - "max-age": "2000h" - }, - "db-driver": "mysql", - "db": "clustercockpit:demo@tcp(127.0.0.1:3306)/clustercockpit", - "enable-resampling": { - "trigger": 30, - "resolutions": [600, 300, 120, 60] - }, - "emission-constant": 317, - "clusters": [ - { - "name": "fritz", - "metricDataRepository": { - "kind": "cc-metric-store", - "url": "http://localhost:8082", - "token": "" - }, - "filterRanges": { - "numNodes": { - "from": 1, - "to": 64 - }, - "duration": { - "from": 0, - "to": 86400 - }, - "startTime": { - "from": "2022-01-01T00:00:00Z", - "to": null - } - } - }, - { - "name": "alex", - "metricDataRepository": { - "kind": "cc-metric-store", - "url": "http://localhost:8082", - "token": "" - }, - "filterRanges": { - "numNodes": { - "from": 1, - "to": 64 - }, - "duration": { - "from": 0, - "to": 86400 - }, - "startTime": { - "from": "2022-01-01T00:00:00Z", - "to": null - } - } - } - ] -} diff --git a/go.mod b/go.mod index df8e1fb9..eb061de7 100644 --- a/go.mod +++ b/go.mod @@ -21,7 +21,6 @@ require ( github.com/expr-lang/expr v1.17.6 github.com/go-co-op/gocron/v2 v2.18.2 github.com/go-ldap/ldap/v3 v3.4.12 - github.com/go-sql-driver/mysql v1.9.3 github.com/golang-jwt/jwt/v5 v5.3.0 github.com/golang-migrate/migrate/v4 v4.19.1 github.com/google/gops v0.3.28 @@ -48,7 +47,6 @@ require ( ) require ( - filippo.io/edwards25519 v1.1.0 // indirect github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect github.com/KyleBanks/depth v1.2.1 // indirect github.com/agnivade/levenshtein v1.2.1 // indirect diff --git a/go.sum b/go.sum index 711c5551..fd4980da 100644 --- a/go.sum +++ b/go.sum @@ -2,8 +2,6 @@ filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA= filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= github.com/99designs/gqlgen v0.17.84 h1:iVMdiStgUVx/BFkMb0J5GAXlqfqtQ7bqMCYK6v52kQ0= github.com/99designs/gqlgen v0.17.84/go.mod h1:qjoUqzTeiejdo+bwUg8unqSpeYG42XrcrQboGIezmFA= -github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0= -github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8= github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU= github.com/ClusterCockpit/cc-lib v1.0.2 h1:ZWn3oZkXgxrr3zSigBdlOOfayZ4Om4xL20DhmritPPg= @@ -12,8 +10,6 @@ github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE= github.com/Masterminds/squirrel v1.5.4 h1:uUcX/aBc8O7Fg9kaISIUsHXdKuqehiXAMQTYX8afzqM= github.com/Masterminds/squirrel v1.5.4/go.mod h1:NNaOrjSoIDfDA40n7sr2tPNZRfjzjA400rg+riTZj10= -github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= -github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/NVIDIA/go-nvml v0.13.0-1 h1:OLX8Jq3dONuPOQPC7rndB6+iDmDakw0XTYgzMxObkEw= github.com/NVIDIA/go-nvml v0.13.0-1/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4= github.com/PuerkitoBio/goquery v1.11.0 h1:jZ7pwMQXIITcUXNH83LLk+txlaEy6NVOfTuP43xxfqw= @@ -70,10 +66,6 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI= -github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M= -github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE= -github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk= github.com/coreos/go-oidc/v3 v3.16.0 h1:qRQUCFstKpXwmEjDQTIbyY/5jF00+asXzSkmkoa/mow= github.com/coreos/go-oidc/v3 v3.16.0/go.mod h1:wqPbKFrVnE90vty060SB40FCJ8fTHTxSwyXJqZH+sI8= github.com/cpuguy83/go-md2man/v2 v2.0.7 h1:zbFlGlXEAKlwXpmvle3d8Oe3YnkKIK4xSRTd3sHPnBo= @@ -85,16 +77,6 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54 h1:SG7nF6SRlWhcT7cNTs5R6Hk4V2lcmLz2NsG2VnInyNo= github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA= -github.com/dhui/dktest v0.4.6 h1:+DPKyScKSEp3VLtbMDHcUq6V5Lm5zfZZVb0Sk7Ahom4= -github.com/dhui/dktest v0.4.6/go.mod h1:JHTSYDtKkvFNFHJKqCzVzqXecyv+tKt8EzceOmQOgbU= -github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk= -github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= -github.com/docker/docker v28.3.3+incompatible h1:Dypm25kh4rmk49v1eiVbsAtpAsYURjYkaKubwuBdxEI= -github.com/docker/docker v28.3.3+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= -github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c= -github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc= -github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= -github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/expr-lang/expr v1.17.6 h1:1h6i8ONk9cexhDmowO/A64VPxHScu7qfSl2k8OlINec= github.com/expr-lang/expr v1.17.6/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= @@ -113,10 +95,6 @@ github.com/go-jose/go-jose/v4 v4.1.3 h1:CVLmWDhDVRa6Mi/IgCgaopNosCaHz7zrMeF9MlZR github.com/go-jose/go-jose/v4 v4.1.3/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08= github.com/go-ldap/ldap/v3 v3.4.12 h1:1b81mv7MagXZ7+1r7cLTWmyuTqVqdwbtJSjC0DAp9s4= github.com/go-ldap/ldap/v3 v3.4.12/go.mod h1:+SPAGcTtOfmGsCb3h1RFiq4xpp4N636G75OEace8lNo= -github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= -github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= -github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= -github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-openapi/jsonpointer v0.22.3 h1:dKMwfV4fmt6Ah90zloTbUKWMD+0he+12XYAsPotrkn8= github.com/go-openapi/jsonpointer v0.22.3/go.mod h1:0lBbqeRsQ5lIanv3LHZBrmRGHLHcQoOXQnf88fHlGWo= github.com/go-openapi/jsonreference v0.21.3 h1:96Dn+MRPa0nYAR8DR1E03SblB5FJvh7W6krPI0Z7qMc= @@ -145,15 +123,12 @@ github.com/go-openapi/testify/enable/yaml/v2 v2.0.2/go.mod h1:kme83333GCtJQHXQ8U github.com/go-openapi/testify/v2 v2.0.2 h1:X999g3jeLcoY8qctY/c/Z8iBHTbwLz7R2WXd6Ub6wls= github.com/go-openapi/testify/v2 v2.0.2/go.mod h1:HCPmvFFnheKK2BuwSA0TbbdxJ3I16pjwMkYkP4Ywn54= github.com/go-sql-driver/mysql v1.4.1/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= +github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpvNJ1Y= github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg= -github.com/go-sql-driver/mysql v1.9.3 h1:U/N249h2WzJ3Ukj8SowVFjdtZKfu9vlLZxjPXV1aweo= -github.com/go-sql-driver/mysql v1.9.3/go.mod h1:qn46aNg1333BRMNU69Lq93t8du/dwxI64Gl8i5p1WMU= github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs= github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= github.com/goccy/go-yaml v1.19.0 h1:EmkZ9RIsX+Uq4DYFowegAuJo8+xdX3T/2dwNPXbxEYE= github.com/goccy/go-yaml v1.19.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= -github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= -github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo= github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= github.com/golang-migrate/migrate/v4 v4.19.1 h1:OCyb44lFuQfYXYLx1SCxPZQGU7mcaZ7gH9yH4jSFbBA= @@ -241,17 +216,11 @@ github.com/mattn/go-sqlite3 v1.10.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsO github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= github.com/mattn/go-sqlite3 v1.14.32 h1:JD12Ag3oLy1zQA+BNn74xRgaBbdhbNIDYvQUEuuErjs= github.com/mattn/go-sqlite3 v1.14.32/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= -github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0= -github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo= -github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0= -github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= -github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= -github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU= @@ -265,13 +234,7 @@ github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OS github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmtpMYro= github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg= -github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= -github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= -github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug= -github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM= github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= @@ -323,16 +286,6 @@ github.com/vektah/gqlparser/v2 v2.5.31/go.mod h1:c1I28gSOVNzlfc4WuDlqU7voQnsqI6O github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 h1:FnBeRrxr7OU4VvAzt5X7s6266i6cSVkkFPS0TuXWbIg= github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= -go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= -go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 h1:F7Jx+6hwnZ41NSFTO5q4LYDtJRXBf2PD0rNBkeB/lus= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0/go.mod h1:UHB22Z8QsdRDrnAtX4PntOl36ajSxcdUMt1sF7Y6E7Q= -go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ= -go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I= -go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE= -go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E= -go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4= -go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0= diff --git a/init/clustercockpit.service b/init/clustercockpit.service index 0a9448de..b4ed8bfa 100644 --- a/init/clustercockpit.service +++ b/init/clustercockpit.service @@ -3,7 +3,7 @@ Description=ClusterCockpit Web Server Documentation=https://github.com/ClusterCockpit/cc-backend Wants=network-online.target After=network-online.target -After=mariadb.service mysql.service +# Database is file-based SQLite - no service dependency required [Service] WorkingDirectory=/opt/monitoring/cc-backend diff --git a/internal/api/api_test.go b/internal/api/api_test.go index 70b0f0aa..d311767c 100644 --- a/internal/api/api_test.go +++ b/internal/api/api_test.go @@ -141,7 +141,7 @@ func setup(t *testing.T) *api.RestAPI { } dbfilepath := filepath.Join(tmpdir, "test.db") - err := repository.MigrateDB("sqlite3", dbfilepath) + err := repository.MigrateDB(dbfilepath) if err != nil { t.Fatal(err) } diff --git a/internal/config/config.go b/internal/config/config.go index 25ca27eb..b7b8ed06 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -37,10 +37,10 @@ type ProgramConfig struct { EmbedStaticFiles bool `json:"embed-static-files"` StaticFiles string `json:"static-files"` - // 'sqlite3' or 'mysql' (mysql will work for mariadb as well) + // Database driver - only 'sqlite3' is supported DBDriver string `json:"db-driver"` - // For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!). + // Path to SQLite database file DB string `json:"db"` // Keep all metric data in the metric data repositories, diff --git a/internal/config/schema.go b/internal/config/schema.go index ed1f42d8..b171f96a 100644 --- a/internal/config/schema.go +++ b/internal/config/schema.go @@ -41,7 +41,7 @@ var configSchema = ` "type": "string" }, "db": { - "description": "For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!).", + "description": "Path to SQLite database file (e.g., './var/job.db')", "type": "string" }, "disable-archive": { diff --git a/internal/importer/importer_test.go b/internal/importer/importer_test.go index 2aa007da..470f7603 100644 --- a/internal/importer/importer_test.go +++ b/internal/importer/importer_test.go @@ -107,7 +107,7 @@ func setup(t *testing.T) *repository.JobRepository { } dbfilepath := filepath.Join(tmpdir, "test.db") - err := repository.MigrateDB("sqlite3", dbfilepath) + err := repository.MigrateDB(dbfilepath) if err != nil { t.Fatal(err) } diff --git a/internal/repository/dbConnection.go b/internal/repository/dbConnection.go index 1c14c956..0f7536b7 100644 --- a/internal/repository/dbConnection.go +++ b/internal/repository/dbConnection.go @@ -55,6 +55,10 @@ func Connect(driver string, db string) { var err error var dbHandle *sqlx.DB + if driver != "sqlite3" { + cclog.Abortf("Unsupported database driver '%s'. Only 'sqlite3' is supported.\n", driver) + } + dbConnOnce.Do(func() { opts := DatabaseOptions{ URL: db, @@ -64,39 +68,31 @@ func Connect(driver string, db string) { ConnectionMaxIdleTime: repoConfig.ConnectionMaxIdleTime, } - switch driver { - case "sqlite3": - // TODO: Have separate DB handles for Writes and Reads - // Optimize SQLite connection: https://kerkour.com/sqlite-for-servers - connectionURLParams := make(url.Values) - connectionURLParams.Add("_txlock", "immediate") - connectionURLParams.Add("_journal_mode", "WAL") - connectionURLParams.Add("_busy_timeout", "5000") - connectionURLParams.Add("_synchronous", "NORMAL") - connectionURLParams.Add("_cache_size", "1000000000") - connectionURLParams.Add("_foreign_keys", "true") - opts.URL = fmt.Sprintf("file:%s?%s", opts.URL, connectionURLParams.Encode()) + // TODO: Have separate DB handles for Writes and Reads + // Optimize SQLite connection: https://kerkour.com/sqlite-for-servers + connectionURLParams := make(url.Values) + connectionURLParams.Add("_txlock", "immediate") + connectionURLParams.Add("_journal_mode", "WAL") + connectionURLParams.Add("_busy_timeout", "5000") + connectionURLParams.Add("_synchronous", "NORMAL") + connectionURLParams.Add("_cache_size", "1000000000") + connectionURLParams.Add("_foreign_keys", "true") + opts.URL = fmt.Sprintf("file:%s?%s", opts.URL, connectionURLParams.Encode()) - if cclog.Loglevel() == "debug" { - sql.Register("sqlite3WithHooks", sqlhooks.Wrap(&sqlite3.SQLiteDriver{}, &Hooks{})) - dbHandle, err = sqlx.Open("sqlite3WithHooks", opts.URL) - } else { - dbHandle, err = sqlx.Open("sqlite3", opts.URL) - } - - err = setupSqlite(dbHandle.DB) - if err != nil { - cclog.Abortf("Failed sqlite db setup.\nError: %s\n", err.Error()) - } - case "mysql": - opts.URL += "?multiStatements=true" - dbHandle, err = sqlx.Open("mysql", opts.URL) - default: - cclog.Abortf("DB Connection: Unsupported database driver '%s'.\n", driver) + if cclog.Loglevel() == "debug" { + sql.Register("sqlite3WithHooks", sqlhooks.Wrap(&sqlite3.SQLiteDriver{}, &Hooks{})) + dbHandle, err = sqlx.Open("sqlite3WithHooks", opts.URL) + } else { + dbHandle, err = sqlx.Open("sqlite3", opts.URL) } if err != nil { - cclog.Abortf("DB Connection: Could not connect to '%s' database with sqlx.Open().\nError: %s\n", driver, err.Error()) + cclog.Abortf("DB Connection: Could not connect to SQLite database with sqlx.Open().\nError: %s\n", err.Error()) + } + + err = setupSqlite(dbHandle.DB) + if err != nil { + cclog.Abortf("Failed sqlite db setup.\nError: %s\n", err.Error()) } dbHandle.SetMaxOpenConns(opts.MaxOpenConnections) @@ -105,7 +101,7 @@ func Connect(driver string, db string) { dbHandle.SetConnMaxIdleTime(opts.ConnectionMaxIdleTime) dbConnInstance = &DBConnection{DB: dbHandle, Driver: driver} - err = checkDBVersion(driver, dbHandle.DB) + err = checkDBVersion(dbHandle.DB) if err != nil { cclog.Abortf("DB Connection: Failed DB version check.\nError: %s\n", err.Error()) } diff --git a/internal/repository/job.go b/internal/repository/job.go index f23a14cf..47959379 100644 --- a/internal/repository/job.go +++ b/internal/repository/job.go @@ -14,8 +14,6 @@ // Initialize the database connection before using any repository: // // repository.Connect("sqlite3", "./var/job.db") -// // or for MySQL: -// repository.Connect("mysql", "user:password@tcp(localhost:3306)/dbname") // // # Configuration // @@ -158,52 +156,22 @@ func scanJob(row interface{ Scan(...any) error }) (*schema.Job, error) { } func (r *JobRepository) Optimize() error { - var err error - - switch r.driver { - case "sqlite3": - if _, err = r.DB.Exec(`VACUUM`); err != nil { - return err - } - case "mysql": - cclog.Info("Optimize currently not supported for mysql driver") + if _, err := r.DB.Exec(`VACUUM`); err != nil { + return err } - return nil } func (r *JobRepository) Flush() error { - var err error - - switch r.driver { - case "sqlite3": - if _, err = r.DB.Exec(`DELETE FROM jobtag`); err != nil { - return err - } - if _, err = r.DB.Exec(`DELETE FROM tag`); err != nil { - return err - } - if _, err = r.DB.Exec(`DELETE FROM job`); err != nil { - return err - } - case "mysql": - if _, err = r.DB.Exec(`SET FOREIGN_KEY_CHECKS = 0`); err != nil { - return err - } - if _, err = r.DB.Exec(`TRUNCATE TABLE jobtag`); err != nil { - return err - } - if _, err = r.DB.Exec(`TRUNCATE TABLE tag`); err != nil { - return err - } - if _, err = r.DB.Exec(`TRUNCATE TABLE job`); err != nil { - return err - } - if _, err = r.DB.Exec(`SET FOREIGN_KEY_CHECKS = 1`); err != nil { - return err - } + if _, err := r.DB.Exec(`DELETE FROM jobtag`); err != nil { + return err + } + if _, err := r.DB.Exec(`DELETE FROM tag`); err != nil { + return err + } + if _, err := r.DB.Exec(`DELETE FROM job`); err != nil { + return err } - return nil } diff --git a/internal/repository/migration.go b/internal/repository/migration.go index dec93a94..43e913cc 100644 --- a/internal/repository/migration.go +++ b/internal/repository/migration.go @@ -12,7 +12,6 @@ import ( cclog "github.com/ClusterCockpit/cc-lib/ccLogger" "github.com/golang-migrate/migrate/v4" - "github.com/golang-migrate/migrate/v4/database/mysql" "github.com/golang-migrate/migrate/v4/database/sqlite3" "github.com/golang-migrate/migrate/v4/source/iofs" ) @@ -22,40 +21,19 @@ const Version uint = 10 //go:embed migrations/* var migrationFiles embed.FS -func checkDBVersion(backend string, db *sql.DB) error { - var m *migrate.Migrate +func checkDBVersion(db *sql.DB) error { + driver, err := sqlite3.WithInstance(db, &sqlite3.Config{}) + if err != nil { + return err + } + d, err := iofs.New(migrationFiles, "migrations/sqlite3") + if err != nil { + return err + } - switch backend { - case "sqlite3": - driver, err := sqlite3.WithInstance(db, &sqlite3.Config{}) - if err != nil { - return err - } - d, err := iofs.New(migrationFiles, "migrations/sqlite3") - if err != nil { - return err - } - - m, err = migrate.NewWithInstance("iofs", d, "sqlite3", driver) - if err != nil { - return err - } - case "mysql": - driver, err := mysql.WithInstance(db, &mysql.Config{}) - if err != nil { - return err - } - d, err := iofs.New(migrationFiles, "migrations/mysql") - if err != nil { - return err - } - - m, err = migrate.NewWithInstance("iofs", d, "mysql", driver) - if err != nil { - return err - } - default: - cclog.Abortf("Migration: Unsupported database backend '%s'.\n", backend) + m, err := migrate.NewWithInstance("iofs", d, "sqlite3", driver) + if err != nil { + return err } v, dirty, err := m.Version() @@ -80,37 +58,22 @@ func checkDBVersion(backend string, db *sql.DB) error { return nil } -func getMigrateInstance(backend string, db string) (m *migrate.Migrate, err error) { - switch backend { - case "sqlite3": - d, err := iofs.New(migrationFiles, "migrations/sqlite3") - if err != nil { - cclog.Fatal(err) - } +func getMigrateInstance(db string) (m *migrate.Migrate, err error) { + d, err := iofs.New(migrationFiles, "migrations/sqlite3") + if err != nil { + return nil, err + } - m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("sqlite3://%s?_foreign_keys=on", db)) - if err != nil { - return m, err - } - case "mysql": - d, err := iofs.New(migrationFiles, "migrations/mysql") - if err != nil { - return m, err - } - - m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("mysql://%s?multiStatements=true", db)) - if err != nil { - return m, err - } - default: - cclog.Abortf("Migration: Unsupported database backend '%s'.\n", backend) + m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("sqlite3://%s?_foreign_keys=on", db)) + if err != nil { + return nil, err } return m, nil } -func MigrateDB(backend string, db string) error { - m, err := getMigrateInstance(backend, db) +func MigrateDB(db string) error { + m, err := getMigrateInstance(db) if err != nil { return err } @@ -144,8 +107,8 @@ func MigrateDB(backend string, db string) error { return nil } -func RevertDB(backend string, db string) error { - m, err := getMigrateInstance(backend, db) +func RevertDB(db string) error { + m, err := getMigrateInstance(db) if err != nil { return err } @@ -162,8 +125,8 @@ func RevertDB(backend string, db string) error { return nil } -func ForceDB(backend string, db string) error { - m, err := getMigrateInstance(backend, db) +func ForceDB(db string) error { + m, err := getMigrateInstance(db) if err != nil { return err } diff --git a/internal/repository/migrations/mysql/01_init-schema.down.sql b/internal/repository/migrations/mysql/01_init-schema.down.sql deleted file mode 100644 index 68da6469..00000000 --- a/internal/repository/migrations/mysql/01_init-schema.down.sql +++ /dev/null @@ -1,5 +0,0 @@ -DROP TABLE IF EXISTS job; -DROP TABLE IF EXISTS tags; -DROP TABLE IF EXISTS jobtag; -DROP TABLE IF EXISTS configuration; -DROP TABLE IF EXISTS user; diff --git a/internal/repository/migrations/mysql/01_init-schema.up.sql b/internal/repository/migrations/mysql/01_init-schema.up.sql deleted file mode 100644 index 3a6930cd..00000000 --- a/internal/repository/migrations/mysql/01_init-schema.up.sql +++ /dev/null @@ -1,66 +0,0 @@ -CREATE TABLE IF NOT EXISTS job ( - id INTEGER AUTO_INCREMENT PRIMARY KEY , - job_id BIGINT NOT NULL, - cluster VARCHAR(255) NOT NULL, - subcluster VARCHAR(255) NOT NULL, - start_time BIGINT NOT NULL, -- Unix timestamp - - user VARCHAR(255) NOT NULL, - project VARCHAR(255) NOT NULL, - `partition` VARCHAR(255) NOT NULL, - array_job_id BIGINT NOT NULL, - duration INT NOT NULL DEFAULT 0, - walltime INT NOT NULL DEFAULT 0, - job_state VARCHAR(255) NOT NULL - CHECK(job_state IN ('running', 'completed', 'failed', 'cancelled', - 'stopped', 'timeout', 'preempted', 'out_of_memory')), - meta_data TEXT, -- JSON - resources TEXT NOT NULL, -- JSON - - num_nodes INT NOT NULL, - num_hwthreads INT NOT NULL, - num_acc INT NOT NULL, - smt TINYINT NOT NULL DEFAULT 1 CHECK(smt IN (0, 1 )), - exclusive TINYINT NOT NULL DEFAULT 1 CHECK(exclusive IN (0, 1, 2)), - monitoring_status TINYINT NOT NULL DEFAULT 1 CHECK(monitoring_status IN (0, 1, 2, 3)), - - mem_used_max REAL NOT NULL DEFAULT 0.0, - flops_any_avg REAL NOT NULL DEFAULT 0.0, - mem_bw_avg REAL NOT NULL DEFAULT 0.0, - load_avg REAL NOT NULL DEFAULT 0.0, - net_bw_avg REAL NOT NULL DEFAULT 0.0, - net_data_vol_total REAL NOT NULL DEFAULT 0.0, - file_bw_avg REAL NOT NULL DEFAULT 0.0, - file_data_vol_total REAL NOT NULL DEFAULT 0.0, - UNIQUE (job_id, cluster, start_time) - ); - -CREATE TABLE IF NOT EXISTS tag ( - id INTEGER PRIMARY KEY, - tag_type VARCHAR(255) NOT NULL, - tag_name VARCHAR(255) NOT NULL, - UNIQUE (tag_type, tag_name)); - -CREATE TABLE IF NOT EXISTS jobtag ( - job_id INTEGER, - tag_id INTEGER, - PRIMARY KEY (job_id, tag_id), - FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE, - FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE); - -CREATE TABLE IF NOT EXISTS user ( - username varchar(255) PRIMARY KEY NOT NULL, - password varchar(255) DEFAULT NULL, - ldap tinyint NOT NULL DEFAULT 0, /* col called "ldap" for historic reasons, fills the "AuthSource" */ - name varchar(255) DEFAULT NULL, - roles varchar(255) NOT NULL DEFAULT "[]", - email varchar(255) DEFAULT NULL); - -CREATE TABLE IF NOT EXISTS configuration ( - username varchar(255), - confkey varchar(255), - value varchar(255), - PRIMARY KEY (username, confkey), - FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION); - - diff --git a/internal/repository/migrations/mysql/02_add-index.down.sql b/internal/repository/migrations/mysql/02_add-index.down.sql deleted file mode 100644 index 1392c45c..00000000 --- a/internal/repository/migrations/mysql/02_add-index.down.sql +++ /dev/null @@ -1,8 +0,0 @@ -DROP INDEX IF EXISTS job_stats; -DROP INDEX IF EXISTS job_by_user; -DROP INDEX IF EXISTS job_by_starttime; -DROP INDEX IF EXISTS job_by_job_id; -DROP INDEX IF EXISTS job_list; -DROP INDEX IF EXISTS job_list_user; -DROP INDEX IF EXISTS job_list_users; -DROP INDEX IF EXISTS job_list_users_start; diff --git a/internal/repository/migrations/mysql/02_add-index.up.sql b/internal/repository/migrations/mysql/02_add-index.up.sql deleted file mode 100644 index 2524bd93..00000000 --- a/internal/repository/migrations/mysql/02_add-index.up.sql +++ /dev/null @@ -1,8 +0,0 @@ -CREATE INDEX IF NOT EXISTS job_stats ON job (cluster,subcluster,user); -CREATE INDEX IF NOT EXISTS job_by_user ON job (user); -CREATE INDEX IF NOT EXISTS job_by_starttime ON job (start_time); -CREATE INDEX IF NOT EXISTS job_by_job_id ON job (job_id); -CREATE INDEX IF NOT EXISTS job_list ON job (cluster, job_state); -CREATE INDEX IF NOT EXISTS job_list_user ON job (user, cluster, job_state); -CREATE INDEX IF NOT EXISTS job_list_users ON job (user, job_state); -CREATE INDEX IF NOT EXISTS job_list_users_start ON job (start_time, user, job_state); diff --git a/internal/repository/migrations/mysql/03_add-userprojects.down.sql b/internal/repository/migrations/mysql/03_add-userprojects.down.sql deleted file mode 100644 index bbf1e649..00000000 --- a/internal/repository/migrations/mysql/03_add-userprojects.down.sql +++ /dev/null @@ -1 +0,0 @@ -ALTER TABLE user DROP COLUMN projects; diff --git a/internal/repository/migrations/mysql/03_add-userprojects.up.sql b/internal/repository/migrations/mysql/03_add-userprojects.up.sql deleted file mode 100644 index d0f19c21..00000000 --- a/internal/repository/migrations/mysql/03_add-userprojects.up.sql +++ /dev/null @@ -1 +0,0 @@ -ALTER TABLE user ADD COLUMN projects varchar(255) NOT NULL DEFAULT "[]"; diff --git a/internal/repository/migrations/mysql/04_alter-table-job.down.sql b/internal/repository/migrations/mysql/04_alter-table-job.down.sql deleted file mode 100644 index ebc74549..00000000 --- a/internal/repository/migrations/mysql/04_alter-table-job.down.sql +++ /dev/null @@ -1,5 +0,0 @@ -ALTER TABLE job - MODIFY `partition` VARCHAR(255) NOT NULL, - MODIFY array_job_id BIGINT NOT NULL, - MODIFY num_hwthreads INT NOT NULL, - MODIFY num_acc INT NOT NULL; diff --git a/internal/repository/migrations/mysql/04_alter-table-job.up.sql b/internal/repository/migrations/mysql/04_alter-table-job.up.sql deleted file mode 100644 index 9fe76208..00000000 --- a/internal/repository/migrations/mysql/04_alter-table-job.up.sql +++ /dev/null @@ -1,5 +0,0 @@ -ALTER TABLE job - MODIFY `partition` VARCHAR(255), - MODIFY array_job_id BIGINT, - MODIFY num_hwthreads INT, - MODIFY num_acc INT; diff --git a/internal/repository/migrations/mysql/05_extend-tags.down.sql b/internal/repository/migrations/mysql/05_extend-tags.down.sql deleted file mode 100644 index 925c9f8f..00000000 --- a/internal/repository/migrations/mysql/05_extend-tags.down.sql +++ /dev/null @@ -1,2 +0,0 @@ -ALTER TABLE tag DROP COLUMN insert_time; -ALTER TABLE jobtag DROP COLUMN insert_time; diff --git a/internal/repository/migrations/mysql/05_extend-tags.up.sql b/internal/repository/migrations/mysql/05_extend-tags.up.sql deleted file mode 100644 index 4577564a..00000000 --- a/internal/repository/migrations/mysql/05_extend-tags.up.sql +++ /dev/null @@ -1,2 +0,0 @@ -ALTER TABLE tag ADD COLUMN insert_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP; -ALTER TABLE jobtag ADD COLUMN insert_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP; diff --git a/internal/repository/migrations/mysql/06_change-config.down.sql b/internal/repository/migrations/mysql/06_change-config.down.sql deleted file mode 100644 index 0651790c..00000000 --- a/internal/repository/migrations/mysql/06_change-config.down.sql +++ /dev/null @@ -1 +0,0 @@ -ALTER TABLE configuration MODIFY value VARCHAR(255); diff --git a/internal/repository/migrations/mysql/06_change-config.up.sql b/internal/repository/migrations/mysql/06_change-config.up.sql deleted file mode 100644 index e35ff195..00000000 --- a/internal/repository/migrations/mysql/06_change-config.up.sql +++ /dev/null @@ -1 +0,0 @@ -ALTER TABLE configuration MODIFY value TEXT; diff --git a/internal/repository/migrations/mysql/07_fix-tag-id.down.sql b/internal/repository/migrations/mysql/07_fix-tag-id.down.sql deleted file mode 100644 index 9f9959ac..00000000 --- a/internal/repository/migrations/mysql/07_fix-tag-id.down.sql +++ /dev/null @@ -1,3 +0,0 @@ -SET FOREIGN_KEY_CHECKS = 0; -ALTER TABLE tag MODIFY id INTEGER; -SET FOREIGN_KEY_CHECKS = 1; diff --git a/internal/repository/migrations/mysql/07_fix-tag-id.up.sql b/internal/repository/migrations/mysql/07_fix-tag-id.up.sql deleted file mode 100644 index 1abc4b35..00000000 --- a/internal/repository/migrations/mysql/07_fix-tag-id.up.sql +++ /dev/null @@ -1,3 +0,0 @@ -SET FOREIGN_KEY_CHECKS = 0; -ALTER TABLE tag MODIFY id INTEGER AUTO_INCREMENT; -SET FOREIGN_KEY_CHECKS = 1; diff --git a/internal/repository/migrations/mysql/08_add-footprint.down.sql b/internal/repository/migrations/mysql/08_add-footprint.down.sql deleted file mode 100644 index 57f2145c..00000000 --- a/internal/repository/migrations/mysql/08_add-footprint.down.sql +++ /dev/null @@ -1,83 +0,0 @@ -ALTER TABLE job DROP energy; -ALTER TABLE job DROP energy_footprint; -ALTER TABLE job ADD COLUMN flops_any_avg; -ALTER TABLE job ADD COLUMN mem_bw_avg; -ALTER TABLE job ADD COLUMN mem_used_max; -ALTER TABLE job ADD COLUMN load_avg; -ALTER TABLE job ADD COLUMN net_bw_avg; -ALTER TABLE job ADD COLUMN net_data_vol_total; -ALTER TABLE job ADD COLUMN file_bw_avg; -ALTER TABLE job ADD COLUMN file_data_vol_total; - -UPDATE job SET flops_any_avg = json_extract(footprint, '$.flops_any_avg'); -UPDATE job SET mem_bw_avg = json_extract(footprint, '$.mem_bw_avg'); -UPDATE job SET mem_used_max = json_extract(footprint, '$.mem_used_max'); -UPDATE job SET load_avg = json_extract(footprint, '$.cpu_load_avg'); -UPDATE job SET net_bw_avg = json_extract(footprint, '$.net_bw_avg'); -UPDATE job SET net_data_vol_total = json_extract(footprint, '$.net_data_vol_total'); -UPDATE job SET file_bw_avg = json_extract(footprint, '$.file_bw_avg'); -UPDATE job SET file_data_vol_total = json_extract(footprint, '$.file_data_vol_total'); - -ALTER TABLE job DROP footprint; --- Do not use reserved keywords anymore -RENAME TABLE hpc_user TO `user`; -ALTER TABLE job RENAME COLUMN hpc_user TO `user`; -ALTER TABLE job RENAME COLUMN cluster_partition TO `partition`; - -DROP INDEX IF EXISTS jobs_cluster; -DROP INDEX IF EXISTS jobs_cluster_user; -DROP INDEX IF EXISTS jobs_cluster_project; -DROP INDEX IF EXISTS jobs_cluster_subcluster; -DROP INDEX IF EXISTS jobs_cluster_starttime; -DROP INDEX IF EXISTS jobs_cluster_duration; -DROP INDEX IF EXISTS jobs_cluster_numnodes; - -DROP INDEX IF EXISTS jobs_cluster_partition; -DROP INDEX IF EXISTS jobs_cluster_partition_starttime; -DROP INDEX IF EXISTS jobs_cluster_partition_duration; -DROP INDEX IF EXISTS jobs_cluster_partition_numnodes; - -DROP INDEX IF EXISTS jobs_cluster_partition_jobstate; -DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_user; -DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_project; -DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_starttime; -DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_duration; -DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numnodes; - -DROP INDEX IF EXISTS jobs_cluster_jobstate; -DROP INDEX IF EXISTS jobs_cluster_jobstate_user; -DROP INDEX IF EXISTS jobs_cluster_jobstate_project; - -DROP INDEX IF EXISTS jobs_cluster_jobstate_starttime; -DROP INDEX IF EXISTS jobs_cluster_jobstate_duration; -DROP INDEX IF EXISTS jobs_cluster_jobstate_numnodes; - -DROP INDEX IF EXISTS jobs_user; -DROP INDEX IF EXISTS jobs_user_starttime; -DROP INDEX IF EXISTS jobs_user_duration; -DROP INDEX IF EXISTS jobs_user_numnodes; - -DROP INDEX IF EXISTS jobs_project; -DROP INDEX IF EXISTS jobs_project_user; -DROP INDEX IF EXISTS jobs_project_starttime; -DROP INDEX IF EXISTS jobs_project_duration; -DROP INDEX IF EXISTS jobs_project_numnodes; - -DROP INDEX IF EXISTS jobs_jobstate; -DROP INDEX IF EXISTS jobs_jobstate_user; -DROP INDEX IF EXISTS jobs_jobstate_project; -DROP INDEX IF EXISTS jobs_jobstate_starttime; -DROP INDEX IF EXISTS jobs_jobstate_duration; -DROP INDEX IF EXISTS jobs_jobstate_numnodes; - -DROP INDEX IF EXISTS jobs_arrayjobid_starttime; -DROP INDEX IF EXISTS jobs_cluster_arrayjobid_starttime; - -DROP INDEX IF EXISTS jobs_starttime; -DROP INDEX IF EXISTS jobs_duration; -DROP INDEX IF EXISTS jobs_numnodes; - -DROP INDEX IF EXISTS jobs_duration_starttime; -DROP INDEX IF EXISTS jobs_numnodes_starttime; -DROP INDEX IF EXISTS jobs_numacc_starttime; -DROP INDEX IF EXISTS jobs_energy_starttime; diff --git a/internal/repository/migrations/mysql/08_add-footprint.up.sql b/internal/repository/migrations/mysql/08_add-footprint.up.sql deleted file mode 100644 index 207ccf9e..00000000 --- a/internal/repository/migrations/mysql/08_add-footprint.up.sql +++ /dev/null @@ -1,123 +0,0 @@ -DROP INDEX IF EXISTS job_stats ON job; -DROP INDEX IF EXISTS job_by_user ON job; -DROP INDEX IF EXISTS job_by_starttime ON job; -DROP INDEX IF EXISTS job_by_job_id ON job; -DROP INDEX IF EXISTS job_list ON job; -DROP INDEX IF EXISTS job_list_user ON job; -DROP INDEX IF EXISTS job_list_users ON job; -DROP INDEX IF EXISTS job_list_users_start ON job; - -ALTER TABLE job ADD COLUMN energy REAL NOT NULL DEFAULT 0.0; -ALTER TABLE job ADD COLUMN energy_footprint JSON; - -ALTER TABLE job ADD COLUMN footprint JSON; -ALTER TABLE tag ADD COLUMN tag_scope TEXT NOT NULL DEFAULT 'global'; - --- Do not use reserved keywords anymore -RENAME TABLE `user` TO hpc_user; -ALTER TABLE job RENAME COLUMN `user` TO hpc_user; -ALTER TABLE job RENAME COLUMN `partition` TO cluster_partition; - -ALTER TABLE job MODIFY COLUMN cluster VARCHAR(50); -ALTER TABLE job MODIFY COLUMN hpc_user VARCHAR(50); -ALTER TABLE job MODIFY COLUMN subcluster VARCHAR(50); -ALTER TABLE job MODIFY COLUMN project VARCHAR(50); -ALTER TABLE job MODIFY COLUMN cluster_partition VARCHAR(50); -ALTER TABLE job MODIFY COLUMN job_state VARCHAR(25); - -UPDATE job SET footprint = '{"flops_any_avg": 0.0}'; -UPDATE job SET footprint = json_replace(footprint, '$.flops_any_avg', job.flops_any_avg); -UPDATE job SET footprint = json_insert(footprint, '$.mem_bw_avg', job.mem_bw_avg); -UPDATE job SET footprint = json_insert(footprint, '$.mem_used_max', job.mem_used_max); -UPDATE job SET footprint = json_insert(footprint, '$.cpu_load_avg', job.load_avg); -UPDATE job SET footprint = json_insert(footprint, '$.net_bw_avg', job.net_bw_avg) WHERE job.net_bw_avg != 0; -UPDATE job SET footprint = json_insert(footprint, '$.net_data_vol_total', job.net_data_vol_total) WHERE job.net_data_vol_total != 0; -UPDATE job SET footprint = json_insert(footprint, '$.file_bw_avg', job.file_bw_avg) WHERE job.file_bw_avg != 0; -UPDATE job SET footprint = json_insert(footprint, '$.file_data_vol_total', job.file_data_vol_total) WHERE job.file_data_vol_total != 0; - -ALTER TABLE job DROP flops_any_avg; -ALTER TABLE job DROP mem_bw_avg; -ALTER TABLE job DROP mem_used_max; -ALTER TABLE job DROP load_avg; -ALTER TABLE job DROP net_bw_avg; -ALTER TABLE job DROP net_data_vol_total; -ALTER TABLE job DROP file_bw_avg; -ALTER TABLE job DROP file_data_vol_total; - --- Indices for: Single filters, combined filters, sorting, sorting with filters --- Cluster Filter -CREATE INDEX IF NOT EXISTS jobs_cluster ON job (cluster); -CREATE INDEX IF NOT EXISTS jobs_cluster_user ON job (cluster, hpc_user); -CREATE INDEX IF NOT EXISTS jobs_cluster_project ON job (cluster, project); -CREATE INDEX IF NOT EXISTS jobs_cluster_subcluster ON job (cluster, subcluster); --- Cluster Filter Sorting -CREATE INDEX IF NOT EXISTS jobs_cluster_starttime ON job (cluster, start_time); -CREATE INDEX IF NOT EXISTS jobs_cluster_duration ON job (cluster, duration); -CREATE INDEX IF NOT EXISTS jobs_cluster_numnodes ON job (cluster, num_nodes); - --- Cluster+Partition Filter -CREATE INDEX IF NOT EXISTS jobs_cluster_partition ON job (cluster, cluster_partition); --- Cluster+Partition Filter Sorting -CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime ON job (cluster, cluster_partition, start_time); -CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration ON job (cluster, cluster_partition, duration); -CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numnodes ON job (cluster, cluster_partition, num_nodes); - --- Cluster+Partition+Jobstate Filter -CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, cluster_partition, job_state); -CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_user ON job (cluster, cluster_partition, job_state, hpc_user); -CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_project ON job (cluster, cluster_partition, job_state, project); --- Cluster+Partition+Jobstate Filter Sorting -CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_starttime ON job (cluster, cluster_partition, job_state, start_time); -CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_duration ON job (cluster, cluster_partition, job_state, duration); -CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numnodes ON job (cluster, cluster_partition, job_state, num_nodes); - --- Cluster+JobState Filter -CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate ON job (cluster, job_state); -CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_user ON job (cluster, job_state, hpc_user); -CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_project ON job (cluster, job_state, project); --- Cluster+JobState Filter Sorting -CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_starttime ON job (cluster, job_state, start_time); -CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_duration ON job (cluster, job_state, duration); -CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numnodes ON job (cluster, job_state, num_nodes); - --- User Filter -CREATE INDEX IF NOT EXISTS jobs_user ON job (hpc_user); --- User Filter Sorting -CREATE INDEX IF NOT EXISTS jobs_user_starttime ON job (hpc_user, start_time); -CREATE INDEX IF NOT EXISTS jobs_user_duration ON job (hpc_user, duration); -CREATE INDEX IF NOT EXISTS jobs_user_numnodes ON job (hpc_user, num_nodes); - --- Project Filter -CREATE INDEX IF NOT EXISTS jobs_project ON job (project); -CREATE INDEX IF NOT EXISTS jobs_project_user ON job (project, hpc_user); --- Project Filter Sorting -CREATE INDEX IF NOT EXISTS jobs_project_starttime ON job (project, start_time); -CREATE INDEX IF NOT EXISTS jobs_project_duration ON job (project, duration); -CREATE INDEX IF NOT EXISTS jobs_project_numnodes ON job (project, num_nodes); - --- JobState Filter -CREATE INDEX IF NOT EXISTS jobs_jobstate ON job (job_state); -CREATE INDEX IF NOT EXISTS jobs_jobstate_user ON job (job_state, hpc_user); -CREATE INDEX IF NOT EXISTS jobs_jobstate_project ON job (job_state, project); -CREATE INDEX IF NOT EXISTS jobs_jobstate_cluster ON job (job_state, cluster); --- JobState Filter Sorting -CREATE INDEX IF NOT EXISTS jobs_jobstate_starttime ON job (job_state, start_time); -CREATE INDEX IF NOT EXISTS jobs_jobstate_duration ON job (job_state, duration); -CREATE INDEX IF NOT EXISTS jobs_jobstate_numnodes ON job (job_state, num_nodes); - --- ArrayJob Filter -CREATE INDEX IF NOT EXISTS jobs_arrayjobid_starttime ON job (array_job_id, start_time); -CREATE INDEX IF NOT EXISTS jobs_cluster_arrayjobid_starttime ON job (cluster, array_job_id, start_time); - --- Sorting without active filters -CREATE INDEX IF NOT EXISTS jobs_starttime ON job (start_time); -CREATE INDEX IF NOT EXISTS jobs_duration ON job (duration); -CREATE INDEX IF NOT EXISTS jobs_numnodes ON job (num_nodes); - --- Single filters with default starttime sorting -CREATE INDEX IF NOT EXISTS jobs_duration_starttime ON job (duration, start_time); -CREATE INDEX IF NOT EXISTS jobs_numnodes_starttime ON job (num_nodes, start_time); -CREATE INDEX IF NOT EXISTS jobs_numacc_starttime ON job (num_acc, start_time); -CREATE INDEX IF NOT EXISTS jobs_energy_starttime ON job (energy, start_time); - --- Optimize DB index usage diff --git a/internal/repository/node_test.go b/internal/repository/node_test.go index b42e09b8..466f51ee 100644 --- a/internal/repository/node_test.go +++ b/internal/repository/node_test.go @@ -130,7 +130,7 @@ func nodeTestSetup(t *testing.T) { } dbfilepath := filepath.Join(tmpdir, "test.db") - err := MigrateDB("sqlite3", dbfilepath) + err := MigrateDB(dbfilepath) if err != nil { t.Fatal(err) } diff --git a/internal/repository/repository_test.go b/internal/repository/repository_test.go index 1346e4da..e3dec7fc 100644 --- a/internal/repository/repository_test.go +++ b/internal/repository/repository_test.go @@ -149,7 +149,7 @@ func setup(tb testing.TB) *JobRepository { tb.Helper() cclog.Init("warn", true) dbfile := "testdata/job.db" - err := MigrateDB("sqlite3", dbfile) + err := MigrateDB(dbfile) noErr(tb, err) Connect("sqlite3", dbfile) return GetJobRepository() diff --git a/internal/repository/stats.go b/internal/repository/stats.go index ba0d09f5..c92f5193 100644 --- a/internal/repository/stats.go +++ b/internal/repository/stats.go @@ -73,9 +73,6 @@ func (r *JobRepository) buildStatsQuery( col string, ) sq.SelectBuilder { var query sq.SelectBuilder - castType := r.getCastType() - - // fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as %s) as value`, time.Now().Unix(), castType) if col != "" { // Scan columns: id, name, totalJobs, totalUsers, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours @@ -84,26 +81,26 @@ func (r *JobRepository) buildStatsQuery( "name", "COUNT(job.id) as totalJobs", "COUNT(DISTINCT job.hpc_user) AS totalUsers", - fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s) as totalWalltime`, time.Now().Unix(), castType), - fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s) as totalNodes`, castType), - fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s) as totalNodeHours`, time.Now().Unix(), castType), - fmt.Sprintf(`CAST(SUM(job.num_hwthreads) as %s) as totalCores`, castType), - fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as %s) as totalCoreHours`, time.Now().Unix(), castType), - fmt.Sprintf(`CAST(SUM(job.num_acc) as %s) as totalAccs`, castType), - fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as %s) as totalAccHours`, time.Now().Unix(), castType), + fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as int) as totalWalltime`, time.Now().Unix()), + fmt.Sprintf(`CAST(SUM(job.num_nodes) as int) as totalNodes`), + fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as int) as totalNodeHours`, time.Now().Unix()), + fmt.Sprintf(`CAST(SUM(job.num_hwthreads) as int) as totalCores`), + fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as int) as totalCoreHours`, time.Now().Unix()), + fmt.Sprintf(`CAST(SUM(job.num_acc) as int) as totalAccs`), + fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as int) as totalAccHours`, time.Now().Unix()), ).From("job").LeftJoin("hpc_user ON hpc_user.username = job.hpc_user").GroupBy(col) } else { // Scan columns: totalJobs, totalUsers, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours query = sq.Select( "COUNT(job.id) as totalJobs", "COUNT(DISTINCT job.hpc_user) AS totalUsers", - fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s)`, time.Now().Unix(), castType), - fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s)`, castType), - fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s)`, time.Now().Unix(), castType), - fmt.Sprintf(`CAST(SUM(job.num_hwthreads) as %s)`, castType), - fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as %s)`, time.Now().Unix(), castType), - fmt.Sprintf(`CAST(SUM(job.num_acc) as %s)`, castType), - fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as %s)`, time.Now().Unix(), castType), + fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as int)`, time.Now().Unix()), + fmt.Sprintf(`CAST(SUM(job.num_nodes) as int)`), + fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as int)`, time.Now().Unix()), + fmt.Sprintf(`CAST(SUM(job.num_hwthreads) as int)`), + fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as int)`, time.Now().Unix()), + fmt.Sprintf(`CAST(SUM(job.num_acc) as int)`), + fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as int)`, time.Now().Unix()), ).From("job") } @@ -114,21 +111,6 @@ func (r *JobRepository) buildStatsQuery( return query } -func (r *JobRepository) getCastType() string { - var castType string - - switch r.driver { - case "sqlite3": - castType = "int" - case "mysql": - castType = "unsigned" - default: - castType = "" - } - - return castType -} - func (r *JobRepository) JobsStatsGrouped( ctx context.Context, filter []*model.JobFilter, @@ -477,10 +459,9 @@ func (r *JobRepository) AddHistograms( targetBinSize = 3600 } - castType := r.getCastType() var err error // Return X-Values always as seconds, will be formatted into minutes and hours in frontend - value := fmt.Sprintf(`CAST(ROUND(((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / %d) + 1) as %s) as value`, time.Now().Unix(), targetBinSize, castType) + value := fmt.Sprintf(`CAST(ROUND(((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / %d) + 1) as int) as value`, time.Now().Unix(), targetBinSize) stat.HistDuration, err = r.jobsDurationStatisticsHistogram(ctx, value, filter, targetBinSize, &targetBinCount) if err != nil { cclog.Warn("Error while loading job statistics histogram: job duration") diff --git a/internal/repository/userConfig_test.go b/internal/repository/userConfig_test.go index 0d6dc374..b6f68430 100644 --- a/internal/repository/userConfig_test.go +++ b/internal/repository/userConfig_test.go @@ -42,7 +42,7 @@ func setupUserTest(t *testing.T) *UserCfgRepo { cclog.Init("info", true) dbfilepath := "testdata/job.db" - err := MigrateDB("sqlite3", dbfilepath) + err := MigrateDB(dbfilepath) if err != nil { t.Fatal(err) } diff --git a/internal/tagger/detectApp_test.go b/internal/tagger/detectApp_test.go index f9fc91d0..7145d04f 100644 --- a/internal/tagger/detectApp_test.go +++ b/internal/tagger/detectApp_test.go @@ -15,7 +15,7 @@ func setup(tb testing.TB) *repository.JobRepository { tb.Helper() cclog.Init("warn", true) dbfile := "../repository/testdata/job.db" - err := repository.MigrateDB("sqlite3", dbfile) + err := repository.MigrateDB(dbfile) noErr(tb, err) repository.Connect("sqlite3", dbfile) return repository.GetJobRepository() From 89875db4a9a8d7dc53dacb480a6c144ae847772a Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Mon, 22 Dec 2025 10:39:40 +0100 Subject: [PATCH 12/59] dashboard layout fixes --- web/frontend/src/DashPublic.root.svelte | 8 ++++---- web/frontend/src/generic/plots/Stacked.svelte | 2 +- web/frontend/src/status/DashInternal.svelte | 10 ++++++---- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/web/frontend/src/DashPublic.root.svelte b/web/frontend/src/DashPublic.root.svelte index 25e2683c..c69b28f6 100644 --- a/web/frontend/src/DashPublic.root.svelte +++ b/web/frontend/src/DashPublic.root.svelte @@ -338,7 +338,7 @@ - + - + @@ -540,7 +540,7 @@ Date: Mon, 22 Dec 2025 17:26:56 +0100 Subject: [PATCH 13/59] Rework info panel in public dashboard - change to bootstrap grid from table - add infos, use badges - remove non required query --- internal/metricdata/cc-metric-store.go | 30 ++-- web/frontend/src/DashPublic.root.svelte | 219 ++++++++++++++---------- web/frontend/src/generic/units.js | 2 +- 3 files changed, 146 insertions(+), 105 deletions(-) diff --git a/internal/metricdata/cc-metric-store.go b/internal/metricdata/cc-metric-store.go index 6d446d17..be2e956e 100644 --- a/internal/metricdata/cc-metric-store.go +++ b/internal/metricdata/cc-metric-store.go @@ -770,21 +770,25 @@ func (ccms *CCMetricStore) LoadNodeData( } mc := archive.GetMetricConfig(cluster, metric) - hostdata[metric] = append(hostdata[metric], &schema.JobMetric{ - Unit: mc.Unit, - Timestep: mc.Timestep, - Series: []schema.Series{ - { - Hostname: query.Hostname, - Data: qdata.Data, - Statistics: schema.MetricStatistics{ - Avg: float64(qdata.Avg), - Min: float64(qdata.Min), - Max: float64(qdata.Max), + if mc != nil { + hostdata[metric] = append(hostdata[metric], &schema.JobMetric{ + Unit: mc.Unit, + Timestep: mc.Timestep, + Series: []schema.Series{ + { + Hostname: query.Hostname, + Data: qdata.Data, + Statistics: schema.MetricStatistics{ + Avg: float64(qdata.Avg), + Min: float64(qdata.Min), + Max: float64(qdata.Max), + }, }, }, - }, - }) + }) + } else { + cclog.Warnf("Metric '%s' not configured for cluster '%s': Skipped in LoadNodeData() Return!", metric, cluster) + } } if len(errors) != 0 { diff --git a/web/frontend/src/DashPublic.root.svelte b/web/frontend/src/DashPublic.root.svelte index c69b28f6..fbbf486d 100644 --- a/web/frontend/src/DashPublic.root.svelte +++ b/web/frontend/src/DashPublic.root.svelte @@ -30,7 +30,8 @@ Table, Progress, Icon, - Button + Button, + Badge } from "@sveltestrap/sveltestrap"; import Roofline from "./generic/plots/Roofline.svelte"; import Pie, { colors } from "./generic/plots/Pie.svelte"; @@ -85,7 +86,8 @@ query: gql` query ( $cluster: String! - $metrics: [String!] + $nmetrics: [String!] + $cmetrics: [String!] $from: Time! $to: Time! $clusterFrom: Time! @@ -97,7 +99,7 @@ # Node 5 Minute Averages for Roofline nodeMetrics( cluster: $cluster - metrics: $metrics + metrics: $nmetrics from: $from to: $to ) { @@ -106,6 +108,10 @@ metrics { name metric { + unit { + base + prefix + } series { statistics { avg @@ -114,21 +120,6 @@ } } } - # Running Job Metric Average for Rooflines - jobsMetricStats(filter: $jobFilter, metrics: $metrics) { - id - jobId - duration - numNodes - numAccelerators - subCluster - stats { - name - data { - avg - } - } - } # Get Jobs for Per-Node Counts jobs(filter: $jobFilter, order: $sorting, page: $paging) { items { @@ -175,7 +166,7 @@ # ClusterMetrics for doubleMetricPlot clusterMetrics( cluster: $cluster - metrics: $metrics + metrics: $cmetrics from: $clusterFrom to: $to ) { @@ -194,7 +185,8 @@ `, variables: { cluster: presetCluster, - metrics: ["flops_any", "mem_bw"], // Metrics For Cluster Plot and Roofline + nmetrics: ["flops_any", "mem_bw", "cpu_power", "acc_power"], // Metrics For Roofline and Stats + cmetrics: ["flops_any", "mem_bw"], // Metrics For Cluster Plot from: from.toISOString(), clusterFrom: clusterFrom.toISOString(), to: to.toISOString(), @@ -258,6 +250,11 @@ } } + // Get Idle Infos after Sums + if (!rawInfos['idleNodes']) rawInfos['idleNodes'] = rawInfos['totalNodes'] - rawInfos['allocatedNodes']; + if (!rawInfos['idleCores']) rawInfos['idleCores'] = rawInfos['totalCores'] - rawInfos['allocatedCores']; + if (!rawInfos['idleAccs']) rawInfos['idleAccs'] = rawInfos['totalAccs'] - rawInfos['allocatedAccs']; + // Keymetrics (Data on Cluster-Scope) let rawFlops = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) => sum + (node.metrics.find((m) => m.name == 'flops_any')?.metric?.series[0]?.statistics?.avg || 0), @@ -271,6 +268,26 @@ ) || 0; rawInfos['memBwRate'] = Math.floor((rawMemBw * 100) / 100) + let rawCpuPwr = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) => + sum + (node.metrics.find((m) => m.name == 'cpu_power')?.metric?.series[0]?.statistics?.avg || 0), + 0, // Initial Value + ) || 0; + rawInfos['cpuPwr'] = Math.floor((rawCpuPwr * 100) / 100) + if (!rawInfos['cpuPwrUnit']) { + let rawCpuUnit = $statusQuery?.data?.nodeMetrics[0]?.metrics.find((m) => m.name == 'cpu_power')?.metric?.unit || null + rawInfos['cpuPwrUnit'] = rawCpuUnit ? rawCpuUnit.prefix + rawCpuUnit.base : '' + } + + let rawGpuPwr = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) => + sum + (node.metrics.find((m) => m.name == 'acc_power')?.metric?.series[0]?.statistics?.avg || 0), + 0, // Initial Value + ) || 0; + rawInfos['gpuPwr'] = Math.floor((rawGpuPwr * 100) / 100) + if (!rawInfos['gpuPwrUnit']) { + let rawGpuUnit = $statusQuery?.data?.nodeMetrics[0]?.metrics.find((m) => m.name == 'acc_power')?.metric?.unit || null + rawInfos['gpuPwrUnit'] = rawGpuUnit ? rawGpuUnit.prefix + rawGpuUnit.base : '' + } + return rawInfos } else { return {}; @@ -408,79 +425,99 @@ - - - - - -
    - - - - - - - - -
    - - - - - - - - - - + + + + {clusterInfo?.runningJobs} + +
    + Running Jobs +
    + + + + {clusterInfo?.activeUsers} + +
    + Active Users +
    + + + + {clusterInfo?.allocatedNodes} + +
    + Active Nodes +
    + + + + + + {clusterInfo?.flopRate} {clusterInfo?.flopRateUnit} + +
    + Total Flop Rate +
    + + + + {clusterInfo?.memBwRate} {clusterInfo?.memBwRateUnit} + +
    + Total Memory Bandwidth +
    + {#if clusterInfo?.totalAccs !== 0} - - - - - + + + {clusterInfo?.gpuPwr} {clusterInfo?.gpuPwrUnit} + +
    + Total GPU Power +
    + + {:else} + + + {clusterInfo?.cpuPwr} {clusterInfo?.cpuPwrUnit} + +
    + Total CPU Power +
    + {/if} -
    {clusterInfo?.runningJobs} Running Jobs{clusterInfo?.activeUsers} Active Users
    - Flop Rate (Any) - - Memory BW Rate -
    - {clusterInfo?.flopRate} - {clusterInfo?.flopRateUnit} - - {clusterInfo?.memBwRate} - {clusterInfo?.memBwRateUnit} -
    Allocated Nodes
    - -
    {clusterInfo?.allocatedNodes} / {clusterInfo?.totalNodes} - Nodes
    Allocated Cores
    - -
    {formatNumber(clusterInfo?.allocatedCores)} / {formatNumber(clusterInfo?.totalCores)} - Cores
    Allocated Accelerators
    - -
    {clusterInfo?.allocatedAccs} / {clusterInfo?.totalAccs} - Accelerators
    +
    + + + Active Cores + + + + {formatNumber(clusterInfo?.allocatedCores)} + {formatNumber(clusterInfo?.idleCores)} + + + + Idle Cores + + + {#if clusterInfo?.totalAccs !== 0} + + + Active GPU + + + + {formatNumber(clusterInfo?.allocatedAccs)} + {formatNumber(clusterInfo?.idleAccs)} + + + + Idle GPU + + + {/if}
    diff --git a/web/frontend/src/generic/units.js b/web/frontend/src/generic/units.js index 1737b977..3e251fbf 100644 --- a/web/frontend/src/generic/units.js +++ b/web/frontend/src/generic/units.js @@ -3,7 +3,7 @@ */ const power = [1, 1e3, 1e6, 1e9, 1e12, 1e15, 1e18, 1e21] -const prefix = ['', 'K', 'M', 'G', 'T', 'P', 'E'] +const prefix = ['', 'k', 'M', 'G', 'T', 'P', 'E'] export function formatNumber(x) { if ( isNaN(x) || x == null) { From 0bc26aa1943cf281165084f46a2273c95ffe8d90 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Tue, 23 Dec 2025 05:56:46 +0100 Subject: [PATCH 14/59] Add error check --- internal/api/nats.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/internal/api/nats.go b/internal/api/nats.go index 1bfe9051..61cbd979 100644 --- a/internal/api/nats.go +++ b/internal/api/nats.go @@ -224,7 +224,10 @@ func (api *NatsAPI) handleNodeState(subject string, data []byte) { JobsRunning: node.JobsRunning, } - repo.UpdateNodeState(node.Hostname, req.Cluster, &nodeState) + if err := repo.UpdateNodeState(node.Hostname, req.Cluster, &nodeState); err != nil { + cclog.Errorf("NATS %s: updating node state for %s on %s failed: %v", + subject, node.Hostname, req.Cluster, err) + } } cclog.Debugf("NATS %s: updated %d node states for cluster %s", subject, len(req.Nodes), req.Cluster) From c1135531ba26d3267791113b77c0f4bcc4f71234 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Tue, 23 Dec 2025 07:56:13 +0100 Subject: [PATCH 15/59] Port NATS api to ccMessages --- go.mod | 5 +++ go.sum | 4 ++ internal/api/nats.go | 94 +++++++++++++++++++++++++++------------ internal/config/config.go | 3 +- pkg/nats/influxDecoder.go | 59 ++++++++++++++++++++++++ 5 files changed, 134 insertions(+), 31 deletions(-) create mode 100644 pkg/nats/influxDecoder.go diff --git a/go.mod b/go.mod index eb061de7..b821f7bf 100644 --- a/go.mod +++ b/go.mod @@ -50,6 +50,7 @@ require ( github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect github.com/KyleBanks/depth v1.2.1 // indirect github.com/agnivade/levenshtein v1.2.1 // indirect + github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.3 // indirect github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.13 // indirect github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.13 // indirect @@ -89,6 +90,8 @@ require ( github.com/gorilla/securecookie v1.1.2 // indirect github.com/gorilla/websocket v1.5.3 // indirect github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect + github.com/influxdata/influxdb-client-go/v2 v2.14.0 // indirect + github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf // indirect github.com/jonboulle/clockwork v0.5.0 // indirect github.com/jpillora/backoff v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect @@ -101,6 +104,7 @@ require ( github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect github.com/nats-io/nkeys v0.4.11 // indirect github.com/nats-io/nuid v1.0.1 // indirect + github.com/oapi-codegen/runtime v1.1.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/prometheus/client_model v0.6.2 // indirect github.com/prometheus/procfs v0.16.1 // indirect @@ -114,6 +118,7 @@ require ( github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 // indirect go.yaml.in/yaml/v2 v2.4.3 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect + golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect golang.org/x/mod v0.30.0 // indirect golang.org/x/net v0.47.0 // indirect golang.org/x/sync v0.18.0 // indirect diff --git a/go.sum b/go.sum index fd4980da..04e2514b 100644 --- a/go.sum +++ b/go.sum @@ -14,6 +14,7 @@ github.com/NVIDIA/go-nvml v0.13.0-1 h1:OLX8Jq3dONuPOQPC7rndB6+iDmDakw0XTYgzMxObk github.com/NVIDIA/go-nvml v0.13.0-1/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4= github.com/PuerkitoBio/goquery v1.11.0 h1:jZ7pwMQXIITcUXNH83LLk+txlaEy6NVOfTuP43xxfqw= github.com/PuerkitoBio/goquery v1.11.0/go.mod h1:wQHgxUOU3JGuj3oD/QFfxUdlzW6xPHfqyHre6VMY4DQ= +github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk= github.com/agnivade/levenshtein v1.2.1 h1:EHBY3UOn1gwdy/VbFwgo4cxecRznFk7fKWN1KOX7eoM= github.com/agnivade/levenshtein v1.2.1/go.mod h1:QVVI16kDrtSuwcpd0p1+xMC6Z/VfhtCyDIjcwga4/DU= github.com/alexbrainman/sspi v0.0.0-20250919150558-7d374ff0d59e h1:4dAU9FXIyQktpoUAgOJK3OTFc/xug0PCXYCqU0FgDKI= @@ -64,6 +65,7 @@ github.com/aws/smithy-go v1.24.0 h1:LpilSUItNPFr1eY85RYgTIg5eIEPtvFbskaFcmmIUnk= github.com/aws/smithy-go v1.24.0/go.mod h1:LEj2LM3rBRQJxPZTB4KuzZkaZYnZPnvgIhb4pu07mx0= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/bmatcuk/doublestar v1.1.1/go.mod h1:UD6OnuiIn0yFxxA2le/rnRU1G4RaI4UvFv1sNto9p6w= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/coreos/go-oidc/v3 v3.16.0 h1:qRQUCFstKpXwmEjDQTIbyY/5jF00+asXzSkmkoa/mow= @@ -194,6 +196,7 @@ github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2E github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE= github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co= github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= @@ -260,6 +263,7 @@ github.com/sergi/go-diff v1.3.1 h1:xkr+Oxo4BOQKmkn/B9eMK0g5Kg/983T9DqqPHwYqD+8= github.com/sergi/go-diff v1.3.1/go.mod h1:aMJSSKb2lpPvRNec0+w3fl7LP9IOFzdc9Pa4NFbPK1I= github.com/sosodev/duration v1.3.1 h1:qtHBDMQ6lvMQsL15g4aopM4HEfOaYuhWBw3NPTtlqq4= github.com/sosodev/duration v1.3.1/go.mod h1:RQIBBX0+fMLc/D9+Jb/fwvVmo0eZvDDEERAikUR6SDg= +github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKkMo8ZTx3f+BZEkzsRUY10Xsm2mwU0= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= diff --git a/internal/api/nats.go b/internal/api/nats.go index 1bfe9051..745e7acb 100644 --- a/internal/api/nats.go +++ b/internal/api/nats.go @@ -9,6 +9,7 @@ import ( "bytes" "database/sql" "encoding/json" + "strings" "sync" "time" @@ -18,7 +19,9 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/pkg/nats" cclog "github.com/ClusterCockpit/cc-lib/ccLogger" + lp "github.com/ClusterCockpit/cc-lib/ccMessage" "github.com/ClusterCockpit/cc-lib/schema" + influx "github.com/influxdata/line-protocol/v2/lineprotocol" ) // NatsAPI provides NATS subscription-based handlers for Job and Node operations. @@ -50,11 +53,7 @@ func (api *NatsAPI) StartSubscriptions() error { s := config.Keys.APISubjects - if err := client.Subscribe(s.SubjectJobStart, api.handleStartJob); err != nil { - return err - } - - if err := client.Subscribe(s.SubjectJobStop, api.handleStopJob); err != nil { + if err := client.Subscribe(s.SubjectJobEvent, api.handleJobEvent); err != nil { return err } @@ -67,26 +66,63 @@ func (api *NatsAPI) StartSubscriptions() error { return nil } +func (api *NatsAPI) processJobEvent(msg lp.CCMessage) { + function, ok := msg.GetTag("function") + if !ok { + cclog.Errorf("Job event is missing tag 'function': %+v", msg) + return + } + + switch function { + case "start_job": + api.handleStartJob(msg.GetEventValue()) + + case "stop_job": + api.handleStopJob(msg.GetEventValue()) + default: + cclog.Warnf("Unimplemented job event: %+v", msg) + } +} + +func (api *NatsAPI) handleJobEvent(subject string, data []byte) { + d := influx.NewDecoderWithBytes(data) + + for d.Next() { + m, err := nats.DecodeInfluxMessage(d) + if err != nil { + cclog.Errorf("NATS %s: Failed to decode message: %v", subject, err) + return + } + + if m.IsEvent() { + if m.Name() == "job" { + api.processJobEvent(m) + } + } + + } +} + // handleStartJob processes job start messages received via NATS. // Expected JSON payload follows the schema.Job structure. -func (api *NatsAPI) handleStartJob(subject string, data []byte) { +func (api *NatsAPI) handleStartJob(payload string) { req := schema.Job{ Shared: "none", MonitoringStatus: schema.MonitoringStatusRunningOrArchiving, } - dec := json.NewDecoder(bytes.NewReader(data)) + dec := json.NewDecoder(strings.NewReader(payload)) dec.DisallowUnknownFields() if err := dec.Decode(&req); err != nil { - cclog.Errorf("NATS %s: parsing request failed: %v", subject, err) + cclog.Errorf("NATS start job: parsing request failed: %v", err) return } - cclog.Debugf("NATS %s: %s", subject, req.GoString()) + cclog.Debugf("NATS start job: %s", req.GoString()) req.State = schema.JobStateRunning if err := importer.SanityChecks(&req); err != nil { - cclog.Errorf("NATS %s: sanity check failed: %v", subject, err) + cclog.Errorf("NATS start job: sanity check failed: %v", err) return } @@ -96,14 +132,14 @@ func (api *NatsAPI) handleStartJob(subject string, data []byte) { jobs, err := api.JobRepository.FindAll(&req.JobID, &req.Cluster, nil) if err != nil && err != sql.ErrNoRows { - cclog.Errorf("NATS %s: checking for duplicate failed: %v", subject, err) + cclog.Errorf("NATS start job: checking for duplicate failed: %v", err) return } if err == nil { for _, job := range jobs { if (req.StartTime - job.StartTime) < secondsPerDay { - cclog.Errorf("NATS %s: job with jobId %d, cluster %s already exists (dbid: %d)", - subject, req.JobID, req.Cluster, job.ID) + cclog.Errorf("NATS start job: job with jobId %d, cluster %s already exists (dbid: %d)", + req.JobID, req.Cluster, job.ID) return } } @@ -111,14 +147,14 @@ func (api *NatsAPI) handleStartJob(subject string, data []byte) { id, err := api.JobRepository.Start(&req) if err != nil { - cclog.Errorf("NATS %s: insert into database failed: %v", subject, err) + cclog.Errorf("NATS start job: insert into database failed: %v", err) return } unlockOnce.Do(api.RepositoryMutex.Unlock) for _, tag := range req.Tags { if _, err := api.JobRepository.AddTagOrCreate(nil, id, tag.Type, tag.Name, tag.Scope); err != nil { - cclog.Errorf("NATS %s: adding tag to new job %d failed: %v", subject, id, err) + cclog.Errorf("NATS start job: adding tag to new job %d failed: %v", id, err) return } } @@ -129,18 +165,18 @@ func (api *NatsAPI) handleStartJob(subject string, data []byte) { // handleStopJob processes job stop messages received via NATS. // Expected JSON payload follows the StopJobAPIRequest structure. -func (api *NatsAPI) handleStopJob(subject string, data []byte) { +func (api *NatsAPI) handleStopJob(payload string) { var req StopJobAPIRequest - dec := json.NewDecoder(bytes.NewReader(data)) + dec := json.NewDecoder(strings.NewReader(payload)) dec.DisallowUnknownFields() if err := dec.Decode(&req); err != nil { - cclog.Errorf("NATS %s: parsing request failed: %v", subject, err) + cclog.Errorf("NATS job stop: parsing request failed: %v", err) return } if req.JobID == nil { - cclog.Errorf("NATS %s: the field 'jobId' is required", subject) + cclog.Errorf("NATS job stop: the field 'jobId' is required") return } @@ -148,28 +184,28 @@ func (api *NatsAPI) handleStopJob(subject string, data []byte) { if err != nil { cachedJob, cachedErr := api.JobRepository.FindCached(req.JobID, req.Cluster, req.StartTime) if cachedErr != nil { - cclog.Errorf("NATS %s: finding job failed: %v (cached lookup also failed: %v)", - subject, err, cachedErr) + cclog.Errorf("NATS job stop: finding job failed: %v (cached lookup also failed: %v)", + err, cachedErr) return } job = cachedJob } if job.State != schema.JobStateRunning { - cclog.Errorf("NATS %s: jobId %d (id %d) on %s: job has already been stopped (state is: %s)", - subject, job.JobID, job.ID, job.Cluster, job.State) + cclog.Errorf("NATS job stop: jobId %d (id %d) on %s: job has already been stopped (state is: %s)", + job.JobID, job.ID, job.Cluster, job.State) return } if job.StartTime > req.StopTime { - cclog.Errorf("NATS %s: jobId %d (id %d) on %s: stopTime %d must be >= startTime %d", - subject, job.JobID, job.ID, job.Cluster, req.StopTime, job.StartTime) + cclog.Errorf("NATS job stop: jobId %d (id %d) on %s: stopTime %d must be >= startTime %d", + job.JobID, job.ID, job.Cluster, req.StopTime, job.StartTime) return } if req.State != "" && !req.State.Valid() { - cclog.Errorf("NATS %s: jobId %d (id %d) on %s: invalid job state: %#v", - subject, job.JobID, job.ID, job.Cluster, req.State) + cclog.Errorf("NATS job stop: jobId %d (id %d) on %s: invalid job state: %#v", + job.JobID, job.ID, job.Cluster, req.State) return } else if req.State == "" { req.State = schema.JobStateCompleted @@ -182,8 +218,8 @@ func (api *NatsAPI) handleStopJob(subject string, data []byte) { if err := api.JobRepository.Stop(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil { if err := api.JobRepository.StopCached(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil { - cclog.Errorf("NATS %s: jobId %d (id %d) on %s: marking job as '%s' failed: %v", - subject, job.JobID, job.ID, job.Cluster, job.State, err) + cclog.Errorf("NATS job stop: jobId %d (id %d) on %s: marking job as '%s' failed: %v", + job.JobID, job.ID, job.Cluster, job.State, err) return } } diff --git a/internal/config/config.go b/internal/config/config.go index b7b8ed06..3c88bcfd 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -90,8 +90,7 @@ type ResampleConfig struct { } type NATSConfig struct { - SubjectJobStart string `json:"subjectJobStart"` - SubjectJobStop string `json:"subjectJobStop"` + SubjectJobEvent string `json:"subjectJobEvent"` SubjectNodeState string `json:"subjectNodeState"` } diff --git a/pkg/nats/influxDecoder.go b/pkg/nats/influxDecoder.go new file mode 100644 index 00000000..412f85e9 --- /dev/null +++ b/pkg/nats/influxDecoder.go @@ -0,0 +1,59 @@ +// Copyright (C) NHR@FAU, University Erlangen-Nuremberg. +// All rights reserved. This file is part of cc-backend. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package nats + +import ( + "time" + + lp "github.com/ClusterCockpit/cc-lib/ccMessage" + influx "github.com/influxdata/line-protocol/v2/lineprotocol" +) + +// DecodeInfluxMessage decodes a single InfluxDB line protocol message from the decoder +// Returns the decoded CCMessage or an error if decoding fails +func DecodeInfluxMessage(d *influx.Decoder) (lp.CCMessage, error) { + measurement, err := d.Measurement() + if err != nil { + return nil, err + } + + tags := make(map[string]string) + for { + key, value, err := d.NextTag() + if err != nil { + return nil, err + } + if key == nil { + break + } + tags[string(key)] = string(value) + } + + fields := make(map[string]interface{}) + for { + key, value, err := d.NextField() + if err != nil { + return nil, err + } + if key == nil { + break + } + fields[string(key)] = value.Interface() + } + + t, err := d.Time(influx.Nanosecond, time.Time{}) + if err != nil { + return nil, err + } + + return lp.NewMessage( + string(measurement), + tags, + nil, + fields, + t, + ) +} From 64fef9774cce32caf35b8e20adbef51896205315 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Tue, 23 Dec 2025 09:22:57 +0100 Subject: [PATCH 16/59] Add unit test for NATS API --- internal/api/api_test.go | 10 +- internal/api/nats_test.go | 892 ++++++++++++++++++++++++++++ internal/repository/dbConnection.go | 23 + 3 files changed, 920 insertions(+), 5 deletions(-) create mode 100644 internal/api/nats_test.go diff --git a/internal/api/api_test.go b/internal/api/api_test.go index d311767c..3030b1c1 100644 --- a/internal/api/api_test.go +++ b/internal/api/api_test.go @@ -36,6 +36,8 @@ import ( ) func setup(t *testing.T) *api.RestAPI { + repository.ResetConnection() + const testconfig = `{ "main": { "addr": "0.0.0.0:8080", @@ -190,11 +192,9 @@ func setup(t *testing.T) *api.RestAPI { } func cleanup() { - // Gracefully shutdown archiver with timeout if err := archiver.Shutdown(5 * time.Second); err != nil { cclog.Warnf("Archiver shutdown timeout in tests: %v", err) } - // TODO: Clear all caches, reset all modules, etc... } /* @@ -230,7 +230,7 @@ func TestRestApi(t *testing.T) { r.StrictSlash(true) restapi.MountAPIRoutes(r) - var TestJobId int64 = 123 + var TestJobID int64 = 123 TestClusterName := "testcluster" var TestStartTime int64 = 123456789 @@ -280,7 +280,7 @@ func TestRestApi(t *testing.T) { } // resolver := graph.GetResolverInstance() restapi.JobRepository.SyncJobs() - job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime) + job, err := restapi.JobRepository.Find(&TestJobID, &TestClusterName, &TestStartTime) if err != nil { t.Fatal(err) } @@ -338,7 +338,7 @@ func TestRestApi(t *testing.T) { } // Archiving happens asynchronously, will be completed in cleanup - job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime) + job, err := restapi.JobRepository.Find(&TestJobID, &TestClusterName, &TestStartTime) if err != nil { t.Fatal(err) } diff --git a/internal/api/nats_test.go b/internal/api/nats_test.go new file mode 100644 index 00000000..420a359c --- /dev/null +++ b/internal/api/nats_test.go @@ -0,0 +1,892 @@ +// Copyright (C) NHR@FAU, University Erlangen-Nuremberg. +// All rights reserved. This file is part of cc-backend. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. +package api + +import ( + "context" + "database/sql" + "encoding/json" + "fmt" + "os" + "path/filepath" + "testing" + "time" + + "github.com/ClusterCockpit/cc-backend/internal/archiver" + "github.com/ClusterCockpit/cc-backend/internal/auth" + "github.com/ClusterCockpit/cc-backend/internal/config" + "github.com/ClusterCockpit/cc-backend/internal/graph" + "github.com/ClusterCockpit/cc-backend/internal/metricdata" + "github.com/ClusterCockpit/cc-backend/internal/repository" + "github.com/ClusterCockpit/cc-backend/pkg/archive" + ccconf "github.com/ClusterCockpit/cc-lib/ccConfig" + cclog "github.com/ClusterCockpit/cc-lib/ccLogger" + lp "github.com/ClusterCockpit/cc-lib/ccMessage" + "github.com/ClusterCockpit/cc-lib/schema" + + _ "github.com/mattn/go-sqlite3" +) + +func setupNatsTest(t *testing.T) *NatsAPI { + repository.ResetConnection() + + const testconfig = `{ + "main": { + "addr": "0.0.0.0:8080", + "validate": false, + "apiAllowedIPs": [ + "*" + ] + }, + "archive": { + "kind": "file", + "path": "./var/job-archive" + }, + "auth": { + "jwts": { + "max-age": "2m" + } + }, + "clusters": [ + { + "name": "testcluster", + "metricDataRepository": {"kind": "test", "url": "bla:8081"}, + "filterRanges": { + "numNodes": { "from": 1, "to": 64 }, + "duration": { "from": 0, "to": 86400 }, + "startTime": { "from": "2022-01-01T00:00:00Z", "to": null } + } + } + ] +}` + const testclusterJSON = `{ + "name": "testcluster", + "subClusters": [ + { + "name": "sc1", + "nodes": "host123,host124,host125", + "processorType": "Intel Core i7-4770", + "socketsPerNode": 1, + "coresPerSocket": 4, + "threadsPerCore": 2, + "flopRateScalar": { + "unit": { + "prefix": "G", + "base": "F/s" + }, + "value": 14 + }, + "flopRateSimd": { + "unit": { + "prefix": "G", + "base": "F/s" + }, + "value": 112 + }, + "memoryBandwidth": { + "unit": { + "prefix": "G", + "base": "B/s" + }, + "value": 24 + }, + "numberOfNodes": 70, + "topology": { + "node": [0, 1, 2, 3, 4, 5, 6, 7], + "socket": [[0, 1, 2, 3, 4, 5, 6, 7]], + "memoryDomain": [[0, 1, 2, 3, 4, 5, 6, 7]], + "die": [[0, 1, 2, 3, 4, 5, 6, 7]], + "core": [[0], [1], [2], [3], [4], [5], [6], [7]] + } + } + ], + "metricConfig": [ + { + "name": "load_one", + "unit": { "base": ""}, + "scope": "node", + "timestep": 60, + "aggregation": "avg", + "peak": 8, + "normal": 0, + "caution": 0, + "alert": 0 + } + ] + }` + + cclog.Init("info", true) + tmpdir := t.TempDir() + jobarchive := filepath.Join(tmpdir, "job-archive") + if err := os.Mkdir(jobarchive, 0o777); err != nil { + t.Fatal(err) + } + + if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), fmt.Appendf(nil, "%d", 3), 0o666); err != nil { + t.Fatal(err) + } + + if err := os.Mkdir(filepath.Join(jobarchive, "testcluster"), 0o777); err != nil { + t.Fatal(err) + } + + if err := os.WriteFile(filepath.Join(jobarchive, "testcluster", "cluster.json"), []byte(testclusterJSON), 0o666); err != nil { + t.Fatal(err) + } + + dbfilepath := filepath.Join(tmpdir, "test.db") + err := repository.MigrateDB(dbfilepath) + if err != nil { + t.Fatal(err) + } + + cfgFilePath := filepath.Join(tmpdir, "config.json") + if err := os.WriteFile(cfgFilePath, []byte(testconfig), 0o666); err != nil { + t.Fatal(err) + } + + ccconf.Init(cfgFilePath) + + // Load and check main configuration + if cfg := ccconf.GetPackageConfig("main"); cfg != nil { + if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil { + config.Init(cfg, clustercfg) + } else { + cclog.Abort("Cluster configuration must be present") + } + } else { + cclog.Abort("Main configuration must be present") + } + archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", jobarchive) + + repository.Connect("sqlite3", dbfilepath) + + if err := archive.Init(json.RawMessage(archiveCfg), config.Keys.DisableArchive); err != nil { + t.Fatal(err) + } + + if err := metricdata.Init(); err != nil { + t.Fatal(err) + } + + archiver.Start(repository.GetJobRepository(), context.Background()) + + if cfg := ccconf.GetPackageConfig("auth"); cfg != nil { + auth.Init(&cfg) + } else { + cclog.Warn("Authentication disabled due to missing configuration") + auth.Init(nil) + } + + graph.Init() + + return NewNatsAPI() +} + +func cleanupNatsTest() { + if err := archiver.Shutdown(5 * time.Second); err != nil { + cclog.Warnf("Archiver shutdown timeout in tests: %v", err) + } +} + +func TestNatsHandleStartJob(t *testing.T) { + natsAPI := setupNatsTest(t) + t.Cleanup(cleanupNatsTest) + + tests := []struct { + name string + payload string + expectError bool + validateJob func(t *testing.T, job *schema.Job) + shouldFindJob bool + }{ + { + name: "valid job start", + payload: `{ + "jobId": 1001, + "user": "testuser1", + "project": "testproj1", + "cluster": "testcluster", + "partition": "main", + "walltime": 7200, + "numNodes": 1, + "numHwthreads": 8, + "numAcc": 0, + "shared": "none", + "monitoringStatus": 1, + "smt": 1, + "resources": [ + { + "hostname": "host123", + "hwthreads": [0, 1, 2, 3, 4, 5, 6, 7] + } + ], + "startTime": 1234567890 + }`, + expectError: false, + shouldFindJob: true, + validateJob: func(t *testing.T, job *schema.Job) { + if job.JobID != 1001 { + t.Errorf("expected JobID 1001, got %d", job.JobID) + } + if job.User != "testuser1" { + t.Errorf("expected user testuser1, got %s", job.User) + } + if job.State != schema.JobStateRunning { + t.Errorf("expected state running, got %s", job.State) + } + }, + }, + { + name: "invalid JSON", + payload: `{ + "jobId": "not a number", + "user": "testuser2" + }`, + expectError: true, + shouldFindJob: false, + }, + { + name: "missing required fields", + payload: `{ + "jobId": 1002 + }`, + expectError: true, + shouldFindJob: false, + }, + { + name: "job with unknown fields (should fail due to DisallowUnknownFields)", + payload: `{ + "jobId": 1003, + "user": "testuser3", + "project": "testproj3", + "cluster": "testcluster", + "partition": "main", + "walltime": 3600, + "numNodes": 1, + "numHwthreads": 8, + "unknownField": "should cause error", + "startTime": 1234567900 + }`, + expectError: true, + shouldFindJob: false, + }, + { + name: "job with tags", + payload: `{ + "jobId": 1004, + "user": "testuser4", + "project": "testproj4", + "cluster": "testcluster", + "partition": "main", + "walltime": 3600, + "numNodes": 1, + "numHwthreads": 8, + "numAcc": 0, + "shared": "none", + "monitoringStatus": 1, + "smt": 1, + "resources": [ + { + "hostname": "host123", + "hwthreads": [0, 1, 2, 3] + } + ], + "tags": [ + { + "type": "test", + "name": "testtag", + "scope": "testuser4" + } + ], + "startTime": 1234567910 + }`, + expectError: false, + shouldFindJob: true, + validateJob: func(t *testing.T, job *schema.Job) { + if job.JobID != 1004 { + t.Errorf("expected JobID 1004, got %d", job.JobID) + } + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + natsAPI.handleStartJob(tt.payload) + natsAPI.JobRepository.SyncJobs() + + // Allow some time for async operations + time.Sleep(100 * time.Millisecond) + + if tt.shouldFindJob { + // Extract jobId from payload + var payloadMap map[string]any + json.Unmarshal([]byte(tt.payload), &payloadMap) + jobID := int64(payloadMap["jobId"].(float64)) + cluster := payloadMap["cluster"].(string) + startTime := int64(payloadMap["startTime"].(float64)) + + job, err := natsAPI.JobRepository.Find(&jobID, &cluster, &startTime) + if err != nil { + if !tt.expectError { + t.Fatalf("expected to find job, but got error: %v", err) + } + return + } + + if tt.validateJob != nil { + tt.validateJob(t, job) + } + } + }) + } +} + +func TestNatsHandleStopJob(t *testing.T) { + natsAPI := setupNatsTest(t) + t.Cleanup(cleanupNatsTest) + + // First, create a running job + startPayload := `{ + "jobId": 2001, + "user": "testuser", + "project": "testproj", + "cluster": "testcluster", + "partition": "main", + "walltime": 3600, + "numNodes": 1, + "numHwthreads": 8, + "numAcc": 0, + "shared": "none", + "monitoringStatus": 1, + "smt": 1, + "resources": [ + { + "hostname": "host123", + "hwthreads": [0, 1, 2, 3, 4, 5, 6, 7] + } + ], + "startTime": 1234567890 + }` + + natsAPI.handleStartJob(startPayload) + natsAPI.JobRepository.SyncJobs() + time.Sleep(100 * time.Millisecond) + + tests := []struct { + name string + payload string + expectError bool + validateJob func(t *testing.T, job *schema.Job) + setupJobFunc func() // Optional: create specific test job + }{ + { + name: "valid job stop - completed", + payload: `{ + "jobId": 2001, + "cluster": "testcluster", + "startTime": 1234567890, + "jobState": "completed", + "stopTime": 1234571490 + }`, + expectError: false, + validateJob: func(t *testing.T, job *schema.Job) { + if job.State != schema.JobStateCompleted { + t.Errorf("expected state completed, got %s", job.State) + } + expectedDuration := int32(1234571490 - 1234567890) + if job.Duration != expectedDuration { + t.Errorf("expected duration %d, got %d", expectedDuration, job.Duration) + } + }, + }, + { + name: "valid job stop - failed", + setupJobFunc: func() { + startPayloadFailed := `{ + "jobId": 2002, + "user": "testuser", + "project": "testproj", + "cluster": "testcluster", + "partition": "main", + "walltime": 3600, + "numNodes": 1, + "numHwthreads": 8, + "numAcc": 0, + "shared": "none", + "monitoringStatus": 1, + "smt": 1, + "resources": [ + { + "hostname": "host123", + "hwthreads": [0, 1, 2, 3] + } + ], + "startTime": 1234567900 + }` + natsAPI.handleStartJob(startPayloadFailed) + natsAPI.JobRepository.SyncJobs() + time.Sleep(100 * time.Millisecond) + }, + payload: `{ + "jobId": 2002, + "cluster": "testcluster", + "startTime": 1234567900, + "jobState": "failed", + "stopTime": 1234569900 + }`, + expectError: false, + validateJob: func(t *testing.T, job *schema.Job) { + if job.State != schema.JobStateFailed { + t.Errorf("expected state failed, got %s", job.State) + } + }, + }, + { + name: "invalid JSON", + payload: `{ + "jobId": "not a number" + }`, + expectError: true, + }, + { + name: "missing jobId", + payload: `{ + "cluster": "testcluster", + "jobState": "completed", + "stopTime": 1234571490 + }`, + expectError: true, + }, + { + name: "invalid job state", + setupJobFunc: func() { + startPayloadInvalid := `{ + "jobId": 2003, + "user": "testuser", + "project": "testproj", + "cluster": "testcluster", + "partition": "main", + "walltime": 3600, + "numNodes": 1, + "numHwthreads": 8, + "numAcc": 0, + "shared": "none", + "monitoringStatus": 1, + "smt": 1, + "resources": [ + { + "hostname": "host123", + "hwthreads": [0, 1] + } + ], + "startTime": 1234567910 + }` + natsAPI.handleStartJob(startPayloadInvalid) + natsAPI.JobRepository.SyncJobs() + time.Sleep(100 * time.Millisecond) + }, + payload: `{ + "jobId": 2003, + "cluster": "testcluster", + "startTime": 1234567910, + "jobState": "invalid_state", + "stopTime": 1234571510 + }`, + expectError: true, + }, + { + name: "stopTime before startTime", + setupJobFunc: func() { + startPayloadTime := `{ + "jobId": 2004, + "user": "testuser", + "project": "testproj", + "cluster": "testcluster", + "partition": "main", + "walltime": 3600, + "numNodes": 1, + "numHwthreads": 8, + "numAcc": 0, + "shared": "none", + "monitoringStatus": 1, + "smt": 1, + "resources": [ + { + "hostname": "host123", + "hwthreads": [0] + } + ], + "startTime": 1234567920 + }` + natsAPI.handleStartJob(startPayloadTime) + natsAPI.JobRepository.SyncJobs() + time.Sleep(100 * time.Millisecond) + }, + payload: `{ + "jobId": 2004, + "cluster": "testcluster", + "startTime": 1234567920, + "jobState": "completed", + "stopTime": 1234567900 + }`, + expectError: true, + }, + { + name: "job not found", + payload: `{ + "jobId": 99999, + "cluster": "testcluster", + "startTime": 1234567890, + "jobState": "completed", + "stopTime": 1234571490 + }`, + expectError: true, + }, + } + + testData := schema.JobData{ + "load_one": map[schema.MetricScope]*schema.JobMetric{ + schema.MetricScopeNode: { + Unit: schema.Unit{Base: "load"}, + Timestep: 60, + Series: []schema.Series{ + { + Hostname: "host123", + Statistics: schema.MetricStatistics{Min: 0.1, Avg: 0.2, Max: 0.3}, + Data: []schema.Float{0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.3, 0.3, 0.3}, + }, + }, + }, + }, + } + + metricdata.TestLoadDataCallback = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) { + return testData, nil + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.setupJobFunc != nil { + tt.setupJobFunc() + } + + natsAPI.handleStopJob(tt.payload) + + // Allow some time for async operations + time.Sleep(100 * time.Millisecond) + + if !tt.expectError && tt.validateJob != nil { + // Extract job details from payload + var payloadMap map[string]any + json.Unmarshal([]byte(tt.payload), &payloadMap) + jobID := int64(payloadMap["jobId"].(float64)) + cluster := payloadMap["cluster"].(string) + + var startTime *int64 + if st, ok := payloadMap["startTime"]; ok { + t := int64(st.(float64)) + startTime = &t + } + + job, err := natsAPI.JobRepository.Find(&jobID, &cluster, startTime) + if err != nil { + t.Fatalf("expected to find job, but got error: %v", err) + } + + tt.validateJob(t, job) + } + }) + } +} + +func TestNatsHandleNodeState(t *testing.T) { + natsAPI := setupNatsTest(t) + t.Cleanup(cleanupNatsTest) + + tests := []struct { + name string + payload string + expectError bool + validateFn func(t *testing.T) + }{ + { + name: "valid node state update", + payload: `{ + "cluster": "testcluster", + "nodes": [ + { + "hostname": "host123", + "states": ["allocated"], + "cpusAllocated": 8, + "memoryAllocated": 16384, + "gpusAllocated": 0, + "jobsRunning": 1 + } + ] + }`, + expectError: false, + validateFn: func(t *testing.T) { + // In a full test, we would verify the node state was updated in the database + // For now, just ensure no error occurred + }, + }, + { + name: "multiple nodes", + payload: `{ + "cluster": "testcluster", + "nodes": [ + { + "hostname": "host123", + "states": ["idle"], + "cpusAllocated": 0, + "memoryAllocated": 0, + "gpusAllocated": 0, + "jobsRunning": 0 + }, + { + "hostname": "host124", + "states": ["allocated"], + "cpusAllocated": 4, + "memoryAllocated": 8192, + "gpusAllocated": 1, + "jobsRunning": 1 + } + ] + }`, + expectError: false, + }, + { + name: "invalid JSON", + payload: `{ + "cluster": "testcluster", + "nodes": "not an array" + }`, + expectError: true, + }, + { + name: "empty nodes array", + payload: `{ + "cluster": "testcluster", + "nodes": [] + }`, + expectError: false, // Empty array should not cause error + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + natsAPI.handleNodeState("test.subject", []byte(tt.payload)) + + // Allow some time for async operations + time.Sleep(50 * time.Millisecond) + + if tt.validateFn != nil { + tt.validateFn(t) + } + }) + } +} + +func TestNatsProcessJobEvent(t *testing.T) { + natsAPI := setupNatsTest(t) + t.Cleanup(cleanupNatsTest) + + msgStartJob, err := lp.NewMessage( + "job", + map[string]string{"function": "start_job"}, + nil, + map[string]any{ + "event": `{ + "jobId": 3001, + "user": "testuser", + "project": "testproj", + "cluster": "testcluster", + "partition": "main", + "walltime": 3600, + "numNodes": 1, + "numHwthreads": 8, + "numAcc": 0, + "shared": "none", + "monitoringStatus": 1, + "smt": 1, + "resources": [ + { + "hostname": "host123", + "hwthreads": [0, 1, 2, 3] + } + ], + "startTime": 1234567890 + }`, + }, + time.Now(), + ) + if err != nil { + t.Fatalf("failed to create test message: %v", err) + } + + msgMissingTag, err := lp.NewMessage( + "job", + map[string]string{}, + nil, + map[string]any{ + "event": `{}`, + }, + time.Now(), + ) + if err != nil { + t.Fatalf("failed to create test message: %v", err) + } + + msgUnknownFunc, err := lp.NewMessage( + "job", + map[string]string{"function": "unknown_function"}, + nil, + map[string]any{ + "event": `{}`, + }, + time.Now(), + ) + if err != nil { + t.Fatalf("failed to create test message: %v", err) + } + + tests := []struct { + name string + message lp.CCMessage + expectError bool + }{ + { + name: "start_job function", + message: msgStartJob, + expectError: false, + }, + { + name: "missing function tag", + message: msgMissingTag, + expectError: true, + }, + { + name: "unknown function", + message: msgUnknownFunc, + expectError: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + natsAPI.processJobEvent(tt.message) + time.Sleep(50 * time.Millisecond) + }) + } +} + +func TestNatsHandleJobEvent(t *testing.T) { + natsAPI := setupNatsTest(t) + t.Cleanup(cleanupNatsTest) + + tests := []struct { + name string + data []byte + expectError bool + }{ + { + name: "valid influx line protocol", + data: []byte(`job,function=start_job event="{\"jobId\":4001,\"user\":\"testuser\",\"project\":\"testproj\",\"cluster\":\"testcluster\",\"partition\":\"main\",\"walltime\":3600,\"numNodes\":1,\"numHwthreads\":8,\"numAcc\":0,\"shared\":\"none\",\"monitoringStatus\":1,\"smt\":1,\"resources\":[{\"hostname\":\"host123\",\"hwthreads\":[0,1,2,3]}],\"startTime\":1234567890}"`), + expectError: false, + }, + { + name: "invalid influx line protocol", + data: []byte(`invalid line protocol format`), + expectError: true, + }, + { + name: "empty data", + data: []byte(``), + expectError: false, // Decoder should handle empty input gracefully + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // HandleJobEvent doesn't return errors, it logs them + // We're just ensuring it doesn't panic + natsAPI.handleJobEvent("test.subject", tt.data) + time.Sleep(50 * time.Millisecond) + }) + } +} + +func TestNatsHandleStartJobDuplicatePrevention(t *testing.T) { + natsAPI := setupNatsTest(t) + t.Cleanup(cleanupNatsTest) + + // Start a job + payload := `{ + "jobId": 5001, + "user": "testuser", + "project": "testproj", + "cluster": "testcluster", + "partition": "main", + "walltime": 3600, + "numNodes": 1, + "numHwthreads": 8, + "numAcc": 0, + "shared": "none", + "monitoringStatus": 1, + "smt": 1, + "resources": [ + { + "hostname": "host123", + "hwthreads": [0, 1, 2, 3] + } + ], + "startTime": 1234567890 + }` + + natsAPI.handleStartJob(payload) + natsAPI.JobRepository.SyncJobs() + time.Sleep(100 * time.Millisecond) + + // Try to start the same job again (within 24 hours) + duplicatePayload := `{ + "jobId": 5001, + "user": "testuser", + "project": "testproj", + "cluster": "testcluster", + "partition": "main", + "walltime": 3600, + "numNodes": 1, + "numHwthreads": 8, + "numAcc": 0, + "shared": "none", + "monitoringStatus": 1, + "smt": 1, + "resources": [ + { + "hostname": "host123", + "hwthreads": [0, 1, 2, 3] + } + ], + "startTime": 1234567900 + }` + + natsAPI.handleStartJob(duplicatePayload) + natsAPI.JobRepository.SyncJobs() + time.Sleep(100 * time.Millisecond) + + // Verify only one job exists + jobID := int64(5001) + cluster := "testcluster" + jobs, err := natsAPI.JobRepository.FindAll(&jobID, &cluster, nil) + if err != nil && err != sql.ErrNoRows { + t.Fatalf("unexpected error: %v", err) + } + + if len(jobs) != 1 { + t.Errorf("expected 1 job, got %d", len(jobs)) + } +} diff --git a/internal/repository/dbConnection.go b/internal/repository/dbConnection.go index 0f7536b7..be0b161b 100644 --- a/internal/repository/dbConnection.go +++ b/internal/repository/dbConnection.go @@ -115,3 +115,26 @@ func GetConnection() *DBConnection { return dbConnInstance } + +// ResetConnection closes the current database connection and resets the connection state. +// This function is intended for testing purposes only to allow test isolation. +func ResetConnection() error { + if dbConnInstance != nil && dbConnInstance.DB != nil { + if err := dbConnInstance.DB.Close(); err != nil { + return fmt.Errorf("failed to close database connection: %w", err) + } + } + + dbConnInstance = nil + dbConnOnce = sync.Once{} + jobRepoInstance = nil + jobRepoOnce = sync.Once{} + nodeRepoInstance = nil + nodeRepoOnce = sync.Once{} + userRepoInstance = nil + userRepoOnce = sync.Once{} + userCfgRepoInstance = nil + userCfgRepoOnce = sync.Once{} + + return nil +} From 9bf5c5dc1a0b5eae739a8ef8971d470059def827 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Tue, 23 Dec 2025 09:34:09 +0100 Subject: [PATCH 17/59] Update README and config schema --- CLAUDE.md | 94 ++++++++++++++++++++++++++++++++++++++- README.md | 62 +++++++++++++++++++++----- configs/config-demo.json | 12 ++--- configs/config.json | 4 ++ internal/config/schema.go | 15 +++++++ 5 files changed, 165 insertions(+), 22 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 379b4dbb..67412a76 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -100,11 +100,15 @@ The backend follows a layered architecture with clear separation of concerns: - Pluggable backends: cc-metric-store, Prometheus, InfluxDB - Each cluster can have a different metric data backend - **internal/archiver**: Job archiving to file-based archive +- **internal/api/nats.go**: NATS-based API for job and node operations + - Subscribes to NATS subjects for job events (start/stop) + - Handles node state updates via NATS + - Uses InfluxDB line protocol message format - **pkg/archive**: Job archive backend implementations - File system backend (default) - S3 backend - SQLite backend (experimental) -- **pkg/nats**: NATS integration for metric ingestion +- **pkg/nats**: NATS client and message decoding utilities ### Frontend Structure @@ -146,6 +150,14 @@ applied automatically on startup. Version tracking in `version` table. ## Configuration - **config.json**: Main configuration (clusters, metric repositories, archive settings) + - `main.apiSubjects`: NATS subject configuration (optional) + - `subjectJobEvent`: Subject for job start/stop events (e.g., "cc.job.event") + - `subjectNodeState`: Subject for node state updates (e.g., "cc.node.state") + - `nats`: NATS client connection configuration (optional) + - `address`: NATS server address (e.g., "nats://localhost:4222") + - `username`: Authentication username (optional) + - `password`: Authentication password (optional) + - `creds-file-path`: Path to NATS credentials file (optional) - **.env**: Environment variables (secrets like JWT keys) - Copy from `configs/env-template.txt` - NEVER commit this file @@ -207,9 +219,87 @@ applied automatically on startup. Version tracking in `version` table. 2. Increment `repository.Version` 3. Test with fresh database and existing database +## NATS API + +The backend supports a NATS-based API as an alternative to the REST API for job and node operations. + +### Setup + +1. Configure NATS client connection in `config.json`: + ```json + { + "nats": { + "address": "nats://localhost:4222", + "username": "user", + "password": "pass" + } + } + ``` + +2. Configure API subjects in `config.json` under `main`: + ```json + { + "main": { + "apiSubjects": { + "subjectJobEvent": "cc.job.event", + "subjectNodeState": "cc.node.state" + } + } + } + ``` + +### Message Format + +Messages use **InfluxDB line protocol** format with the following structure: + +#### Job Events + +**Start Job:** +``` +job,function=start_job event="{\"jobId\":123,\"user\":\"alice\",\"cluster\":\"test\", ...}" 1234567890000000000 +``` + +**Stop Job:** +``` +job,function=stop_job event="{\"jobId\":123,\"cluster\":\"test\",\"startTime\":1234567890,\"stopTime\":1234571490,\"jobState\":\"completed\"}" 1234571490000000000 +``` + +**Tags:** +- `function`: Either `start_job` or `stop_job` + +**Fields:** +- `event`: JSON payload containing job data (see REST API documentation for schema) + +#### Node State Updates + +```json +{ + "cluster": "testcluster", + "nodes": [ + { + "hostname": "node001", + "states": ["allocated"], + "cpusAllocated": 8, + "memoryAllocated": 16384, + "gpusAllocated": 0, + "jobsRunning": 1 + } + ] +} +``` + +### Implementation Notes + +- NATS API mirrors REST API functionality but uses messaging +- Job start/stop events are processed asynchronously +- Duplicate job detection is handled (same as REST API) +- All validation rules from REST API apply +- Messages are logged; no responses are sent back to publishers +- If NATS client is unavailable, API subscriptions are skipped (logged as warning) + ## Dependencies - Go 1.24.0+ (check go.mod for exact version) - Node.js (for frontend builds) - SQLite 3 (only supported database) -- Optional: NATS server for metric ingestion +- Optional: NATS server for NATS API integration diff --git a/README.md b/README.md index a0352d17..468a12ad 100644 --- a/README.md +++ b/README.md @@ -22,11 +22,12 @@ switching from PHP Symfony to a Golang based solution are explained ## Overview This is a Golang web backend for the ClusterCockpit job-specific performance -monitoring framework. It provides a REST API for integrating ClusterCockpit with -an HPC cluster batch system and external analysis scripts. Data exchange between -the web front-end and the back-end is based on a GraphQL API. The web frontend -is also served by the backend using [Svelte](https://svelte.dev/) components. -Layout and styling are based on [Bootstrap 5](https://getbootstrap.com/) using +monitoring framework. It provides a REST API and an optional NATS-based messaging +API for integrating ClusterCockpit with an HPC cluster batch system and external +analysis scripts. Data exchange between the web front-end and the back-end is +based on a GraphQL API. The web frontend is also served by the backend using +[Svelte](https://svelte.dev/) components. Layout and styling are based on +[Bootstrap 5](https://getbootstrap.com/) using [Bootstrap Icons](https://icons.getbootstrap.com/). The backend uses [SQLite 3](https://sqlite.org/) as the relational SQL database. @@ -35,6 +36,10 @@ databases, the only tested and supported setup is to use cc-metric-store as the metric data backend. Documentation on how to integrate ClusterCockpit with other time series databases will be added in the future. +For real-time integration with HPC systems, the backend can subscribe to +[NATS](https://nats.io/) subjects to receive job start/stop events and node +state updates, providing an alternative to REST API polling. + Completed batch jobs are stored in a file-based job archive according to [this specification](https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive). The backend supports authentication via local accounts, an external LDAP @@ -130,27 +135,60 @@ ln -s ./var/job-archive ## Project file structure +- [`.github/`](https://github.com/ClusterCockpit/cc-backend/tree/master/.github) + GitHub Actions workflows and dependabot configuration for CI/CD. - [`api/`](https://github.com/ClusterCockpit/cc-backend/tree/master/api) contains the API schema files for the REST and GraphQL APIs. The REST API is documented in the OpenAPI 3.0 format in - [./api/openapi.yaml](./api/openapi.yaml). + [./api/swagger.yaml](./api/swagger.yaml). The GraphQL schema is in + [./api/schema.graphqls](./api/schema.graphqls). - [`cmd/cc-backend`](https://github.com/ClusterCockpit/cc-backend/tree/master/cmd/cc-backend) - contains `main.go` for the main application. + contains the main application entry point and CLI implementation. - [`configs/`](https://github.com/ClusterCockpit/cc-backend/tree/master/configs) contains documentation about configuration and command line options and required - environment variables. A sample configuration file is provided. -- [`docs/`](https://github.com/ClusterCockpit/cc-backend/tree/master/docs) - contains more in-depth documentation. + environment variables. Sample configuration files are provided. - [`init/`](https://github.com/ClusterCockpit/cc-backend/tree/master/init) contains an example of setting up systemd for production use. - [`internal/`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal) contains library source code that is not intended for use by others. + - [`api`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/api) + REST API handlers and NATS integration + - [`archiver`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/archiver) + Job archiving functionality + - [`auth`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/auth) + Authentication (local, LDAP, OIDC) and JWT token handling + - [`config`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/config) + Configuration management and validation + - [`graph`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/graph) + GraphQL schema and resolvers + - [`importer`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/importer) + Job data import and database initialization + - [`memorystore`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/memorystore) + In-memory metric data store with checkpointing + - [`metricdata`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/metricdata) + Metric data repository implementations (cc-metric-store, Prometheus) + - [`metricDataDispatcher`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/metricDataDispatcher) + Dispatches metric data loading to appropriate backends + - [`repository`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/repository) + Database repository layer for jobs and metadata + - [`routerConfig`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/routerConfig) + HTTP router configuration and middleware + - [`tagger`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/tagger) + Job classification and application detection + - [`taskmanager`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/taskmanager) + Background task management and scheduled jobs - [`pkg/`](https://github.com/ClusterCockpit/cc-backend/tree/master/pkg) contains Go packages that can be used by other projects. + - [`archive`](https://github.com/ClusterCockpit/cc-backend/tree/master/pkg/archive) + Job archive backend implementations (filesystem, S3) + - [`nats`](https://github.com/ClusterCockpit/cc-backend/tree/master/pkg/nats) + NATS client and message handling - [`tools/`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools) Additional command line helper tools. - [`archive-manager`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-manager) - Commands for getting infos about and existing job archive. + Commands for getting infos about an existing job archive. + - [`archive-migration`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-migration) + Tool for migrating job archives between formats. - [`convert-pem-pubkey`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/convert-pem-pubkey) Tool to convert external pubkey for use in `cc-backend`. - [`gen-keypair`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/gen-keypair) @@ -162,7 +200,7 @@ ln -s ./var/job-archive - [`frontend`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/frontend) Svelte components and static assets for the frontend UI - [`templates`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/templates) - Server-side Go templates + Server-side Go templates, including monitoring views - [`gqlgen.yml`](https://github.com/ClusterCockpit/cc-backend/blob/master/gqlgen.yml) Configures the behaviour and generation of [gqlgen](https://github.com/99designs/gqlgen). diff --git a/configs/config-demo.json b/configs/config-demo.json index 58366fb5..aa388316 100644 --- a/configs/config-demo.json +++ b/configs/config-demo.json @@ -5,14 +5,9 @@ "resampling": { "minimumPoints": 600, "trigger": 180, - "resolutions": [ - 240, - 60 - ] + "resolutions": [240, 60] }, - "apiAllowedIPs": [ - "*" - ], + "apiAllowedIPs": ["*"], "emission-constant": 317 }, "cron": { @@ -103,4 +98,5 @@ } ] } -} \ No newline at end of file +} + diff --git a/configs/config.json b/configs/config.json index 88a9e930..41d8ecac 100644 --- a/configs/config.json +++ b/configs/config.json @@ -15,6 +15,10 @@ 240, 60 ] + }, + "apiSubjects": { + "subjectJobEvent": "cc.job.event", + "subjectNodeState": "cc.node.state" } }, "cron": { diff --git a/internal/config/schema.go b/internal/config/schema.go index b171f96a..ff8d0c92 100644 --- a/internal/config/schema.go +++ b/internal/config/schema.go @@ -119,6 +119,21 @@ var configSchema = ` } }, "required": ["trigger", "resolutions"] + }, + "apiSubjects": { + "description": "NATS subjects configuration for subscribing to job and node events.", + "type": "object", + "properties": { + "subjectJobEvent": { + "description": "NATS subject for job events (start_job, stop_job)", + "type": "string" + }, + "subjectNodeState": { + "description": "NATS subject for node state updates", + "type": "string" + } + }, + "required": ["subjectJobEvent", "subjectNodeState"] } }, "required": ["apiAllowedIPs"] From 8576ae458d11d5df74faca4088f9ff4d0a0c7774 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Wed, 24 Dec 2025 09:24:18 +0100 Subject: [PATCH 18/59] Switch to cc-lib v2 --- cmd/cc-backend/init.go | 4 +- cmd/cc-backend/main.go | 14 ++--- cmd/cc-backend/server.go | 6 +- go.mod | 16 ++--- go.sum | 32 +++++----- gqlgen.yml | 52 +++++++-------- internal/api/api_test.go | 6 +- internal/api/cluster.go | 2 +- internal/api/job.go | 4 +- internal/api/memorystore.go | 2 +- internal/api/nats.go | 21 +++++-- internal/api/nats_test.go | 8 +-- internal/api/node.go | 2 +- internal/api/rest.go | 6 +- internal/api/user.go | 4 +- internal/archiver/archiveWorker.go | 4 +- internal/archiver/archiver.go | 4 +- internal/auth/auth.go | 6 +- internal/auth/jwt.go | 4 +- internal/auth/jwtCookieSession.go | 4 +- internal/auth/jwtHelpers.go | 4 +- internal/auth/jwtHelpers_test.go | 2 +- internal/auth/jwtSession.go | 4 +- internal/auth/ldap.go | 4 +- internal/auth/local.go | 4 +- internal/auth/oidc.go | 4 +- internal/config/config.go | 4 +- internal/config/config_test.go | 4 +- internal/config/validate.go | 2 +- internal/graph/generated/generated.go | 2 +- internal/graph/model/models_gen.go | 2 +- internal/graph/resolver.go | 2 +- internal/graph/schema.resolvers.go | 4 +- internal/graph/util.go | 4 +- internal/importer/handleImport.go | 4 +- internal/importer/importer_test.go | 4 +- internal/importer/initDB.go | 4 +- internal/importer/normalize.go | 2 +- internal/importer/normalize_test.go | 2 +- internal/memorystore/api.go | 4 +- internal/memorystore/archive.go | 2 +- internal/memorystore/avroCheckpoint.go | 4 +- internal/memorystore/avroHelper.go | 2 +- internal/memorystore/avroStruct.go | 2 +- internal/memorystore/buffer.go | 2 +- internal/memorystore/checkpoint.go | 4 +- internal/memorystore/level.go | 2 +- internal/memorystore/lineprotocol.go | 4 +- internal/memorystore/memorystore.go | 8 +-- internal/memorystore/memorystore_test.go | 2 +- internal/memorystore/stats.go | 2 +- internal/metricDataDispatcher/dataLoader.go | 8 +-- .../metricdata/cc-metric-store-internal.go | 4 +- internal/metricdata/cc-metric-store.go | 4 +- internal/metricdata/metricdata.go | 4 +- internal/metricdata/prometheus.go | 4 +- internal/metricdata/utils.go | 2 +- internal/repository/dbConnection.go | 2 +- internal/repository/hooks.go | 2 +- internal/repository/job.go | 6 +- internal/repository/jobCreate.go | 4 +- internal/repository/jobFind.go | 4 +- internal/repository/jobHooks.go | 2 +- internal/repository/jobQuery.go | 4 +- internal/repository/job_test.go | 2 +- internal/repository/migration.go | 2 +- internal/repository/node.go | 6 +- internal/repository/node_test.go | 6 +- internal/repository/repository_test.go | 4 +- internal/repository/stats.go | 4 +- internal/repository/tags.go | 4 +- internal/repository/testdata/job.db | Bin 987136 -> 987136 bytes internal/repository/user.go | 4 +- internal/repository/userConfig.go | 6 +- internal/repository/userConfig_test.go | 6 +- internal/routerConfig/routes.go | 6 +- internal/tagger/classifyJob.go | 6 +- internal/tagger/classifyJob_test.go | 2 +- internal/tagger/detectApp.go | 6 +- internal/tagger/detectApp_test.go | 2 +- internal/tagger/tagger.go | 4 +- internal/tagger/tagger_test.go | 2 +- internal/taskmanager/commitJobService.go | 2 +- internal/taskmanager/compressionService.go | 4 +- internal/taskmanager/ldapSyncService.go | 2 +- internal/taskmanager/retentionService.go | 2 +- internal/taskmanager/stopJobsExceedTime.go | 2 +- internal/taskmanager/taskManager.go | 2 +- internal/taskmanager/updateDurationService.go | 2 +- .../taskmanager/updateFootprintService.go | 4 +- pkg/archive/archive.go | 6 +- pkg/archive/archive_test.go | 4 +- pkg/archive/clusterConfig.go | 4 +- pkg/archive/fsBackend.go | 6 +- pkg/archive/fsBackend_test.go | 4 +- pkg/archive/json.go | 4 +- pkg/archive/nodelist.go | 2 +- pkg/archive/s3Backend.go | 6 +- pkg/archive/s3Backend_test.go | 2 +- pkg/archive/sqliteBackend.go | 6 +- pkg/archive/sqliteBackend_test.go | 2 +- pkg/nats/client.go | 2 +- pkg/nats/config.go | 2 +- pkg/nats/influxDecoder.go | 59 ------------------ tools/archive-manager/import_test.go | 4 +- tools/archive-manager/main.go | 4 +- tools/archive-migration/main.go | 2 +- tools/archive-migration/transforms.go | 2 +- web/web.go | 6 +- web/webConfig_test.go | 2 +- 110 files changed, 261 insertions(+), 311 deletions(-) delete mode 100644 pkg/nats/influxDecoder.go diff --git a/cmd/cc-backend/init.go b/cmd/cc-backend/init.go index 151eee9e..025396be 100644 --- a/cmd/cc-backend/init.go +++ b/cmd/cc-backend/init.go @@ -15,8 +15,8 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/pkg/archive" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/util" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/util" ) const envString = ` diff --git a/cmd/cc-backend/main.go b/cmd/cc-backend/main.go index 9464ccf4..f8b4aea1 100644 --- a/cmd/cc-backend/main.go +++ b/cmd/cc-backend/main.go @@ -32,11 +32,11 @@ import ( "github.com/ClusterCockpit/cc-backend/pkg/archive" "github.com/ClusterCockpit/cc-backend/pkg/nats" "github.com/ClusterCockpit/cc-backend/web" - ccconf "github.com/ClusterCockpit/cc-lib/ccConfig" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/runtimeEnv" - "github.com/ClusterCockpit/cc-lib/schema" - "github.com/ClusterCockpit/cc-lib/util" + ccconf "github.com/ClusterCockpit/cc-lib/v2/ccConfig" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/runtime" + "github.com/ClusterCockpit/cc-lib/v2/schema" + "github.com/ClusterCockpit/cc-lib/v2/util" "github.com/google/gops/agent" "github.com/joho/godotenv" @@ -371,7 +371,7 @@ func runServer(ctx context.Context) error { case <-ctx.Done(): } - runtimeEnv.SystemdNotifiy(false, "Shutting down ...") + runtime.SystemdNotify(false, "Shutting down ...") srv.Shutdown(ctx) util.FsWatcherShutdown() taskmanager.Shutdown() @@ -381,7 +381,7 @@ func runServer(ctx context.Context) error { if os.Getenv(envGOGC) == "" { debug.SetGCPercent(25) } - runtimeEnv.SystemdNotifiy(true, "running") + runtime.SystemdNotify(true, "running") // Wait for completion or error go func() { diff --git a/cmd/cc-backend/server.go b/cmd/cc-backend/server.go index 4ed79622..53e24c88 100644 --- a/cmd/cc-backend/server.go +++ b/cmd/cc-backend/server.go @@ -33,8 +33,8 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/routerConfig" "github.com/ClusterCockpit/cc-backend/pkg/nats" "github.com/ClusterCockpit/cc-backend/web" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/runtimeEnv" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/runtime" "github.com/gorilla/handlers" "github.com/gorilla/mux" httpSwagger "github.com/swaggo/http-swagger" @@ -347,7 +347,7 @@ func (s *Server) Start(ctx context.Context) error { // Because this program will want to bind to a privileged port (like 80), the listener must // be established first, then the user can be changed, and after that, // the actual http server can be started. - if err := runtimeEnv.DropPrivileges(config.Keys.Group, config.Keys.User); err != nil { + if err := runtime.DropPrivileges(config.Keys.Group, config.Keys.User); err != nil { return fmt.Errorf("dropping privileges: %w", err) } diff --git a/go.mod b/go.mod index b821f7bf..36ce47b9 100644 --- a/go.mod +++ b/go.mod @@ -11,14 +11,14 @@ tool ( require ( github.com/99designs/gqlgen v0.17.84 - github.com/ClusterCockpit/cc-lib v1.0.2 + github.com/ClusterCockpit/cc-lib/v2 v2.0.0 github.com/Masterminds/squirrel v1.5.4 github.com/aws/aws-sdk-go-v2 v1.41.0 github.com/aws/aws-sdk-go-v2/config v1.31.20 github.com/aws/aws-sdk-go-v2/credentials v1.18.24 github.com/aws/aws-sdk-go-v2/service/s3 v1.90.2 github.com/coreos/go-oidc/v3 v3.16.0 - github.com/expr-lang/expr v1.17.6 + github.com/expr-lang/expr v1.17.7 github.com/go-co-op/gocron/v2 v2.18.2 github.com/go-ldap/ldap/v3 v3.4.12 github.com/golang-jwt/jwt/v5 v5.3.0 @@ -41,7 +41,7 @@ require ( github.com/swaggo/http-swagger v1.3.4 github.com/swaggo/swag v1.16.6 github.com/vektah/gqlparser/v2 v2.5.31 - golang.org/x/crypto v0.45.0 + golang.org/x/crypto v0.46.0 golang.org/x/oauth2 v0.32.0 golang.org/x/time v0.14.0 ) @@ -95,14 +95,14 @@ require ( github.com/jonboulle/clockwork v0.5.0 // indirect github.com/jpillora/backoff v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect - github.com/klauspost/compress v1.18.1 // indirect + github.com/klauspost/compress v1.18.2 // indirect github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect - github.com/nats-io/nkeys v0.4.11 // indirect + github.com/nats-io/nkeys v0.4.12 // indirect github.com/nats-io/nuid v1.0.1 // indirect github.com/oapi-codegen/runtime v1.1.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect @@ -121,9 +121,9 @@ require ( golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect golang.org/x/mod v0.30.0 // indirect golang.org/x/net v0.47.0 // indirect - golang.org/x/sync v0.18.0 // indirect - golang.org/x/sys v0.38.0 // indirect - golang.org/x/text v0.31.0 // indirect + golang.org/x/sync v0.19.0 // indirect + golang.org/x/sys v0.39.0 // indirect + golang.org/x/text v0.32.0 // indirect golang.org/x/tools v0.39.0 // indirect google.golang.org/protobuf v1.36.10 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index 04e2514b..9038d960 100644 --- a/go.sum +++ b/go.sum @@ -4,8 +4,8 @@ github.com/99designs/gqlgen v0.17.84 h1:iVMdiStgUVx/BFkMb0J5GAXlqfqtQ7bqMCYK6v52 github.com/99designs/gqlgen v0.17.84/go.mod h1:qjoUqzTeiejdo+bwUg8unqSpeYG42XrcrQboGIezmFA= github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8= github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU= -github.com/ClusterCockpit/cc-lib v1.0.2 h1:ZWn3oZkXgxrr3zSigBdlOOfayZ4Om4xL20DhmritPPg= -github.com/ClusterCockpit/cc-lib v1.0.2/go.mod h1:UGdOvXEnjFqlnPSxtvtFwO6BtXYW6NnXFoud9FtN93k= +github.com/ClusterCockpit/cc-lib/v2 v2.0.0 h1:OjDADx8mf9SflqeeKUuhy5pamu4YDucae6wUX6vvNNA= +github.com/ClusterCockpit/cc-lib/v2 v2.0.0/go.mod h1:JuxMAuEOaLLNEnnL9U3ejha8kMvsSatLdKPZEgJw6iw= github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc= github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE= github.com/Masterminds/squirrel v1.5.4 h1:uUcX/aBc8O7Fg9kaISIUsHXdKuqehiXAMQTYX8afzqM= @@ -79,8 +79,8 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54 h1:SG7nF6SRlWhcT7cNTs5R6Hk4V2lcmLz2NsG2VnInyNo= github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA= -github.com/expr-lang/expr v1.17.6 h1:1h6i8ONk9cexhDmowO/A64VPxHScu7qfSl2k8OlINec= -github.com/expr-lang/expr v1.17.6/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4= +github.com/expr-lang/expr v1.17.7 h1:Q0xY/e/2aCIp8g9s/LGvMDCC5PxYlvHgDZRQ4y16JX8= +github.com/expr-lang/expr v1.17.7/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/frankban/quicktest v1.11.0/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s= @@ -197,8 +197,8 @@ github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE= -github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co= -github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0= +github.com/klauspost/compress v1.18.2 h1:iiPHWW0YrcFgpBYhsA6D1+fqHssJscY/Tm/y2Uqnapk= +github.com/klauspost/compress v1.18.2/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= @@ -230,8 +230,8 @@ github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+ github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/nats-io/nats.go v1.47.0 h1:YQdADw6J/UfGUd2Oy6tn4Hq6YHxCaJrVKayxxFqYrgM= github.com/nats-io/nats.go v1.47.0/go.mod h1:iRWIPokVIFbVijxuMQq4y9ttaBTMe0SFdlZfMDd+33g= -github.com/nats-io/nkeys v0.4.11 h1:q44qGV008kYd9W1b1nEBkNzvnWxtRSQ7A8BoqRrcfa0= -github.com/nats-io/nkeys v0.4.11/go.mod h1:szDimtgmfOi9n25JpfIdGw12tZFYXqhGxjhVxsatHVE= +github.com/nats-io/nkeys v0.4.12 h1:nssm7JKOG9/x4J8II47VWCL1Ds29avyiQDRn0ckMvDc= +github.com/nats-io/nkeys v0.4.12/go.mod h1:MT59A1HYcjIcyQDJStTfaOY6vhy9XTUjOFo+SVsvpBg= github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw= github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= @@ -298,8 +298,8 @@ go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q= -golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4= +golang.org/x/crypto v0.46.0 h1:cKRW/pmt1pKAfetfu+RCEvjvZkA9RimPbh7bhFjGVBU= +golang.org/x/crypto v0.46.0/go.mod h1:Evb/oLKmMraqjZ2iQTwDwvCtJkczlDuTmdJXoZVzqU0= golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b h1:M2rDM6z3Fhozi9O7NWsxAkg/yqS/lQJ6PmkyIV3YP+o= golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= @@ -315,16 +315,16 @@ golang.org/x/oauth2 v0.32.0 h1:jsCblLleRMDrxMN29H3z/k1KliIvpLgCkE6R8FXXNgY= golang.org/x/oauth2 v0.32.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I= -golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= +golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= -golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= +golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= @@ -332,8 +332,8 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM= -golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM= +golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU= +golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY= golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/gqlgen.yml b/gqlgen.yml index 5f5272b4..40410b48 100644 --- a/gqlgen.yml +++ b/gqlgen.yml @@ -52,51 +52,51 @@ models: - github.com/99designs/gqlgen/graphql.Int64 - github.com/99designs/gqlgen/graphql.Int32 Job: - model: "github.com/ClusterCockpit/cc-lib/schema.Job" + model: "github.com/ClusterCockpit/cc-lib/v2/schema.Job" fields: tags: resolver: true metaData: resolver: true Cluster: - model: "github.com/ClusterCockpit/cc-lib/schema.Cluster" + model: "github.com/ClusterCockpit/cc-lib/v2/schema.Cluster" fields: partitions: resolver: true # Node: - # model: "github.com/ClusterCockpit/cc-lib/schema.Node" + # model: "github.com/ClusterCockpit/cc-lib/v2/schema.Node" # fields: # metaData: # resolver: true - NullableFloat: { model: "github.com/ClusterCockpit/cc-lib/schema.Float" } - MetricScope: { model: "github.com/ClusterCockpit/cc-lib/schema.MetricScope" } - MetricValue: { model: "github.com/ClusterCockpit/cc-lib/schema.MetricValue" } + NullableFloat: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.Float" } + MetricScope: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.MetricScope" } + MetricValue: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.MetricValue" } JobStatistics: - { model: "github.com/ClusterCockpit/cc-lib/schema.JobStatistics" } + { model: "github.com/ClusterCockpit/cc-lib/v2/schema.JobStatistics" } GlobalMetricListItem: - { model: "github.com/ClusterCockpit/cc-lib/schema.GlobalMetricListItem" } + { model: "github.com/ClusterCockpit/cc-lib/v2/schema.GlobalMetricListItem" } ClusterSupport: - { model: "github.com/ClusterCockpit/cc-lib/schema.ClusterSupport" } - Tag: { model: "github.com/ClusterCockpit/cc-lib/schema.Tag" } - Resource: { model: "github.com/ClusterCockpit/cc-lib/schema.Resource" } - JobState: { model: "github.com/ClusterCockpit/cc-lib/schema.JobState" } - Node: { model: "github.com/ClusterCockpit/cc-lib/schema.Node" } + { model: "github.com/ClusterCockpit/cc-lib/v2/schema.ClusterSupport" } + Tag: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.Tag" } + Resource: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.Resource" } + JobState: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.JobState" } + Node: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.Node" } SchedulerState: - { model: "github.com/ClusterCockpit/cc-lib/schema.SchedulerState" } + { model: "github.com/ClusterCockpit/cc-lib/v2/schema.SchedulerState" } HealthState: - { model: "github.com/ClusterCockpit/cc-lib/schema.MonitoringState" } - JobMetric: { model: "github.com/ClusterCockpit/cc-lib/schema.JobMetric" } - Series: { model: "github.com/ClusterCockpit/cc-lib/schema.Series" } + { model: "github.com/ClusterCockpit/cc-lib/v2/schema.MonitoringState" } + JobMetric: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.JobMetric" } + Series: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.Series" } MetricStatistics: - { model: "github.com/ClusterCockpit/cc-lib/schema.MetricStatistics" } + { model: "github.com/ClusterCockpit/cc-lib/v2/schema.MetricStatistics" } MetricConfig: - { model: "github.com/ClusterCockpit/cc-lib/schema.MetricConfig" } + { model: "github.com/ClusterCockpit/cc-lib/v2/schema.MetricConfig" } SubClusterConfig: - { model: "github.com/ClusterCockpit/cc-lib/schema.SubClusterConfig" } - Accelerator: { model: "github.com/ClusterCockpit/cc-lib/schema.Accelerator" } - Topology: { model: "github.com/ClusterCockpit/cc-lib/schema.Topology" } + { model: "github.com/ClusterCockpit/cc-lib/v2/schema.SubClusterConfig" } + Accelerator: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.Accelerator" } + Topology: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.Topology" } FilterRanges: - { model: "github.com/ClusterCockpit/cc-lib/schema.FilterRanges" } - SubCluster: { model: "github.com/ClusterCockpit/cc-lib/schema.SubCluster" } - StatsSeries: { model: "github.com/ClusterCockpit/cc-lib/schema.StatsSeries" } - Unit: { model: "github.com/ClusterCockpit/cc-lib/schema.Unit" } + { model: "github.com/ClusterCockpit/cc-lib/v2/schema.FilterRanges" } + SubCluster: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.SubCluster" } + StatsSeries: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.StatsSeries" } + Unit: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.Unit" } diff --git a/internal/api/api_test.go b/internal/api/api_test.go index 3030b1c1..50605f7b 100644 --- a/internal/api/api_test.go +++ b/internal/api/api_test.go @@ -27,9 +27,9 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/metricdata" "github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/pkg/archive" - ccconf "github.com/ClusterCockpit/cc-lib/ccConfig" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + ccconf "github.com/ClusterCockpit/cc-lib/v2/ccConfig" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" "github.com/gorilla/mux" _ "github.com/mattn/go-sqlite3" diff --git a/internal/api/cluster.go b/internal/api/cluster.go index 28d7c109..b6f41244 100644 --- a/internal/api/cluster.go +++ b/internal/api/cluster.go @@ -13,7 +13,7 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/pkg/archive" - "github.com/ClusterCockpit/cc-lib/schema" + "github.com/ClusterCockpit/cc-lib/v2/schema" ) // GetClustersAPIResponse model diff --git a/internal/api/job.go b/internal/api/job.go index 919772f4..9b970c2e 100644 --- a/internal/api/job.go +++ b/internal/api/job.go @@ -25,8 +25,8 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher" "github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/pkg/archive" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" "github.com/gorilla/mux" ) diff --git a/internal/api/memorystore.go b/internal/api/memorystore.go index 1b883792..56c396e2 100644 --- a/internal/api/memorystore.go +++ b/internal/api/memorystore.go @@ -16,7 +16,7 @@ import ( "strings" "github.com/ClusterCockpit/cc-backend/internal/memorystore" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" "github.com/influxdata/line-protocol/v2/lineprotocol" ) diff --git a/internal/api/nats.go b/internal/api/nats.go index a309a915..efd04406 100644 --- a/internal/api/nats.go +++ b/internal/api/nats.go @@ -18,9 +18,10 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/importer" "github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/pkg/nats" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - lp "github.com/ClusterCockpit/cc-lib/ccMessage" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage" + "github.com/ClusterCockpit/cc-lib/v2/receivers" + "github.com/ClusterCockpit/cc-lib/v2/schema" influx "github.com/influxdata/line-protocol/v2/lineprotocol" ) @@ -75,10 +76,18 @@ func (api *NatsAPI) processJobEvent(msg lp.CCMessage) { switch function { case "start_job": - api.handleStartJob(msg.GetEventValue()) + v, ok := msg.GetEventValue() + if !ok { + cclog.Errorf("Job event is missing event value: %+v", msg) + } + api.handleStartJob(v) case "stop_job": - api.handleStopJob(msg.GetEventValue()) + v, ok := msg.GetEventValue() + if !ok { + cclog.Errorf("Job event is missing event value: %+v", msg) + } + api.handleStopJob(v) default: cclog.Warnf("Unimplemented job event: %+v", msg) } @@ -88,7 +97,7 @@ func (api *NatsAPI) handleJobEvent(subject string, data []byte) { d := influx.NewDecoderWithBytes(data) for d.Next() { - m, err := nats.DecodeInfluxMessage(d) + m, err := receivers.DecodeInfluxMessage(d) if err != nil { cclog.Errorf("NATS %s: Failed to decode message: %v", subject, err) return diff --git a/internal/api/nats_test.go b/internal/api/nats_test.go index 420a359c..c9415afc 100644 --- a/internal/api/nats_test.go +++ b/internal/api/nats_test.go @@ -21,10 +21,10 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/metricdata" "github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/pkg/archive" - ccconf "github.com/ClusterCockpit/cc-lib/ccConfig" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - lp "github.com/ClusterCockpit/cc-lib/ccMessage" - "github.com/ClusterCockpit/cc-lib/schema" + ccconf "github.com/ClusterCockpit/cc-lib/v2/ccConfig" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage" + "github.com/ClusterCockpit/cc-lib/v2/schema" _ "github.com/mattn/go-sqlite3" ) diff --git a/internal/api/node.go b/internal/api/node.go index 8953e5b9..350f097d 100644 --- a/internal/api/node.go +++ b/internal/api/node.go @@ -12,7 +12,7 @@ import ( "time" "github.com/ClusterCockpit/cc-backend/internal/repository" - "github.com/ClusterCockpit/cc-lib/schema" + "github.com/ClusterCockpit/cc-lib/v2/schema" ) type UpdateNodeStatesRequest struct { diff --git a/internal/api/rest.go b/internal/api/rest.go index ebcf31ed..195de826 100644 --- a/internal/api/rest.go +++ b/internal/api/rest.go @@ -22,9 +22,9 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/auth" "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/repository" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" - "github.com/ClusterCockpit/cc-lib/util" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" + "github.com/ClusterCockpit/cc-lib/v2/util" "github.com/gorilla/mux" ) diff --git a/internal/api/user.go b/internal/api/user.go index f9ddee33..1821b69b 100644 --- a/internal/api/user.go +++ b/internal/api/user.go @@ -11,8 +11,8 @@ import ( "net/http" "github.com/ClusterCockpit/cc-backend/internal/repository" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" "github.com/gorilla/mux" ) diff --git a/internal/archiver/archiveWorker.go b/internal/archiver/archiveWorker.go index 0434844d..ecdd1756 100644 --- a/internal/archiver/archiveWorker.go +++ b/internal/archiver/archiveWorker.go @@ -54,8 +54,8 @@ import ( "time" "github.com/ClusterCockpit/cc-backend/internal/repository" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" sq "github.com/Masterminds/squirrel" ) diff --git a/internal/archiver/archiver.go b/internal/archiver/archiver.go index b88199aa..46ce8126 100644 --- a/internal/archiver/archiver.go +++ b/internal/archiver/archiver.go @@ -12,8 +12,8 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher" "github.com/ClusterCockpit/cc-backend/pkg/archive" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" ) // ArchiveJob archives a completed job's metric data to the configured archive backend. diff --git a/internal/auth/auth.go b/internal/auth/auth.go index 5d947353..3be1768e 100644 --- a/internal/auth/auth.go +++ b/internal/auth/auth.go @@ -25,9 +25,9 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/repository" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" - "github.com/ClusterCockpit/cc-lib/util" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" + "github.com/ClusterCockpit/cc-lib/v2/util" "github.com/gorilla/sessions" ) diff --git a/internal/auth/jwt.go b/internal/auth/jwt.go index 4f1f3f54..be642219 100644 --- a/internal/auth/jwt.go +++ b/internal/auth/jwt.go @@ -14,8 +14,8 @@ import ( "strings" "time" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" "github.com/golang-jwt/jwt/v5" ) diff --git a/internal/auth/jwtCookieSession.go b/internal/auth/jwtCookieSession.go index 44c64a0c..42f7439e 100644 --- a/internal/auth/jwtCookieSession.go +++ b/internal/auth/jwtCookieSession.go @@ -12,8 +12,8 @@ import ( "net/http" "os" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" "github.com/golang-jwt/jwt/v5" ) diff --git a/internal/auth/jwtHelpers.go b/internal/auth/jwtHelpers.go index 792722a8..5bfc91ef 100644 --- a/internal/auth/jwtHelpers.go +++ b/internal/auth/jwtHelpers.go @@ -11,8 +11,8 @@ import ( "fmt" "github.com/ClusterCockpit/cc-backend/internal/repository" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" "github.com/golang-jwt/jwt/v5" ) diff --git a/internal/auth/jwtHelpers_test.go b/internal/auth/jwtHelpers_test.go index 5cee1df5..84a1f2e0 100644 --- a/internal/auth/jwtHelpers_test.go +++ b/internal/auth/jwtHelpers_test.go @@ -8,7 +8,7 @@ package auth import ( "testing" - "github.com/ClusterCockpit/cc-lib/schema" + "github.com/ClusterCockpit/cc-lib/v2/schema" "github.com/golang-jwt/jwt/v5" ) diff --git a/internal/auth/jwtSession.go b/internal/auth/jwtSession.go index 15e58347..107afcb8 100644 --- a/internal/auth/jwtSession.go +++ b/internal/auth/jwtSession.go @@ -13,8 +13,8 @@ import ( "os" "strings" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" "github.com/golang-jwt/jwt/v5" ) diff --git a/internal/auth/ldap.go b/internal/auth/ldap.go index e96e732b..4cbb80c5 100644 --- a/internal/auth/ldap.go +++ b/internal/auth/ldap.go @@ -13,8 +13,8 @@ import ( "strings" "github.com/ClusterCockpit/cc-backend/internal/repository" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" "github.com/go-ldap/ldap/v3" ) diff --git a/internal/auth/local.go b/internal/auth/local.go index 1c9b0372..b1a7362c 100644 --- a/internal/auth/local.go +++ b/internal/auth/local.go @@ -9,8 +9,8 @@ import ( "fmt" "net/http" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" "golang.org/x/crypto/bcrypt" ) diff --git a/internal/auth/oidc.go b/internal/auth/oidc.go index 9e361302..a3fc09cc 100644 --- a/internal/auth/oidc.go +++ b/internal/auth/oidc.go @@ -15,8 +15,8 @@ import ( "time" "github.com/ClusterCockpit/cc-backend/internal/repository" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" "github.com/coreos/go-oidc/v3/oidc" "github.com/gorilla/mux" "golang.org/x/oauth2" diff --git a/internal/config/config.go b/internal/config/config.go index 3c88bcfd..af8ec944 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -11,8 +11,8 @@ import ( "encoding/json" "time" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/resampler" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/resampler" ) type ProgramConfig struct { diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 35e1c65e..396a80a1 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -8,8 +8,8 @@ package config import ( "testing" - ccconf "github.com/ClusterCockpit/cc-lib/ccConfig" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" + ccconf "github.com/ClusterCockpit/cc-lib/v2/ccConfig" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" ) func TestInit(t *testing.T) { diff --git a/internal/config/validate.go b/internal/config/validate.go index 6ac67f5e..af8591ca 100644 --- a/internal/config/validate.go +++ b/internal/config/validate.go @@ -8,7 +8,7 @@ package config import ( "encoding/json" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" "github.com/santhosh-tekuri/jsonschema/v5" ) diff --git a/internal/graph/generated/generated.go b/internal/graph/generated/generated.go index 1cb348e5..d96ccf1d 100644 --- a/internal/graph/generated/generated.go +++ b/internal/graph/generated/generated.go @@ -16,7 +16,7 @@ import ( "github.com/99designs/gqlgen/graphql/introspection" "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/graph/model" - "github.com/ClusterCockpit/cc-lib/schema" + "github.com/ClusterCockpit/cc-lib/v2/schema" gqlparser "github.com/vektah/gqlparser/v2" "github.com/vektah/gqlparser/v2/ast" ) diff --git a/internal/graph/model/models_gen.go b/internal/graph/model/models_gen.go index 63b2da5d..31ba03ab 100644 --- a/internal/graph/model/models_gen.go +++ b/internal/graph/model/models_gen.go @@ -10,7 +10,7 @@ import ( "time" "github.com/ClusterCockpit/cc-backend/internal/config" - "github.com/ClusterCockpit/cc-lib/schema" + "github.com/ClusterCockpit/cc-lib/v2/schema" ) type ClusterMetricWithName struct { diff --git a/internal/graph/resolver.go b/internal/graph/resolver.go index 990014c7..d1b04de6 100644 --- a/internal/graph/resolver.go +++ b/internal/graph/resolver.go @@ -4,7 +4,7 @@ import ( "sync" "github.com/ClusterCockpit/cc-backend/internal/repository" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" "github.com/jmoiron/sqlx" ) diff --git a/internal/graph/schema.resolvers.go b/internal/graph/schema.resolvers.go index cd4af057..32499b8c 100644 --- a/internal/graph/schema.resolvers.go +++ b/internal/graph/schema.resolvers.go @@ -22,8 +22,8 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher" "github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/pkg/archive" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" ) // Partitions is the resolver for the partitions field. diff --git a/internal/graph/util.go b/internal/graph/util.go index 220c3a84..42a1d2fb 100644 --- a/internal/graph/util.go +++ b/internal/graph/util.go @@ -14,8 +14,8 @@ import ( "github.com/99designs/gqlgen/graphql" "github.com/ClusterCockpit/cc-backend/internal/graph/model" "github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" ) const MAX_JOBS_FOR_ANALYSIS = 500 diff --git a/internal/importer/handleImport.go b/internal/importer/handleImport.go index 482b328c..4b217475 100644 --- a/internal/importer/handleImport.go +++ b/internal/importer/handleImport.go @@ -14,8 +14,8 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/pkg/archive" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" ) // HandleImportFlag imports jobs from file pairs specified in a comma-separated flag string. diff --git a/internal/importer/importer_test.go b/internal/importer/importer_test.go index 470f7603..bffb8bf6 100644 --- a/internal/importer/importer_test.go +++ b/internal/importer/importer_test.go @@ -16,8 +16,8 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/importer" "github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/pkg/archive" - ccconf "github.com/ClusterCockpit/cc-lib/ccConfig" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" + ccconf "github.com/ClusterCockpit/cc-lib/v2/ccConfig" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" ) // copyFile copies a file from source path to destination path. diff --git a/internal/importer/initDB.go b/internal/importer/initDB.go index 12f49010..d88be7c7 100644 --- a/internal/importer/initDB.go +++ b/internal/importer/initDB.go @@ -22,8 +22,8 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/pkg/archive" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" ) const ( diff --git a/internal/importer/normalize.go b/internal/importer/normalize.go index 943ceb26..c6e84d4b 100644 --- a/internal/importer/normalize.go +++ b/internal/importer/normalize.go @@ -7,7 +7,7 @@ package importer import ( "math" - ccunits "github.com/ClusterCockpit/cc-lib/ccUnits" + ccunits "github.com/ClusterCockpit/cc-lib/v2/ccUnits" ) // getNormalizationFactor calculates the scaling factor needed to normalize a value diff --git a/internal/importer/normalize_test.go b/internal/importer/normalize_test.go index 6aa1ed2e..039a3cfc 100644 --- a/internal/importer/normalize_test.go +++ b/internal/importer/normalize_test.go @@ -8,7 +8,7 @@ import ( "fmt" "testing" - ccunits "github.com/ClusterCockpit/cc-lib/ccUnits" + ccunits "github.com/ClusterCockpit/cc-lib/v2/ccUnits" ) // TestNormalizeFactor tests the normalization of large byte values to gigabyte prefix. diff --git a/internal/memorystore/api.go b/internal/memorystore/api.go index b96dc1fd..41c53a18 100644 --- a/internal/memorystore/api.go +++ b/internal/memorystore/api.go @@ -9,8 +9,8 @@ import ( "errors" "math" - "github.com/ClusterCockpit/cc-lib/schema" - "github.com/ClusterCockpit/cc-lib/util" + "github.com/ClusterCockpit/cc-lib/v2/schema" + "github.com/ClusterCockpit/cc-lib/v2/util" ) var ( diff --git a/internal/memorystore/archive.go b/internal/memorystore/archive.go index 5019ee7a..fc46dac6 100644 --- a/internal/memorystore/archive.go +++ b/internal/memorystore/archive.go @@ -18,7 +18,7 @@ import ( "sync/atomic" "time" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" ) func Archiving(wg *sync.WaitGroup, ctx context.Context) { diff --git a/internal/memorystore/avroCheckpoint.go b/internal/memorystore/avroCheckpoint.go index 42e5f623..b0b0cf42 100644 --- a/internal/memorystore/avroCheckpoint.go +++ b/internal/memorystore/avroCheckpoint.go @@ -19,8 +19,8 @@ import ( "sync/atomic" "time" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" "github.com/linkedin/goavro/v2" ) diff --git a/internal/memorystore/avroHelper.go b/internal/memorystore/avroHelper.go index a6f6c9bf..93a293bd 100644 --- a/internal/memorystore/avroHelper.go +++ b/internal/memorystore/avroHelper.go @@ -11,7 +11,7 @@ import ( "strconv" "sync" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" ) func DataStaging(wg *sync.WaitGroup, ctx context.Context) { diff --git a/internal/memorystore/avroStruct.go b/internal/memorystore/avroStruct.go index bde9e02b..2643a9a7 100644 --- a/internal/memorystore/avroStruct.go +++ b/internal/memorystore/avroStruct.go @@ -8,7 +8,7 @@ package memorystore import ( "sync" - "github.com/ClusterCockpit/cc-lib/schema" + "github.com/ClusterCockpit/cc-lib/v2/schema" ) var ( diff --git a/internal/memorystore/buffer.go b/internal/memorystore/buffer.go index 55be2ada..15e29b3a 100644 --- a/internal/memorystore/buffer.go +++ b/internal/memorystore/buffer.go @@ -9,7 +9,7 @@ import ( "errors" "sync" - "github.com/ClusterCockpit/cc-lib/schema" + "github.com/ClusterCockpit/cc-lib/v2/schema" ) // BufferCap is the default buffer capacity. diff --git a/internal/memorystore/checkpoint.go b/internal/memorystore/checkpoint.go index c676977c..c48c2fd8 100644 --- a/internal/memorystore/checkpoint.go +++ b/internal/memorystore/checkpoint.go @@ -23,8 +23,8 @@ import ( "sync/atomic" "time" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" "github.com/linkedin/goavro/v2" ) diff --git a/internal/memorystore/level.go b/internal/memorystore/level.go index f3b3d3f5..bce2a7a6 100644 --- a/internal/memorystore/level.go +++ b/internal/memorystore/level.go @@ -9,7 +9,7 @@ import ( "sync" "unsafe" - "github.com/ClusterCockpit/cc-lib/util" + "github.com/ClusterCockpit/cc-lib/v2/util" ) // Could also be called "node" as this forms a node in a tree structure. diff --git a/internal/memorystore/lineprotocol.go b/internal/memorystore/lineprotocol.go index 6404361f..ca8cc811 100644 --- a/internal/memorystore/lineprotocol.go +++ b/internal/memorystore/lineprotocol.go @@ -12,8 +12,8 @@ import ( "time" "github.com/ClusterCockpit/cc-backend/pkg/nats" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" "github.com/influxdata/line-protocol/v2/lineprotocol" ) diff --git a/internal/memorystore/memorystore.go b/internal/memorystore/memorystore.go index 259a86ed..7c5ea0eb 100644 --- a/internal/memorystore/memorystore.go +++ b/internal/memorystore/memorystore.go @@ -30,10 +30,10 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/pkg/archive" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/resampler" - "github.com/ClusterCockpit/cc-lib/schema" - "github.com/ClusterCockpit/cc-lib/util" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/resampler" + "github.com/ClusterCockpit/cc-lib/v2/schema" + "github.com/ClusterCockpit/cc-lib/v2/util" ) var ( diff --git a/internal/memorystore/memorystore_test.go b/internal/memorystore/memorystore_test.go index b8ab090a..57ea6938 100644 --- a/internal/memorystore/memorystore_test.go +++ b/internal/memorystore/memorystore_test.go @@ -8,7 +8,7 @@ package memorystore import ( "testing" - "github.com/ClusterCockpit/cc-lib/schema" + "github.com/ClusterCockpit/cc-lib/v2/schema" ) func TestAssignAggregationStrategy(t *testing.T) { diff --git a/internal/memorystore/stats.go b/internal/memorystore/stats.go index b2cb539a..c931ab35 100644 --- a/internal/memorystore/stats.go +++ b/internal/memorystore/stats.go @@ -9,7 +9,7 @@ import ( "errors" "math" - "github.com/ClusterCockpit/cc-lib/util" + "github.com/ClusterCockpit/cc-lib/v2/util" ) type Stats struct { diff --git a/internal/metricDataDispatcher/dataLoader.go b/internal/metricDataDispatcher/dataLoader.go index 780eb73e..6d1338fa 100644 --- a/internal/metricDataDispatcher/dataLoader.go +++ b/internal/metricDataDispatcher/dataLoader.go @@ -13,10 +13,10 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/metricdata" "github.com/ClusterCockpit/cc-backend/pkg/archive" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/lrucache" - "github.com/ClusterCockpit/cc-lib/resampler" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/lrucache" + "github.com/ClusterCockpit/cc-lib/v2/resampler" + "github.com/ClusterCockpit/cc-lib/v2/schema" ) var cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024) diff --git a/internal/metricdata/cc-metric-store-internal.go b/internal/metricdata/cc-metric-store-internal.go index 9f0cd74a..741ce358 100644 --- a/internal/metricdata/cc-metric-store-internal.go +++ b/internal/metricdata/cc-metric-store-internal.go @@ -15,8 +15,8 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/memorystore" "github.com/ClusterCockpit/cc-backend/pkg/archive" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" ) // Bloat Code diff --git a/internal/metricdata/cc-metric-store.go b/internal/metricdata/cc-metric-store.go index be2e956e..6c146f22 100644 --- a/internal/metricdata/cc-metric-store.go +++ b/internal/metricdata/cc-metric-store.go @@ -15,8 +15,8 @@ import ( "time" "github.com/ClusterCockpit/cc-backend/pkg/archive" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" ) type CCMetricStoreConfig struct { diff --git a/internal/metricdata/metricdata.go b/internal/metricdata/metricdata.go index 0748a8d5..ab0e19fb 100644 --- a/internal/metricdata/metricdata.go +++ b/internal/metricdata/metricdata.go @@ -13,8 +13,8 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/memorystore" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" ) type MetricDataRepository interface { diff --git a/internal/metricdata/prometheus.go b/internal/metricdata/prometheus.go index 66c5bc1e..3fb94d51 100644 --- a/internal/metricdata/prometheus.go +++ b/internal/metricdata/prometheus.go @@ -21,8 +21,8 @@ import ( "time" "github.com/ClusterCockpit/cc-backend/pkg/archive" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" promapi "github.com/prometheus/client_golang/api" promv1 "github.com/prometheus/client_golang/api/prometheus/v1" promcfg "github.com/prometheus/common/config" diff --git a/internal/metricdata/utils.go b/internal/metricdata/utils.go index 0b2bb7ec..21dfbcac 100644 --- a/internal/metricdata/utils.go +++ b/internal/metricdata/utils.go @@ -10,7 +10,7 @@ import ( "encoding/json" "time" - "github.com/ClusterCockpit/cc-lib/schema" + "github.com/ClusterCockpit/cc-lib/v2/schema" ) var TestLoadDataCallback func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) { diff --git a/internal/repository/dbConnection.go b/internal/repository/dbConnection.go index be0b161b..3141cf86 100644 --- a/internal/repository/dbConnection.go +++ b/internal/repository/dbConnection.go @@ -12,7 +12,7 @@ import ( "sync" "time" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" "github.com/jmoiron/sqlx" "github.com/mattn/go-sqlite3" "github.com/qustavo/sqlhooks/v2" diff --git a/internal/repository/hooks.go b/internal/repository/hooks.go index 54330723..c916b57e 100644 --- a/internal/repository/hooks.go +++ b/internal/repository/hooks.go @@ -8,7 +8,7 @@ import ( "context" "time" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" ) // Hooks satisfies the sqlhook.Hooks interface diff --git a/internal/repository/job.go b/internal/repository/job.go index 47959379..99970ce1 100644 --- a/internal/repository/job.go +++ b/internal/repository/job.go @@ -71,9 +71,9 @@ import ( "time" "github.com/ClusterCockpit/cc-backend/pkg/archive" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/lrucache" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/lrucache" + "github.com/ClusterCockpit/cc-lib/v2/schema" sq "github.com/Masterminds/squirrel" "github.com/jmoiron/sqlx" ) diff --git a/internal/repository/jobCreate.go b/internal/repository/jobCreate.go index efd262b8..6114ae5e 100644 --- a/internal/repository/jobCreate.go +++ b/internal/repository/jobCreate.go @@ -9,8 +9,8 @@ import ( "encoding/json" "fmt" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" sq "github.com/Masterminds/squirrel" ) diff --git a/internal/repository/jobFind.go b/internal/repository/jobFind.go index c4051e7f..ff2c27aa 100644 --- a/internal/repository/jobFind.go +++ b/internal/repository/jobFind.go @@ -12,8 +12,8 @@ import ( "time" "github.com/ClusterCockpit/cc-backend/internal/graph/model" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" sq "github.com/Masterminds/squirrel" ) diff --git a/internal/repository/jobHooks.go b/internal/repository/jobHooks.go index 824b5cde..c449d308 100644 --- a/internal/repository/jobHooks.go +++ b/internal/repository/jobHooks.go @@ -7,7 +7,7 @@ package repository import ( "sync" - "github.com/ClusterCockpit/cc-lib/schema" + "github.com/ClusterCockpit/cc-lib/v2/schema" ) type JobHook interface { diff --git a/internal/repository/jobQuery.go b/internal/repository/jobQuery.go index 00dabea3..8c341afb 100644 --- a/internal/repository/jobQuery.go +++ b/internal/repository/jobQuery.go @@ -14,8 +14,8 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/graph/model" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" sq "github.com/Masterminds/squirrel" ) diff --git a/internal/repository/job_test.go b/internal/repository/job_test.go index c89225b3..17766c69 100644 --- a/internal/repository/job_test.go +++ b/internal/repository/job_test.go @@ -10,7 +10,7 @@ import ( "testing" "time" - "github.com/ClusterCockpit/cc-lib/schema" + "github.com/ClusterCockpit/cc-lib/v2/schema" _ "github.com/mattn/go-sqlite3" ) diff --git a/internal/repository/migration.go b/internal/repository/migration.go index 43e913cc..a47f9fcd 100644 --- a/internal/repository/migration.go +++ b/internal/repository/migration.go @@ -10,7 +10,7 @@ import ( "embed" "fmt" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" "github.com/golang-migrate/migrate/v4" "github.com/golang-migrate/migrate/v4/database/sqlite3" "github.com/golang-migrate/migrate/v4/source/iofs" diff --git a/internal/repository/node.go b/internal/repository/node.go index 3b597eda..752a36fa 100644 --- a/internal/repository/node.go +++ b/internal/repository/node.go @@ -17,9 +17,9 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/graph/model" "github.com/ClusterCockpit/cc-backend/pkg/archive" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/lrucache" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/lrucache" + "github.com/ClusterCockpit/cc-lib/v2/schema" sq "github.com/Masterminds/squirrel" "github.com/jmoiron/sqlx" ) diff --git a/internal/repository/node_test.go b/internal/repository/node_test.go index 466f51ee..e1d6ca93 100644 --- a/internal/repository/node_test.go +++ b/internal/repository/node_test.go @@ -15,9 +15,9 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/pkg/archive" - ccconf "github.com/ClusterCockpit/cc-lib/ccConfig" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + ccconf "github.com/ClusterCockpit/cc-lib/v2/ccConfig" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" _ "github.com/mattn/go-sqlite3" ) diff --git a/internal/repository/repository_test.go b/internal/repository/repository_test.go index e3dec7fc..9d07b026 100644 --- a/internal/repository/repository_test.go +++ b/internal/repository/repository_test.go @@ -9,8 +9,8 @@ import ( "testing" "github.com/ClusterCockpit/cc-backend/internal/graph/model" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" _ "github.com/mattn/go-sqlite3" ) diff --git a/internal/repository/stats.go b/internal/repository/stats.go index c92f5193..d1e16eb8 100644 --- a/internal/repository/stats.go +++ b/internal/repository/stats.go @@ -14,8 +14,8 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/graph/model" "github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher" "github.com/ClusterCockpit/cc-backend/pkg/archive" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" sq "github.com/Masterminds/squirrel" ) diff --git a/internal/repository/tags.go b/internal/repository/tags.go index 8a076e8a..9bc9abae 100644 --- a/internal/repository/tags.go +++ b/internal/repository/tags.go @@ -11,8 +11,8 @@ import ( "strings" "github.com/ClusterCockpit/cc-backend/pkg/archive" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" sq "github.com/Masterminds/squirrel" ) diff --git a/internal/repository/testdata/job.db b/internal/repository/testdata/job.db index 5c5a692585a4736a9e0456d1caf7b4878c936f74..729cac965265a81bf187475d23d21de268a42ccd 100644 GIT binary patch delta 914 zcma)4O=}ZT6n$^r?U+fEUfYnG7-A{`$-=;VDY$Se z#O{lKKrvu9Lfwix7t*>hG!#i8Vy#+Rvhcp7q_I_Sao(HZoO3@8Z)I(IWo`Ne=qkq+ z!&YNl-@;^st1J=^u*GuKHO!P-s=7sIsp^&tQH3)RX%XGxuv%9-iDZdN(d%W&P}W~r=gXVQjc+vXq5P8X?f-n!}otTX(EukkI` zX^%%(OF(pv|Hl&GgFuiUPb&WDIyH^rhd@{(S7q5$co=8{2qDyS~Q@D(N|Zhe~oPDSH2k z(m1S*PSJa4Gi;Oa6JDN#NL)Mn807dIn+%&I`+!rIxR+5$#S;r^_ijFm-{8%HQv47>Ofzv3<_KjOrQkU7dz2ze^XWjO@`bTufF zk`+7x;!9?pcyAMrHt{H>AfZ%gLpycl{cSQdG?h>8b^Fa(7>4WBe7R@~AtV=q3*%{d z=Y6sCYm!UBrSW3*;H-??!M`U~mQNQN8^YzEQ$;$VKw{zhAF*{nmXUt3c}LO-&}0SM fUsDxRTboO$KotlS)SwOxXo3SSw4iNl-l6FS=Yv%5 diff --git a/internal/repository/user.go b/internal/repository/user.go index 5cab2b0d..770915b6 100644 --- a/internal/repository/user.go +++ b/internal/repository/user.go @@ -15,8 +15,8 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/graph/model" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" sq "github.com/Masterminds/squirrel" "github.com/jmoiron/sqlx" "golang.org/x/crypto/bcrypt" diff --git a/internal/repository/userConfig.go b/internal/repository/userConfig.go index beeffbf5..75e7119f 100644 --- a/internal/repository/userConfig.go +++ b/internal/repository/userConfig.go @@ -12,9 +12,9 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/web" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/lrucache" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/lrucache" + "github.com/ClusterCockpit/cc-lib/v2/schema" "github.com/jmoiron/sqlx" ) diff --git a/internal/repository/userConfig_test.go b/internal/repository/userConfig_test.go index b6f68430..02c70d0f 100644 --- a/internal/repository/userConfig_test.go +++ b/internal/repository/userConfig_test.go @@ -10,9 +10,9 @@ import ( "testing" "github.com/ClusterCockpit/cc-backend/internal/config" - ccconf "github.com/ClusterCockpit/cc-lib/ccConfig" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + ccconf "github.com/ClusterCockpit/cc-lib/v2/ccConfig" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" _ "github.com/mattn/go-sqlite3" ) diff --git a/internal/routerConfig/routes.go b/internal/routerConfig/routes.go index 4466034d..436031ef 100644 --- a/internal/routerConfig/routes.go +++ b/internal/routerConfig/routes.go @@ -17,9 +17,9 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/graph/model" "github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/web" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" - "github.com/ClusterCockpit/cc-lib/util" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" + "github.com/ClusterCockpit/cc-lib/v2/util" "github.com/gorilla/mux" ) diff --git a/internal/tagger/classifyJob.go b/internal/tagger/classifyJob.go index 4e46f370..70399218 100644 --- a/internal/tagger/classifyJob.go +++ b/internal/tagger/classifyJob.go @@ -16,9 +16,9 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/pkg/archive" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" - "github.com/ClusterCockpit/cc-lib/util" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" + "github.com/ClusterCockpit/cc-lib/v2/util" "github.com/expr-lang/expr" "github.com/expr-lang/expr/vm" ) diff --git a/internal/tagger/classifyJob_test.go b/internal/tagger/classifyJob_test.go index 3795a60a..bed7a8f0 100644 --- a/internal/tagger/classifyJob_test.go +++ b/internal/tagger/classifyJob_test.go @@ -3,7 +3,7 @@ package tagger import ( "testing" - "github.com/ClusterCockpit/cc-lib/schema" + "github.com/ClusterCockpit/cc-lib/v2/schema" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" ) diff --git a/internal/tagger/detectApp.go b/internal/tagger/detectApp.go index 4e8f858d..0b8e3e7e 100644 --- a/internal/tagger/detectApp.go +++ b/internal/tagger/detectApp.go @@ -16,9 +16,9 @@ import ( "strings" "github.com/ClusterCockpit/cc-backend/internal/repository" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" - "github.com/ClusterCockpit/cc-lib/util" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" + "github.com/ClusterCockpit/cc-lib/v2/util" ) //go:embed apps/* diff --git a/internal/tagger/detectApp_test.go b/internal/tagger/detectApp_test.go index 7145d04f..1c44f670 100644 --- a/internal/tagger/detectApp_test.go +++ b/internal/tagger/detectApp_test.go @@ -8,7 +8,7 @@ import ( "testing" "github.com/ClusterCockpit/cc-backend/internal/repository" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" ) func setup(tb testing.TB) *repository.JobRepository { diff --git a/internal/tagger/tagger.go b/internal/tagger/tagger.go index 2ba18a14..0839603d 100644 --- a/internal/tagger/tagger.go +++ b/internal/tagger/tagger.go @@ -13,8 +13,8 @@ import ( "sync" "github.com/ClusterCockpit/cc-backend/internal/repository" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" ) // Tagger is the interface that must be implemented by all tagging components. diff --git a/internal/tagger/tagger_test.go b/internal/tagger/tagger_test.go index fb4bc54e..d24ad7f7 100644 --- a/internal/tagger/tagger_test.go +++ b/internal/tagger/tagger_test.go @@ -8,7 +8,7 @@ import ( "testing" "github.com/ClusterCockpit/cc-backend/internal/repository" - "github.com/ClusterCockpit/cc-lib/schema" + "github.com/ClusterCockpit/cc-lib/v2/schema" ) func TestInit(t *testing.T) { diff --git a/internal/taskmanager/commitJobService.go b/internal/taskmanager/commitJobService.go index 4f21c86b..4a070284 100644 --- a/internal/taskmanager/commitJobService.go +++ b/internal/taskmanager/commitJobService.go @@ -9,7 +9,7 @@ import ( "time" "github.com/ClusterCockpit/cc-backend/internal/repository" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" "github.com/go-co-op/gocron/v2" ) diff --git a/internal/taskmanager/compressionService.go b/internal/taskmanager/compressionService.go index 1da2f68d..ab01ce8f 100644 --- a/internal/taskmanager/compressionService.go +++ b/internal/taskmanager/compressionService.go @@ -9,8 +9,8 @@ import ( "time" "github.com/ClusterCockpit/cc-backend/pkg/archive" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" "github.com/go-co-op/gocron/v2" ) diff --git a/internal/taskmanager/ldapSyncService.go b/internal/taskmanager/ldapSyncService.go index e410af9e..9e99a261 100644 --- a/internal/taskmanager/ldapSyncService.go +++ b/internal/taskmanager/ldapSyncService.go @@ -9,7 +9,7 @@ import ( "time" "github.com/ClusterCockpit/cc-backend/internal/auth" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" "github.com/go-co-op/gocron/v2" ) diff --git a/internal/taskmanager/retentionService.go b/internal/taskmanager/retentionService.go index acd07307..5678cd14 100644 --- a/internal/taskmanager/retentionService.go +++ b/internal/taskmanager/retentionService.go @@ -9,7 +9,7 @@ import ( "time" "github.com/ClusterCockpit/cc-backend/pkg/archive" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" "github.com/go-co-op/gocron/v2" ) diff --git a/internal/taskmanager/stopJobsExceedTime.go b/internal/taskmanager/stopJobsExceedTime.go index b763f561..ce9cfd77 100644 --- a/internal/taskmanager/stopJobsExceedTime.go +++ b/internal/taskmanager/stopJobsExceedTime.go @@ -9,7 +9,7 @@ import ( "runtime" "github.com/ClusterCockpit/cc-backend/internal/config" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" "github.com/go-co-op/gocron/v2" ) diff --git a/internal/taskmanager/taskManager.go b/internal/taskmanager/taskManager.go index 57f2d883..06e4f28f 100644 --- a/internal/taskmanager/taskManager.go +++ b/internal/taskmanager/taskManager.go @@ -13,7 +13,7 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/auth" "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/repository" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" "github.com/go-co-op/gocron/v2" ) diff --git a/internal/taskmanager/updateDurationService.go b/internal/taskmanager/updateDurationService.go index 9c52da79..f1dde74a 100644 --- a/internal/taskmanager/updateDurationService.go +++ b/internal/taskmanager/updateDurationService.go @@ -8,7 +8,7 @@ package taskmanager import ( "time" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" "github.com/go-co-op/gocron/v2" ) diff --git a/internal/taskmanager/updateFootprintService.go b/internal/taskmanager/updateFootprintService.go index ae9512cd..979a6137 100644 --- a/internal/taskmanager/updateFootprintService.go +++ b/internal/taskmanager/updateFootprintService.go @@ -12,8 +12,8 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/metricdata" "github.com/ClusterCockpit/cc-backend/pkg/archive" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" sq "github.com/Masterminds/squirrel" "github.com/go-co-op/gocron/v2" ) diff --git a/pkg/archive/archive.go b/pkg/archive/archive.go index 71933f2b..f9ce4314 100644 --- a/pkg/archive/archive.go +++ b/pkg/archive/archive.go @@ -85,9 +85,9 @@ import ( "sync" "github.com/ClusterCockpit/cc-backend/internal/config" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/lrucache" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/lrucache" + "github.com/ClusterCockpit/cc-lib/v2/schema" ) // Version is the current archive schema version. diff --git a/pkg/archive/archive_test.go b/pkg/archive/archive_test.go index 34ea831a..595315c3 100644 --- a/pkg/archive/archive_test.go +++ b/pkg/archive/archive_test.go @@ -11,8 +11,8 @@ import ( "testing" "github.com/ClusterCockpit/cc-backend/pkg/archive" - "github.com/ClusterCockpit/cc-lib/schema" - "github.com/ClusterCockpit/cc-lib/util" + "github.com/ClusterCockpit/cc-lib/v2/schema" + "github.com/ClusterCockpit/cc-lib/v2/util" ) var jobs []*schema.Job diff --git a/pkg/archive/clusterConfig.go b/pkg/archive/clusterConfig.go index 696601b7..6e4866eb 100644 --- a/pkg/archive/clusterConfig.go +++ b/pkg/archive/clusterConfig.go @@ -8,8 +8,8 @@ package archive import ( "fmt" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" ) var ( diff --git a/pkg/archive/fsBackend.go b/pkg/archive/fsBackend.go index b8d2a94b..020f2aa4 100644 --- a/pkg/archive/fsBackend.go +++ b/pkg/archive/fsBackend.go @@ -23,9 +23,9 @@ import ( "time" "github.com/ClusterCockpit/cc-backend/internal/config" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" - "github.com/ClusterCockpit/cc-lib/util" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" + "github.com/ClusterCockpit/cc-lib/v2/util" "github.com/santhosh-tekuri/jsonschema/v5" ) diff --git a/pkg/archive/fsBackend_test.go b/pkg/archive/fsBackend_test.go index a43a6c3a..05491f61 100644 --- a/pkg/archive/fsBackend_test.go +++ b/pkg/archive/fsBackend_test.go @@ -10,8 +10,8 @@ import ( "path/filepath" "testing" - "github.com/ClusterCockpit/cc-lib/schema" - "github.com/ClusterCockpit/cc-lib/util" + "github.com/ClusterCockpit/cc-lib/v2/schema" + "github.com/ClusterCockpit/cc-lib/v2/util" ) func TestInitEmptyPath(t *testing.T) { diff --git a/pkg/archive/json.go b/pkg/archive/json.go index 75c39531..cf1b0a38 100644 --- a/pkg/archive/json.go +++ b/pkg/archive/json.go @@ -10,8 +10,8 @@ import ( "io" "time" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" ) func DecodeJobData(r io.Reader, k string) (schema.JobData, error) { diff --git a/pkg/archive/nodelist.go b/pkg/archive/nodelist.go index ffb5f563..7a3784c3 100644 --- a/pkg/archive/nodelist.go +++ b/pkg/archive/nodelist.go @@ -10,7 +10,7 @@ import ( "strconv" "strings" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" ) type NodeList [][]interface { diff --git a/pkg/archive/s3Backend.go b/pkg/archive/s3Backend.go index c874a320..a9933a9f 100644 --- a/pkg/archive/s3Backend.go +++ b/pkg/archive/s3Backend.go @@ -22,9 +22,9 @@ import ( "time" "github.com/ClusterCockpit/cc-backend/internal/config" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" - "github.com/ClusterCockpit/cc-lib/util" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" + "github.com/ClusterCockpit/cc-lib/v2/util" "github.com/aws/aws-sdk-go-v2/aws" awsconfig "github.com/aws/aws-sdk-go-v2/config" "github.com/aws/aws-sdk-go-v2/credentials" diff --git a/pkg/archive/s3Backend_test.go b/pkg/archive/s3Backend_test.go index 06324cd3..2b79db7f 100644 --- a/pkg/archive/s3Backend_test.go +++ b/pkg/archive/s3Backend_test.go @@ -13,7 +13,7 @@ import ( "strings" "testing" - "github.com/ClusterCockpit/cc-lib/schema" + "github.com/ClusterCockpit/cc-lib/v2/schema" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/s3" "github.com/aws/aws-sdk-go-v2/service/s3/types" diff --git a/pkg/archive/sqliteBackend.go b/pkg/archive/sqliteBackend.go index 0b7a22d2..5bce9cea 100644 --- a/pkg/archive/sqliteBackend.go +++ b/pkg/archive/sqliteBackend.go @@ -21,9 +21,9 @@ import ( "time" "github.com/ClusterCockpit/cc-backend/internal/config" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" - "github.com/ClusterCockpit/cc-lib/util" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" + "github.com/ClusterCockpit/cc-lib/v2/util" _ "github.com/mattn/go-sqlite3" ) diff --git a/pkg/archive/sqliteBackend_test.go b/pkg/archive/sqliteBackend_test.go index b72b8f6c..5d05e14e 100644 --- a/pkg/archive/sqliteBackend_test.go +++ b/pkg/archive/sqliteBackend_test.go @@ -9,7 +9,7 @@ import ( "os" "testing" - "github.com/ClusterCockpit/cc-lib/schema" + "github.com/ClusterCockpit/cc-lib/v2/schema" ) func TestSqliteInitEmptyPath(t *testing.T) { diff --git a/pkg/nats/client.go b/pkg/nats/client.go index 822a7b26..a32ebdca 100644 --- a/pkg/nats/client.go +++ b/pkg/nats/client.go @@ -54,7 +54,7 @@ import ( "fmt" "sync" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" "github.com/nats-io/nats.go" ) diff --git a/pkg/nats/config.go b/pkg/nats/config.go index 32a0bbda..c9ab48a5 100644 --- a/pkg/nats/config.go +++ b/pkg/nats/config.go @@ -9,7 +9,7 @@ import ( "bytes" "encoding/json" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" ) // NatsConfig holds the configuration for connecting to a NATS server. diff --git a/pkg/nats/influxDecoder.go b/pkg/nats/influxDecoder.go deleted file mode 100644 index 412f85e9..00000000 --- a/pkg/nats/influxDecoder.go +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright (C) NHR@FAU, University Erlangen-Nuremberg. -// All rights reserved. This file is part of cc-backend. -// Use of this source code is governed by a MIT-style -// license that can be found in the LICENSE file. - -package nats - -import ( - "time" - - lp "github.com/ClusterCockpit/cc-lib/ccMessage" - influx "github.com/influxdata/line-protocol/v2/lineprotocol" -) - -// DecodeInfluxMessage decodes a single InfluxDB line protocol message from the decoder -// Returns the decoded CCMessage or an error if decoding fails -func DecodeInfluxMessage(d *influx.Decoder) (lp.CCMessage, error) { - measurement, err := d.Measurement() - if err != nil { - return nil, err - } - - tags := make(map[string]string) - for { - key, value, err := d.NextTag() - if err != nil { - return nil, err - } - if key == nil { - break - } - tags[string(key)] = string(value) - } - - fields := make(map[string]interface{}) - for { - key, value, err := d.NextField() - if err != nil { - return nil, err - } - if key == nil { - break - } - fields[string(key)] = value.Interface() - } - - t, err := d.Time(influx.Nanosecond, time.Time{}) - if err != nil { - return nil, err - } - - return lp.NewMessage( - string(measurement), - tags, - nil, - fields, - t, - ) -} diff --git a/tools/archive-manager/import_test.go b/tools/archive-manager/import_test.go index b1032118..57294d50 100644 --- a/tools/archive-manager/import_test.go +++ b/tools/archive-manager/import_test.go @@ -12,8 +12,8 @@ import ( "testing" "github.com/ClusterCockpit/cc-backend/pkg/archive" - "github.com/ClusterCockpit/cc-lib/schema" - "github.com/ClusterCockpit/cc-lib/util" + "github.com/ClusterCockpit/cc-lib/v2/schema" + "github.com/ClusterCockpit/cc-lib/v2/util" ) // TestImportFileToSqlite tests importing jobs from file backend to SQLite backend diff --git a/tools/archive-manager/main.go b/tools/archive-manager/main.go index 4972fe96..f5f8b836 100644 --- a/tools/archive-manager/main.go +++ b/tools/archive-manager/main.go @@ -23,8 +23,8 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/pkg/archive" - ccconf "github.com/ClusterCockpit/cc-lib/ccConfig" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" + ccconf "github.com/ClusterCockpit/cc-lib/v2/ccConfig" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" ) func parseDate(in string) int64 { diff --git a/tools/archive-migration/main.go b/tools/archive-migration/main.go index 9bbed121..8375ee98 100644 --- a/tools/archive-migration/main.go +++ b/tools/archive-migration/main.go @@ -12,7 +12,7 @@ import ( "path/filepath" "strings" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" ) func main() { diff --git a/tools/archive-migration/transforms.go b/tools/archive-migration/transforms.go index 6558e47a..ef4ba5eb 100644 --- a/tools/archive-migration/transforms.go +++ b/tools/archive-migration/transforms.go @@ -12,7 +12,7 @@ import ( "sync" "sync/atomic" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" ) // transformExclusiveToShared converts the old 'exclusive' field to the new 'shared' field diff --git a/web/web.go b/web/web.go index 31d7002e..d2ae8700 100644 --- a/web/web.go +++ b/web/web.go @@ -16,9 +16,9 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/pkg/archive" - cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" - "github.com/ClusterCockpit/cc-lib/util" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" + "github.com/ClusterCockpit/cc-lib/v2/util" ) type WebConfig struct { diff --git a/web/webConfig_test.go b/web/webConfig_test.go index 4bd84330..514fdabb 100644 --- a/web/webConfig_test.go +++ b/web/webConfig_test.go @@ -10,7 +10,7 @@ import ( "fmt" "testing" - ccconf "github.com/ClusterCockpit/cc-lib/ccConfig" + ccconf "github.com/ClusterCockpit/cc-lib/v2/ccConfig" ) func TestInit(t *testing.T) { From 11ec2267daaf67cbc6602e3ba395a16ede96503e Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Thu, 25 Dec 2025 08:42:54 +0100 Subject: [PATCH 19/59] Major refactor of metric data handling - make the internal memory store required and default - Rename memorystore to metricstore - Rename metricDataDispatcher to metricdispatch - Remove metricdata package - Introduce metricsync package for upstream metric data pull --- CLAUDE.md | 10 +- README.md | 8 +- cmd/cc-backend/main.go | 21 +- cmd/cc-backend/server.go | 10 +- internal/api/api_test.go | 12 +- internal/api/job.go | 6 +- .../api/{memorystore.go => metricstore.go} | 12 +- internal/api/nats_test.go | 9 +- internal/archiver/README.md | 4 +- internal/archiver/archiver.go | 4 +- internal/graph/schema.resolvers.go | 16 +- internal/graph/util.go | 6 +- internal/metricDataDispatcher/dataLoader.go | 381 ----- internal/metricdata/cc-metric-store.go | 1226 ----------------- internal/metricdata/metricdata.go | 88 -- internal/metricdata/prometheus.go | 587 -------- internal/metricdata/utils.go | 118 -- internal/metricdispatch/dataLoader.go | 490 +++++++ internal/metricdispatch/dataLoader_test.go | 125 ++ internal/{memorystore => metricstore}/api.go | 6 +- .../{memorystore => metricstore}/archive.go | 2 +- .../avroCheckpoint.go | 2 +- .../avroHelper.go | 2 +- .../avroStruct.go | 2 +- .../{memorystore => metricstore}/buffer.go | 2 +- .../checkpoint.go | 2 +- .../{memorystore => metricstore}/config.go | 4 +- .../configSchema.go | 2 +- .../{memorystore => metricstore}/debug.go | 2 +- .../healthcheck.go | 2 +- .../{memorystore => metricstore}/level.go | 2 +- .../lineprotocol.go | 2 +- .../memorystore.go | 4 +- .../memorystore_test.go | 2 +- .../query.go} | 146 +- .../{memorystore => metricstore}/stats.go | 2 +- internal/metricsync/metricdata.go | 60 + internal/repository/stats.go | 4 +- .../taskmanager/updateFootprintService.go | 10 +- 39 files changed, 815 insertions(+), 2578 deletions(-) rename internal/api/{memorystore.go => metricstore.go} (95%) delete mode 100644 internal/metricDataDispatcher/dataLoader.go delete mode 100644 internal/metricdata/cc-metric-store.go delete mode 100644 internal/metricdata/metricdata.go delete mode 100644 internal/metricdata/prometheus.go delete mode 100644 internal/metricdata/utils.go create mode 100644 internal/metricdispatch/dataLoader.go create mode 100644 internal/metricdispatch/dataLoader_test.go rename internal/{memorystore => metricstore}/api.go (98%) rename internal/{memorystore => metricstore}/archive.go (99%) rename internal/{memorystore => metricstore}/avroCheckpoint.go (99%) rename internal/{memorystore => metricstore}/avroHelper.go (99%) rename internal/{memorystore => metricstore}/avroStruct.go (99%) rename internal/{memorystore => metricstore}/buffer.go (99%) rename internal/{memorystore => metricstore}/checkpoint.go (99%) rename internal/{memorystore => metricstore}/config.go (98%) rename internal/{memorystore => metricstore}/configSchema.go (99%) rename internal/{memorystore => metricstore}/debug.go (99%) rename internal/{memorystore => metricstore}/healthcheck.go (99%) rename internal/{memorystore => metricstore}/level.go (99%) rename internal/{memorystore => metricstore}/lineprotocol.go (99%) rename internal/{memorystore => metricstore}/memorystore.go (99%) rename internal/{memorystore => metricstore}/memorystore_test.go (99%) rename internal/{metricdata/cc-metric-store-internal.go => metricstore/query.go} (87%) rename internal/{memorystore => metricstore}/stats.go (99%) create mode 100644 internal/metricsync/metricdata.go diff --git a/CLAUDE.md b/CLAUDE.md index 67412a76..f30c3923 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -96,9 +96,9 @@ The backend follows a layered architecture with clear separation of concerns: - **internal/auth**: Authentication layer - Supports local accounts, LDAP, OIDC, and JWT tokens - Implements rate limiting for login attempts -- **internal/metricdata**: Metric data repository abstraction - - Pluggable backends: cc-metric-store, Prometheus, InfluxDB - - Each cluster can have a different metric data backend +- **internal/metricstore**: Metric store with data loading API + - In-memory metric storage with checkpointing + - Query API for loading job metric data - **internal/archiver**: Job archiving to file-based archive - **internal/api/nats.go**: NATS-based API for job and node operations - Subscribes to NATS subjects for job events (start/stop) @@ -209,8 +209,8 @@ applied automatically on startup. Version tracking in `version` table. ### Adding a new metric data backend -1. Implement `MetricDataRepository` interface in `internal/metricdata/` -2. Register in `metricdata.Init()` switch statement +1. Implement metric loading functions in `internal/metricstore/query.go` +2. Add cluster configuration to metric store initialization 3. Update config.json schema documentation ### Modifying database schema diff --git a/README.md b/README.md index 468a12ad..00bcb119 100644 --- a/README.md +++ b/README.md @@ -163,11 +163,9 @@ ln -s ./var/job-archive GraphQL schema and resolvers - [`importer`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/importer) Job data import and database initialization - - [`memorystore`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/memorystore) - In-memory metric data store with checkpointing - - [`metricdata`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/metricdata) - Metric data repository implementations (cc-metric-store, Prometheus) - - [`metricDataDispatcher`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/metricDataDispatcher) + - [`metricstore`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/metricstore) + In-memory metric data store with checkpointing and metric loading + - [`metricdispatch`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/metricdispatch) Dispatches metric data loading to appropriate backends - [`repository`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/repository) Database repository layer for jobs and metadata diff --git a/cmd/cc-backend/main.go b/cmd/cc-backend/main.go index f8b4aea1..331df4f6 100644 --- a/cmd/cc-backend/main.go +++ b/cmd/cc-backend/main.go @@ -24,8 +24,7 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/auth" "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/importer" - "github.com/ClusterCockpit/cc-backend/internal/memorystore" - "github.com/ClusterCockpit/cc-backend/internal/metricdata" + "github.com/ClusterCockpit/cc-backend/internal/metricstore" "github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/internal/tagger" "github.com/ClusterCockpit/cc-backend/internal/taskmanager" @@ -283,10 +282,7 @@ func initSubsystems() error { return fmt.Errorf("initializing archive: %w", err) } - // Initialize metricdata - if err := metricdata.Init(); err != nil { - return fmt.Errorf("initializing metricdata repository: %w", err) - } + // Note: metricstore.Init() is called later in runServer() with proper configuration // Handle database re-initialization if flagReinitDB { @@ -322,13 +318,12 @@ func initSubsystems() error { func runServer(ctx context.Context) error { var wg sync.WaitGroup - // Start metric store if enabled - if memorystore.InternalCCMSFlag { - mscfg := ccconf.GetPackageConfig("metric-store") - if mscfg == nil { - return fmt.Errorf("metric store configuration must be present") - } - memorystore.Init(mscfg, &wg) + // Initialize metric store if configuration is provided + mscfg := ccconf.GetPackageConfig("metric-store") + if mscfg != nil { + metricstore.Init(mscfg, &wg) + } else { + cclog.Debug("Metric store configuration not found, skipping metricstore initialization") } // Start archiver and task manager diff --git a/cmd/cc-backend/server.go b/cmd/cc-backend/server.go index 53e24c88..8d700823 100644 --- a/cmd/cc-backend/server.go +++ b/cmd/cc-backend/server.go @@ -29,7 +29,7 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/graph" "github.com/ClusterCockpit/cc-backend/internal/graph/generated" - "github.com/ClusterCockpit/cc-backend/internal/memorystore" + "github.com/ClusterCockpit/cc-backend/internal/metricstore" "github.com/ClusterCockpit/cc-backend/internal/routerConfig" "github.com/ClusterCockpit/cc-backend/pkg/nats" "github.com/ClusterCockpit/cc-backend/web" @@ -253,9 +253,7 @@ func (s *Server) init() error { } } - if memorystore.InternalCCMSFlag { - s.restAPIHandle.MountMetricStoreAPIRoutes(metricstoreapi) - } + s.restAPIHandle.MountMetricStoreAPIRoutes(metricstoreapi) if config.Keys.EmbedStaticFiles { if i, err := os.Stat("./var/img"); err == nil { @@ -383,9 +381,7 @@ func (s *Server) Shutdown(ctx context.Context) { } // Archive all the metric store data - if memorystore.InternalCCMSFlag { - memorystore.Shutdown() - } + metricstore.Shutdown() // Shutdown archiver with 10 second timeout for fast shutdown if err := archiver.Shutdown(10 * time.Second); err != nil { diff --git a/internal/api/api_test.go b/internal/api/api_test.go index 50605f7b..a2283013 100644 --- a/internal/api/api_test.go +++ b/internal/api/api_test.go @@ -23,8 +23,8 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/auth" "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/graph" - "github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher" - "github.com/ClusterCockpit/cc-backend/internal/metricdata" + "github.com/ClusterCockpit/cc-backend/internal/metricdispatch" + "github.com/ClusterCockpit/cc-backend/internal/metricstore" "github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/pkg/archive" ccconf "github.com/ClusterCockpit/cc-lib/v2/ccConfig" @@ -173,9 +173,7 @@ func setup(t *testing.T) *api.RestAPI { t.Fatal(err) } - if err := metricdata.Init(); err != nil { - t.Fatal(err) - } + // metricstore initialization removed - it's initialized via callback in tests archiver.Start(repository.GetJobRepository(), context.Background()) @@ -221,7 +219,7 @@ func TestRestApi(t *testing.T) { }, } - metricdata.TestLoadDataCallback = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) { + metricstore.TestLoadDataCallback = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) { return testData, nil } @@ -366,7 +364,7 @@ func TestRestApi(t *testing.T) { } t.Run("CheckArchive", func(t *testing.T) { - data, err := metricDataDispatcher.LoadData(stoppedJob, []string{"load_one"}, []schema.MetricScope{schema.MetricScopeNode}, context.Background(), 60) + data, err := metricdispatch.LoadData(stoppedJob, []string{"load_one"}, []schema.MetricScope{schema.MetricScopeNode}, context.Background(), 60) if err != nil { t.Fatal(err) } diff --git a/internal/api/job.go b/internal/api/job.go index 9b970c2e..09f7b22c 100644 --- a/internal/api/job.go +++ b/internal/api/job.go @@ -22,7 +22,7 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/graph" "github.com/ClusterCockpit/cc-backend/internal/graph/model" "github.com/ClusterCockpit/cc-backend/internal/importer" - "github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher" + "github.com/ClusterCockpit/cc-backend/internal/metricdispatch" "github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/pkg/archive" cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" @@ -293,7 +293,7 @@ func (api *RestAPI) getCompleteJobByID(rw http.ResponseWriter, r *http.Request) } if r.URL.Query().Get("all-metrics") == "true" { - data, err = metricDataDispatcher.LoadData(job, nil, scopes, r.Context(), resolution) + data, err = metricdispatch.LoadData(job, nil, scopes, r.Context(), resolution) if err != nil { cclog.Warnf("REST: error while loading all-metrics job data for JobID %d on %s", job.JobID, job.Cluster) return @@ -389,7 +389,7 @@ func (api *RestAPI) getJobByID(rw http.ResponseWriter, r *http.Request) { resolution = max(resolution, mc.Timestep) } - data, err := metricDataDispatcher.LoadData(job, metrics, scopes, r.Context(), resolution) + data, err := metricdispatch.LoadData(job, metrics, scopes, r.Context(), resolution) if err != nil { cclog.Warnf("REST: error while loading job data for JobID %d on %s", job.JobID, job.Cluster) return diff --git a/internal/api/memorystore.go b/internal/api/metricstore.go similarity index 95% rename from internal/api/memorystore.go rename to internal/api/metricstore.go index 56c396e2..d4ab1dfe 100644 --- a/internal/api/memorystore.go +++ b/internal/api/metricstore.go @@ -15,7 +15,7 @@ import ( "strconv" "strings" - "github.com/ClusterCockpit/cc-backend/internal/memorystore" + "github.com/ClusterCockpit/cc-backend/internal/metricstore" cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" "github.com/influxdata/line-protocol/v2/lineprotocol" @@ -58,7 +58,7 @@ func freeMetrics(rw http.ResponseWriter, r *http.Request) { return } - ms := memorystore.GetMemoryStore() + ms := metricstore.GetMemoryStore() n := 0 for _, sel := range selectors { bn, err := ms.Free(sel, to) @@ -97,9 +97,9 @@ func writeMetrics(rw http.ResponseWriter, r *http.Request) { return } - ms := memorystore.GetMemoryStore() + ms := metricstore.GetMemoryStore() dec := lineprotocol.NewDecoderWithBytes(bytes) - if err := memorystore.DecodeLine(dec, ms, r.URL.Query().Get("cluster")); err != nil { + if err := metricstore.DecodeLine(dec, ms, r.URL.Query().Get("cluster")); err != nil { cclog.Errorf("/api/write error: %s", err.Error()) handleError(err, http.StatusBadRequest, rw) return @@ -129,7 +129,7 @@ func debugMetrics(rw http.ResponseWriter, r *http.Request) { selector = strings.Split(raw, ":") } - ms := memorystore.GetMemoryStore() + ms := metricstore.GetMemoryStore() if err := ms.DebugDump(bufio.NewWriter(rw), selector); err != nil { handleError(err, http.StatusBadRequest, rw) return @@ -162,7 +162,7 @@ func metricsHealth(rw http.ResponseWriter, r *http.Request) { selector := []string{rawCluster, rawNode} - ms := memorystore.GetMemoryStore() + ms := metricstore.GetMemoryStore() if err := ms.HealthCheck(bufio.NewWriter(rw), selector); err != nil { handleError(err, http.StatusBadRequest, rw) return diff --git a/internal/api/nats_test.go b/internal/api/nats_test.go index c9415afc..9e1fa2b5 100644 --- a/internal/api/nats_test.go +++ b/internal/api/nats_test.go @@ -18,7 +18,8 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/auth" "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/graph" - "github.com/ClusterCockpit/cc-backend/internal/metricdata" + "github.com/ClusterCockpit/cc-backend/internal/importer" + "github.com/ClusterCockpit/cc-backend/internal/metricstore" "github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/pkg/archive" ccconf "github.com/ClusterCockpit/cc-lib/v2/ccConfig" @@ -167,9 +168,7 @@ func setupNatsTest(t *testing.T) *NatsAPI { t.Fatal(err) } - if err := metricdata.Init(); err != nil { - t.Fatal(err) - } + // metricstore initialization removed - it's initialized via callback in tests archiver.Start(repository.GetJobRepository(), context.Background()) @@ -564,7 +563,7 @@ func TestNatsHandleStopJob(t *testing.T) { }, } - metricdata.TestLoadDataCallback = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) { + metricstore.TestLoadDataCallback = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) { return testData, nil } diff --git a/internal/archiver/README.md b/internal/archiver/README.md index 0fae04ea..48aed797 100644 --- a/internal/archiver/README.md +++ b/internal/archiver/README.md @@ -106,7 +106,7 @@ Data is archived at the highest available resolution (typically 60s intervals). ```go // In archiver.go ArchiveJob() function -jobData, err := metricDataDispatcher.LoadData(job, allMetrics, scopes, ctx, 300) +jobData, err := metricdispatch.LoadData(job, allMetrics, scopes, ctx, 300) // 0 = highest resolution // 300 = 5-minute resolution ``` @@ -185,6 +185,6 @@ Internal state is protected by: ## Dependencies - `internal/repository`: Database operations for job metadata -- `internal/metricDataDispatcher`: Loading metric data from various backends +- `internal/metricdispatch`: Loading metric data from various backends - `pkg/archive`: Archive backend abstraction (filesystem, S3, SQLite) - `cc-lib/schema`: Job and metric data structures diff --git a/internal/archiver/archiver.go b/internal/archiver/archiver.go index 46ce8126..4e0b6473 100644 --- a/internal/archiver/archiver.go +++ b/internal/archiver/archiver.go @@ -10,7 +10,7 @@ import ( "math" "github.com/ClusterCockpit/cc-backend/internal/config" - "github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher" + "github.com/ClusterCockpit/cc-backend/internal/metricdispatch" "github.com/ClusterCockpit/cc-backend/pkg/archive" cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" "github.com/ClusterCockpit/cc-lib/v2/schema" @@ -60,7 +60,7 @@ func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.Job, error) { scopes = append(scopes, schema.MetricScopeAccelerator) } - jobData, err := metricDataDispatcher.LoadData(job, allMetrics, scopes, ctx, 0) // 0 Resulotion-Value retrieves highest res (60s) + jobData, err := metricdispatch.LoadData(job, allMetrics, scopes, ctx, 0) // 0 Resulotion-Value retrieves highest res (60s) if err != nil { cclog.Error("Error wile loading job data for archiving") return nil, err diff --git a/internal/graph/schema.resolvers.go b/internal/graph/schema.resolvers.go index 32499b8c..34bbf393 100644 --- a/internal/graph/schema.resolvers.go +++ b/internal/graph/schema.resolvers.go @@ -19,7 +19,7 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/graph/generated" "github.com/ClusterCockpit/cc-backend/internal/graph/model" - "github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher" + "github.com/ClusterCockpit/cc-backend/internal/metricdispatch" "github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/pkg/archive" cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" @@ -484,7 +484,7 @@ func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []str return nil, err } - data, err := metricDataDispatcher.LoadData(job, metrics, scopes, ctx, *resolution) + data, err := metricdispatch.LoadData(job, metrics, scopes, ctx, *resolution) if err != nil { cclog.Warn("Error while loading job data") return nil, err @@ -512,7 +512,7 @@ func (r *queryResolver) JobStats(ctx context.Context, id string, metrics []strin return nil, err } - data, err := metricDataDispatcher.LoadJobStats(job, metrics, ctx) + data, err := metricdispatch.LoadJobStats(job, metrics, ctx) if err != nil { cclog.Warnf("Error while loading jobStats data for job id %s", id) return nil, err @@ -537,7 +537,7 @@ func (r *queryResolver) ScopedJobStats(ctx context.Context, id string, metrics [ return nil, err } - data, err := metricDataDispatcher.LoadScopedJobStats(job, metrics, scopes, ctx) + data, err := metricdispatch.LoadScopedJobStats(job, metrics, scopes, ctx) if err != nil { cclog.Warnf("Error while loading scopedJobStats data for job id %s", id) return nil, err @@ -702,7 +702,7 @@ func (r *queryResolver) JobsMetricStats(ctx context.Context, filter []*model.Job res := []*model.JobStats{} for _, job := range jobs { - data, err := metricDataDispatcher.LoadJobStats(job, metrics, ctx) + data, err := metricdispatch.LoadJobStats(job, metrics, ctx) if err != nil { cclog.Warnf("Error while loading comparison jobStats data for job id %d", job.JobID) continue @@ -759,7 +759,7 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [ } } - data, err := metricDataDispatcher.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx) + data, err := metricdispatch.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx) if err != nil { cclog.Warn("error while loading node data") return nil, err @@ -825,7 +825,7 @@ func (r *queryResolver) NodeMetricsList(ctx context.Context, cluster string, sub } } - data, err := metricDataDispatcher.LoadNodeListData(cluster, subCluster, nodes, metrics, scopes, *resolution, from, to, ctx) + data, err := metricdispatch.LoadNodeListData(cluster, subCluster, nodes, metrics, scopes, *resolution, from, to, ctx) if err != nil { cclog.Warn("error while loading node data (Resolver.NodeMetricsList") return nil, err @@ -880,7 +880,7 @@ func (r *queryResolver) ClusterMetrics(ctx context.Context, cluster string, metr // 'nodes' == nil -> Defaults to all nodes of cluster for existing query workflow scopes := []schema.MetricScope{"node"} - data, err := metricDataDispatcher.LoadNodeData(cluster, metrics, nil, scopes, from, to, ctx) + data, err := metricdispatch.LoadNodeData(cluster, metrics, nil, scopes, from, to, ctx) if err != nil { cclog.Warn("error while loading node data") return nil, err diff --git a/internal/graph/util.go b/internal/graph/util.go index 42a1d2fb..4135ca72 100644 --- a/internal/graph/util.go +++ b/internal/graph/util.go @@ -13,7 +13,7 @@ import ( "github.com/99designs/gqlgen/graphql" "github.com/ClusterCockpit/cc-backend/internal/graph/model" - "github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher" + "github.com/ClusterCockpit/cc-backend/internal/metricdispatch" cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" "github.com/ClusterCockpit/cc-lib/v2/schema" ) @@ -55,7 +55,7 @@ func (r *queryResolver) rooflineHeatmap( // resolution = max(resolution, mc.Timestep) // } - jobdata, err := metricDataDispatcher.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0) + jobdata, err := metricdispatch.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0) if err != nil { cclog.Errorf("Error while loading roofline metrics for job %d", job.ID) return nil, err @@ -128,7 +128,7 @@ func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobF continue } - if err := metricDataDispatcher.LoadAverages(job, metrics, avgs, ctx); err != nil { + if err := metricdispatch.LoadAverages(job, metrics, avgs, ctx); err != nil { cclog.Error("Error while loading averages for footprint") return nil, err } diff --git a/internal/metricDataDispatcher/dataLoader.go b/internal/metricDataDispatcher/dataLoader.go deleted file mode 100644 index 6d1338fa..00000000 --- a/internal/metricDataDispatcher/dataLoader.go +++ /dev/null @@ -1,381 +0,0 @@ -// Copyright (C) NHR@FAU, University Erlangen-Nuremberg. -// All rights reserved. This file is part of cc-backend. -// Use of this source code is governed by a MIT-style -// license that can be found in the LICENSE file. -package metricDataDispatcher - -import ( - "context" - "fmt" - "math" - "time" - - "github.com/ClusterCockpit/cc-backend/internal/config" - "github.com/ClusterCockpit/cc-backend/internal/metricdata" - "github.com/ClusterCockpit/cc-backend/pkg/archive" - cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" - "github.com/ClusterCockpit/cc-lib/v2/lrucache" - "github.com/ClusterCockpit/cc-lib/v2/resampler" - "github.com/ClusterCockpit/cc-lib/v2/schema" -) - -var cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024) - -func cacheKey( - job *schema.Job, - metrics []string, - scopes []schema.MetricScope, - resolution int, -) string { - // Duration and StartTime do not need to be in the cache key as StartTime is less unique than - // job.ID and the TTL of the cache entry makes sure it does not stay there forever. - return fmt.Sprintf("%d(%s):[%v],[%v]-%d", - job.ID, job.State, metrics, scopes, resolution) -} - -// Fetches the metric data for a job. -func LoadData(job *schema.Job, - metrics []string, - scopes []schema.MetricScope, - ctx context.Context, - resolution int, -) (schema.JobData, error) { - data := cache.Get(cacheKey(job, metrics, scopes, resolution), func() (_ any, ttl time.Duration, size int) { - var jd schema.JobData - var err error - - if job.State == schema.JobStateRunning || - job.MonitoringStatus == schema.MonitoringStatusRunningOrArchiving || - config.Keys.DisableArchive { - - repo, err := metricdata.GetMetricDataRepo(job.Cluster) - if err != nil { - return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster), 0, 0 - } - - if scopes == nil { - scopes = append(scopes, schema.MetricScopeNode) - } - - if metrics == nil { - cluster := archive.GetCluster(job.Cluster) - for _, mc := range cluster.MetricConfig { - metrics = append(metrics, mc.Name) - } - } - - jd, err = repo.LoadData(job, metrics, scopes, ctx, resolution) - if err != nil { - if len(jd) != 0 { - cclog.Warnf("partial error: %s", err.Error()) - // return err, 0, 0 // Reactivating will block archiving on one partial error - } else { - cclog.Error("Error while loading job data from metric repository") - return err, 0, 0 - } - } - size = jd.Size() - } else { - var jd_temp schema.JobData - jd_temp, err = archive.GetHandle().LoadJobData(job) - if err != nil { - cclog.Error("Error while loading job data from archive") - return err, 0, 0 - } - - // Deep copy the cached archive hashmap - jd = metricdata.DeepCopy(jd_temp) - - // Resampling for archived data. - // Pass the resolution from frontend here. - for _, v := range jd { - for _, v_ := range v { - timestep := int64(0) - for i := 0; i < len(v_.Series); i += 1 { - v_.Series[i].Data, timestep, err = resampler.LargestTriangleThreeBucket(v_.Series[i].Data, int64(v_.Timestep), int64(resolution)) - if err != nil { - return err, 0, 0 - } - } - v_.Timestep = int(timestep) - } - } - - // Avoid sending unrequested data to the client: - if metrics != nil || scopes != nil { - if metrics == nil { - metrics = make([]string, 0, len(jd)) - for k := range jd { - metrics = append(metrics, k) - } - } - - res := schema.JobData{} - for _, metric := range metrics { - if perscope, ok := jd[metric]; ok { - if len(perscope) > 1 { - subset := make(map[schema.MetricScope]*schema.JobMetric) - for _, scope := range scopes { - if jm, ok := perscope[scope]; ok { - subset[scope] = jm - } - } - - if len(subset) > 0 { - perscope = subset - } - } - - res[metric] = perscope - } - } - jd = res - } - size = jd.Size() - } - - ttl = 5 * time.Hour - if job.State == schema.JobStateRunning { - ttl = 2 * time.Minute - } - - // FIXME: Review: Is this really necessary or correct. - // Note: Lines 147-170 formerly known as prepareJobData(jobData, scopes) - // For /monitoring/job/ and some other places, flops_any and mem_bw need - // to be available at the scope 'node'. If a job has a lot of nodes, - // statisticsSeries should be available so that a min/median/max Graph can be - // used instead of a lot of single lines. - // NOTE: New StatsSeries will always be calculated as 'min/median/max' - // Existing (archived) StatsSeries can be 'min/mean/max'! - const maxSeriesSize int = 15 - for _, scopes := range jd { - for _, jm := range scopes { - if jm.StatisticsSeries != nil || len(jm.Series) <= maxSeriesSize { - continue - } - - jm.AddStatisticsSeries() - } - } - - nodeScopeRequested := false - for _, scope := range scopes { - if scope == schema.MetricScopeNode { - nodeScopeRequested = true - } - } - - if nodeScopeRequested { - jd.AddNodeScope("flops_any") - jd.AddNodeScope("mem_bw") - } - - // Round Resulting Stat Values - jd.RoundMetricStats() - - return jd, ttl, size - }) - - if err, ok := data.(error); ok { - cclog.Error("Error in returned dataset") - return nil, err - } - - return data.(schema.JobData), nil -} - -// Used for the jobsFootprint GraphQL-Query. TODO: Rename/Generalize. -func LoadAverages( - job *schema.Job, - metrics []string, - data [][]schema.Float, - ctx context.Context, -) error { - if job.State != schema.JobStateRunning && !config.Keys.DisableArchive { - return archive.LoadAveragesFromArchive(job, metrics, data) // #166 change also here? - } - - repo, err := metricdata.GetMetricDataRepo(job.Cluster) - if err != nil { - return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster) - } - - stats, err := repo.LoadStats(job, metrics, ctx) // #166 how to handle stats for acc normalizazion? - if err != nil { - cclog.Errorf("Error while loading statistics for job %v (User %v, Project %v)", job.JobID, job.User, job.Project) - return err - } - - for i, m := range metrics { - nodes, ok := stats[m] - if !ok { - data[i] = append(data[i], schema.NaN) - continue - } - - sum := 0.0 - for _, node := range nodes { - sum += node.Avg - } - data[i] = append(data[i], schema.Float(sum)) - } - - return nil -} - -// Used for statsTable in frontend: Return scoped statistics by metric. -func LoadScopedJobStats( - job *schema.Job, - metrics []string, - scopes []schema.MetricScope, - ctx context.Context, -) (schema.ScopedJobStats, error) { - if job.State != schema.JobStateRunning && !config.Keys.DisableArchive { - return archive.LoadScopedStatsFromArchive(job, metrics, scopes) - } - - repo, err := metricdata.GetMetricDataRepo(job.Cluster) - if err != nil { - return nil, fmt.Errorf("job %d: no metric data repository configured for '%s'", job.JobID, job.Cluster) - } - - scopedStats, err := repo.LoadScopedStats(job, metrics, scopes, ctx) - if err != nil { - cclog.Errorf("error while loading scoped statistics for job %d (User %s, Project %s)", job.JobID, job.User, job.Project) - return nil, err - } - - return scopedStats, nil -} - -// Used for polar plots in frontend: Aggregates statistics for all nodes to single values for job per metric. -func LoadJobStats( - job *schema.Job, - metrics []string, - ctx context.Context, -) (map[string]schema.MetricStatistics, error) { - if job.State != schema.JobStateRunning && !config.Keys.DisableArchive { - return archive.LoadStatsFromArchive(job, metrics) - } - - data := make(map[string]schema.MetricStatistics, len(metrics)) - repo, err := metricdata.GetMetricDataRepo(job.Cluster) - if err != nil { - return data, fmt.Errorf("job %d: no metric data repository configured for '%s'", job.JobID, job.Cluster) - } - - stats, err := repo.LoadStats(job, metrics, ctx) - if err != nil { - cclog.Errorf("error while loading statistics for job %d (User %s, Project %s)", job.JobID, job.User, job.Project) - return data, err - } - - for _, m := range metrics { - sum, avg, min, max := 0.0, 0.0, 0.0, 0.0 - nodes, ok := stats[m] - if !ok { - data[m] = schema.MetricStatistics{Min: min, Avg: avg, Max: max} - continue - } - - for _, node := range nodes { - sum += node.Avg - min = math.Min(min, node.Min) - max = math.Max(max, node.Max) - } - - data[m] = schema.MetricStatistics{ - Avg: (math.Round((sum/float64(job.NumNodes))*100) / 100), - Min: (math.Round(min*100) / 100), - Max: (math.Round(max*100) / 100), - } - } - - return data, nil -} - -// Used for the classic node/system view. Returns a map of nodes to a map of metrics. -func LoadNodeData( - cluster string, - metrics, nodes []string, - scopes []schema.MetricScope, - from, to time.Time, - ctx context.Context, -) (map[string]map[string][]*schema.JobMetric, error) { - repo, err := metricdata.GetMetricDataRepo(cluster) - if err != nil { - return nil, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster) - } - - if metrics == nil { - for _, m := range archive.GetCluster(cluster).MetricConfig { - metrics = append(metrics, m.Name) - } - } - - data, err := repo.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx) - if err != nil { - if len(data) != 0 { - cclog.Warnf("partial error: %s", err.Error()) - } else { - cclog.Error("Error while loading node data from metric repository") - return nil, err - } - } - - if data == nil { - return nil, fmt.Errorf("METRICDATA/METRICDATA > the metric data repository for '%s' does not support this query", cluster) - } - - return data, nil -} - -func LoadNodeListData( - cluster, subCluster string, - nodes []string, - metrics []string, - scopes []schema.MetricScope, - resolution int, - from, to time.Time, - ctx context.Context, -) (map[string]schema.JobData, error) { - repo, err := metricdata.GetMetricDataRepo(cluster) - if err != nil { - return nil, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster) - } - - if metrics == nil { - for _, m := range archive.GetCluster(cluster).MetricConfig { - metrics = append(metrics, m.Name) - } - } - - data, err := repo.LoadNodeListData(cluster, subCluster, nodes, metrics, scopes, resolution, from, to, ctx) - if err != nil { - if len(data) != 0 { - cclog.Warnf("partial error: %s", err.Error()) - } else { - cclog.Error("Error while loading node data from metric repository") - return nil, err - } - } - - // NOTE: New StatsSeries will always be calculated as 'min/median/max' - const maxSeriesSize int = 8 - for _, jd := range data { - for _, scopes := range jd { - for _, jm := range scopes { - if jm.StatisticsSeries != nil || len(jm.Series) < maxSeriesSize { - continue - } - jm.AddStatisticsSeries() - } - } - } - - if data == nil { - return nil, fmt.Errorf("METRICDATA/METRICDATA > the metric data repository for '%s' does not support this query", cluster) - } - - return data, nil -} diff --git a/internal/metricdata/cc-metric-store.go b/internal/metricdata/cc-metric-store.go deleted file mode 100644 index 6c146f22..00000000 --- a/internal/metricdata/cc-metric-store.go +++ /dev/null @@ -1,1226 +0,0 @@ -// Copyright (C) NHR@FAU, University Erlangen-Nuremberg. -// All rights reserved. -// Use of this source code is governed by a MIT-style -// license that can be found in the LICENSE file. -package metricdata - -import ( - "bufio" - "bytes" - "context" - "encoding/json" - "fmt" - "net/http" - "strings" - "time" - - "github.com/ClusterCockpit/cc-backend/pkg/archive" - cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" - "github.com/ClusterCockpit/cc-lib/v2/schema" -) - -type CCMetricStoreConfig struct { - Kind string `json:"kind"` - Url string `json:"url"` - Token string `json:"token"` - - // If metrics are known to this MetricDataRepository under a different - // name than in the `metricConfig` section of the 'cluster.json', - // provide this optional mapping of local to remote name for this metric. - Renamings map[string]string `json:"metricRenamings"` -} - -type CCMetricStore struct { - here2there map[string]string - there2here map[string]string - client http.Client - jwt string - url string - queryEndpoint string -} - -type ApiQueryRequest struct { - Cluster string `json:"cluster"` - Queries []ApiQuery `json:"queries"` - ForAllNodes []string `json:"for-all-nodes"` - From int64 `json:"from"` - To int64 `json:"to"` - WithStats bool `json:"with-stats"` - WithData bool `json:"with-data"` -} - -type ApiQuery struct { - Type *string `json:"type,omitempty"` - SubType *string `json:"subtype,omitempty"` - Metric string `json:"metric"` - Hostname string `json:"host"` - Resolution int `json:"resolution"` - TypeIds []string `json:"type-ids,omitempty"` - SubTypeIds []string `json:"subtype-ids,omitempty"` - Aggregate bool `json:"aggreg"` -} - -type ApiQueryResponse struct { - Queries []ApiQuery `json:"queries,omitempty"` - Results [][]ApiMetricData `json:"results"` -} - -type ApiMetricData struct { - Error *string `json:"error"` - Data []schema.Float `json:"data"` - From int64 `json:"from"` - To int64 `json:"to"` - Resolution int `json:"resolution"` - Avg schema.Float `json:"avg"` - Min schema.Float `json:"min"` - Max schema.Float `json:"max"` -} - -func (ccms *CCMetricStore) Init(rawConfig json.RawMessage) error { - var config CCMetricStoreConfig - if err := json.Unmarshal(rawConfig, &config); err != nil { - cclog.Warn("Error while unmarshaling raw json config") - return err - } - - ccms.url = config.Url - ccms.queryEndpoint = fmt.Sprintf("%s/api/query", config.Url) - ccms.jwt = config.Token - ccms.client = http.Client{ - Timeout: 10 * time.Second, - } - - if config.Renamings != nil { - ccms.here2there = config.Renamings - ccms.there2here = make(map[string]string, len(config.Renamings)) - for k, v := range ccms.here2there { - ccms.there2here[v] = k - } - } else { - ccms.here2there = make(map[string]string) - ccms.there2here = make(map[string]string) - } - - return nil -} - -func (ccms *CCMetricStore) toRemoteName(metric string) string { - if renamed, ok := ccms.here2there[metric]; ok { - return renamed - } - - return metric -} - -func (ccms *CCMetricStore) toLocalName(metric string) string { - if renamed, ok := ccms.there2here[metric]; ok { - return renamed - } - - return metric -} - -func (ccms *CCMetricStore) doRequest( - ctx context.Context, - body *ApiQueryRequest, -) (*ApiQueryResponse, error) { - buf := &bytes.Buffer{} - if err := json.NewEncoder(buf).Encode(body); err != nil { - cclog.Errorf("Error while encoding request body: %s", err.Error()) - return nil, err - } - - req, err := http.NewRequestWithContext(ctx, http.MethodGet, ccms.queryEndpoint, buf) - if err != nil { - cclog.Errorf("Error while building request body: %s", err.Error()) - return nil, err - } - if ccms.jwt != "" { - req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", ccms.jwt)) - } - - // versioning the cc-metric-store query API. - // v2 = data with resampling - // v1 = data without resampling - q := req.URL.Query() - q.Add("version", "v2") - req.URL.RawQuery = q.Encode() - - res, err := ccms.client.Do(req) - if err != nil { - cclog.Errorf("Error while performing request: %s", err.Error()) - return nil, err - } - - if res.StatusCode != http.StatusOK { - return nil, fmt.Errorf("'%s': HTTP Status: %s", ccms.queryEndpoint, res.Status) - } - - var resBody ApiQueryResponse - if err := json.NewDecoder(bufio.NewReader(res.Body)).Decode(&resBody); err != nil { - cclog.Errorf("Error while decoding result body: %s", err.Error()) - return nil, err - } - - return &resBody, nil -} - -func (ccms *CCMetricStore) LoadData( - job *schema.Job, - metrics []string, - scopes []schema.MetricScope, - ctx context.Context, - resolution int, -) (schema.JobData, error) { - queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes, resolution) - if err != nil { - cclog.Errorf("Error while building queries for jobId %d, Metrics %v, Scopes %v: %s", job.JobID, metrics, scopes, err.Error()) - return nil, err - } - - req := ApiQueryRequest{ - Cluster: job.Cluster, - From: job.StartTime, - To: job.StartTime + int64(job.Duration), - Queries: queries, - WithStats: true, - WithData: true, - } - - resBody, err := ccms.doRequest(ctx, &req) - if err != nil { - cclog.Errorf("Error while performing request: %s", err.Error()) - return nil, err - } - - var errors []string - jobData := make(schema.JobData) - for i, row := range resBody.Results { - query := req.Queries[i] - metric := ccms.toLocalName(query.Metric) - scope := assignedScope[i] - mc := archive.GetMetricConfig(job.Cluster, metric) - if _, ok := jobData[metric]; !ok { - jobData[metric] = make(map[schema.MetricScope]*schema.JobMetric) - } - - res := mc.Timestep - if len(row) > 0 { - res = row[0].Resolution - } - - jobMetric, ok := jobData[metric][scope] - if !ok { - jobMetric = &schema.JobMetric{ - Unit: mc.Unit, - Timestep: res, - Series: make([]schema.Series, 0), - } - jobData[metric][scope] = jobMetric - } - - for ndx, res := range row { - if res.Error != nil { - /* Build list for "partial errors", if any */ - errors = append(errors, fmt.Sprintf("failed to fetch '%s' from host '%s': %s", query.Metric, query.Hostname, *res.Error)) - continue - } - - id := (*string)(nil) - if query.Type != nil { - id = new(string) - *id = query.TypeIds[ndx] - } - - if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() { - // "schema.Float()" because regular float64 can not be JSONed when NaN. - res.Avg = schema.Float(0) - res.Min = schema.Float(0) - res.Max = schema.Float(0) - } - - jobMetric.Series = append(jobMetric.Series, schema.Series{ - Hostname: query.Hostname, - Id: id, - Statistics: schema.MetricStatistics{ - Avg: float64(res.Avg), - Min: float64(res.Min), - Max: float64(res.Max), - }, - Data: res.Data, - }) - } - - // So that one can later check len(jobData): - if len(jobMetric.Series) == 0 { - delete(jobData[metric], scope) - if len(jobData[metric]) == 0 { - delete(jobData, metric) - } - } - } - - if len(errors) != 0 { - /* Returns list for "partial errors" */ - return jobData, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", ")) - } - return jobData, nil -} - -func (ccms *CCMetricStore) buildQueries( - job *schema.Job, - metrics []string, - scopes []schema.MetricScope, - resolution int, -) ([]ApiQuery, []schema.MetricScope, error) { - queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(job.Resources)) - assignedScope := []schema.MetricScope{} - - subcluster, scerr := archive.GetSubCluster(job.Cluster, job.SubCluster) - if scerr != nil { - return nil, nil, scerr - } - topology := subcluster.Topology - - for _, metric := range metrics { - remoteName := ccms.toRemoteName(metric) - mc := archive.GetMetricConfig(job.Cluster, metric) - if mc == nil { - // return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, job.Cluster) - cclog.Infof("metric '%s' is not specified for cluster '%s'", metric, job.Cluster) - continue - } - - // Skip if metric is removed for subcluster - if len(mc.SubClusters) != 0 { - isRemoved := false - for _, scConfig := range mc.SubClusters { - if scConfig.Name == job.SubCluster && scConfig.Remove { - isRemoved = true - break - } - } - if isRemoved { - continue - } - } - - // Avoid duplicates... - handledScopes := make([]schema.MetricScope, 0, 3) - - scopesLoop: - for _, requestedScope := range scopes { - nativeScope := mc.Scope - if nativeScope == schema.MetricScopeAccelerator && job.NumAcc == 0 { - continue - } - - scope := nativeScope.Max(requestedScope) - for _, s := range handledScopes { - if scope == s { - continue scopesLoop - } - } - handledScopes = append(handledScopes, scope) - - for _, host := range job.Resources { - hwthreads := host.HWThreads - if hwthreads == nil { - hwthreads = topology.Node - } - - // Accelerator -> Accelerator (Use "accelerator" scope if requested scope is lower than node) - if nativeScope == schema.MetricScopeAccelerator && scope.LT(schema.MetricScopeNode) { - if scope != schema.MetricScopeAccelerator { - // Skip all other catched cases - continue - } - - queries = append(queries, ApiQuery{ - Metric: remoteName, - Hostname: host.Hostname, - Aggregate: false, - Type: &acceleratorString, - TypeIds: host.Accelerators, - Resolution: resolution, - }) - assignedScope = append(assignedScope, schema.MetricScopeAccelerator) - continue - } - - // Accelerator -> Node - if nativeScope == schema.MetricScopeAccelerator && scope == schema.MetricScopeNode { - if len(host.Accelerators) == 0 { - continue - } - - queries = append(queries, ApiQuery{ - Metric: remoteName, - Hostname: host.Hostname, - Aggregate: true, - Type: &acceleratorString, - TypeIds: host.Accelerators, - Resolution: resolution, - }) - assignedScope = append(assignedScope, scope) - continue - } - - // HWThread -> HWThead - if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeHWThread { - queries = append(queries, ApiQuery{ - Metric: remoteName, - Hostname: host.Hostname, - Aggregate: false, - Type: &hwthreadString, - TypeIds: intToStringSlice(hwthreads), - Resolution: resolution, - }) - assignedScope = append(assignedScope, scope) - continue - } - - // HWThread -> Core - if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeCore { - cores, _ := topology.GetCoresFromHWThreads(hwthreads) - for _, core := range cores { - queries = append(queries, ApiQuery{ - Metric: remoteName, - Hostname: host.Hostname, - Aggregate: true, - Type: &hwthreadString, - TypeIds: intToStringSlice(topology.Core[core]), - Resolution: resolution, - }) - assignedScope = append(assignedScope, scope) - } - continue - } - - // HWThread -> Socket - if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeSocket { - sockets, _ := topology.GetSocketsFromHWThreads(hwthreads) - for _, socket := range sockets { - queries = append(queries, ApiQuery{ - Metric: remoteName, - Hostname: host.Hostname, - Aggregate: true, - Type: &hwthreadString, - TypeIds: intToStringSlice(topology.Socket[socket]), - Resolution: resolution, - }) - assignedScope = append(assignedScope, scope) - } - continue - } - - // HWThread -> Node - if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeNode { - queries = append(queries, ApiQuery{ - Metric: remoteName, - Hostname: host.Hostname, - Aggregate: true, - Type: &hwthreadString, - TypeIds: intToStringSlice(hwthreads), - Resolution: resolution, - }) - assignedScope = append(assignedScope, scope) - continue - } - - // Core -> Core - if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeCore { - cores, _ := topology.GetCoresFromHWThreads(hwthreads) - queries = append(queries, ApiQuery{ - Metric: remoteName, - Hostname: host.Hostname, - Aggregate: false, - Type: &coreString, - TypeIds: intToStringSlice(cores), - Resolution: resolution, - }) - assignedScope = append(assignedScope, scope) - continue - } - - // Core -> Socket - if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeSocket { - sockets, _ := topology.GetSocketsFromCores(hwthreads) - for _, socket := range sockets { - queries = append(queries, ApiQuery{ - Metric: remoteName, - Hostname: host.Hostname, - Aggregate: true, - Type: &coreString, - TypeIds: intToStringSlice(topology.Socket[socket]), - Resolution: resolution, - }) - assignedScope = append(assignedScope, scope) - } - continue - } - - // Core -> Node - if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode { - cores, _ := topology.GetCoresFromHWThreads(hwthreads) - queries = append(queries, ApiQuery{ - Metric: remoteName, - Hostname: host.Hostname, - Aggregate: true, - Type: &coreString, - TypeIds: intToStringSlice(cores), - Resolution: resolution, - }) - assignedScope = append(assignedScope, scope) - continue - } - - // MemoryDomain -> MemoryDomain - if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain { - sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads) - queries = append(queries, ApiQuery{ - Metric: remoteName, - Hostname: host.Hostname, - Aggregate: false, - Type: &memoryDomainString, - TypeIds: intToStringSlice(sockets), - Resolution: resolution, - }) - assignedScope = append(assignedScope, scope) - continue - } - - // MemoryDoman -> Node - if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode { - sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads) - queries = append(queries, ApiQuery{ - Metric: remoteName, - Hostname: host.Hostname, - Aggregate: true, - Type: &memoryDomainString, - TypeIds: intToStringSlice(sockets), - Resolution: resolution, - }) - assignedScope = append(assignedScope, scope) - continue - } - - // Socket -> Socket - if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket { - sockets, _ := topology.GetSocketsFromHWThreads(hwthreads) - queries = append(queries, ApiQuery{ - Metric: remoteName, - Hostname: host.Hostname, - Aggregate: false, - Type: &socketString, - TypeIds: intToStringSlice(sockets), - Resolution: resolution, - }) - assignedScope = append(assignedScope, scope) - continue - } - - // Socket -> Node - if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeNode { - sockets, _ := topology.GetSocketsFromHWThreads(hwthreads) - queries = append(queries, ApiQuery{ - Metric: remoteName, - Hostname: host.Hostname, - Aggregate: true, - Type: &socketString, - TypeIds: intToStringSlice(sockets), - Resolution: resolution, - }) - assignedScope = append(assignedScope, scope) - continue - } - - // Node -> Node - if nativeScope == schema.MetricScopeNode && scope == schema.MetricScopeNode { - queries = append(queries, ApiQuery{ - Metric: remoteName, - Hostname: host.Hostname, - Resolution: resolution, - }) - assignedScope = append(assignedScope, scope) - continue - } - - return nil, nil, fmt.Errorf("METRICDATA/CCMS > TODO: unhandled case: native-scope=%s, requested-scope=%s", nativeScope, requestedScope) - } - } - } - - return queries, assignedScope, nil -} - -func (ccms *CCMetricStore) LoadStats( - job *schema.Job, - metrics []string, - ctx context.Context, -) (map[string]map[string]schema.MetricStatistics, error) { - - queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, 0) // #166 Add scope shere for analysis view accelerator normalization? - if err != nil { - cclog.Errorf("Error while building queries for jobId %d, Metrics %v: %s", job.JobID, metrics, err.Error()) - return nil, err - } - - req := ApiQueryRequest{ - Cluster: job.Cluster, - From: job.StartTime, - To: job.StartTime + int64(job.Duration), - Queries: queries, - WithStats: true, - WithData: false, - } - - resBody, err := ccms.doRequest(ctx, &req) - if err != nil { - cclog.Errorf("Error while performing request: %s", err.Error()) - return nil, err - } - - stats := make(map[string]map[string]schema.MetricStatistics, len(metrics)) - for i, res := range resBody.Results { - query := req.Queries[i] - metric := ccms.toLocalName(query.Metric) - data := res[0] - if data.Error != nil { - cclog.Errorf("fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error) - continue - } - - metricdata, ok := stats[metric] - if !ok { - metricdata = make(map[string]schema.MetricStatistics, job.NumNodes) - stats[metric] = metricdata - } - - if data.Avg.IsNaN() || data.Min.IsNaN() || data.Max.IsNaN() { - cclog.Warnf("fetching %s for node %s failed: one of avg/min/max is NaN", metric, query.Hostname) - continue - } - - metricdata[query.Hostname] = schema.MetricStatistics{ - Avg: float64(data.Avg), - Min: float64(data.Min), - Max: float64(data.Max), - } - } - - return stats, nil -} - -// Used for Job-View Statistics Table -func (ccms *CCMetricStore) LoadScopedStats( - job *schema.Job, - metrics []string, - scopes []schema.MetricScope, - ctx context.Context, -) (schema.ScopedJobStats, error) { - queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes, 0) - if err != nil { - cclog.Errorf("Error while building queries for jobId %d, Metrics %v, Scopes %v: %s", job.JobID, metrics, scopes, err.Error()) - return nil, err - } - - req := ApiQueryRequest{ - Cluster: job.Cluster, - From: job.StartTime, - To: job.StartTime + int64(job.Duration), - Queries: queries, - WithStats: true, - WithData: false, - } - - resBody, err := ccms.doRequest(ctx, &req) - if err != nil { - cclog.Errorf("Error while performing request: %s", err.Error()) - return nil, err - } - - var errors []string - scopedJobStats := make(schema.ScopedJobStats) - - for i, row := range resBody.Results { - query := req.Queries[i] - metric := ccms.toLocalName(query.Metric) - scope := assignedScope[i] - - if _, ok := scopedJobStats[metric]; !ok { - scopedJobStats[metric] = make(map[schema.MetricScope][]*schema.ScopedStats) - } - - if _, ok := scopedJobStats[metric][scope]; !ok { - scopedJobStats[metric][scope] = make([]*schema.ScopedStats, 0) - } - - for ndx, res := range row { - if res.Error != nil { - /* Build list for "partial errors", if any */ - errors = append(errors, fmt.Sprintf("failed to fetch '%s' from host '%s': %s", query.Metric, query.Hostname, *res.Error)) - continue - } - - id := (*string)(nil) - if query.Type != nil { - id = new(string) - *id = query.TypeIds[ndx] - } - - if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() { - // "schema.Float()" because regular float64 can not be JSONed when NaN. - res.Avg = schema.Float(0) - res.Min = schema.Float(0) - res.Max = schema.Float(0) - } - - scopedJobStats[metric][scope] = append(scopedJobStats[metric][scope], &schema.ScopedStats{ - Hostname: query.Hostname, - Id: id, - Data: &schema.MetricStatistics{ - Avg: float64(res.Avg), - Min: float64(res.Min), - Max: float64(res.Max), - }, - }) - } - - // So that one can later check len(scopedJobStats[metric][scope]): Remove from map if empty - if len(scopedJobStats[metric][scope]) == 0 { - delete(scopedJobStats[metric], scope) - if len(scopedJobStats[metric]) == 0 { - delete(scopedJobStats, metric) - } - } - } - - if len(errors) != 0 { - /* Returns list for "partial errors" */ - return scopedJobStats, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", ")) - } - return scopedJobStats, nil -} - -// Used for Systems-View Node-Overview -func (ccms *CCMetricStore) LoadNodeData( - cluster string, - metrics, nodes []string, - scopes []schema.MetricScope, - from, to time.Time, - ctx context.Context, -) (map[string]map[string][]*schema.JobMetric, error) { - req := ApiQueryRequest{ - Cluster: cluster, - From: from.Unix(), - To: to.Unix(), - WithStats: true, - WithData: true, - } - - if nodes == nil { - for _, metric := range metrics { - req.ForAllNodes = append(req.ForAllNodes, ccms.toRemoteName(metric)) - } - } else { - for _, node := range nodes { - for _, metric := range metrics { - req.Queries = append(req.Queries, ApiQuery{ - Hostname: node, - Metric: ccms.toRemoteName(metric), - Resolution: 0, // Default for Node Queries: Will return metric $Timestep Resolution - }) - } - } - } - - resBody, err := ccms.doRequest(ctx, &req) - if err != nil { - cclog.Errorf("Error while performing request: %s", err.Error()) - return nil, err - } - - var errors []string - data := make(map[string]map[string][]*schema.JobMetric) - for i, res := range resBody.Results { - var query ApiQuery - if resBody.Queries != nil { - query = resBody.Queries[i] - } else { - query = req.Queries[i] - } - - metric := ccms.toLocalName(query.Metric) - qdata := res[0] - if qdata.Error != nil { - /* Build list for "partial errors", if any */ - errors = append(errors, fmt.Sprintf("fetching %s for node %s failed: %s", metric, query.Hostname, *qdata.Error)) - } - - if qdata.Avg.IsNaN() || qdata.Min.IsNaN() || qdata.Max.IsNaN() { - // return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, "avg/min/max is NaN") - qdata.Avg, qdata.Min, qdata.Max = 0., 0., 0. - } - - hostdata, ok := data[query.Hostname] - if !ok { - hostdata = make(map[string][]*schema.JobMetric) - data[query.Hostname] = hostdata - } - - mc := archive.GetMetricConfig(cluster, metric) - if mc != nil { - hostdata[metric] = append(hostdata[metric], &schema.JobMetric{ - Unit: mc.Unit, - Timestep: mc.Timestep, - Series: []schema.Series{ - { - Hostname: query.Hostname, - Data: qdata.Data, - Statistics: schema.MetricStatistics{ - Avg: float64(qdata.Avg), - Min: float64(qdata.Min), - Max: float64(qdata.Max), - }, - }, - }, - }) - } else { - cclog.Warnf("Metric '%s' not configured for cluster '%s': Skipped in LoadNodeData() Return!", metric, cluster) - } - } - - if len(errors) != 0 { - /* Returns list of "partial errors" */ - return data, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", ")) - } - - return data, nil -} - -// Used for Systems-View Node-List -func (ccms *CCMetricStore) LoadNodeListData( - cluster, subCluster string, - nodes []string, - metrics []string, - scopes []schema.MetricScope, - resolution int, - from, to time.Time, - ctx context.Context, -) (map[string]schema.JobData, error) { - - // Note: Order of node data is not guaranteed after this point - queries, assignedScope, err := ccms.buildNodeQueries(cluster, subCluster, nodes, metrics, scopes, resolution) - if err != nil { - cclog.Errorf("Error while building node queries for Cluster %s, SubCLuster %s, Metrics %v, Scopes %v: %s", cluster, subCluster, metrics, scopes, err.Error()) - return nil, err - } - - req := ApiQueryRequest{ - Cluster: cluster, - Queries: queries, - From: from.Unix(), - To: to.Unix(), - WithStats: true, - WithData: true, - } - - resBody, err := ccms.doRequest(ctx, &req) - if err != nil { - cclog.Errorf("Error while performing request: %s", err.Error()) - return nil, err - } - - var errors []string - data := make(map[string]schema.JobData) - for i, row := range resBody.Results { - var query ApiQuery - if resBody.Queries != nil { - query = resBody.Queries[i] - } else { - query = req.Queries[i] - } - // qdata := res[0] - metric := ccms.toLocalName(query.Metric) - scope := assignedScope[i] - mc := archive.GetMetricConfig(cluster, metric) - - res := mc.Timestep - if len(row) > 0 { - res = row[0].Resolution - } - - // Init Nested Map Data Structures If Not Found - hostData, ok := data[query.Hostname] - if !ok { - hostData = make(schema.JobData) - data[query.Hostname] = hostData - } - - metricData, ok := hostData[metric] - if !ok { - metricData = make(map[schema.MetricScope]*schema.JobMetric) - data[query.Hostname][metric] = metricData - } - - scopeData, ok := metricData[scope] - if !ok { - scopeData = &schema.JobMetric{ - Unit: mc.Unit, - Timestep: res, - Series: make([]schema.Series, 0), - } - data[query.Hostname][metric][scope] = scopeData - } - - for ndx, res := range row { - if res.Error != nil { - /* Build list for "partial errors", if any */ - errors = append(errors, fmt.Sprintf("failed to fetch '%s' from host '%s': %s", query.Metric, query.Hostname, *res.Error)) - continue - } - - id := (*string)(nil) - if query.Type != nil { - id = new(string) - *id = query.TypeIds[ndx] - } - - if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() { - // "schema.Float()" because regular float64 can not be JSONed when NaN. - res.Avg = schema.Float(0) - res.Min = schema.Float(0) - res.Max = schema.Float(0) - } - - scopeData.Series = append(scopeData.Series, schema.Series{ - Hostname: query.Hostname, - Id: id, - Statistics: schema.MetricStatistics{ - Avg: float64(res.Avg), - Min: float64(res.Min), - Max: float64(res.Max), - }, - Data: res.Data, - }) - } - } - - if len(errors) != 0 { - /* Returns list of "partial errors" */ - return data, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", ")) - } - - return data, nil -} - -func (ccms *CCMetricStore) buildNodeQueries( - cluster string, - subCluster string, - nodes []string, - metrics []string, - scopes []schema.MetricScope, - resolution int, -) ([]ApiQuery, []schema.MetricScope, error) { - - queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(nodes)) - assignedScope := []schema.MetricScope{} - - // Get Topol before loop if subCluster given - var subClusterTopol *schema.SubCluster - var scterr error - if subCluster != "" { - subClusterTopol, scterr = archive.GetSubCluster(cluster, subCluster) - if scterr != nil { - cclog.Errorf("could not load cluster %s subCluster %s topology: %s", cluster, subCluster, scterr.Error()) - return nil, nil, scterr - } - } - - for _, metric := range metrics { - remoteName := ccms.toRemoteName(metric) - mc := archive.GetMetricConfig(cluster, metric) - if mc == nil { - // return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, cluster) - cclog.Warnf("metric '%s' is not specified for cluster '%s'", metric, cluster) - continue - } - - // Skip if metric is removed for subcluster - if mc.SubClusters != nil { - isRemoved := false - for _, scConfig := range mc.SubClusters { - if scConfig.Name == subCluster && scConfig.Remove { - isRemoved = true - break - } - } - if isRemoved { - continue - } - } - - // Avoid duplicates... - handledScopes := make([]schema.MetricScope, 0, 3) - - scopesLoop: - for _, requestedScope := range scopes { - nativeScope := mc.Scope - - scope := nativeScope.Max(requestedScope) - for _, s := range handledScopes { - if scope == s { - continue scopesLoop - } - } - handledScopes = append(handledScopes, scope) - - for _, hostname := range nodes { - - // If no subCluster given, get it by node - if subCluster == "" { - subClusterName, scnerr := archive.GetSubClusterByNode(cluster, hostname) - if scnerr != nil { - return nil, nil, scnerr - } - subClusterTopol, scterr = archive.GetSubCluster(cluster, subClusterName) - if scterr != nil { - return nil, nil, scterr - } - } - - // Always full node hwthread id list, no partial queries expected -> Use "topology.Node" directly where applicable - // Always full accelerator id list, no partial queries expected -> Use "acceleratorIds" directly where applicable - topology := subClusterTopol.Topology - acceleratorIds := topology.GetAcceleratorIDs() - - // Moved check here if metric matches hardware specs - if nativeScope == schema.MetricScopeAccelerator && len(acceleratorIds) == 0 { - continue scopesLoop - } - - // Accelerator -> Accelerator (Use "accelerator" scope if requested scope is lower than node) - if nativeScope == schema.MetricScopeAccelerator && scope.LT(schema.MetricScopeNode) { - if scope != schema.MetricScopeAccelerator { - // Skip all other catched cases - continue - } - - queries = append(queries, ApiQuery{ - Metric: remoteName, - Hostname: hostname, - Aggregate: false, - Type: &acceleratorString, - TypeIds: acceleratorIds, - Resolution: resolution, - }) - assignedScope = append(assignedScope, schema.MetricScopeAccelerator) - continue - } - - // Accelerator -> Node - if nativeScope == schema.MetricScopeAccelerator && scope == schema.MetricScopeNode { - if len(acceleratorIds) == 0 { - continue - } - - queries = append(queries, ApiQuery{ - Metric: remoteName, - Hostname: hostname, - Aggregate: true, - Type: &acceleratorString, - TypeIds: acceleratorIds, - Resolution: resolution, - }) - assignedScope = append(assignedScope, scope) - continue - } - - // HWThread -> HWThead - if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeHWThread { - queries = append(queries, ApiQuery{ - Metric: remoteName, - Hostname: hostname, - Aggregate: false, - Type: &hwthreadString, - TypeIds: intToStringSlice(topology.Node), - Resolution: resolution, - }) - assignedScope = append(assignedScope, scope) - continue - } - - // HWThread -> Core - if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeCore { - cores, _ := topology.GetCoresFromHWThreads(topology.Node) - for _, core := range cores { - queries = append(queries, ApiQuery{ - Metric: remoteName, - Hostname: hostname, - Aggregate: true, - Type: &hwthreadString, - TypeIds: intToStringSlice(topology.Core[core]), - Resolution: resolution, - }) - assignedScope = append(assignedScope, scope) - } - continue - } - - // HWThread -> Socket - if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeSocket { - sockets, _ := topology.GetSocketsFromHWThreads(topology.Node) - for _, socket := range sockets { - queries = append(queries, ApiQuery{ - Metric: remoteName, - Hostname: hostname, - Aggregate: true, - Type: &hwthreadString, - TypeIds: intToStringSlice(topology.Socket[socket]), - Resolution: resolution, - }) - assignedScope = append(assignedScope, scope) - } - continue - } - - // HWThread -> Node - if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeNode { - queries = append(queries, ApiQuery{ - Metric: remoteName, - Hostname: hostname, - Aggregate: true, - Type: &hwthreadString, - TypeIds: intToStringSlice(topology.Node), - Resolution: resolution, - }) - assignedScope = append(assignedScope, scope) - continue - } - - // Core -> Core - if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeCore { - cores, _ := topology.GetCoresFromHWThreads(topology.Node) - queries = append(queries, ApiQuery{ - Metric: remoteName, - Hostname: hostname, - Aggregate: false, - Type: &coreString, - TypeIds: intToStringSlice(cores), - Resolution: resolution, - }) - assignedScope = append(assignedScope, scope) - continue - } - - // Core -> Socket - if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeSocket { - sockets, _ := topology.GetSocketsFromCores(topology.Node) - for _, socket := range sockets { - queries = append(queries, ApiQuery{ - Metric: remoteName, - Hostname: hostname, - Aggregate: true, - Type: &coreString, - TypeIds: intToStringSlice(topology.Socket[socket]), - Resolution: resolution, - }) - assignedScope = append(assignedScope, scope) - } - continue - } - - // Core -> Node - if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode { - cores, _ := topology.GetCoresFromHWThreads(topology.Node) - queries = append(queries, ApiQuery{ - Metric: remoteName, - Hostname: hostname, - Aggregate: true, - Type: &coreString, - TypeIds: intToStringSlice(cores), - Resolution: resolution, - }) - assignedScope = append(assignedScope, scope) - continue - } - - // MemoryDomain -> MemoryDomain - if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain { - sockets, _ := topology.GetMemoryDomainsFromHWThreads(topology.Node) - queries = append(queries, ApiQuery{ - Metric: remoteName, - Hostname: hostname, - Aggregate: false, - Type: &memoryDomainString, - TypeIds: intToStringSlice(sockets), - Resolution: resolution, - }) - assignedScope = append(assignedScope, scope) - continue - } - - // MemoryDoman -> Node - if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode { - sockets, _ := topology.GetMemoryDomainsFromHWThreads(topology.Node) - queries = append(queries, ApiQuery{ - Metric: remoteName, - Hostname: hostname, - Aggregate: true, - Type: &memoryDomainString, - TypeIds: intToStringSlice(sockets), - Resolution: resolution, - }) - assignedScope = append(assignedScope, scope) - continue - } - - // Socket -> Socket - if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket { - sockets, _ := topology.GetSocketsFromHWThreads(topology.Node) - queries = append(queries, ApiQuery{ - Metric: remoteName, - Hostname: hostname, - Aggregate: false, - Type: &socketString, - TypeIds: intToStringSlice(sockets), - Resolution: resolution, - }) - assignedScope = append(assignedScope, scope) - continue - } - - // Socket -> Node - if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeNode { - sockets, _ := topology.GetSocketsFromHWThreads(topology.Node) - queries = append(queries, ApiQuery{ - Metric: remoteName, - Hostname: hostname, - Aggregate: true, - Type: &socketString, - TypeIds: intToStringSlice(sockets), - Resolution: resolution, - }) - assignedScope = append(assignedScope, scope) - continue - } - - // Node -> Node - if nativeScope == schema.MetricScopeNode && scope == schema.MetricScopeNode { - queries = append(queries, ApiQuery{ - Metric: remoteName, - Hostname: hostname, - Resolution: resolution, - }) - assignedScope = append(assignedScope, scope) - continue - } - - return nil, nil, fmt.Errorf("METRICDATA/CCMS > TODO: unhandled case: native-scope=%s, requested-scope=%s", nativeScope, requestedScope) - } - } - } - - return queries, assignedScope, nil -} diff --git a/internal/metricdata/metricdata.go b/internal/metricdata/metricdata.go deleted file mode 100644 index ab0e19fb..00000000 --- a/internal/metricdata/metricdata.go +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright (C) NHR@FAU, University Erlangen-Nuremberg. -// All rights reserved. This file is part of cc-backend. -// Use of this source code is governed by a MIT-style -// license that can be found in the LICENSE file. - -package metricdata - -import ( - "context" - "encoding/json" - "fmt" - "time" - - "github.com/ClusterCockpit/cc-backend/internal/config" - "github.com/ClusterCockpit/cc-backend/internal/memorystore" - cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" - "github.com/ClusterCockpit/cc-lib/v2/schema" -) - -type MetricDataRepository interface { - // Initialize this MetricDataRepository. One instance of - // this interface will only ever be responsible for one cluster. - Init(rawConfig json.RawMessage) error - - // Return the JobData for the given job, only with the requested metrics. - LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) - - // Return a map of metrics to a map of nodes to the metric statistics of the job. node scope only. - LoadStats(job *schema.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) - - // Return a map of metrics to a map of scopes to the scoped metric statistics of the job. - LoadScopedStats(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.ScopedJobStats, error) - - // Return a map of hosts to a map of metrics at the requested scopes (currently only node) for that node. - LoadNodeData(cluster string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) - - // Return a map of hosts to a map of metrics to a map of scopes for multiple nodes. - LoadNodeListData(cluster, subCluster string, nodes, metrics []string, scopes []schema.MetricScope, resolution int, from, to time.Time, ctx context.Context) (map[string]schema.JobData, error) -} - -var metricDataRepos map[string]MetricDataRepository = map[string]MetricDataRepository{} - -func Init() error { - for _, cluster := range config.Clusters { - if cluster.MetricDataRepository != nil { - var kind struct { - Kind string `json:"kind"` - } - if err := json.Unmarshal(cluster.MetricDataRepository, &kind); err != nil { - cclog.Warn("Error while unmarshaling raw json MetricDataRepository") - return err - } - - var mdr MetricDataRepository - switch kind.Kind { - case "cc-metric-store": - mdr = &CCMetricStore{} - case "cc-metric-store-internal": - mdr = &CCMetricStoreInternal{} - memorystore.InternalCCMSFlag = true - case "prometheus": - mdr = &PrometheusDataRepository{} - case "test": - mdr = &TestMetricDataRepository{} - default: - return fmt.Errorf("METRICDATA/METRICDATA > Unknown MetricDataRepository %v for cluster %v", kind.Kind, cluster.Name) - } - - if err := mdr.Init(cluster.MetricDataRepository); err != nil { - cclog.Errorf("Error initializing MetricDataRepository %v for cluster %v", kind.Kind, cluster.Name) - return err - } - metricDataRepos[cluster.Name] = mdr - } - } - return nil -} - -func GetMetricDataRepo(cluster string) (MetricDataRepository, error) { - var err error - repo, ok := metricDataRepos[cluster] - - if !ok { - err = fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster) - } - - return repo, err -} diff --git a/internal/metricdata/prometheus.go b/internal/metricdata/prometheus.go deleted file mode 100644 index 3fb94d51..00000000 --- a/internal/metricdata/prometheus.go +++ /dev/null @@ -1,587 +0,0 @@ -// Copyright (C) 2022 DKRZ -// All rights reserved. This file is part of cc-backend. -// Use of this source code is governed by a MIT-style -// license that can be found in the LICENSE file. -package metricdata - -import ( - "bytes" - "context" - "encoding/json" - "errors" - "fmt" - "math" - "net/http" - "os" - "regexp" - "sort" - "strings" - "sync" - "text/template" - "time" - - "github.com/ClusterCockpit/cc-backend/pkg/archive" - cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" - "github.com/ClusterCockpit/cc-lib/v2/schema" - promapi "github.com/prometheus/client_golang/api" - promv1 "github.com/prometheus/client_golang/api/prometheus/v1" - promcfg "github.com/prometheus/common/config" - promm "github.com/prometheus/common/model" -) - -type PrometheusDataRepositoryConfig struct { - Url string `json:"url"` - Username string `json:"username,omitempty"` - Suffix string `json:"suffix,omitempty"` - Templates map[string]string `json:"query-templates"` -} - -type PrometheusDataRepository struct { - client promapi.Client - queryClient promv1.API - suffix string - templates map[string]*template.Template -} - -type PromQLArgs struct { - Nodes string -} - -type Trie map[rune]Trie - -var logOnce sync.Once - -func contains(s []schema.MetricScope, str schema.MetricScope) bool { - for _, v := range s { - if v == str { - return true - } - } - return false -} - -func MinMaxMean(data []schema.Float) (float64, float64, float64) { - if len(data) == 0 { - return 0.0, 0.0, 0.0 - } - min := math.MaxFloat64 - max := -math.MaxFloat64 - var sum float64 - var n float64 - for _, val := range data { - if val.IsNaN() { - continue - } - sum += float64(val) - n += 1 - if float64(val) > max { - max = float64(val) - } - if float64(val) < min { - min = float64(val) - } - } - return min, max, sum / n -} - -// Rewritten from -// https://github.com/ermanh/trieregex/blob/master/trieregex/trieregex.py -func nodeRegex(nodes []string) string { - root := Trie{} - // add runes of each compute node to trie - for _, node := range nodes { - _trie := root - for _, c := range node { - if _, ok := _trie[c]; !ok { - _trie[c] = Trie{} - } - _trie = _trie[c] - } - _trie['*'] = Trie{} - } - // recursively build regex from rune trie - var trieRegex func(trie Trie, reset bool) string - trieRegex = func(trie Trie, reset bool) string { - if reset == true { - trie = root - } - if len(trie) == 0 { - return "" - } - if len(trie) == 1 { - for key, _trie := range trie { - if key == '*' { - return "" - } - return regexp.QuoteMeta(string(key)) + trieRegex(_trie, false) - } - } else { - sequences := []string{} - for key, _trie := range trie { - if key != '*' { - sequences = append(sequences, regexp.QuoteMeta(string(key))+trieRegex(_trie, false)) - } - } - sort.Slice(sequences, func(i, j int) bool { - return (-len(sequences[i]) < -len(sequences[j])) || (sequences[i] < sequences[j]) - }) - var result string - // single edge from this tree node - if len(sequences) == 1 { - result = sequences[0] - if len(result) > 1 { - result = "(?:" + result + ")" - } - // multiple edges, each length 1 - } else if s := strings.Join(sequences, ""); len(s) == len(sequences) { - // char or numeric range - if len(s)-1 == int(s[len(s)-1])-int(s[0]) { - result = fmt.Sprintf("[%c-%c]", s[0], s[len(s)-1]) - // char or numeric set - } else { - result = "[" + s + "]" - } - // multiple edges of different lengths - } else { - result = "(?:" + strings.Join(sequences, "|") + ")" - } - if _, ok := trie['*']; ok { - result += "?" - } - return result - } - return "" - } - return trieRegex(root, true) -} - -func (pdb *PrometheusDataRepository) Init(rawConfig json.RawMessage) error { - var config PrometheusDataRepositoryConfig - // parse config - if err := json.Unmarshal(rawConfig, &config); err != nil { - cclog.Warn("Error while unmarshaling raw json config") - return err - } - // support basic authentication - var rt http.RoundTripper = nil - if prom_pw := os.Getenv("PROMETHEUS_PASSWORD"); prom_pw != "" && config.Username != "" { - prom_pw := promcfg.Secret(prom_pw) - rt = promcfg.NewBasicAuthRoundTripper(promcfg.NewInlineSecret(config.Username), promcfg.NewInlineSecret(string(prom_pw)), promapi.DefaultRoundTripper) - } else { - if config.Username != "" { - return errors.New("METRICDATA/PROMETHEUS > Prometheus username provided, but PROMETHEUS_PASSWORD not set") - } - } - // init client - client, err := promapi.NewClient(promapi.Config{ - Address: config.Url, - RoundTripper: rt, - }) - if err != nil { - cclog.Error("Error while initializing new prometheus client") - return err - } - // init query client - pdb.client = client - pdb.queryClient = promv1.NewAPI(pdb.client) - // site config - pdb.suffix = config.Suffix - // init query templates - pdb.templates = make(map[string]*template.Template) - for metric, templ := range config.Templates { - pdb.templates[metric], err = template.New(metric).Parse(templ) - if err == nil { - cclog.Debugf("Added PromQL template for %s: %s", metric, templ) - } else { - cclog.Warnf("Failed to parse PromQL template %s for metric %s", templ, metric) - } - } - return nil -} - -// TODO: respect scope argument -func (pdb *PrometheusDataRepository) FormatQuery( - metric string, - scope schema.MetricScope, - nodes []string, - cluster string, -) (string, error) { - args := PromQLArgs{} - if len(nodes) > 0 { - args.Nodes = fmt.Sprintf("(%s)%s", nodeRegex(nodes), pdb.suffix) - } else { - args.Nodes = fmt.Sprintf(".*%s", pdb.suffix) - } - - buf := &bytes.Buffer{} - if templ, ok := pdb.templates[metric]; ok { - err := templ.Execute(buf, args) - if err != nil { - return "", errors.New(fmt.Sprintf("METRICDATA/PROMETHEUS > Error compiling template %v", templ)) - } else { - query := buf.String() - cclog.Debugf("PromQL: %s", query) - return query, nil - } - } else { - return "", errors.New(fmt.Sprintf("METRICDATA/PROMETHEUS > No PromQL for metric %s configured.", metric)) - } -} - -// Convert PromAPI row to CC schema.Series -func (pdb *PrometheusDataRepository) RowToSeries( - from time.Time, - step int64, - steps int64, - row *promm.SampleStream, -) schema.Series { - ts := from.Unix() - hostname := strings.TrimSuffix(string(row.Metric["exported_instance"]), pdb.suffix) - // init array of expected length with NaN - values := make([]schema.Float, steps+1) - for i := range values { - values[i] = schema.NaN - } - // copy recorded values from prom sample pair - for _, v := range row.Values { - idx := (v.Timestamp.Unix() - ts) / step - values[idx] = schema.Float(v.Value) - } - min, max, mean := MinMaxMean(values) - // output struct - return schema.Series{ - Hostname: hostname, - Data: values, - Statistics: schema.MetricStatistics{ - Avg: mean, - Min: min, - Max: max, - }, - } -} - -func (pdb *PrometheusDataRepository) LoadData( - job *schema.Job, - metrics []string, - scopes []schema.MetricScope, - ctx context.Context, - resolution int, -) (schema.JobData, error) { - // TODO respect requested scope - if len(scopes) == 0 || !contains(scopes, schema.MetricScopeNode) { - scopes = append(scopes, schema.MetricScopeNode) - } - - jobData := make(schema.JobData) - // parse job specs - nodes := make([]string, len(job.Resources)) - for i, resource := range job.Resources { - nodes[i] = resource.Hostname - } - from := time.Unix(job.StartTime, 0) - to := time.Unix(job.StartTime+int64(job.Duration), 0) - - for _, scope := range scopes { - if scope != schema.MetricScopeNode { - logOnce.Do(func() { - cclog.Infof("Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope) - }) - continue - } - - for _, metric := range metrics { - metricConfig := archive.GetMetricConfig(job.Cluster, metric) - if metricConfig == nil { - cclog.Warnf("Error in LoadData: Metric %s for cluster %s not configured", metric, job.Cluster) - return nil, errors.New("Prometheus config error") - } - query, err := pdb.FormatQuery(metric, scope, nodes, job.Cluster) - if err != nil { - cclog.Warn("Error while formatting prometheus query") - return nil, err - } - - // ranged query over all job nodes - r := promv1.Range{ - Start: from, - End: to, - Step: time.Duration(metricConfig.Timestep * 1e9), - } - result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r) - if err != nil { - cclog.Errorf("Prometheus query error in LoadData: %v\nQuery: %s", err, query) - return nil, errors.New("Prometheus query error") - } - if len(warnings) > 0 { - cclog.Warnf("Warnings: %v\n", warnings) - } - - // init data structures - if _, ok := jobData[metric]; !ok { - jobData[metric] = make(map[schema.MetricScope]*schema.JobMetric) - } - jobMetric, ok := jobData[metric][scope] - if !ok { - jobMetric = &schema.JobMetric{ - Unit: metricConfig.Unit, - Timestep: metricConfig.Timestep, - Series: make([]schema.Series, 0), - } - } - step := int64(metricConfig.Timestep) - steps := int64(to.Sub(from).Seconds()) / step - // iter rows of host, metric, values - for _, row := range result.(promm.Matrix) { - jobMetric.Series = append(jobMetric.Series, - pdb.RowToSeries(from, step, steps, row)) - } - // only add metric if at least one host returned data - if !ok && len(jobMetric.Series) > 0 { - jobData[metric][scope] = jobMetric - } - // sort by hostname to get uniform coloring - sort.Slice(jobMetric.Series, func(i, j int) bool { - return (jobMetric.Series[i].Hostname < jobMetric.Series[j].Hostname) - }) - } - } - return jobData, nil -} - -// TODO change implementation to precomputed/cached stats -func (pdb *PrometheusDataRepository) LoadStats( - job *schema.Job, - metrics []string, - ctx context.Context, -) (map[string]map[string]schema.MetricStatistics, error) { - // map of metrics of nodes of stats - stats := map[string]map[string]schema.MetricStatistics{} - - data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0 /*resolution here*/) - if err != nil { - cclog.Warn("Error while loading job for stats") - return nil, err - } - for metric, metricData := range data { - stats[metric] = make(map[string]schema.MetricStatistics) - for _, series := range metricData[schema.MetricScopeNode].Series { - stats[metric][series.Hostname] = series.Statistics - } - } - - return stats, nil -} - -func (pdb *PrometheusDataRepository) LoadNodeData( - cluster string, - metrics, nodes []string, - scopes []schema.MetricScope, - from, to time.Time, - ctx context.Context, -) (map[string]map[string][]*schema.JobMetric, error) { - t0 := time.Now() - // Map of hosts of metrics of value slices - data := make(map[string]map[string][]*schema.JobMetric) - // query db for each metric - // TODO: scopes seems to be always empty - if len(scopes) == 0 || !contains(scopes, schema.MetricScopeNode) { - scopes = append(scopes, schema.MetricScopeNode) - } - for _, scope := range scopes { - if scope != schema.MetricScopeNode { - logOnce.Do(func() { - cclog.Infof("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope) - }) - continue - } - for _, metric := range metrics { - metricConfig := archive.GetMetricConfig(cluster, metric) - if metricConfig == nil { - cclog.Warnf("Error in LoadNodeData: Metric %s for cluster %s not configured", metric, cluster) - return nil, errors.New("Prometheus config error") - } - query, err := pdb.FormatQuery(metric, scope, nodes, cluster) - if err != nil { - cclog.Warn("Error while formatting prometheus query") - return nil, err - } - - // ranged query over all nodes - r := promv1.Range{ - Start: from, - End: to, - Step: time.Duration(metricConfig.Timestep * 1e9), - } - result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r) - if err != nil { - cclog.Errorf("Prometheus query error in LoadNodeData: %v\n", err) - return nil, errors.New("Prometheus query error") - } - if len(warnings) > 0 { - cclog.Warnf("Warnings: %v\n", warnings) - } - - step := int64(metricConfig.Timestep) - steps := int64(to.Sub(from).Seconds()) / step - - // iter rows of host, metric, values - for _, row := range result.(promm.Matrix) { - hostname := strings.TrimSuffix(string(row.Metric["exported_instance"]), pdb.suffix) - hostdata, ok := data[hostname] - if !ok { - hostdata = make(map[string][]*schema.JobMetric) - data[hostname] = hostdata - } - // output per host and metric - hostdata[metric] = append(hostdata[metric], &schema.JobMetric{ - Unit: metricConfig.Unit, - Timestep: metricConfig.Timestep, - Series: []schema.Series{pdb.RowToSeries(from, step, steps, row)}, - }, - ) - } - } - } - t1 := time.Since(t0) - cclog.Debugf("LoadNodeData of %v nodes took %s", len(data), t1) - return data, nil -} - -// Implemented by NHR@FAU; Used in Job-View StatsTable -func (pdb *PrometheusDataRepository) LoadScopedStats( - job *schema.Job, - metrics []string, - scopes []schema.MetricScope, - ctx context.Context, -) (schema.ScopedJobStats, error) { - // Assumption: pdb.loadData() only returns series node-scope - use node scope for statsTable - scopedJobStats := make(schema.ScopedJobStats) - data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0 /*resolution here*/) - if err != nil { - cclog.Warn("Error while loading job for scopedJobStats") - return nil, err - } - - for metric, metricData := range data { - for _, scope := range scopes { - if scope != schema.MetricScopeNode { - logOnce.Do(func() { - cclog.Infof("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope) - }) - continue - } - - if _, ok := scopedJobStats[metric]; !ok { - scopedJobStats[metric] = make(map[schema.MetricScope][]*schema.ScopedStats) - } - - if _, ok := scopedJobStats[metric][scope]; !ok { - scopedJobStats[metric][scope] = make([]*schema.ScopedStats, 0) - } - - for _, series := range metricData[scope].Series { - scopedJobStats[metric][scope] = append(scopedJobStats[metric][scope], &schema.ScopedStats{ - Hostname: series.Hostname, - Data: &series.Statistics, - }) - } - } - } - - return scopedJobStats, nil -} - -// Implemented by NHR@FAU; Used in NodeList-View -func (pdb *PrometheusDataRepository) LoadNodeListData( - cluster, subCluster string, - nodes []string, - metrics []string, - scopes []schema.MetricScope, - resolution int, - from, to time.Time, - ctx context.Context, -) (map[string]schema.JobData, error) { - // Assumption: pdb.loadData() only returns series node-scope - use node scope for NodeList - - // Fetch Data, based on pdb.LoadNodeData() - t0 := time.Now() - // Map of hosts of jobData - data := make(map[string]schema.JobData) - - // query db for each metric - // TODO: scopes seems to be always empty - if len(scopes) == 0 || !contains(scopes, schema.MetricScopeNode) { - scopes = append(scopes, schema.MetricScopeNode) - } - - for _, scope := range scopes { - if scope != schema.MetricScopeNode { - logOnce.Do(func() { - cclog.Infof("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope) - }) - continue - } - - for _, metric := range metrics { - metricConfig := archive.GetMetricConfig(cluster, metric) - if metricConfig == nil { - cclog.Warnf("Error in LoadNodeListData: Metric %s for cluster %s not configured", metric, cluster) - return nil, errors.New("Prometheus config error") - } - query, err := pdb.FormatQuery(metric, scope, nodes, cluster) - if err != nil { - cclog.Warn("Error while formatting prometheus query") - return nil, err - } - - // ranged query over all nodes - r := promv1.Range{ - Start: from, - End: to, - Step: time.Duration(metricConfig.Timestep * 1e9), - } - result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r) - if err != nil { - cclog.Errorf("Prometheus query error in LoadNodeData: %v\n", err) - return nil, errors.New("Prometheus query error") - } - if len(warnings) > 0 { - cclog.Warnf("Warnings: %v\n", warnings) - } - - step := int64(metricConfig.Timestep) - steps := int64(to.Sub(from).Seconds()) / step - - // iter rows of host, metric, values - for _, row := range result.(promm.Matrix) { - hostname := strings.TrimSuffix(string(row.Metric["exported_instance"]), pdb.suffix) - - hostdata, ok := data[hostname] - if !ok { - hostdata = make(schema.JobData) - data[hostname] = hostdata - } - - metricdata, ok := hostdata[metric] - if !ok { - metricdata = make(map[schema.MetricScope]*schema.JobMetric) - data[hostname][metric] = metricdata - } - - // output per host, metric and scope - scopeData, ok := metricdata[scope] - if !ok { - scopeData = &schema.JobMetric{ - Unit: metricConfig.Unit, - Timestep: metricConfig.Timestep, - Series: []schema.Series{pdb.RowToSeries(from, step, steps, row)}, - } - data[hostname][metric][scope] = scopeData - } - } - } - } - t1 := time.Since(t0) - cclog.Debugf("LoadNodeListData of %v nodes took %s", len(data), t1) - return data, nil -} diff --git a/internal/metricdata/utils.go b/internal/metricdata/utils.go deleted file mode 100644 index 21dfbcac..00000000 --- a/internal/metricdata/utils.go +++ /dev/null @@ -1,118 +0,0 @@ -// Copyright (C) NHR@FAU, University Erlangen-Nuremberg. -// All rights reserved. This file is part of cc-backend. -// Use of this source code is governed by a MIT-style -// license that can be found in the LICENSE file. - -package metricdata - -import ( - "context" - "encoding/json" - "time" - - "github.com/ClusterCockpit/cc-lib/v2/schema" -) - -var TestLoadDataCallback func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) { - panic("TODO") -} - -// TestMetricDataRepository is only a mock for unit-testing. -type TestMetricDataRepository struct{} - -func (tmdr *TestMetricDataRepository) Init(_ json.RawMessage) error { - return nil -} - -func (tmdr *TestMetricDataRepository) LoadData( - job *schema.Job, - metrics []string, - scopes []schema.MetricScope, - ctx context.Context, - resolution int, -) (schema.JobData, error) { - return TestLoadDataCallback(job, metrics, scopes, ctx, resolution) -} - -func (tmdr *TestMetricDataRepository) LoadStats( - job *schema.Job, - metrics []string, - ctx context.Context, -) (map[string]map[string]schema.MetricStatistics, error) { - panic("TODO") -} - -func (tmdr *TestMetricDataRepository) LoadScopedStats( - job *schema.Job, - metrics []string, - scopes []schema.MetricScope, - ctx context.Context, -) (schema.ScopedJobStats, error) { - panic("TODO") -} - -func (tmdr *TestMetricDataRepository) LoadNodeData( - cluster string, - metrics, nodes []string, - scopes []schema.MetricScope, - from, to time.Time, - ctx context.Context, -) (map[string]map[string][]*schema.JobMetric, error) { - panic("TODO") -} - -func (tmdr *TestMetricDataRepository) LoadNodeListData( - cluster, subCluster string, - nodes []string, - metrics []string, - scopes []schema.MetricScope, - resolution int, - from, to time.Time, - ctx context.Context, -) (map[string]schema.JobData, error) { - panic("TODO") -} - -func DeepCopy(jdTemp schema.JobData) schema.JobData { - jd := make(schema.JobData, len(jdTemp)) - for k, v := range jdTemp { - jd[k] = make(map[schema.MetricScope]*schema.JobMetric, len(jdTemp[k])) - for k_, v_ := range v { - jd[k][k_] = new(schema.JobMetric) - jd[k][k_].Series = make([]schema.Series, len(v_.Series)) - for i := 0; i < len(v_.Series); i += 1 { - jd[k][k_].Series[i].Data = make([]schema.Float, len(v_.Series[i].Data)) - copy(jd[k][k_].Series[i].Data, v_.Series[i].Data) - jd[k][k_].Series[i].Hostname = v_.Series[i].Hostname - jd[k][k_].Series[i].Id = v_.Series[i].Id - jd[k][k_].Series[i].Statistics.Avg = v_.Series[i].Statistics.Avg - jd[k][k_].Series[i].Statistics.Min = v_.Series[i].Statistics.Min - jd[k][k_].Series[i].Statistics.Max = v_.Series[i].Statistics.Max - } - jd[k][k_].Timestep = v_.Timestep - jd[k][k_].Unit.Base = v_.Unit.Base - jd[k][k_].Unit.Prefix = v_.Unit.Prefix - if v_.StatisticsSeries != nil { - // Init Slices - jd[k][k_].StatisticsSeries = new(schema.StatsSeries) - jd[k][k_].StatisticsSeries.Max = make([]schema.Float, len(v_.StatisticsSeries.Max)) - jd[k][k_].StatisticsSeries.Min = make([]schema.Float, len(v_.StatisticsSeries.Min)) - jd[k][k_].StatisticsSeries.Median = make([]schema.Float, len(v_.StatisticsSeries.Median)) - jd[k][k_].StatisticsSeries.Mean = make([]schema.Float, len(v_.StatisticsSeries.Mean)) - // Copy Data - copy(jd[k][k_].StatisticsSeries.Max, v_.StatisticsSeries.Max) - copy(jd[k][k_].StatisticsSeries.Min, v_.StatisticsSeries.Min) - copy(jd[k][k_].StatisticsSeries.Median, v_.StatisticsSeries.Median) - copy(jd[k][k_].StatisticsSeries.Mean, v_.StatisticsSeries.Mean) - // Handle Percentiles - for k__, v__ := range v_.StatisticsSeries.Percentiles { - jd[k][k_].StatisticsSeries.Percentiles[k__] = make([]schema.Float, len(v__)) - copy(jd[k][k_].StatisticsSeries.Percentiles[k__], v__) - } - } else { - jd[k][k_].StatisticsSeries = v_.StatisticsSeries - } - } - } - return jd -} diff --git a/internal/metricdispatch/dataLoader.go b/internal/metricdispatch/dataLoader.go new file mode 100644 index 00000000..8bfebbd6 --- /dev/null +++ b/internal/metricdispatch/dataLoader.go @@ -0,0 +1,490 @@ +// Copyright (C) NHR@FAU, University Erlangen-Nuremberg. +// All rights reserved. This file is part of cc-backend. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +// Package metricdispatch provides a unified interface for loading and caching job metric data. +// +// This package serves as a central dispatcher that routes metric data requests to the appropriate +// backend based on job state. For running jobs, data is fetched from the metric store (e.g., cc-metric-store). +// For completed jobs, data is retrieved from the file-based job archive. +// +// # Key Features +// +// - Automatic backend selection based on job state (running vs. archived) +// - LRU cache for performance optimization (128 MB default cache size) +// - Data resampling using Largest Triangle Three Bucket algorithm for archived data +// - Automatic statistics series generation for jobs with many nodes +// - Support for scoped metrics (node, socket, accelerator, core) +// +// # Cache Behavior +// +// Cached data has different TTL (time-to-live) values depending on job state: +// - Running jobs: 2 minutes (data changes frequently) +// - Completed jobs: 5 hours (data is static) +// +// The cache key is based on job ID, state, requested metrics, scopes, and resolution. +// +// # Usage +// +// The primary entry point is LoadData, which automatically handles both running and archived jobs: +// +// jobData, err := metricdispatch.LoadData(job, metrics, scopes, ctx, resolution) +// if err != nil { +// // Handle error +// } +// +// For statistics only, use LoadJobStats, LoadScopedJobStats, or LoadAverages depending on the required format. +package metricdispatch + +import ( + "context" + "fmt" + "math" + "time" + + "github.com/ClusterCockpit/cc-backend/internal/config" + "github.com/ClusterCockpit/cc-backend/internal/metricstore" + "github.com/ClusterCockpit/cc-backend/pkg/archive" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/lrucache" + "github.com/ClusterCockpit/cc-lib/v2/resampler" + "github.com/ClusterCockpit/cc-lib/v2/schema" +) + +// cache is an LRU cache with 128 MB capacity for storing loaded job metric data. +// The cache reduces load on both the metric store and archive backends. +var cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024) + +// cacheKey generates a unique cache key for a job's metric data based on job ID, state, +// requested metrics, scopes, and resolution. Duration and StartTime are intentionally excluded +// because job.ID is more unique and the cache TTL ensures entries don't persist indefinitely. +func cacheKey( + job *schema.Job, + metrics []string, + scopes []schema.MetricScope, + resolution int, +) string { + return fmt.Sprintf("%d(%s):[%v],[%v]-%d", + job.ID, job.State, metrics, scopes, resolution) +} + +// LoadData retrieves metric data for a job from the appropriate backend (memory store for running jobs, +// archive for completed jobs) and applies caching, resampling, and statistics generation as needed. +// +// For running jobs or when archive is disabled, data is fetched from the metric store. +// For completed archived jobs, data is loaded from the job archive and resampled if needed. +// +// Parameters: +// - job: The job for which to load metric data +// - metrics: List of metric names to load (nil loads all metrics for the cluster) +// - scopes: Metric scopes to include (nil defaults to node scope) +// - ctx: Context for cancellation and timeouts +// - resolution: Target number of data points for resampling (only applies to archived data) +// +// Returns the loaded job data and any error encountered. For partial errors (some metrics failed), +// the function returns the successfully loaded data with a warning logged. +func LoadData(job *schema.Job, + metrics []string, + scopes []schema.MetricScope, + ctx context.Context, + resolution int, +) (schema.JobData, error) { + data := cache.Get(cacheKey(job, metrics, scopes, resolution), func() (_ any, ttl time.Duration, size int) { + var jd schema.JobData + var err error + + if job.State == schema.JobStateRunning || + job.MonitoringStatus == schema.MonitoringStatusRunningOrArchiving || + config.Keys.DisableArchive { + + if scopes == nil { + scopes = append(scopes, schema.MetricScopeNode) + } + + if metrics == nil { + cluster := archive.GetCluster(job.Cluster) + for _, mc := range cluster.MetricConfig { + metrics = append(metrics, mc.Name) + } + } + + jd, err = metricstore.LoadData(job, metrics, scopes, ctx, resolution) + if err != nil { + if len(jd) != 0 { + cclog.Warnf("partial error loading metrics from store for job %d (user: %s, project: %s): %s", + job.JobID, job.User, job.Project, err.Error()) + } else { + cclog.Errorf("failed to load job data from metric store for job %d (user: %s, project: %s): %s", + job.JobID, job.User, job.Project, err.Error()) + return err, 0, 0 + } + } + size = jd.Size() + } else { + var jdTemp schema.JobData + jdTemp, err = archive.GetHandle().LoadJobData(job) + if err != nil { + cclog.Errorf("failed to load job data from archive for job %d (user: %s, project: %s): %s", + job.JobID, job.User, job.Project, err.Error()) + return err, 0, 0 + } + + jd = deepCopy(jdTemp) + + // Resample archived data using Largest Triangle Three Bucket algorithm to reduce data points + // to the requested resolution, improving transfer performance and client-side rendering. + for _, v := range jd { + for _, v_ := range v { + timestep := int64(0) + for i := 0; i < len(v_.Series); i += 1 { + v_.Series[i].Data, timestep, err = resampler.LargestTriangleThreeBucket(v_.Series[i].Data, int64(v_.Timestep), int64(resolution)) + if err != nil { + return err, 0, 0 + } + } + v_.Timestep = int(timestep) + } + } + + // Filter job data to only include requested metrics and scopes, avoiding unnecessary data transfer. + if metrics != nil || scopes != nil { + if metrics == nil { + metrics = make([]string, 0, len(jd)) + for k := range jd { + metrics = append(metrics, k) + } + } + + res := schema.JobData{} + for _, metric := range metrics { + if perscope, ok := jd[metric]; ok { + if len(perscope) > 1 { + subset := make(map[schema.MetricScope]*schema.JobMetric) + for _, scope := range scopes { + if jm, ok := perscope[scope]; ok { + subset[scope] = jm + } + } + + if len(subset) > 0 { + perscope = subset + } + } + + res[metric] = perscope + } + } + jd = res + } + size = jd.Size() + } + + ttl = 5 * time.Hour + if job.State == schema.JobStateRunning { + ttl = 2 * time.Minute + } + + // Generate statistics series for jobs with many nodes to enable min/median/max graphs + // instead of overwhelming the UI with individual node lines. Note that newly calculated + // statistics use min/median/max, while archived statistics may use min/mean/max. + const maxSeriesSize int = 15 + for _, scopes := range jd { + for _, jm := range scopes { + if jm.StatisticsSeries != nil || len(jm.Series) <= maxSeriesSize { + continue + } + + jm.AddStatisticsSeries() + } + } + + nodeScopeRequested := false + for _, scope := range scopes { + if scope == schema.MetricScopeNode { + nodeScopeRequested = true + } + } + + if nodeScopeRequested { + jd.AddNodeScope("flops_any") + jd.AddNodeScope("mem_bw") + } + + // Round Resulting Stat Values + jd.RoundMetricStats() + + return jd, ttl, size + }) + + if err, ok := data.(error); ok { + cclog.Errorf("error in cached dataset for job %d: %s", job.JobID, err.Error()) + return nil, err + } + + return data.(schema.JobData), nil +} + +// LoadAverages computes average values for the specified metrics across all nodes of a job. +// For running jobs, it loads statistics from the metric store. For completed jobs, it uses +// the pre-calculated averages from the job archive. The results are appended to the data slice. +func LoadAverages( + job *schema.Job, + metrics []string, + data [][]schema.Float, + ctx context.Context, +) error { + if job.State != schema.JobStateRunning && !config.Keys.DisableArchive { + return archive.LoadAveragesFromArchive(job, metrics, data) // #166 change also here? + } + + stats, err := metricstore.LoadStats(job, metrics, ctx) + if err != nil { + cclog.Errorf("failed to load statistics from metric store for job %d (user: %s, project: %s): %s", + job.JobID, job.User, job.Project, err.Error()) + return err + } + + for i, m := range metrics { + nodes, ok := stats[m] + if !ok { + data[i] = append(data[i], schema.NaN) + continue + } + + sum := 0.0 + for _, node := range nodes { + sum += node.Avg + } + data[i] = append(data[i], schema.Float(sum)) + } + + return nil +} + +// LoadScopedJobStats retrieves job statistics organized by metric scope (node, socket, core, accelerator). +// For running jobs, statistics are computed from the metric store. For completed jobs, pre-calculated +// statistics are loaded from the job archive. +func LoadScopedJobStats( + job *schema.Job, + metrics []string, + scopes []schema.MetricScope, + ctx context.Context, +) (schema.ScopedJobStats, error) { + if job.State != schema.JobStateRunning && !config.Keys.DisableArchive { + return archive.LoadScopedStatsFromArchive(job, metrics, scopes) + } + + scopedStats, err := metricstore.LoadScopedStats(job, metrics, scopes, ctx) + if err != nil { + cclog.Errorf("failed to load scoped statistics from metric store for job %d (user: %s, project: %s): %s", + job.JobID, job.User, job.Project, err.Error()) + return nil, err + } + + return scopedStats, nil +} + +// LoadJobStats retrieves aggregated statistics (min/avg/max) for each requested metric across all job nodes. +// For running jobs, statistics are computed from the metric store. For completed jobs, pre-calculated +// statistics are loaded from the job archive. +func LoadJobStats( + job *schema.Job, + metrics []string, + ctx context.Context, +) (map[string]schema.MetricStatistics, error) { + if job.State != schema.JobStateRunning && !config.Keys.DisableArchive { + return archive.LoadStatsFromArchive(job, metrics) + } + + data := make(map[string]schema.MetricStatistics, len(metrics)) + + stats, err := metricstore.LoadStats(job, metrics, ctx) + if err != nil { + cclog.Errorf("failed to load statistics from metric store for job %d (user: %s, project: %s): %s", + job.JobID, job.User, job.Project, err.Error()) + return data, err + } + + for _, m := range metrics { + sum, avg, min, max := 0.0, 0.0, 0.0, 0.0 + nodes, ok := stats[m] + if !ok { + data[m] = schema.MetricStatistics{Min: min, Avg: avg, Max: max} + continue + } + + for _, node := range nodes { + sum += node.Avg + min = math.Min(min, node.Min) + max = math.Max(max, node.Max) + } + + data[m] = schema.MetricStatistics{ + Avg: (math.Round((sum/float64(job.NumNodes))*100) / 100), + Min: (math.Round(min*100) / 100), + Max: (math.Round(max*100) / 100), + } + } + + return data, nil +} + +// LoadNodeData retrieves metric data for specific nodes in a cluster within a time range. +// This is used for node monitoring views and system status pages. Data is always fetched from +// the metric store (not the archive) since it's for current/recent node status monitoring. +// +// Returns a nested map structure: node -> metric -> scoped data. +func LoadNodeData( + cluster string, + metrics, nodes []string, + scopes []schema.MetricScope, + from, to time.Time, + ctx context.Context, +) (map[string]map[string][]*schema.JobMetric, error) { + if metrics == nil { + for _, m := range archive.GetCluster(cluster).MetricConfig { + metrics = append(metrics, m.Name) + } + } + + data, err := metricstore.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx) + if err != nil { + if len(data) != 0 { + cclog.Warnf("partial error loading node data from metric store for cluster %s: %s", cluster, err.Error()) + } else { + cclog.Errorf("failed to load node data from metric store for cluster %s: %s", cluster, err.Error()) + return nil, err + } + } + + if data == nil { + return nil, fmt.Errorf("metric store for cluster '%s' does not support node data queries", cluster) + } + + return data, nil +} + +// LoadNodeListData retrieves time-series metric data for multiple nodes within a time range, +// with optional resampling and automatic statistics generation for large datasets. +// This is used for comparing multiple nodes or displaying node status over time. +// +// Returns a map of node names to their job-like metric data structures. +func LoadNodeListData( + cluster, subCluster string, + nodes []string, + metrics []string, + scopes []schema.MetricScope, + resolution int, + from, to time.Time, + ctx context.Context, +) (map[string]schema.JobData, error) { + if metrics == nil { + for _, m := range archive.GetCluster(cluster).MetricConfig { + metrics = append(metrics, m.Name) + } + } + + data, err := metricstore.LoadNodeListData(cluster, subCluster, nodes, metrics, scopes, resolution, from, to, ctx) + if err != nil { + if len(data) != 0 { + cclog.Warnf("partial error loading node list data from metric store for cluster %s, subcluster %s: %s", + cluster, subCluster, err.Error()) + } else { + cclog.Errorf("failed to load node list data from metric store for cluster %s, subcluster %s: %s", + cluster, subCluster, err.Error()) + return nil, err + } + } + + // Generate statistics series for datasets with many series to improve visualization performance. + // Statistics are calculated as min/median/max. + const maxSeriesSize int = 8 + for _, jd := range data { + for _, scopes := range jd { + for _, jm := range scopes { + if jm.StatisticsSeries != nil || len(jm.Series) < maxSeriesSize { + continue + } + jm.AddStatisticsSeries() + } + } + } + + if data == nil { + return nil, fmt.Errorf("metric store for cluster '%s' does not support node list queries", cluster) + } + + return data, nil +} + +// deepCopy creates a deep copy of JobData to prevent cache corruption when modifying +// archived data (e.g., during resampling). This ensures the cached archive data remains +// immutable while allowing per-request transformations. +func deepCopy(source schema.JobData) schema.JobData { + result := make(schema.JobData, len(source)) + + for metricName, scopeMap := range source { + result[metricName] = make(map[schema.MetricScope]*schema.JobMetric, len(scopeMap)) + + for scope, jobMetric := range scopeMap { + result[metricName][scope] = copyJobMetric(jobMetric) + } + } + + return result +} + +func copyJobMetric(src *schema.JobMetric) *schema.JobMetric { + dst := &schema.JobMetric{ + Timestep: src.Timestep, + Unit: src.Unit, + Series: make([]schema.Series, len(src.Series)), + } + + for i := range src.Series { + dst.Series[i] = copySeries(&src.Series[i]) + } + + if src.StatisticsSeries != nil { + dst.StatisticsSeries = copyStatisticsSeries(src.StatisticsSeries) + } + + return dst +} + +func copySeries(src *schema.Series) schema.Series { + dst := schema.Series{ + Hostname: src.Hostname, + Id: src.Id, + Statistics: src.Statistics, + Data: make([]schema.Float, len(src.Data)), + } + + copy(dst.Data, src.Data) + return dst +} + +func copyStatisticsSeries(src *schema.StatsSeries) *schema.StatsSeries { + dst := &schema.StatsSeries{ + Min: make([]schema.Float, len(src.Min)), + Mean: make([]schema.Float, len(src.Mean)), + Median: make([]schema.Float, len(src.Median)), + Max: make([]schema.Float, len(src.Max)), + } + + copy(dst.Min, src.Min) + copy(dst.Mean, src.Mean) + copy(dst.Median, src.Median) + copy(dst.Max, src.Max) + + if len(src.Percentiles) > 0 { + dst.Percentiles = make(map[int][]schema.Float, len(src.Percentiles)) + for percentile, values := range src.Percentiles { + dst.Percentiles[percentile] = make([]schema.Float, len(values)) + copy(dst.Percentiles[percentile], values) + } + } + + return dst +} diff --git a/internal/metricdispatch/dataLoader_test.go b/internal/metricdispatch/dataLoader_test.go new file mode 100644 index 00000000..c4841f8d --- /dev/null +++ b/internal/metricdispatch/dataLoader_test.go @@ -0,0 +1,125 @@ +// Copyright (C) NHR@FAU, University Erlangen-Nuremberg. +// All rights reserved. This file is part of cc-backend. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package metricdispatch + +import ( + "testing" + + "github.com/ClusterCockpit/cc-lib/v2/schema" +) + +func TestDeepCopy(t *testing.T) { + nodeId := "0" + original := schema.JobData{ + "cpu_load": { + schema.MetricScopeNode: &schema.JobMetric{ + Timestep: 60, + Unit: schema.Unit{Base: "load", Prefix: ""}, + Series: []schema.Series{ + { + Hostname: "node001", + Id: &nodeId, + Data: []schema.Float{1.0, 2.0, 3.0}, + Statistics: schema.MetricStatistics{ + Min: 1.0, + Avg: 2.0, + Max: 3.0, + }, + }, + }, + StatisticsSeries: &schema.StatsSeries{ + Min: []schema.Float{1.0, 1.5, 2.0}, + Mean: []schema.Float{2.0, 2.5, 3.0}, + Median: []schema.Float{2.0, 2.5, 3.0}, + Max: []schema.Float{3.0, 3.5, 4.0}, + Percentiles: map[int][]schema.Float{ + 25: {1.5, 2.0, 2.5}, + 75: {2.5, 3.0, 3.5}, + }, + }, + }, + }, + } + + copied := deepCopy(original) + + original["cpu_load"][schema.MetricScopeNode].Series[0].Data[0] = 999.0 + original["cpu_load"][schema.MetricScopeNode].StatisticsSeries.Min[0] = 888.0 + original["cpu_load"][schema.MetricScopeNode].StatisticsSeries.Percentiles[25][0] = 777.0 + + if copied["cpu_load"][schema.MetricScopeNode].Series[0].Data[0] != 1.0 { + t.Errorf("Series data was not deeply copied: got %v, want 1.0", + copied["cpu_load"][schema.MetricScopeNode].Series[0].Data[0]) + } + + if copied["cpu_load"][schema.MetricScopeNode].StatisticsSeries.Min[0] != 1.0 { + t.Errorf("StatisticsSeries was not deeply copied: got %v, want 1.0", + copied["cpu_load"][schema.MetricScopeNode].StatisticsSeries.Min[0]) + } + + if copied["cpu_load"][schema.MetricScopeNode].StatisticsSeries.Percentiles[25][0] != 1.5 { + t.Errorf("Percentiles was not deeply copied: got %v, want 1.5", + copied["cpu_load"][schema.MetricScopeNode].StatisticsSeries.Percentiles[25][0]) + } + + if copied["cpu_load"][schema.MetricScopeNode].Timestep != 60 { + t.Errorf("Timestep not copied correctly: got %v, want 60", + copied["cpu_load"][schema.MetricScopeNode].Timestep) + } + + if copied["cpu_load"][schema.MetricScopeNode].Series[0].Hostname != "node001" { + t.Errorf("Hostname not copied correctly: got %v, want node001", + copied["cpu_load"][schema.MetricScopeNode].Series[0].Hostname) + } +} + +func TestDeepCopyNilStatisticsSeries(t *testing.T) { + original := schema.JobData{ + "mem_used": { + schema.MetricScopeNode: &schema.JobMetric{ + Timestep: 60, + Series: []schema.Series{ + { + Hostname: "node001", + Data: []schema.Float{1.0, 2.0}, + }, + }, + StatisticsSeries: nil, + }, + }, + } + + copied := deepCopy(original) + + if copied["mem_used"][schema.MetricScopeNode].StatisticsSeries != nil { + t.Errorf("StatisticsSeries should be nil, got %v", + copied["mem_used"][schema.MetricScopeNode].StatisticsSeries) + } +} + +func TestDeepCopyEmptyPercentiles(t *testing.T) { + original := schema.JobData{ + "cpu_load": { + schema.MetricScopeNode: &schema.JobMetric{ + Timestep: 60, + Series: []schema.Series{}, + StatisticsSeries: &schema.StatsSeries{ + Min: []schema.Float{1.0}, + Mean: []schema.Float{2.0}, + Median: []schema.Float{2.0}, + Max: []schema.Float{3.0}, + Percentiles: nil, + }, + }, + }, + } + + copied := deepCopy(original) + + if copied["cpu_load"][schema.MetricScopeNode].StatisticsSeries.Percentiles != nil { + t.Errorf("Percentiles should be nil when source is nil/empty") + } +} diff --git a/internal/memorystore/api.go b/internal/metricstore/api.go similarity index 98% rename from internal/memorystore/api.go rename to internal/metricstore/api.go index 41c53a18..d8a2ea82 100644 --- a/internal/memorystore/api.go +++ b/internal/metricstore/api.go @@ -3,10 +3,11 @@ // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. -package memorystore +package metricstore import ( "errors" + "fmt" "math" "github.com/ClusterCockpit/cc-lib/v2/schema" @@ -124,6 +125,9 @@ func FetchData(req APIQueryRequest) (*APIQueryResponse, error) { req.WithData = true ms := GetMemoryStore() + if ms == nil { + return nil, fmt.Errorf("memorystore not initialized") + } response := APIQueryResponse{ Results: make([][]APIMetricData, 0, len(req.Queries)), diff --git a/internal/memorystore/archive.go b/internal/metricstore/archive.go similarity index 99% rename from internal/memorystore/archive.go rename to internal/metricstore/archive.go index fc46dac6..972769fd 100644 --- a/internal/memorystore/archive.go +++ b/internal/metricstore/archive.go @@ -3,7 +3,7 @@ // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. -package memorystore +package metricstore import ( "archive/zip" diff --git a/internal/memorystore/avroCheckpoint.go b/internal/metricstore/avroCheckpoint.go similarity index 99% rename from internal/memorystore/avroCheckpoint.go rename to internal/metricstore/avroCheckpoint.go index b0b0cf42..275a64bd 100644 --- a/internal/memorystore/avroCheckpoint.go +++ b/internal/metricstore/avroCheckpoint.go @@ -3,7 +3,7 @@ // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. -package memorystore +package metricstore import ( "bufio" diff --git a/internal/memorystore/avroHelper.go b/internal/metricstore/avroHelper.go similarity index 99% rename from internal/memorystore/avroHelper.go rename to internal/metricstore/avroHelper.go index 93a293bd..5587a58d 100644 --- a/internal/memorystore/avroHelper.go +++ b/internal/metricstore/avroHelper.go @@ -3,7 +3,7 @@ // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. -package memorystore +package metricstore import ( "context" diff --git a/internal/memorystore/avroStruct.go b/internal/metricstore/avroStruct.go similarity index 99% rename from internal/memorystore/avroStruct.go rename to internal/metricstore/avroStruct.go index 2643a9a7..78a8d137 100644 --- a/internal/memorystore/avroStruct.go +++ b/internal/metricstore/avroStruct.go @@ -3,7 +3,7 @@ // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. -package memorystore +package metricstore import ( "sync" diff --git a/internal/memorystore/buffer.go b/internal/metricstore/buffer.go similarity index 99% rename from internal/memorystore/buffer.go rename to internal/metricstore/buffer.go index 15e29b3a..94d3ce76 100644 --- a/internal/memorystore/buffer.go +++ b/internal/metricstore/buffer.go @@ -3,7 +3,7 @@ // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. -package memorystore +package metricstore import ( "errors" diff --git a/internal/memorystore/checkpoint.go b/internal/metricstore/checkpoint.go similarity index 99% rename from internal/memorystore/checkpoint.go rename to internal/metricstore/checkpoint.go index c48c2fd8..27d611c4 100644 --- a/internal/memorystore/checkpoint.go +++ b/internal/metricstore/checkpoint.go @@ -3,7 +3,7 @@ // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. -package memorystore +package metricstore import ( "bufio" diff --git a/internal/memorystore/config.go b/internal/metricstore/config.go similarity index 98% rename from internal/memorystore/config.go rename to internal/metricstore/config.go index fbd62341..97f16c46 100644 --- a/internal/memorystore/config.go +++ b/internal/metricstore/config.go @@ -3,7 +3,7 @@ // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. -package memorystore +package metricstore import ( "fmt" @@ -19,8 +19,6 @@ const ( DefaultAvroCheckpointInterval = time.Minute ) -var InternalCCMSFlag bool = false - type MetricStoreConfig struct { // Number of concurrent workers for checkpoint and archive operations. // If not set or 0, defaults to min(runtime.NumCPU()/2+1, 10) diff --git a/internal/memorystore/configSchema.go b/internal/metricstore/configSchema.go similarity index 99% rename from internal/memorystore/configSchema.go rename to internal/metricstore/configSchema.go index 2616edc6..f1a20a73 100644 --- a/internal/memorystore/configSchema.go +++ b/internal/metricstore/configSchema.go @@ -3,7 +3,7 @@ // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. -package memorystore +package metricstore const configSchema = `{ "type": "object", diff --git a/internal/memorystore/debug.go b/internal/metricstore/debug.go similarity index 99% rename from internal/memorystore/debug.go rename to internal/metricstore/debug.go index b56cf254..50c91e08 100644 --- a/internal/memorystore/debug.go +++ b/internal/metricstore/debug.go @@ -3,7 +3,7 @@ // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. -package memorystore +package metricstore import ( "bufio" diff --git a/internal/memorystore/healthcheck.go b/internal/metricstore/healthcheck.go similarity index 99% rename from internal/memorystore/healthcheck.go rename to internal/metricstore/healthcheck.go index b1052f3b..2a49c47a 100644 --- a/internal/memorystore/healthcheck.go +++ b/internal/metricstore/healthcheck.go @@ -3,7 +3,7 @@ // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. -package memorystore +package metricstore import ( "bufio" diff --git a/internal/memorystore/level.go b/internal/metricstore/level.go similarity index 99% rename from internal/memorystore/level.go rename to internal/metricstore/level.go index bce2a7a6..d46f893a 100644 --- a/internal/memorystore/level.go +++ b/internal/metricstore/level.go @@ -3,7 +3,7 @@ // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. -package memorystore +package metricstore import ( "sync" diff --git a/internal/memorystore/lineprotocol.go b/internal/metricstore/lineprotocol.go similarity index 99% rename from internal/memorystore/lineprotocol.go rename to internal/metricstore/lineprotocol.go index ca8cc811..cc59e213 100644 --- a/internal/memorystore/lineprotocol.go +++ b/internal/metricstore/lineprotocol.go @@ -3,7 +3,7 @@ // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. -package memorystore +package metricstore import ( "context" diff --git a/internal/memorystore/memorystore.go b/internal/metricstore/memorystore.go similarity index 99% rename from internal/memorystore/memorystore.go rename to internal/metricstore/memorystore.go index 7c5ea0eb..14a02fcd 100644 --- a/internal/memorystore/memorystore.go +++ b/internal/metricstore/memorystore.go @@ -3,7 +3,7 @@ // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. -// Package memorystore provides an efficient in-memory time-series metric storage system +// Package metricstore provides an efficient in-memory time-series metric storage system // with support for hierarchical data organization, checkpointing, and archiving. // // The package organizes metrics in a tree structure (cluster → host → component) and @@ -17,7 +17,7 @@ // - Concurrent checkpoint/archive workers // - Support for sum and average aggregation // - NATS integration for metric ingestion -package memorystore +package metricstore import ( "bytes" diff --git a/internal/memorystore/memorystore_test.go b/internal/metricstore/memorystore_test.go similarity index 99% rename from internal/memorystore/memorystore_test.go rename to internal/metricstore/memorystore_test.go index 57ea6938..29379d21 100644 --- a/internal/memorystore/memorystore_test.go +++ b/internal/metricstore/memorystore_test.go @@ -3,7 +3,7 @@ // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. -package memorystore +package metricstore import ( "testing" diff --git a/internal/metricdata/cc-metric-store-internal.go b/internal/metricstore/query.go similarity index 87% rename from internal/metricdata/cc-metric-store-internal.go rename to internal/metricstore/query.go index 741ce358..78c78dd5 100644 --- a/internal/metricdata/cc-metric-store-internal.go +++ b/internal/metricstore/query.go @@ -3,56 +3,41 @@ // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. -package metricdata +package metricstore import ( "context" - "encoding/json" "fmt" "strconv" "strings" "time" - "github.com/ClusterCockpit/cc-backend/internal/memorystore" "github.com/ClusterCockpit/cc-backend/pkg/archive" cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" "github.com/ClusterCockpit/cc-lib/v2/schema" ) -// Bloat Code -type CCMetricStoreConfigInternal struct { - Kind string `json:"kind"` - Url string `json:"url"` - Token string `json:"token"` +// TestLoadDataCallback allows tests to override LoadData behavior +var TestLoadDataCallback func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) - // If metrics are known to this MetricDataRepository under a different - // name than in the `metricConfig` section of the 'cluster.json', - // provide this optional mapping of local to remote name for this metric. - Renamings map[string]string `json:"metricRenamings"` -} - -// Bloat Code -type CCMetricStoreInternal struct{} - -// Bloat Code -func (ccms *CCMetricStoreInternal) Init(rawConfig json.RawMessage) error { - return nil -} - -func (ccms *CCMetricStoreInternal) LoadData( +func LoadData( job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int, ) (schema.JobData, error) { - queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes, int64(resolution)) + if TestLoadDataCallback != nil { + return TestLoadDataCallback(job, metrics, scopes, ctx, resolution) + } + + queries, assignedScope, err := buildQueries(job, metrics, scopes, int64(resolution)) if err != nil { cclog.Errorf("Error while building queries for jobId %d, Metrics %v, Scopes %v: %s", job.JobID, metrics, scopes, err.Error()) return nil, err } - req := memorystore.APIQueryRequest{ + req := APIQueryRequest{ Cluster: job.Cluster, From: job.StartTime, To: job.StartTime + int64(job.Duration), @@ -61,7 +46,7 @@ func (ccms *CCMetricStoreInternal) LoadData( WithData: true, } - resBody, err := memorystore.FetchData(req) + resBody, err := FetchData(req) if err != nil { cclog.Errorf("Error while fetching data : %s", err.Error()) return nil, err @@ -149,13 +134,13 @@ var ( acceleratorString = string(schema.MetricScopeAccelerator) ) -func (ccms *CCMetricStoreInternal) buildQueries( +func buildQueries( job *schema.Job, metrics []string, scopes []schema.MetricScope, resolution int64, -) ([]memorystore.APIQuery, []schema.MetricScope, error) { - queries := make([]memorystore.APIQuery, 0, len(metrics)*len(scopes)*len(job.Resources)) +) ([]APIQuery, []schema.MetricScope, error) { + queries := make([]APIQuery, 0, len(metrics)*len(scopes)*len(job.Resources)) assignedScope := []schema.MetricScope{} subcluster, scerr := archive.GetSubCluster(job.Cluster, job.SubCluster) @@ -217,7 +202,7 @@ func (ccms *CCMetricStoreInternal) buildQueries( continue } - queries = append(queries, memorystore.APIQuery{ + queries = append(queries, APIQuery{ Metric: metric, Hostname: host.Hostname, Aggregate: false, @@ -235,7 +220,7 @@ func (ccms *CCMetricStoreInternal) buildQueries( continue } - queries = append(queries, memorystore.APIQuery{ + queries = append(queries, APIQuery{ Metric: metric, Hostname: host.Hostname, Aggregate: true, @@ -249,7 +234,7 @@ func (ccms *CCMetricStoreInternal) buildQueries( // HWThread -> HWThead if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeHWThread { - queries = append(queries, memorystore.APIQuery{ + queries = append(queries, APIQuery{ Metric: metric, Hostname: host.Hostname, Aggregate: false, @@ -265,7 +250,7 @@ func (ccms *CCMetricStoreInternal) buildQueries( if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeCore { cores, _ := topology.GetCoresFromHWThreads(hwthreads) for _, core := range cores { - queries = append(queries, memorystore.APIQuery{ + queries = append(queries, APIQuery{ Metric: metric, Hostname: host.Hostname, Aggregate: true, @@ -282,7 +267,7 @@ func (ccms *CCMetricStoreInternal) buildQueries( if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeSocket { sockets, _ := topology.GetSocketsFromHWThreads(hwthreads) for _, socket := range sockets { - queries = append(queries, memorystore.APIQuery{ + queries = append(queries, APIQuery{ Metric: metric, Hostname: host.Hostname, Aggregate: true, @@ -297,7 +282,7 @@ func (ccms *CCMetricStoreInternal) buildQueries( // HWThread -> Node if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeNode { - queries = append(queries, memorystore.APIQuery{ + queries = append(queries, APIQuery{ Metric: metric, Hostname: host.Hostname, Aggregate: true, @@ -312,7 +297,7 @@ func (ccms *CCMetricStoreInternal) buildQueries( // Core -> Core if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeCore { cores, _ := topology.GetCoresFromHWThreads(hwthreads) - queries = append(queries, memorystore.APIQuery{ + queries = append(queries, APIQuery{ Metric: metric, Hostname: host.Hostname, Aggregate: false, @@ -328,7 +313,7 @@ func (ccms *CCMetricStoreInternal) buildQueries( if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeSocket { sockets, _ := topology.GetSocketsFromCores(hwthreads) for _, socket := range sockets { - queries = append(queries, memorystore.APIQuery{ + queries = append(queries, APIQuery{ Metric: metric, Hostname: host.Hostname, Aggregate: true, @@ -344,7 +329,7 @@ func (ccms *CCMetricStoreInternal) buildQueries( // Core -> Node if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode { cores, _ := topology.GetCoresFromHWThreads(hwthreads) - queries = append(queries, memorystore.APIQuery{ + queries = append(queries, APIQuery{ Metric: metric, Hostname: host.Hostname, Aggregate: true, @@ -359,7 +344,7 @@ func (ccms *CCMetricStoreInternal) buildQueries( // MemoryDomain -> MemoryDomain if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain { sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads) - queries = append(queries, memorystore.APIQuery{ + queries = append(queries, APIQuery{ Metric: metric, Hostname: host.Hostname, Aggregate: false, @@ -374,7 +359,7 @@ func (ccms *CCMetricStoreInternal) buildQueries( // MemoryDoman -> Node if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode { sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads) - queries = append(queries, memorystore.APIQuery{ + queries = append(queries, APIQuery{ Metric: metric, Hostname: host.Hostname, Aggregate: true, @@ -389,7 +374,7 @@ func (ccms *CCMetricStoreInternal) buildQueries( // Socket -> Socket if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket { sockets, _ := topology.GetSocketsFromHWThreads(hwthreads) - queries = append(queries, memorystore.APIQuery{ + queries = append(queries, APIQuery{ Metric: metric, Hostname: host.Hostname, Aggregate: false, @@ -404,7 +389,7 @@ func (ccms *CCMetricStoreInternal) buildQueries( // Socket -> Node if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeNode { sockets, _ := topology.GetSocketsFromHWThreads(hwthreads) - queries = append(queries, memorystore.APIQuery{ + queries = append(queries, APIQuery{ Metric: metric, Hostname: host.Hostname, Aggregate: true, @@ -418,7 +403,7 @@ func (ccms *CCMetricStoreInternal) buildQueries( // Node -> Node if nativeScope == schema.MetricScopeNode && scope == schema.MetricScopeNode { - queries = append(queries, memorystore.APIQuery{ + queries = append(queries, APIQuery{ Metric: metric, Hostname: host.Hostname, Resolution: resolution, @@ -435,18 +420,18 @@ func (ccms *CCMetricStoreInternal) buildQueries( return queries, assignedScope, nil } -func (ccms *CCMetricStoreInternal) LoadStats( +func LoadStats( job *schema.Job, metrics []string, ctx context.Context, ) (map[string]map[string]schema.MetricStatistics, error) { - queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, 0) // #166 Add scope shere for analysis view accelerator normalization? + queries, _, err := buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, 0) // #166 Add scope shere for analysis view accelerator normalization? if err != nil { cclog.Errorf("Error while building queries for jobId %d, Metrics %v: %s", job.JobID, metrics, err.Error()) return nil, err } - req := memorystore.APIQueryRequest{ + req := APIQueryRequest{ Cluster: job.Cluster, From: job.StartTime, To: job.StartTime + int64(job.Duration), @@ -455,7 +440,7 @@ func (ccms *CCMetricStoreInternal) LoadStats( WithData: false, } - resBody, err := memorystore.FetchData(req) + resBody, err := FetchData(req) if err != nil { cclog.Errorf("Error while fetching data : %s", err.Error()) return nil, err @@ -492,20 +477,19 @@ func (ccms *CCMetricStoreInternal) LoadStats( return stats, nil } -// Used for Job-View Statistics Table -func (ccms *CCMetricStoreInternal) LoadScopedStats( +func LoadScopedStats( job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, ) (schema.ScopedJobStats, error) { - queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes, 0) + queries, assignedScope, err := buildQueries(job, metrics, scopes, 0) if err != nil { cclog.Errorf("Error while building queries for jobId %d, Metrics %v, Scopes %v: %s", job.JobID, metrics, scopes, err.Error()) return nil, err } - req := memorystore.APIQueryRequest{ + req := APIQueryRequest{ Cluster: job.Cluster, From: job.StartTime, To: job.StartTime + int64(job.Duration), @@ -514,7 +498,7 @@ func (ccms *CCMetricStoreInternal) LoadScopedStats( WithData: false, } - resBody, err := memorystore.FetchData(req) + resBody, err := FetchData(req) if err != nil { cclog.Errorf("Error while fetching data : %s", err.Error()) return nil, err @@ -583,15 +567,14 @@ func (ccms *CCMetricStoreInternal) LoadScopedStats( return scopedJobStats, nil } -// Used for Systems-View Node-Overview -func (ccms *CCMetricStoreInternal) LoadNodeData( +func LoadNodeData( cluster string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context, ) (map[string]map[string][]*schema.JobMetric, error) { - req := memorystore.APIQueryRequest{ + req := APIQueryRequest{ Cluster: cluster, From: from.Unix(), To: to.Unix(), @@ -604,7 +587,7 @@ func (ccms *CCMetricStoreInternal) LoadNodeData( } else { for _, node := range nodes { for _, metric := range metrics { - req.Queries = append(req.Queries, memorystore.APIQuery{ + req.Queries = append(req.Queries, APIQuery{ Hostname: node, Metric: metric, Resolution: 0, // Default for Node Queries: Will return metric $Timestep Resolution @@ -613,7 +596,7 @@ func (ccms *CCMetricStoreInternal) LoadNodeData( } } - resBody, err := memorystore.FetchData(req) + resBody, err := FetchData(req) if err != nil { cclog.Errorf("Error while fetching data : %s", err.Error()) return nil, err @@ -622,7 +605,7 @@ func (ccms *CCMetricStoreInternal) LoadNodeData( var errors []string data := make(map[string]map[string][]*schema.JobMetric) for i, res := range resBody.Results { - var query memorystore.APIQuery + var query APIQuery if resBody.Queries != nil { query = resBody.Queries[i] } else { @@ -673,8 +656,7 @@ func (ccms *CCMetricStoreInternal) LoadNodeData( return data, nil } -// Used for Systems-View Node-List -func (ccms *CCMetricStoreInternal) LoadNodeListData( +func LoadNodeListData( cluster, subCluster string, nodes []string, metrics []string, @@ -683,15 +665,14 @@ func (ccms *CCMetricStoreInternal) LoadNodeListData( from, to time.Time, ctx context.Context, ) (map[string]schema.JobData, error) { - // Note: Order of node data is not guaranteed after this point - queries, assignedScope, err := ccms.buildNodeQueries(cluster, subCluster, nodes, metrics, scopes, int64(resolution)) + queries, assignedScope, err := buildNodeQueries(cluster, subCluster, nodes, metrics, scopes, int64(resolution)) if err != nil { cclog.Errorf("Error while building node queries for Cluster %s, SubCLuster %s, Metrics %v, Scopes %v: %s", cluster, subCluster, metrics, scopes, err.Error()) return nil, err } - req := memorystore.APIQueryRequest{ + req := APIQueryRequest{ Cluster: cluster, Queries: queries, From: from.Unix(), @@ -700,7 +681,7 @@ func (ccms *CCMetricStoreInternal) LoadNodeListData( WithData: true, } - resBody, err := memorystore.FetchData(req) + resBody, err := FetchData(req) if err != nil { cclog.Errorf("Error while fetching data : %s", err.Error()) return nil, err @@ -709,7 +690,7 @@ func (ccms *CCMetricStoreInternal) LoadNodeListData( var errors []string data := make(map[string]schema.JobData) for i, row := range resBody.Results { - var query memorystore.APIQuery + var query APIQuery if resBody.Queries != nil { query = resBody.Queries[i] } else { @@ -789,15 +770,15 @@ func (ccms *CCMetricStoreInternal) LoadNodeListData( return data, nil } -func (ccms *CCMetricStoreInternal) buildNodeQueries( +func buildNodeQueries( cluster string, subCluster string, nodes []string, metrics []string, scopes []schema.MetricScope, resolution int64, -) ([]memorystore.APIQuery, []schema.MetricScope, error) { - queries := make([]memorystore.APIQuery, 0, len(metrics)*len(scopes)*len(nodes)) +) ([]APIQuery, []schema.MetricScope, error) { + queries := make([]APIQuery, 0, len(metrics)*len(scopes)*len(nodes)) assignedScope := []schema.MetricScope{} // Get Topol before loop if subCluster given @@ -812,7 +793,6 @@ func (ccms *CCMetricStoreInternal) buildNodeQueries( } for _, metric := range metrics { - metric := metric mc := archive.GetMetricConfig(cluster, metric) if mc == nil { // return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, cluster) @@ -880,7 +860,7 @@ func (ccms *CCMetricStoreInternal) buildNodeQueries( continue } - queries = append(queries, memorystore.APIQuery{ + queries = append(queries, APIQuery{ Metric: metric, Hostname: hostname, Aggregate: false, @@ -898,7 +878,7 @@ func (ccms *CCMetricStoreInternal) buildNodeQueries( continue } - queries = append(queries, memorystore.APIQuery{ + queries = append(queries, APIQuery{ Metric: metric, Hostname: hostname, Aggregate: true, @@ -912,7 +892,7 @@ func (ccms *CCMetricStoreInternal) buildNodeQueries( // HWThread -> HWThead if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeHWThread { - queries = append(queries, memorystore.APIQuery{ + queries = append(queries, APIQuery{ Metric: metric, Hostname: hostname, Aggregate: false, @@ -928,7 +908,7 @@ func (ccms *CCMetricStoreInternal) buildNodeQueries( if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeCore { cores, _ := topology.GetCoresFromHWThreads(topology.Node) for _, core := range cores { - queries = append(queries, memorystore.APIQuery{ + queries = append(queries, APIQuery{ Metric: metric, Hostname: hostname, Aggregate: true, @@ -945,7 +925,7 @@ func (ccms *CCMetricStoreInternal) buildNodeQueries( if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeSocket { sockets, _ := topology.GetSocketsFromHWThreads(topology.Node) for _, socket := range sockets { - queries = append(queries, memorystore.APIQuery{ + queries = append(queries, APIQuery{ Metric: metric, Hostname: hostname, Aggregate: true, @@ -960,7 +940,7 @@ func (ccms *CCMetricStoreInternal) buildNodeQueries( // HWThread -> Node if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeNode { - queries = append(queries, memorystore.APIQuery{ + queries = append(queries, APIQuery{ Metric: metric, Hostname: hostname, Aggregate: true, @@ -975,7 +955,7 @@ func (ccms *CCMetricStoreInternal) buildNodeQueries( // Core -> Core if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeCore { cores, _ := topology.GetCoresFromHWThreads(topology.Node) - queries = append(queries, memorystore.APIQuery{ + queries = append(queries, APIQuery{ Metric: metric, Hostname: hostname, Aggregate: false, @@ -991,7 +971,7 @@ func (ccms *CCMetricStoreInternal) buildNodeQueries( if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeSocket { sockets, _ := topology.GetSocketsFromCores(topology.Node) for _, socket := range sockets { - queries = append(queries, memorystore.APIQuery{ + queries = append(queries, APIQuery{ Metric: metric, Hostname: hostname, Aggregate: true, @@ -1007,7 +987,7 @@ func (ccms *CCMetricStoreInternal) buildNodeQueries( // Core -> Node if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode { cores, _ := topology.GetCoresFromHWThreads(topology.Node) - queries = append(queries, memorystore.APIQuery{ + queries = append(queries, APIQuery{ Metric: metric, Hostname: hostname, Aggregate: true, @@ -1022,7 +1002,7 @@ func (ccms *CCMetricStoreInternal) buildNodeQueries( // MemoryDomain -> MemoryDomain if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain { sockets, _ := topology.GetMemoryDomainsFromHWThreads(topology.Node) - queries = append(queries, memorystore.APIQuery{ + queries = append(queries, APIQuery{ Metric: metric, Hostname: hostname, Aggregate: false, @@ -1037,7 +1017,7 @@ func (ccms *CCMetricStoreInternal) buildNodeQueries( // MemoryDoman -> Node if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode { sockets, _ := topology.GetMemoryDomainsFromHWThreads(topology.Node) - queries = append(queries, memorystore.APIQuery{ + queries = append(queries, APIQuery{ Metric: metric, Hostname: hostname, Aggregate: true, @@ -1052,7 +1032,7 @@ func (ccms *CCMetricStoreInternal) buildNodeQueries( // Socket -> Socket if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket { sockets, _ := topology.GetSocketsFromHWThreads(topology.Node) - queries = append(queries, memorystore.APIQuery{ + queries = append(queries, APIQuery{ Metric: metric, Hostname: hostname, Aggregate: false, @@ -1067,7 +1047,7 @@ func (ccms *CCMetricStoreInternal) buildNodeQueries( // Socket -> Node if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeNode { sockets, _ := topology.GetSocketsFromHWThreads(topology.Node) - queries = append(queries, memorystore.APIQuery{ + queries = append(queries, APIQuery{ Metric: metric, Hostname: hostname, Aggregate: true, @@ -1081,7 +1061,7 @@ func (ccms *CCMetricStoreInternal) buildNodeQueries( // Node -> Node if nativeScope == schema.MetricScopeNode && scope == schema.MetricScopeNode { - queries = append(queries, memorystore.APIQuery{ + queries = append(queries, APIQuery{ Metric: metric, Hostname: hostname, Resolution: resolution, diff --git a/internal/memorystore/stats.go b/internal/metricstore/stats.go similarity index 99% rename from internal/memorystore/stats.go rename to internal/metricstore/stats.go index c931ab35..51ffafc1 100644 --- a/internal/memorystore/stats.go +++ b/internal/metricstore/stats.go @@ -3,7 +3,7 @@ // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. -package memorystore +package metricstore import ( "errors" diff --git a/internal/metricsync/metricdata.go b/internal/metricsync/metricdata.go new file mode 100644 index 00000000..772f16da --- /dev/null +++ b/internal/metricsync/metricdata.go @@ -0,0 +1,60 @@ +// Copyright (C) NHR@FAU, University Erlangen-Nuremberg. +// All rights reserved. This file is part of cc-backend. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package metricsync + +import ( + "context" + "encoding/json" + "fmt" + "time" + + "github.com/ClusterCockpit/cc-backend/internal/config" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" + "github.com/ClusterCockpit/cc-lib/v2/schema" +) + +type MetricDataSource interface { + // Initialize this MetricDataRepository. One instance of + // this interface will only ever be responsible for one cluster. + Init(rawConfig json.RawMessage) error + + // Return a map of hosts to a map of metrics at the requested scopes (currently only node) for that node. + Pull(cluster string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) +} + +var metricDataSourceRepos map[string]MetricDataSource = map[string]MetricDataSource{} + +func Init() error { + for _, cluster := range config.Clusters { + if cluster.MetricDataRepository != nil { + var kind struct { + Kind string `json:"kind"` + } + if err := json.Unmarshal(cluster.MetricDataRepository, &kind); err != nil { + cclog.Warn("Error while unmarshaling raw json MetricDataRepository") + return err + } + + var mdr MetricDataSource + switch kind.Kind { + case "cc-metric-store": + case "prometheus": + // mdr = &PrometheusDataRepository{} + case "test": + // mdr = &TestMetricDataRepository{} + default: + return fmt.Errorf("METRICDATA/METRICDATA > Unknown MetricDataRepository %v for cluster %v", kind.Kind, cluster.Name) + } + + if err := mdr.Init(cluster.MetricDataRepository); err != nil { + cclog.Errorf("Error initializing MetricDataRepository %v for cluster %v", kind.Kind, cluster.Name) + return err + } + metricDataSourceRepos[cluster.Name] = mdr + } + } + return nil +} diff --git a/internal/repository/stats.go b/internal/repository/stats.go index d1e16eb8..989026d1 100644 --- a/internal/repository/stats.go +++ b/internal/repository/stats.go @@ -12,7 +12,7 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/graph/model" - "github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher" + "github.com/ClusterCockpit/cc-backend/internal/metricdispatch" "github.com/ClusterCockpit/cc-backend/pkg/archive" cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" "github.com/ClusterCockpit/cc-lib/v2/schema" @@ -766,7 +766,7 @@ func (r *JobRepository) runningJobsMetricStatisticsHistogram( continue } - if err := metricDataDispatcher.LoadAverages(job, metrics, avgs, ctx); err != nil { + if err := metricdispatch.LoadAverages(job, metrics, avgs, ctx); err != nil { cclog.Errorf("Error while loading averages for histogram: %s", err) return nil } diff --git a/internal/taskmanager/updateFootprintService.go b/internal/taskmanager/updateFootprintService.go index 979a6137..c8f81e37 100644 --- a/internal/taskmanager/updateFootprintService.go +++ b/internal/taskmanager/updateFootprintService.go @@ -10,7 +10,7 @@ import ( "math" "time" - "github.com/ClusterCockpit/cc-backend/internal/metricdata" + "github.com/ClusterCockpit/cc-backend/internal/metricstore" "github.com/ClusterCockpit/cc-backend/pkg/archive" cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" "github.com/ClusterCockpit/cc-lib/v2/schema" @@ -58,12 +58,6 @@ func RegisterFootprintWorker() { allMetrics = append(allMetrics, mc.Name) } - repo, err := metricdata.GetMetricDataRepo(cluster.Name) - if err != nil { - cclog.Errorf("no metric data repository configured for '%s'", cluster.Name) - continue - } - pendingStatements := []sq.UpdateBuilder{} for _, job := range jobs { @@ -72,7 +66,7 @@ func RegisterFootprintWorker() { sJob := time.Now() - jobStats, err := repo.LoadStats(job, allMetrics, context.Background()) + jobStats, err := metricstore.LoadStats(job, allMetrics, context.Background()) if err != nil { cclog.Errorf("error wile loading job data stats for footprint update: %v", err) ce++ From ecb5aef7355b498d2f84e1837e536ab5aa69a2d0 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Thu, 25 Dec 2025 08:48:03 +0100 Subject: [PATCH 20/59] Fix build error in unit test --- internal/api/nats_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/internal/api/nats_test.go b/internal/api/nats_test.go index 9e1fa2b5..e92ce291 100644 --- a/internal/api/nats_test.go +++ b/internal/api/nats_test.go @@ -18,7 +18,6 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/auth" "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/graph" - "github.com/ClusterCockpit/cc-backend/internal/importer" "github.com/ClusterCockpit/cc-backend/internal/metricstore" "github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/pkg/archive" From 4d6326b8be9bef9d730ea010ae933d43e43ad9a1 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Mon, 12 Jan 2026 08:55:31 +0100 Subject: [PATCH 21/59] Remove metricsync --- internal/metricsync/metricdata.go | 60 ------------------------------- 1 file changed, 60 deletions(-) delete mode 100644 internal/metricsync/metricdata.go diff --git a/internal/metricsync/metricdata.go b/internal/metricsync/metricdata.go deleted file mode 100644 index 772f16da..00000000 --- a/internal/metricsync/metricdata.go +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright (C) NHR@FAU, University Erlangen-Nuremberg. -// All rights reserved. This file is part of cc-backend. -// Use of this source code is governed by a MIT-style -// license that can be found in the LICENSE file. - -package metricsync - -import ( - "context" - "encoding/json" - "fmt" - "time" - - "github.com/ClusterCockpit/cc-backend/internal/config" - cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" - "github.com/ClusterCockpit/cc-lib/v2/schema" -) - -type MetricDataSource interface { - // Initialize this MetricDataRepository. One instance of - // this interface will only ever be responsible for one cluster. - Init(rawConfig json.RawMessage) error - - // Return a map of hosts to a map of metrics at the requested scopes (currently only node) for that node. - Pull(cluster string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) -} - -var metricDataSourceRepos map[string]MetricDataSource = map[string]MetricDataSource{} - -func Init() error { - for _, cluster := range config.Clusters { - if cluster.MetricDataRepository != nil { - var kind struct { - Kind string `json:"kind"` - } - if err := json.Unmarshal(cluster.MetricDataRepository, &kind); err != nil { - cclog.Warn("Error while unmarshaling raw json MetricDataRepository") - return err - } - - var mdr MetricDataSource - switch kind.Kind { - case "cc-metric-store": - case "prometheus": - // mdr = &PrometheusDataRepository{} - case "test": - // mdr = &TestMetricDataRepository{} - default: - return fmt.Errorf("METRICDATA/METRICDATA > Unknown MetricDataRepository %v for cluster %v", kind.Kind, cluster.Name) - } - - if err := mdr.Init(cluster.MetricDataRepository); err != nil { - cclog.Errorf("Error initializing MetricDataRepository %v for cluster %v", kind.Kind, cluster.Name) - return err - } - metricDataSourceRepos[cluster.Name] = mdr - } - } - return nil -} From 56399523d7e9582ff7525b4932586415bf48d7ea Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Mon, 12 Jan 2026 09:00:06 +0100 Subject: [PATCH 22/59] Update module deps --- go.mod | 17 ++++------------- go.sum | 26 ++++++++++++++------------ 2 files changed, 18 insertions(+), 25 deletions(-) diff --git a/go.mod b/go.mod index 36ce47b9..c8899162 100644 --- a/go.mod +++ b/go.mod @@ -33,8 +33,6 @@ require ( github.com/linkedin/goavro/v2 v2.14.1 github.com/mattn/go-sqlite3 v1.14.32 github.com/nats-io/nats.go v1.47.0 - github.com/prometheus/client_golang v1.23.2 - github.com/prometheus/common v0.67.4 github.com/qustavo/sqlhooks/v2 v2.1.0 github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 github.com/stretchr/testify v1.11.1 @@ -65,8 +63,6 @@ require ( github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.7 // indirect github.com/aws/aws-sdk-go-v2/service/sts v1.40.2 // indirect github.com/aws/smithy-go v1.24.0 // indirect - github.com/beorn7/perks v1.0.1 // indirect - github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.7 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/felixge/httpsnoop v1.0.4 // indirect @@ -86,6 +82,7 @@ require ( github.com/go-viper/mapstructure/v2 v2.4.0 // indirect github.com/goccy/go-yaml v1.19.0 // indirect github.com/golang/snappy v0.0.4 // indirect + github.com/google/go-cmp v0.7.0 // indirect github.com/google/uuid v1.6.0 // indirect github.com/gorilla/securecookie v1.1.2 // indirect github.com/gorilla/websocket v1.5.3 // indirect @@ -93,24 +90,19 @@ require ( github.com/influxdata/influxdb-client-go/v2 v2.14.0 // indirect github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf // indirect github.com/jonboulle/clockwork v0.5.0 // indirect - github.com/jpillora/backoff v1.0.0 // indirect - github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/compress v1.18.2 // indirect + github.com/kr/pretty v0.3.1 // indirect github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect - github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect - github.com/modern-go/reflect2 v1.0.2 // indirect - github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect - github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect github.com/nats-io/nkeys v0.4.12 // indirect github.com/nats-io/nuid v1.0.1 // indirect github.com/oapi-codegen/runtime v1.1.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect - github.com/prometheus/client_model v0.6.2 // indirect - github.com/prometheus/procfs v0.16.1 // indirect + github.com/prometheus/common v0.67.4 // indirect github.com/robfig/cron/v3 v3.0.1 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/sosodev/duration v1.3.1 // indirect + github.com/stmcginnis/gofish v0.20.0 // indirect github.com/stretchr/objx v0.5.2 // indirect github.com/swaggo/files v1.0.1 // indirect github.com/urfave/cli/v2 v2.27.7 // indirect @@ -125,7 +117,6 @@ require ( golang.org/x/sys v0.39.0 // indirect golang.org/x/text v0.32.0 // indirect golang.org/x/tools v0.39.0 // indirect - google.golang.org/protobuf v1.36.10 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect sigs.k8s.io/yaml v1.6.0 // indirect ) diff --git a/go.sum b/go.sum index 9038d960..99c2bdb0 100644 --- a/go.sum +++ b/go.sum @@ -23,6 +23,8 @@ github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883 h1:bvNMNQO63//z+xNg github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8= github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM= github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA= +github.com/antithesishq/antithesis-sdk-go v0.5.0-default-no-op h1:Ucf+QxEKMbPogRO5guBNe5cgd9uZgfoJLOYs8WWhtjM= +github.com/antithesishq/antithesis-sdk-go v0.5.0-default-no-op/go.mod h1:IUpT2DPAKh6i/YhSbt6Gl3v2yvUZjmKncl7U91fup7E= github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7Dml6nw9rQ= github.com/apapsch/go-jsonmerge/v2 v2.0.0/go.mod h1:lvDnEdqiQrp0O42VQGgmlKpxL1AP2+08jFMw88y4klk= github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q= @@ -142,7 +144,8 @@ github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= -github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/go-tpm v0.9.7 h1:u89J4tUUeDTlH8xxC3CTW7OHZjbjKoHdQ9W7gCUhtxA= +github.com/google/go-tpm v0.9.7/go.mod h1:h9jEsEECg7gtLis0upRBQU+GhYVH6jMjrFxI8u6bVUY= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gops v0.3.28 h1:2Xr57tqKAmQYRAfG12E+yLcoa2Y42UJo2lOrUFL9ark= @@ -192,10 +195,6 @@ github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= github.com/jonboulle/clockwork v0.5.0 h1:Hyh9A8u51kptdkR+cqRpT1EebBwTn1oK9YfGYbdFz6I= github.com/jonboulle/clockwork v0.5.0/go.mod h1:3mZlmanh0g2NDKO5TWZVJAfofYk64M7XN3SzBPjZF60= -github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA= -github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= -github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= -github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE= github.com/klauspost/compress v1.18.2 h1:iiPHWW0YrcFgpBYhsA6D1+fqHssJscY/Tm/y2Uqnapk= github.com/klauspost/compress v1.18.2/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4= @@ -219,15 +218,14 @@ github.com/mattn/go-sqlite3 v1.10.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsO github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= github.com/mattn/go-sqlite3 v1.14.32 h1:JD12Ag3oLy1zQA+BNn74xRgaBbdhbNIDYvQUEuuErjs= github.com/mattn/go-sqlite3 v1.14.32/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= -github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= -github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= -github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/minio/highwayhash v1.0.4-0.20251030100505-070ab1a87a76 h1:KGuD/pM2JpL9FAYvBrnBBeENKZNh6eNtjqytV6TYjnk= +github.com/minio/highwayhash v1.0.4-0.20251030100505-070ab1a87a76/go.mod h1:GGYsuwP/fPD6Y9hMiXuapVvlIUEhFhMTh0rxU3ik1LQ= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= -github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU= -github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= +github.com/nats-io/jwt/v2 v2.8.0 h1:K7uzyz50+yGZDO5o772eRE7atlcSEENpL7P+b74JV1g= +github.com/nats-io/jwt/v2 v2.8.0/go.mod h1:me11pOkwObtcBNR8AiMrUbtVOUGkqYjMQZ6jnSdVUIA= +github.com/nats-io/nats-server/v2 v2.12.3 h1:KRv+1n7lddMVgkJPQer+pt36TcO0ENxjilBmeWdjcHs= +github.com/nats-io/nats-server/v2 v2.12.3/go.mod h1:MQXjG9WjyXKz9koWzUc3jYUMKD8x3CLmTNy91IQQz3Y= github.com/nats-io/nats.go v1.47.0 h1:YQdADw6J/UfGUd2Oy6tn4Hq6YHxCaJrVKayxxFqYrgM= github.com/nats-io/nats.go v1.47.0/go.mod h1:iRWIPokVIFbVijxuMQq4y9ttaBTMe0SFdlZfMDd+33g= github.com/nats-io/nkeys v0.4.12 h1:nssm7JKOG9/x4J8II47VWCL1Ds29avyiQDRn0ckMvDc= @@ -238,6 +236,7 @@ github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLA github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmtpMYro= github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg= github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= +github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= @@ -253,6 +252,7 @@ github.com/qustavo/sqlhooks/v2 v2.1.0 h1:54yBemHnGHp/7xgT+pxwmIlMSDNYKx5JW5dfRAi github.com/qustavo/sqlhooks/v2 v2.1.0/go.mod h1:aMREyKo7fOKTwiLuWPsaHRXEmtqG4yREztO0idF83AU= github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs= github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro= +github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= @@ -264,6 +264,8 @@ github.com/sergi/go-diff v1.3.1/go.mod h1:aMJSSKb2lpPvRNec0+w3fl7LP9IOFzdc9Pa4NF github.com/sosodev/duration v1.3.1 h1:qtHBDMQ6lvMQsL15g4aopM4HEfOaYuhWBw3NPTtlqq4= github.com/sosodev/duration v1.3.1/go.mod h1:RQIBBX0+fMLc/D9+Jb/fwvVmo0eZvDDEERAikUR6SDg= github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKkMo8ZTx3f+BZEkzsRUY10Xsm2mwU0= +github.com/stmcginnis/gofish v0.20.0 h1:hH2V2Qe898F2wWT1loApnkDUrXXiLKqbSlMaH3Y1n08= +github.com/stmcginnis/gofish v0.20.0/go.mod h1:PzF5i8ecRG9A2ol8XT64npKUunyraJ+7t0kYMpQAtqU= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= From 4cec93334964c166435e79c99af6e354d661dde7 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Tue, 13 Jan 2026 06:28:33 +0100 Subject: [PATCH 23/59] Remove obsolete cluster config section --- cmd/cc-backend/main.go | 7 +-- internal/api/api_test.go | 6 +- internal/api/nats_test.go | 6 +- internal/config/config.go | 21 +------ internal/config/config_test.go | 12 +--- internal/config/schema.go | 80 -------------------------- internal/importer/importer_test.go | 6 +- internal/repository/node_test.go | 6 +- internal/repository/userConfig_test.go | 6 +- tools/archive-manager/main.go | 6 +- web/web.go | 7 --- 11 files changed, 10 insertions(+), 153 deletions(-) diff --git a/cmd/cc-backend/main.go b/cmd/cc-backend/main.go index 331df4f6..8eb3c76f 100644 --- a/cmd/cc-backend/main.go +++ b/cmd/cc-backend/main.go @@ -102,12 +102,7 @@ func initConfiguration() error { return fmt.Errorf("main configuration must be present") } - clustercfg := ccconf.GetPackageConfig("clusters") - if clustercfg == nil { - return fmt.Errorf("cluster configuration must be present") - } - - config.Init(cfg, clustercfg) + config.Init(cfg) return nil } diff --git a/internal/api/api_test.go b/internal/api/api_test.go index a2283013..7aa935ff 100644 --- a/internal/api/api_test.go +++ b/internal/api/api_test.go @@ -157,11 +157,7 @@ func setup(t *testing.T) *api.RestAPI { // Load and check main configuration if cfg := ccconf.GetPackageConfig("main"); cfg != nil { - if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil { - config.Init(cfg, clustercfg) - } else { - cclog.Abort("Cluster configuration must be present") - } + config.Init(cfg) } else { cclog.Abort("Main configuration must be present") } diff --git a/internal/api/nats_test.go b/internal/api/nats_test.go index e92ce291..319668bb 100644 --- a/internal/api/nats_test.go +++ b/internal/api/nats_test.go @@ -151,11 +151,7 @@ func setupNatsTest(t *testing.T) *NatsAPI { // Load and check main configuration if cfg := ccconf.GetPackageConfig("main"); cfg != nil { - if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil { - config.Init(cfg, clustercfg) - } else { - cclog.Abort("Cluster configuration must be present") - } + config.Init(cfg) } else { cclog.Abort("Main configuration must be present") } diff --git a/internal/config/config.go b/internal/config/config.go index af8ec944..b8eea2ca 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -111,14 +111,6 @@ type FilterRanges struct { StartTime *TimeRange `json:"startTime"` } -type ClusterConfig struct { - Name string `json:"name"` - FilterRanges *FilterRanges `json:"filterRanges"` - MetricDataRepository json.RawMessage `json:"metricDataRepository"` -} - -var Clusters []*ClusterConfig - var Keys ProgramConfig = ProgramConfig{ Addr: "localhost:8080", DisableAuthentication: false, @@ -132,7 +124,7 @@ var Keys ProgramConfig = ProgramConfig{ ShortRunningJobsDuration: 5 * 60, } -func Init(mainConfig json.RawMessage, clusterConfig json.RawMessage) { +func Init(mainConfig json.RawMessage) { Validate(configSchema, mainConfig) dec := json.NewDecoder(bytes.NewReader(mainConfig)) dec.DisallowUnknownFields() @@ -140,17 +132,6 @@ func Init(mainConfig json.RawMessage, clusterConfig json.RawMessage) { cclog.Abortf("Config Init: Could not decode config file '%s'.\nError: %s\n", mainConfig, err.Error()) } - Validate(clustersSchema, clusterConfig) - dec = json.NewDecoder(bytes.NewReader(clusterConfig)) - dec.DisallowUnknownFields() - if err := dec.Decode(&Clusters); err != nil { - cclog.Abortf("Config Init: Could not decode config file '%s'.\nError: %s\n", mainConfig, err.Error()) - } - - if len(Clusters) < 1 { - cclog.Abort("Config Init: At least one cluster required in config. Exited with error.") - } - if Keys.EnableResampling != nil && Keys.EnableResampling.MinimumPoints > 0 { resampler.SetMinimumRequiredPoints(Keys.EnableResampling.MinimumPoints) } diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 396a80a1..e4a700ff 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -16,11 +16,7 @@ func TestInit(t *testing.T) { fp := "../../configs/config.json" ccconf.Init(fp) if cfg := ccconf.GetPackageConfig("main"); cfg != nil { - if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil { - Init(cfg, clustercfg) - } else { - cclog.Abort("Cluster configuration must be present") - } + Init(cfg) } else { cclog.Abort("Main configuration must be present") } @@ -34,11 +30,7 @@ func TestInitMinimal(t *testing.T) { fp := "../../configs/config-demo.json" ccconf.Init(fp) if cfg := ccconf.GetPackageConfig("main"); cfg != nil { - if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil { - Init(cfg, clustercfg) - } else { - cclog.Abort("Cluster configuration must be present") - } + Init(cfg) } else { cclog.Abort("Main configuration must be present") } diff --git a/internal/config/schema.go b/internal/config/schema.go index ff8d0c92..2d068140 100644 --- a/internal/config/schema.go +++ b/internal/config/schema.go @@ -138,83 +138,3 @@ var configSchema = ` }, "required": ["apiAllowedIPs"] }` - -var clustersSchema = ` - { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "description": "The name of the cluster.", - "type": "string" - }, - "metricDataRepository": { - "description": "Type of the metric data repository for this cluster", - "type": "object", - "properties": { - "kind": { - "type": "string", - "enum": ["influxdb", "prometheus", "cc-metric-store", "cc-metric-store-internal", "test"] - }, - "url": { - "type": "string" - }, - "token": { - "type": "string" - } - }, - "required": ["kind"] - }, - "filterRanges": { - "description": "This option controls the slider ranges for the UI controls of numNodes, duration, and startTime.", - "type": "object", - "properties": { - "numNodes": { - "description": "UI slider range for number of nodes", - "type": "object", - "properties": { - "from": { - "type": "integer" - }, - "to": { - "type": "integer" - } - }, - "required": ["from", "to"] - }, - "duration": { - "description": "UI slider range for duration", - "type": "object", - "properties": { - "from": { - "type": "integer" - }, - "to": { - "type": "integer" - } - }, - "required": ["from", "to"] - }, - "startTime": { - "description": "UI slider range for start time", - "type": "object", - "properties": { - "from": { - "type": "string", - "format": "date-time" - }, - "to": { - "type": "null" - } - }, - "required": ["from", "to"] - } - }, - "required": ["numNodes", "duration", "startTime"] - } - }, - "required": ["name", "metricDataRepository", "filterRanges"], - "minItems": 1 - } - }` diff --git a/internal/importer/importer_test.go b/internal/importer/importer_test.go index bffb8bf6..2d00fc84 100644 --- a/internal/importer/importer_test.go +++ b/internal/importer/importer_test.go @@ -121,11 +121,7 @@ func setup(t *testing.T) *repository.JobRepository { // Load and check main configuration if cfg := ccconf.GetPackageConfig("main"); cfg != nil { - if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil { - config.Init(cfg, clustercfg) - } else { - t.Fatal("Cluster configuration must be present") - } + config.Init(cfg) } else { t.Fatal("Main configuration must be present") } diff --git a/internal/repository/node_test.go b/internal/repository/node_test.go index e1d6ca93..fd935b53 100644 --- a/internal/repository/node_test.go +++ b/internal/repository/node_test.go @@ -144,11 +144,7 @@ func nodeTestSetup(t *testing.T) { // Load and check main configuration if cfg := ccconf.GetPackageConfig("main"); cfg != nil { - if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil { - config.Init(cfg, clustercfg) - } else { - cclog.Abort("Cluster configuration must be present") - } + config.Init(cfg) } else { cclog.Abort("Main configuration must be present") } diff --git a/internal/repository/userConfig_test.go b/internal/repository/userConfig_test.go index 02c70d0f..ae3adaf2 100644 --- a/internal/repository/userConfig_test.go +++ b/internal/repository/userConfig_test.go @@ -58,11 +58,7 @@ func setupUserTest(t *testing.T) *UserCfgRepo { // Load and check main configuration if cfg := ccconf.GetPackageConfig("main"); cfg != nil { - if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil { - config.Init(cfg, clustercfg) - } else { - t.Fatal("Cluster configuration must be present") - } + config.Init(cfg) } else { t.Fatal("Main configuration must be present") } diff --git a/tools/archive-manager/main.go b/tools/archive-manager/main.go index f5f8b836..ffcba793 100644 --- a/tools/archive-manager/main.go +++ b/tools/archive-manager/main.go @@ -434,11 +434,7 @@ func main() { // Load and check main configuration if cfg := ccconf.GetPackageConfig("main"); cfg != nil { - if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil { - config.Init(cfg, clustercfg) - } else { - cclog.Abort("Cluster configuration must be present") - } + config.Init(cfg) } else { cclog.Abort("Main configuration must be present") } diff --git a/web/web.go b/web/web.go index d2ae8700..37f1c2b2 100644 --- a/web/web.go +++ b/web/web.go @@ -245,7 +245,6 @@ type Page struct { User schema.User // Information about the currently logged in user (Full User Info) Roles map[string]schema.Role // Available roles for frontend render checks Build Build // Latest information about the application - Clusters []config.ClusterConfig // List of all clusters for use in the Header SubClusters map[string][]string // Map per cluster of all subClusters for use in the Header FilterPresets map[string]any // For pages with the Filter component, this can be used to set initial filters. Infos map[string]any // For generic use (e.g. username for /monitoring/user/, job id for /monitoring/job/) @@ -260,12 +259,6 @@ func RenderTemplate(rw http.ResponseWriter, file string, page *Page) { cclog.Errorf("WEB/WEB > template '%s' not found", file) } - if page.Clusters == nil { - for _, c := range config.Clusters { - page.Clusters = append(page.Clusters, config.ClusterConfig{Name: c.Name, FilterRanges: c.FilterRanges, MetricDataRepository: nil}) - } - } - if page.SubClusters == nil { page.SubClusters = make(map[string][]string) for _, cluster := range archive.Clusters { From 42809e3f75256d282e3f7a5b8bdfd9980c222882 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Tue, 13 Jan 2026 07:20:26 +0100 Subject: [PATCH 24/59] Remove embedded tagger rules --- configs/tagger/README.md | 0 {internal => configs}/tagger/apps/alf.txt | 0 {internal => configs}/tagger/apps/caracal.txt | 0 {internal => configs}/tagger/apps/chroma.txt | 0 {internal => configs}/tagger/apps/cp2k.txt | 0 {internal => configs}/tagger/apps/cpmd.txt | 0 {internal => configs}/tagger/apps/flame.txt | 0 {internal => configs}/tagger/apps/gromacs.txt | 0 {internal => configs}/tagger/apps/julia.txt | 0 {internal => configs}/tagger/apps/lammps.txt | 0 {internal => configs}/tagger/apps/matlab.txt | 0 .../tagger/apps/openfoam.txt | 0 {internal => configs}/tagger/apps/orca.txt | 0 {internal => configs}/tagger/apps/python.txt | 0 {internal => configs}/tagger/apps/starccm.txt | 0 .../tagger/apps/turbomole.txt | 0 {internal => configs}/tagger/apps/vasp.txt | 0 .../tagger/jobclasses/highload.json | 0 .../tagger/jobclasses/lowUtilization.json | 0 .../tagger/jobclasses/lowload.json | 0 .../tagger/jobclasses/parameters.json | 0 internal/tagger/classifyJob.go | 105 ++++++++++-------- internal/tagger/classifyJob_test.go | 8 +- internal/tagger/detectApp.go | 61 +++++----- internal/tagger/detectApp_test.go | 70 +++++++++++- 25 files changed, 166 insertions(+), 78 deletions(-) create mode 100644 configs/tagger/README.md rename {internal => configs}/tagger/apps/alf.txt (100%) rename {internal => configs}/tagger/apps/caracal.txt (100%) rename {internal => configs}/tagger/apps/chroma.txt (100%) rename {internal => configs}/tagger/apps/cp2k.txt (100%) rename {internal => configs}/tagger/apps/cpmd.txt (100%) rename {internal => configs}/tagger/apps/flame.txt (100%) rename {internal => configs}/tagger/apps/gromacs.txt (100%) rename {internal => configs}/tagger/apps/julia.txt (100%) rename {internal => configs}/tagger/apps/lammps.txt (100%) rename {internal => configs}/tagger/apps/matlab.txt (100%) rename {internal => configs}/tagger/apps/openfoam.txt (100%) rename {internal => configs}/tagger/apps/orca.txt (100%) rename {internal => configs}/tagger/apps/python.txt (100%) rename {internal => configs}/tagger/apps/starccm.txt (100%) rename {internal => configs}/tagger/apps/turbomole.txt (100%) rename {internal => configs}/tagger/apps/vasp.txt (100%) rename {internal => configs}/tagger/jobclasses/highload.json (100%) rename {internal => configs}/tagger/jobclasses/lowUtilization.json (100%) rename {internal => configs}/tagger/jobclasses/lowload.json (100%) rename {internal => configs}/tagger/jobclasses/parameters.json (100%) diff --git a/configs/tagger/README.md b/configs/tagger/README.md new file mode 100644 index 00000000..e69de29b diff --git a/internal/tagger/apps/alf.txt b/configs/tagger/apps/alf.txt similarity index 100% rename from internal/tagger/apps/alf.txt rename to configs/tagger/apps/alf.txt diff --git a/internal/tagger/apps/caracal.txt b/configs/tagger/apps/caracal.txt similarity index 100% rename from internal/tagger/apps/caracal.txt rename to configs/tagger/apps/caracal.txt diff --git a/internal/tagger/apps/chroma.txt b/configs/tagger/apps/chroma.txt similarity index 100% rename from internal/tagger/apps/chroma.txt rename to configs/tagger/apps/chroma.txt diff --git a/internal/tagger/apps/cp2k.txt b/configs/tagger/apps/cp2k.txt similarity index 100% rename from internal/tagger/apps/cp2k.txt rename to configs/tagger/apps/cp2k.txt diff --git a/internal/tagger/apps/cpmd.txt b/configs/tagger/apps/cpmd.txt similarity index 100% rename from internal/tagger/apps/cpmd.txt rename to configs/tagger/apps/cpmd.txt diff --git a/internal/tagger/apps/flame.txt b/configs/tagger/apps/flame.txt similarity index 100% rename from internal/tagger/apps/flame.txt rename to configs/tagger/apps/flame.txt diff --git a/internal/tagger/apps/gromacs.txt b/configs/tagger/apps/gromacs.txt similarity index 100% rename from internal/tagger/apps/gromacs.txt rename to configs/tagger/apps/gromacs.txt diff --git a/internal/tagger/apps/julia.txt b/configs/tagger/apps/julia.txt similarity index 100% rename from internal/tagger/apps/julia.txt rename to configs/tagger/apps/julia.txt diff --git a/internal/tagger/apps/lammps.txt b/configs/tagger/apps/lammps.txt similarity index 100% rename from internal/tagger/apps/lammps.txt rename to configs/tagger/apps/lammps.txt diff --git a/internal/tagger/apps/matlab.txt b/configs/tagger/apps/matlab.txt similarity index 100% rename from internal/tagger/apps/matlab.txt rename to configs/tagger/apps/matlab.txt diff --git a/internal/tagger/apps/openfoam.txt b/configs/tagger/apps/openfoam.txt similarity index 100% rename from internal/tagger/apps/openfoam.txt rename to configs/tagger/apps/openfoam.txt diff --git a/internal/tagger/apps/orca.txt b/configs/tagger/apps/orca.txt similarity index 100% rename from internal/tagger/apps/orca.txt rename to configs/tagger/apps/orca.txt diff --git a/internal/tagger/apps/python.txt b/configs/tagger/apps/python.txt similarity index 100% rename from internal/tagger/apps/python.txt rename to configs/tagger/apps/python.txt diff --git a/internal/tagger/apps/starccm.txt b/configs/tagger/apps/starccm.txt similarity index 100% rename from internal/tagger/apps/starccm.txt rename to configs/tagger/apps/starccm.txt diff --git a/internal/tagger/apps/turbomole.txt b/configs/tagger/apps/turbomole.txt similarity index 100% rename from internal/tagger/apps/turbomole.txt rename to configs/tagger/apps/turbomole.txt diff --git a/internal/tagger/apps/vasp.txt b/configs/tagger/apps/vasp.txt similarity index 100% rename from internal/tagger/apps/vasp.txt rename to configs/tagger/apps/vasp.txt diff --git a/internal/tagger/jobclasses/highload.json b/configs/tagger/jobclasses/highload.json similarity index 100% rename from internal/tagger/jobclasses/highload.json rename to configs/tagger/jobclasses/highload.json diff --git a/internal/tagger/jobclasses/lowUtilization.json b/configs/tagger/jobclasses/lowUtilization.json similarity index 100% rename from internal/tagger/jobclasses/lowUtilization.json rename to configs/tagger/jobclasses/lowUtilization.json diff --git a/internal/tagger/jobclasses/lowload.json b/configs/tagger/jobclasses/lowload.json similarity index 100% rename from internal/tagger/jobclasses/lowload.json rename to configs/tagger/jobclasses/lowload.json diff --git a/internal/tagger/jobclasses/parameters.json b/configs/tagger/jobclasses/parameters.json similarity index 100% rename from internal/tagger/jobclasses/parameters.json rename to configs/tagger/jobclasses/parameters.json diff --git a/internal/tagger/classifyJob.go b/internal/tagger/classifyJob.go index 70399218..b5f30949 100644 --- a/internal/tagger/classifyJob.go +++ b/internal/tagger/classifyJob.go @@ -2,15 +2,16 @@ // All rights reserved. This file is part of cc-backend. // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. + package tagger import ( "bytes" - "embed" "encoding/json" "fmt" "maps" "os" + "path/filepath" "strings" "text/template" @@ -23,8 +24,16 @@ import ( "github.com/expr-lang/expr/vm" ) -//go:embed jobclasses/* -var jobClassFiles embed.FS +const ( + // defaultJobClassConfigPath is the default path for job classification configuration + defaultJobClassConfigPath = "./var/tagger/jobclasses" + // tagTypeJobClass is the tag type identifier for job classification tags + tagTypeJobClass = "jobClass" + // jobClassConfigDirMatch is the directory name used for matching filesystem events + jobClassConfigDirMatch = "jobclasses" + // parametersFileName is the name of the parameters configuration file + parametersFileName = "parameters.json" +) // Variable defines a named expression that can be computed and reused in rules. // Variables are evaluated before the main rule and their results are added to the environment. @@ -45,21 +54,21 @@ type ruleVariable struct { // and the final rule expression that determines if the job matches the classification. type RuleFormat struct { // Name is a human-readable description of the rule - Name string `json:"name"` + Name string `json:"name"` // Tag is the classification tag to apply if the rule matches - Tag string `json:"tag"` + Tag string `json:"tag"` // Parameters are shared values referenced in the rule (e.g., thresholds) - Parameters []string `json:"parameters"` + Parameters []string `json:"parameters"` // Metrics are the job metrics required for this rule (e.g., "cpu_load", "mem_used") - Metrics []string `json:"metrics"` + Metrics []string `json:"metrics"` // Requirements are boolean expressions that must be true for the rule to apply - Requirements []string `json:"requirements"` + Requirements []string `json:"requirements"` // Variables are computed values used in the rule expression - Variables []Variable `json:"variables"` + Variables []Variable `json:"variables"` // Rule is the boolean expression that determines if the job matches - Rule string `json:"rule"` + Rule string `json:"rule"` // Hint is a template string that generates a message when the rule matches - Hint string `json:"hint"` + Hint string `json:"hint"` } type ruleInfo struct { @@ -75,29 +84,29 @@ type ruleInfo struct { // This interface allows for easier testing and decoupling from the concrete repository implementation. type JobRepository interface { // HasTag checks if a job already has a specific tag - HasTag(jobId int64, tagType string, tagName string) bool + HasTag(jobID int64, tagType string, tagName string) bool // AddTagOrCreateDirect adds a tag to a job or creates it if it doesn't exist - AddTagOrCreateDirect(jobId int64, tagType string, tagName string) (tagId int64, err error) + AddTagOrCreateDirect(jobID int64, tagType string, tagName string) (tagID int64, err error) // UpdateMetadata updates job metadata with a key-value pair UpdateMetadata(job *schema.Job, key, val string) (err error) } // JobClassTagger classifies jobs based on configurable rules that evaluate job metrics and properties. -// Rules are loaded from embedded JSON files and can be dynamically reloaded from a watched directory. +// Rules are loaded from an external configuration directory and can be dynamically reloaded when files change. // When a job matches a rule, it is tagged with the corresponding classification and an optional hint message. type JobClassTagger struct { // rules maps classification tags to their compiled rule information - rules map[string]ruleInfo + rules map[string]ruleInfo // parameters are shared values (e.g., thresholds) used across multiple rules - parameters map[string]any + parameters map[string]any // tagType is the type of tag ("jobClass") - tagType string + tagType string // cfgPath is the path to watch for configuration changes - cfgPath string + cfgPath string // repo provides access to job database operations - repo JobRepository + repo JobRepository // getStatistics retrieves job statistics for analysis - getStatistics func(job *schema.Job) (map[string]schema.JobStatistics, error) + getStatistics func(job *schema.Job) (map[string]schema.JobStatistics, error) // getMetricConfig retrieves metric configuration (limits) for a cluster getMetricConfig func(cluster, subCluster string) map[string]*schema.Metric } @@ -169,7 +178,7 @@ func (t *JobClassTagger) prepareRule(b []byte, fns string) { // EventMatch checks if a filesystem event should trigger configuration reload. // It returns true if the event path contains "jobclasses". func (t *JobClassTagger) EventMatch(s string) bool { - return strings.Contains(s, "jobclasses") + return strings.Contains(s, jobClassConfigDirMatch) } // EventCallback is triggered when the configuration directory changes. @@ -181,9 +190,10 @@ func (t *JobClassTagger) EventCallback() { cclog.Fatal(err) } - if util.CheckFileExists(t.cfgPath + "/parameters.json") { + parametersFile := filepath.Join(t.cfgPath, parametersFileName) + if util.CheckFileExists(parametersFile) { cclog.Info("Merge parameters") - b, err := os.ReadFile(t.cfgPath + "/parameters.json") + b, err := os.ReadFile(parametersFile) if err != nil { cclog.Warnf("prepareRule() > open file error: %v", err) } @@ -198,13 +208,13 @@ func (t *JobClassTagger) EventCallback() { for _, fn := range files { fns := fn.Name() - if fns != "parameters.json" { + if fns != parametersFileName { cclog.Debugf("Process: %s", fns) - filename := fmt.Sprintf("%s/%s", t.cfgPath, fns) + filename := filepath.Join(t.cfgPath, fns) b, err := os.ReadFile(filename) if err != nil { cclog.Warnf("prepareRule() > open file error: %v", err) - return + continue } t.prepareRule(b, fns) } @@ -213,7 +223,8 @@ func (t *JobClassTagger) EventCallback() { func (t *JobClassTagger) initParameters() error { cclog.Info("Initialize parameters") - b, err := jobClassFiles.ReadFile("jobclasses/parameters.json") + parametersFile := filepath.Join(t.cfgPath, parametersFileName) + b, err := os.ReadFile(parametersFile) if err != nil { cclog.Warnf("prepareRule() > open file error: %v", err) return err @@ -227,13 +238,20 @@ func (t *JobClassTagger) initParameters() error { return nil } -// Register initializes the JobClassTagger by loading parameters and classification rules. -// It loads embedded configuration files and sets up a file watch on ./var/tagger/jobclasses -// if it exists, allowing for dynamic configuration updates without restarting the application. -// Returns an error if the embedded configuration files cannot be read or parsed. +// Register initializes the JobClassTagger by loading parameters and classification rules from external folder. +// It sets up a file watch on ./var/tagger/jobclasses if it exists, allowing for +// dynamic configuration updates without restarting the application. +// Returns an error if the configuration path does not exist or cannot be read. func (t *JobClassTagger) Register() error { - t.cfgPath = "./var/tagger/jobclasses" - t.tagType = "jobClass" + if t.cfgPath == "" { + t.cfgPath = defaultJobClassConfigPath + } + t.tagType = tagTypeJobClass + t.rules = make(map[string]ruleInfo) + + if !util.CheckFileExists(t.cfgPath) { + return fmt.Errorf("configuration path does not exist: %s", t.cfgPath) + } err := t.initParameters() if err != nil { @@ -241,31 +259,28 @@ func (t *JobClassTagger) Register() error { return err } - files, err := jobClassFiles.ReadDir("jobclasses") + files, err := os.ReadDir(t.cfgPath) if err != nil { - return fmt.Errorf("error reading app folder: %#v", err) + return fmt.Errorf("error reading jobclasses folder: %#v", err) } - t.rules = make(map[string]ruleInfo) + for _, fn := range files { fns := fn.Name() - if fns != "parameters.json" { - filename := fmt.Sprintf("jobclasses/%s", fns) + if fns != parametersFileName { cclog.Infof("Process: %s", fns) + filename := filepath.Join(t.cfgPath, fns) - b, err := jobClassFiles.ReadFile(filename) + b, err := os.ReadFile(filename) if err != nil { cclog.Warnf("prepareRule() > open file error: %v", err) - return err + continue } t.prepareRule(b, fns) } } - if util.CheckFileExists(t.cfgPath) { - t.EventCallback() - cclog.Infof("Setup file watch for %s", t.cfgPath) - util.AddListener(t.cfgPath, t) - } + cclog.Infof("Setup file watch for %s", t.cfgPath) + util.AddListener(t.cfgPath, t) t.repo = repository.GetJobRepository() t.getStatistics = archive.GetStatistics diff --git a/internal/tagger/classifyJob_test.go b/internal/tagger/classifyJob_test.go index bed7a8f0..f82cf807 100644 --- a/internal/tagger/classifyJob_test.go +++ b/internal/tagger/classifyJob_test.go @@ -13,13 +13,13 @@ type MockJobRepository struct { mock.Mock } -func (m *MockJobRepository) HasTag(jobId int64, tagType string, tagName string) bool { - args := m.Called(jobId, tagType, tagName) +func (m *MockJobRepository) HasTag(jobID int64, tagType string, tagName string) bool { + args := m.Called(jobID, tagType, tagName) return args.Bool(0) } -func (m *MockJobRepository) AddTagOrCreateDirect(jobId int64, tagType string, tagName string) (tagId int64, err error) { - args := m.Called(jobId, tagType, tagName) +func (m *MockJobRepository) AddTagOrCreateDirect(jobID int64, tagType string, tagName string) (tagID int64, err error) { + args := m.Called(jobID, tagType, tagName) return args.Get(0).(int64), args.Error(1) } diff --git a/internal/tagger/detectApp.go b/internal/tagger/detectApp.go index 0b8e3e7e..2a89ea21 100644 --- a/internal/tagger/detectApp.go +++ b/internal/tagger/detectApp.go @@ -7,9 +7,7 @@ package tagger import ( "bufio" - "embed" "fmt" - "io/fs" "os" "path/filepath" "regexp" @@ -21,8 +19,14 @@ import ( "github.com/ClusterCockpit/cc-lib/v2/util" ) -//go:embed apps/* -var appFiles embed.FS +const ( + // defaultConfigPath is the default path for application tagging configuration + defaultConfigPath = "./var/tagger/apps" + // tagTypeApp is the tag type identifier for application tags + tagTypeApp = "app" + // configDirMatch is the directory name used for matching filesystem events + configDirMatch = "apps" +) type appInfo struct { tag string @@ -30,19 +34,19 @@ type appInfo struct { } // AppTagger detects applications by matching patterns in job scripts. -// It loads application patterns from embedded files and can dynamically reload -// configuration from a watched directory. When a job script matches a pattern, +// It loads application patterns from an external configuration directory and can dynamically reload +// configuration when files change. When a job script matches a pattern, // the corresponding application tag is automatically applied. type AppTagger struct { // apps maps application tags to their matching patterns - apps map[string]appInfo + apps map[string]appInfo // tagType is the type of tag ("app") tagType string // cfgPath is the path to watch for configuration changes cfgPath string } -func (t *AppTagger) scanApp(f fs.File, fns string) { +func (t *AppTagger) scanApp(f *os.File, fns string) { scanner := bufio.NewScanner(f) ai := appInfo{tag: strings.TrimSuffix(fns, filepath.Ext(fns)), strings: make([]string, 0)} @@ -56,7 +60,7 @@ func (t *AppTagger) scanApp(f fs.File, fns string) { // EventMatch checks if a filesystem event should trigger configuration reload. // It returns true if the event path contains "apps". func (t *AppTagger) EventMatch(s string) bool { - return strings.Contains(s, "apps") + return strings.Contains(s, configDirMatch) } // EventCallback is triggered when the configuration directory changes. @@ -71,43 +75,50 @@ func (t *AppTagger) EventCallback() { for _, fn := range files { fns := fn.Name() cclog.Debugf("Process: %s", fns) - f, err := os.Open(fmt.Sprintf("%s/%s", t.cfgPath, fns)) + f, err := os.Open(filepath.Join(t.cfgPath, fns)) if err != nil { cclog.Errorf("error opening app file %s: %#v", fns, err) + continue } t.scanApp(f, fns) + f.Close() } } -// Register initializes the AppTagger by loading application patterns from embedded files. -// It also sets up a file watch on ./var/tagger/apps if it exists, allowing for +// Register initializes the AppTagger by loading application patterns from external folder. +// It sets up a file watch on ./var/tagger/apps if it exists, allowing for // dynamic configuration updates without restarting the application. -// Returns an error if the embedded application files cannot be read. +// Returns an error if the configuration path does not exist or cannot be read. func (t *AppTagger) Register() error { - t.cfgPath = "./var/tagger/apps" - t.tagType = "app" + if t.cfgPath == "" { + t.cfgPath = defaultConfigPath + } + t.tagType = tagTypeApp + t.apps = make(map[string]appInfo, 0) - files, err := appFiles.ReadDir("apps") + if !util.CheckFileExists(t.cfgPath) { + return fmt.Errorf("configuration path does not exist: %s", t.cfgPath) + } + + files, err := os.ReadDir(t.cfgPath) if err != nil { return fmt.Errorf("error reading app folder: %#v", err) } - t.apps = make(map[string]appInfo, 0) + for _, fn := range files { fns := fn.Name() cclog.Debugf("Process: %s", fns) - f, err := appFiles.Open(fmt.Sprintf("apps/%s", fns)) + f, err := os.Open(filepath.Join(t.cfgPath, fns)) if err != nil { - return fmt.Errorf("error opening app file %s: %#v", fns, err) + cclog.Errorf("error opening app file %s: %#v", fns, err) + continue } - defer f.Close() t.scanApp(f, fns) + f.Close() } - if util.CheckFileExists(t.cfgPath) { - t.EventCallback() - cclog.Infof("Setup file watch for %s", t.cfgPath) - util.AddListener(t.cfgPath, t) - } + cclog.Infof("Setup file watch for %s", t.cfgPath) + util.AddListener(t.cfgPath, t) return nil } diff --git a/internal/tagger/detectApp_test.go b/internal/tagger/detectApp_test.go index 1c44f670..fe5e7a21 100644 --- a/internal/tagger/detectApp_test.go +++ b/internal/tagger/detectApp_test.go @@ -5,6 +5,8 @@ package tagger import ( + "os" + "path/filepath" "testing" "github.com/ClusterCockpit/cc-backend/internal/repository" @@ -29,28 +31,88 @@ func noErr(tb testing.TB, err error) { } } -func TestRegister(t *testing.T) { - var tagger AppTagger +func setupAppTaggerTestDir(t *testing.T) string { + t.Helper() - err := tagger.Register() + testDir := t.TempDir() + appsDir := filepath.Join(testDir, "apps") + err := os.MkdirAll(appsDir, 0o755) noErr(t, err) + srcDir := "../../configs/tagger/apps" + files, err := os.ReadDir(srcDir) + noErr(t, err) + + for _, file := range files { + if file.IsDir() { + continue + } + srcPath := filepath.Join(srcDir, file.Name()) + dstPath := filepath.Join(appsDir, file.Name()) + + data, err := os.ReadFile(srcPath) + noErr(t, err) + + err = os.WriteFile(dstPath, data, 0o644) + noErr(t, err) + } + + return appsDir +} + +func TestRegister(t *testing.T) { + appsDir := setupAppTaggerTestDir(t) + + var tagger AppTagger + tagger.cfgPath = appsDir + tagger.tagType = tagTypeApp + tagger.apps = make(map[string]appInfo, 0) + + files, err := os.ReadDir(appsDir) + noErr(t, err) + + for _, fn := range files { + if fn.IsDir() { + continue + } + fns := fn.Name() + f, err := os.Open(filepath.Join(appsDir, fns)) + noErr(t, err) + tagger.scanApp(f, fns) + f.Close() + } + if len(tagger.apps) != 16 { t.Errorf("wrong summary for diagnostic \ngot: %d \nwant: 16", len(tagger.apps)) } } func TestMatch(t *testing.T) { + appsDir := setupAppTaggerTestDir(t) r := setup(t) job, err := r.FindByIDDirect(317) noErr(t, err) var tagger AppTagger + tagger.cfgPath = appsDir + tagger.tagType = tagTypeApp + tagger.apps = make(map[string]appInfo, 0) - err = tagger.Register() + files, err := os.ReadDir(appsDir) noErr(t, err) + for _, fn := range files { + if fn.IsDir() { + continue + } + fns := fn.Name() + f, err := os.Open(filepath.Join(appsDir, fns)) + noErr(t, err) + tagger.scanApp(f, fns) + f.Close() + } + tagger.Match(job) if !r.HasTag(317, "app", "vasp") { From a9366d14c66aae29e3da4928558bd74b39662990 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Tue, 13 Jan 2026 08:32:32 +0100 Subject: [PATCH 25/59] Add README for tagging. Enable tagging by flag without configuration option --- cmd/cc-backend/main.go | 2 + configs/tagger/README.md | 419 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 421 insertions(+) diff --git a/cmd/cc-backend/main.go b/cmd/cc-backend/main.go index 8eb3c76f..9f98ccbf 100644 --- a/cmd/cc-backend/main.go +++ b/cmd/cc-backend/main.go @@ -302,6 +302,8 @@ func initSubsystems() error { // Apply tags if requested if flagApplyTags { + tagger.Init() + if err := tagger.RunTaggers(); err != nil { return fmt.Errorf("running job taggers: %w", err) } diff --git a/configs/tagger/README.md b/configs/tagger/README.md index e69de29b..759cbe97 100644 --- a/configs/tagger/README.md +++ b/configs/tagger/README.md @@ -0,0 +1,419 @@ +# Job Tagging Configuration + +ClusterCockpit provides automatic job tagging functionality to classify and +categorize jobs based on configurable rules. The tagging system consists of two +main components: + +1. **Application Detection** - Identifies which application a job is running +2. **Job Classification** - Analyzes job performance characteristics and applies classification tags + +## Directory Structure + +``` +configs/tagger/ +├── apps/ # Application detection patterns +│ ├── vasp.txt +│ ├── gromacs.txt +│ └── ... +└── jobclasses/ # Job classification rules + ├── parameters.json + ├── lowUtilization.json + ├── highload.json + └── ... +``` + +## Activating Tagger Rules + +### Step 1: Copy Configuration Files + +To activate tagging, review, adapt, and copy the configuration files from +`configs/tagger/` to `var/tagger/`: + +```bash +# From the cc-backend root directory +mkdir -p var/tagger +cp -r configs/tagger/apps var/tagger/ +cp -r configs/tagger/jobclasses var/tagger/ +``` + +### Step 2: Enable Tagging in Configuration + +Add or set the following configuration key in the `main` section of your `config.json`: + +```json +{ + "enable-job-taggers": true +} +``` + +**Important**: Automatic tagging is disabled by default. You must explicitly +enable it by setting `enable-job-taggers: true` in the main configuration file. + +### Step 3: Restart cc-backend + +The tagger system automatically loads configuration from `./var/tagger/` at +startup. After copying the files and enabling the feature, restart cc-backend: + +```bash +./cc-backend -server +``` + +### Step 4: Verify Configuration Loaded + +Check the logs for messages indicating successful configuration loading: + +``` +[INFO] Setup file watch for ./var/tagger/apps +[INFO] Setup file watch for ./var/tagger/jobclasses +``` + +## How Tagging Works + +### Automatic Tagging + +When `enable-job-taggers` is set to `true` in the configuration, tags are +automatically applied when: + +- **Job Start**: Application detection runs immediately when a job starts +- **Job Stop**: Job classification runs when a job completes + +The system analyzes job metadata and metrics to determine appropriate tags. + +**Note**: Automatic tagging only works for jobs that start or stop after the +feature is enabled. Existing jobs are not automatically retagged. + +### Manual Tagging (Retroactive) + +To apply tags to existing jobs in the database, use the `-apply-tags` command +line option: + +```bash +./cc-backend -apply-tags +``` + +This processes all jobs in the database and applies current tagging rules. This +is useful when: + +- You have existing jobs that were created before tagging was enabled +- You've added new tagging rules and want to apply them to historical data +- You've modified existing rules and want to re-evaluate all jobs + +### Hot Reload + +The tagger system watches the configuration directories for changes. You can +modify or add rules without restarting `cc-backend`: + +- Changes to `var/tagger/apps/*` are detected automatically +- Changes to `var/tagger/jobclasses/*` are detected automatically + +## Application Detection + +Application detection identifies which software a job is running by matching +patterns in the job script. + +### Configuration Format + +Application patterns are stored in text files under `var/tagger/apps/`. Each +file contains one or more regular expression patterns (one per line) that match +against the job script. + +**Example: `apps/vasp.txt`** + +``` +vasp +VASP +``` + +### How It Works + +1. When a job starts, the system retrieves the job script from metadata +2. Each line in the app files is treated as a regex pattern +3. Patterns are matched case-insensitively against the lowercased job script +4. If a match is found, a tag of type `app` with the filename (without extension) is applied +5. Only the first matching application is tagged + +### Adding New Applications + +1. Create a new file in `var/tagger/apps/` (e.g., `tensorflow.txt`) +2. Add regex patterns, one per line: + + ``` + tensorflow + tf\.keras + import tensorflow + ``` + +3. The file is automatically detected and loaded + +**Note**: The tag name will be the filename without the `.txt` extension (e.g., `tensorflow`). + +## Job Classification + +Job classification analyzes completed jobs based on their metrics and properties +to identify performance issues or characteristics. + +### Configuration Format + +Job classification rules are defined in JSON files under +`var/tagger/jobclasses/`. Each rule file defines: + +- **Metrics required**: Which job metrics to analyze +- **Requirements**: Pre-conditions that must be met +- **Variables**: Computed values used in the rule +- **Rule expression**: Boolean expression that determines if the rule matches +- **Hint template**: Message displayed when the rule matches + +### Parameters File + +`jobclasses/parameters.json` defines shared threshold values used across multiple rules: + +```json +{ + "lowcpuload_threshold_factor": 0.9, + "highmemoryusage_threshold_factor": 0.9, + "job_min_duration_seconds": 600.0, + "sampling_interval_seconds": 30.0 +} +``` + +### Rule File Structure + +**Example: `jobclasses/lowUtilization.json`** + +```json +{ + "name": "Low resource utilization", + "tag": "lowutilization", + "parameters": ["job_min_duration_seconds"], + "metrics": ["flops_any", "mem_bw"], + "requirements": [ + "job.shared == \"none\"", + "job.duration > job_min_duration_seconds" + ], + "variables": [ + { + "name": "mem_bw_perc", + "expr": "1.0 - (mem_bw.avg / mem_bw.limits.peak)" + } + ], + "rule": "flops_any.avg < flops_any.limits.alert", + "hint": "Average flop rate {{.flops_any.avg}} falls below threshold {{.flops_any.limits.alert}}" +} +``` + +#### Field Descriptions + +| Field | Description | +| -------------- | ----------------------------------------------------------------------------- | +| `name` | Human-readable description of the rule | +| `tag` | Tag identifier applied when the rule matches | +| `parameters` | List of parameter names from `parameters.json` to include in rule environment | +| `metrics` | List of metrics required for evaluation (must be present in job data) | +| `requirements` | Boolean expressions that must all be true for the rule to be evaluated | +| `variables` | Named expressions computed before evaluating the main rule | +| `rule` | Boolean expression that determines if the job matches this classification | +| `hint` | Go template string for generating a user-visible message | + +### Expression Environment + +Expressions in `requirements`, `variables`, and `rule` have access to: + +**Job Properties:** + +- `job.shared` - Shared node allocation type +- `job.duration` - Job runtime in seconds +- `job.numCores` - Number of CPU cores +- `job.numNodes` - Number of nodes +- `job.jobState` - Job completion state +- `job.numAcc` - Number of accelerators +- `job.smt` - SMT setting + +**Metric Statistics (for each metric in `metrics`):** + +- `.min` - Minimum value +- `.max` - Maximum value +- `.avg` - Average value +- `.limits.peak` - Peak limit from cluster config +- `.limits.normal` - Normal threshold +- `.limits.caution` - Caution threshold +- `.limits.alert` - Alert threshold + +**Parameters:** + +- All parameters listed in the `parameters` field + +**Variables:** + +- All variables defined in the `variables` array + +### Expression Language + +Rules use the [expr](https://github.com/expr-lang/expr) language for expressions. Supported operations: + +- **Arithmetic**: `+`, `-`, `*`, `/`, `%`, `^` +- **Comparison**: `==`, `!=`, `<`, `<=`, `>`, `>=` +- **Logical**: `&&`, `||`, `!` +- **Functions**: Standard math functions (see expr documentation) + +### Hint Templates + +Hints use Go's `text/template` syntax. Variables from the evaluation environment are accessible: + +``` +{{.flops_any.avg}} # Access metric average +{{.job.duration}} # Access job property +{{.my_variable}} # Access computed variable +``` + +### Adding New Classification Rules + +1. Create a new JSON file in `var/tagger/jobclasses/` (e.g., `memoryLeak.json`) +2. Define the rule structure: + + ```json + { + "name": "Memory Leak Detection", + "tag": "memory_leak", + "parameters": ["memory_leak_slope_threshold"], + "metrics": ["mem_used"], + "requirements": ["job.duration > 3600"], + "variables": [ + { + "name": "mem_growth", + "expr": "(mem_used.max - mem_used.min) / job.duration" + } + ], + "rule": "mem_growth > memory_leak_slope_threshold", + "hint": "Memory usage grew by {{.mem_growth}} per second" + } + ``` + +3. Add any new parameters to `parameters.json` +4. The file is automatically detected and loaded + +## Configuration Paths + +The tagger system reads from these paths (relative to cc-backend working directory): + +- **Application patterns**: `./var/tagger/apps/` +- **Job classification rules**: `./var/tagger/jobclasses/` + +These paths are defined as constants in the source code and cannot be changed without recompiling. + +## Troubleshooting + +### Tags Not Applied + +1. **Check tagging is enabled**: Verify `enable-job-taggers: true` is set in `config.json` + +2. **Check configuration exists**: + + ```bash + ls -la var/tagger/apps + ls -la var/tagger/jobclasses + ``` + +3. **Check logs for errors**: + + ```bash + ./cc-backend -server -loglevel debug + ``` + +4. **Verify file permissions**: Ensure cc-backend can read the configuration files + +5. **For existing jobs**: Use `./cc-backend -apply-tags` to retroactively tag jobs + +### Rules Not Matching + +1. **Enable debug logging**: Set `loglevel: debug` to see detailed rule evaluation +2. **Check requirements**: Ensure all requirements in the rule are satisfied +3. **Verify metrics exist**: Classification rules require job metrics to be available +4. **Check metric names**: Ensure metric names match those in your cluster configuration + +### File Watch Not Working + +If changes to configuration files aren't detected: + +1. Restart cc-backend to reload all configuration +2. Check filesystem supports file watching (network filesystems may not) +3. Check logs for file watch setup messages + +## Best Practices + +1. **Start Simple**: Begin with basic rules and refine based on results +2. **Use Requirements**: Filter out irrelevant jobs early with requirements +3. **Test Incrementally**: Add one rule at a time and verify behavior +4. **Document Rules**: Use descriptive names and clear hint messages +5. **Share Parameters**: Define common thresholds in `parameters.json` for consistency +6. **Version Control**: Keep your `var/tagger/` configuration in version control +7. **Backup Before Changes**: Test new rules on a copy before deploying to production + +## Examples + +### Simple Application Detection + +**File: `var/tagger/apps/python.txt`** + +``` +python +python3 +\.py +``` + +This detects jobs running Python scripts. + +### Complex Classification Rule + +**File: `var/tagger/jobclasses/cpuImbalance.json`** + +```json +{ + "name": "CPU Load Imbalance", + "tag": "cpu_imbalance", + "parameters": ["core_load_imbalance_threshold_factor"], + "metrics": ["cpu_load"], + "requirements": ["job.numCores > 1", "job.duration > 600"], + "variables": [ + { + "name": "load_variance", + "expr": "(cpu_load.max - cpu_load.min) / cpu_load.avg" + } + ], + "rule": "load_variance > core_load_imbalance_threshold_factor", + "hint": "CPU load varies by {{printf \"%.1f%%\" (load_variance * 100)}} across cores" +} +``` + +This detects jobs where CPU load is unevenly distributed across cores. + +## Reference + +### Configuration Options + +**Main Configuration (`config.json`)**: + +- `enable-job-taggers` (boolean, default: `false`) - Enables automatic job tagging system + - Must be set to `true` to activate automatic tagging on job start/stop events + - Does not affect the `-apply-tags` command line option + +**Command Line Options**: + +- `-apply-tags` - Apply all tagging rules to existing jobs in the database + - Works independently of `enable-job-taggers` configuration + - Useful for retroactively tagging jobs or re-evaluating with updated rules + +### Default Configuration Location + +The example configurations are provided in: + +- `configs/tagger/apps/` - Example application patterns (16 applications) +- `configs/tagger/jobclasses/` - Example classification rules (3 rules) + +Copy these to `var/tagger/` and customize for your environment. + +### Tag Types + +- `app` - Application tags (e.g., "vasp", "gromacs") +- `jobClass` - Classification tags (e.g., "lowutilization", "highload") + +Tags can be queried and filtered in the ClusterCockpit UI and API. From 2ebab1e2e2579cccebaec7615b3a1e07cf6bfe49 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Tue, 13 Jan 2026 09:50:57 +0100 Subject: [PATCH 26/59] Reformat with gofumpt --- internal/auth/auth.go | 10 ++++---- internal/auth/auth_test.go | 40 +++++++++++++++---------------- internal/auth/jwt.go | 6 ++--- internal/auth/jwtCookieSession.go | 4 ++-- internal/auth/jwtHelpers.go | 24 +++++++++---------- internal/auth/jwtHelpers_test.go | 37 ++++++++++++++-------------- internal/auth/jwtSession.go | 4 ++-- internal/auth/oidc.go | 4 ++-- 8 files changed, 64 insertions(+), 65 deletions(-) diff --git a/internal/auth/auth.go b/internal/auth/auth.go index 3be1768e..41691d00 100644 --- a/internal/auth/auth.go +++ b/internal/auth/auth.go @@ -40,7 +40,7 @@ type Authenticator interface { // authenticator should attempt the login. This method should not perform // expensive operations or actual authentication. CanLogin(user *schema.User, username string, rw http.ResponseWriter, r *http.Request) (*schema.User, bool) - + // Login performs the actually authentication for the user. // It returns the authenticated user or an error if authentication fails. // The user parameter may be nil if the user doesn't exist in the database yet. @@ -65,13 +65,13 @@ var ipUserLimiters sync.Map func getIPUserLimiter(ip, username string) *rate.Limiter { key := ip + ":" + username now := time.Now() - + if entry, ok := ipUserLimiters.Load(key); ok { rle := entry.(*rateLimiterEntry) rle.lastUsed = now return rle.limiter } - + // More aggressive rate limiting: 5 attempts per 15 minutes newLimiter := rate.NewLimiter(rate.Every(15*time.Minute/5), 5) ipUserLimiters.Store(key, &rateLimiterEntry{ @@ -176,7 +176,7 @@ func (auth *Authentication) AuthViaSession( func Init(authCfg *json.RawMessage) { initOnce.Do(func() { authInstance = &Authentication{} - + // Start background cleanup of rate limiters startRateLimiterCleanup() @@ -272,7 +272,7 @@ func handleUserSync(user *schema.User, syncUserOnLogin, updateUserOnLogin bool) cclog.Errorf("Error while loading user '%s': %v", user.Username, err) return } - + if err == sql.ErrNoRows && syncUserOnLogin { // Add new user if err := r.AddUser(user); err != nil { cclog.Errorf("Error while adding user '%s' to DB: %v", user.Username, err) diff --git a/internal/auth/auth_test.go b/internal/auth/auth_test.go index 15f153e6..68961354 100644 --- a/internal/auth/auth_test.go +++ b/internal/auth/auth_test.go @@ -15,25 +15,25 @@ import ( func TestGetIPUserLimiter(t *testing.T) { ip := "192.168.1.1" username := "testuser" - + // Get limiter for the first time limiter1 := getIPUserLimiter(ip, username) if limiter1 == nil { t.Fatal("Expected limiter to be created") } - + // Get the same limiter again limiter2 := getIPUserLimiter(ip, username) if limiter1 != limiter2 { t.Error("Expected to get the same limiter instance") } - + // Get a different limiter for different user limiter3 := getIPUserLimiter(ip, "otheruser") if limiter1 == limiter3 { t.Error("Expected different limiter for different user") } - + // Get a different limiter for different IP limiter4 := getIPUserLimiter("192.168.1.2", username) if limiter1 == limiter4 { @@ -45,16 +45,16 @@ func TestGetIPUserLimiter(t *testing.T) { func TestRateLimiterBehavior(t *testing.T) { ip := "10.0.0.1" username := "ratelimituser" - + limiter := getIPUserLimiter(ip, username) - + // Should allow first 5 attempts for i := 0; i < 5; i++ { if !limiter.Allow() { t.Errorf("Request %d should be allowed within rate limit", i+1) } } - + // 6th attempt should be blocked if limiter.Allow() { t.Error("Request 6 should be blocked by rate limiter") @@ -65,19 +65,19 @@ func TestRateLimiterBehavior(t *testing.T) { func TestCleanupOldRateLimiters(t *testing.T) { // Clear all existing limiters first to avoid interference from other tests cleanupOldRateLimiters(time.Now().Add(24 * time.Hour)) - + // Create some new rate limiters limiter1 := getIPUserLimiter("1.1.1.1", "user1") limiter2 := getIPUserLimiter("2.2.2.2", "user2") - + if limiter1 == nil || limiter2 == nil { t.Fatal("Failed to create test limiters") } - + // Cleanup limiters older than 1 second from now (should keep both) time.Sleep(10 * time.Millisecond) // Small delay to ensure timestamp difference cleanupOldRateLimiters(time.Now().Add(-1 * time.Second)) - + // Verify they still exist (should get same instance) if getIPUserLimiter("1.1.1.1", "user1") != limiter1 { t.Error("Limiter 1 was incorrectly cleaned up") @@ -85,10 +85,10 @@ func TestCleanupOldRateLimiters(t *testing.T) { if getIPUserLimiter("2.2.2.2", "user2") != limiter2 { t.Error("Limiter 2 was incorrectly cleaned up") } - + // Cleanup limiters older than 1 hour from now (should remove both) cleanupOldRateLimiters(time.Now().Add(2 * time.Hour)) - + // Getting them again should create new instances newLimiter1 := getIPUserLimiter("1.1.1.1", "user1") if newLimiter1 == limiter1 { @@ -107,14 +107,14 @@ func TestIPv4Extraction(t *testing.T) { {"IPv4 without port", "192.168.1.1", "192.168.1.1"}, {"Localhost with port", "127.0.0.1:3000", "127.0.0.1"}, } - + for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { result := tt.input if host, _, err := net.SplitHostPort(result); err == nil { result = host } - + if result != tt.expected { t.Errorf("Expected %s, got %s", tt.expected, result) } @@ -122,7 +122,7 @@ func TestIPv4Extraction(t *testing.T) { } } -// TestIPv6Extraction tests extracting IPv6 addresses +// TestIPv6Extraction tests extracting IPv6 addresses func TestIPv6Extraction(t *testing.T) { tests := []struct { name string @@ -134,14 +134,14 @@ func TestIPv6Extraction(t *testing.T) { {"IPv6 without port", "2001:db8::1", "2001:db8::1"}, {"IPv6 localhost", "::1", "::1"}, } - + for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { result := tt.input if host, _, err := net.SplitHostPort(result); err == nil { result = host } - + if result != tt.expected { t.Errorf("Expected %s, got %s", tt.expected, result) } @@ -160,14 +160,14 @@ func TestIPExtractionEdgeCases(t *testing.T) { {"Empty string", "", ""}, {"Just port", ":8080", ""}, } - + for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { result := tt.input if host, _, err := net.SplitHostPort(result); err == nil { result = host } - + if result != tt.expected { t.Errorf("Expected %s, got %s", tt.expected, result) } diff --git a/internal/auth/jwt.go b/internal/auth/jwt.go index be642219..c0f641b9 100644 --- a/internal/auth/jwt.go +++ b/internal/auth/jwt.go @@ -101,20 +101,20 @@ func (ja *JWTAuthenticator) AuthViaJWT( // Token is valid, extract payload claims := token.Claims.(jwt.MapClaims) - + // Use shared helper to get user from JWT claims var user *schema.User user, err = getUserFromJWT(claims, Keys.JwtConfig.ValidateUser, schema.AuthToken, -1) if err != nil { return nil, err } - + // If not validating user, we only get roles from JWT (no projects for this auth method) if !Keys.JwtConfig.ValidateUser { user.Roles = extractRolesFromClaims(claims, false) user.Projects = nil // Standard JWT auth doesn't include projects } - + return user, nil } diff --git a/internal/auth/jwtCookieSession.go b/internal/auth/jwtCookieSession.go index 42f7439e..4c4bbeb6 100644 --- a/internal/auth/jwtCookieSession.go +++ b/internal/auth/jwtCookieSession.go @@ -146,13 +146,13 @@ func (ja *JWTCookieSessionAuthenticator) Login( } claims := token.Claims.(jwt.MapClaims) - + // Use shared helper to get user from JWT claims user, err = getUserFromJWT(claims, jc.ValidateUser, schema.AuthSession, schema.AuthViaToken) if err != nil { return nil, err } - + // Sync or update user if configured if !jc.ValidateUser && (jc.SyncUserOnLogin || jc.UpdateUserOnLogin) { handleTokenUser(user) diff --git a/internal/auth/jwtHelpers.go b/internal/auth/jwtHelpers.go index 5bfc91ef..de59145e 100644 --- a/internal/auth/jwtHelpers.go +++ b/internal/auth/jwtHelpers.go @@ -28,7 +28,7 @@ func extractStringFromClaims(claims jwt.MapClaims, key string) string { // If validateRoles is true, only valid roles are returned func extractRolesFromClaims(claims jwt.MapClaims, validateRoles bool) []string { var roles []string - + if rawroles, ok := claims["roles"].([]any); ok { for _, rr := range rawroles { if r, ok := rr.(string); ok { @@ -42,14 +42,14 @@ func extractRolesFromClaims(claims jwt.MapClaims, validateRoles bool) []string { } } } - + return roles } // extractProjectsFromClaims extracts projects from JWT claims func extractProjectsFromClaims(claims jwt.MapClaims) []string { projects := make([]string, 0) - + if rawprojs, ok := claims["projects"].([]any); ok { for _, pp := range rawprojs { if p, ok := pp.(string); ok { @@ -61,7 +61,7 @@ func extractProjectsFromClaims(claims jwt.MapClaims) []string { projects = append(projects, projSlice...) } } - + return projects } @@ -72,14 +72,14 @@ func extractNameFromClaims(claims jwt.MapClaims) string { if name, ok := claims["name"].(string); ok { return name } - + // Try nested structure: {name: {values: [...]}} if wrap, ok := claims["name"].(map[string]any); ok { if vals, ok := wrap["values"].([]any); ok { if len(vals) == 0 { return "" } - + name := fmt.Sprintf("%v", vals[0]) for i := 1; i < len(vals); i++ { name += fmt.Sprintf(" %v", vals[i]) @@ -87,7 +87,7 @@ func extractNameFromClaims(claims jwt.MapClaims) string { return name } } - + return "" } @@ -100,7 +100,7 @@ func getUserFromJWT(claims jwt.MapClaims, validateUser bool, authType schema.Aut if sub == "" { return nil, errors.New("missing 'sub' claim in JWT") } - + if validateUser { // Validate user against database ur := repository.GetUserRepository() @@ -109,22 +109,22 @@ func getUserFromJWT(claims jwt.MapClaims, validateUser bool, authType schema.Aut cclog.Errorf("Error while loading user '%v': %v", sub, err) return nil, fmt.Errorf("database error: %w", err) } - + // Deny any logins for unknown usernames if user == nil || err == sql.ErrNoRows { cclog.Warn("Could not find user from JWT in internal database.") return nil, errors.New("unknown user") } - + // Return database user (with database roles) return user, nil } - + // Create user from JWT claims name := extractNameFromClaims(claims) roles := extractRolesFromClaims(claims, true) // Validate roles projects := extractProjectsFromClaims(claims) - + return &schema.User{ Username: sub, Name: name, diff --git a/internal/auth/jwtHelpers_test.go b/internal/auth/jwtHelpers_test.go index 84a1f2e0..4627f7e5 100644 --- a/internal/auth/jwtHelpers_test.go +++ b/internal/auth/jwtHelpers_test.go @@ -19,7 +19,7 @@ func TestExtractStringFromClaims(t *testing.T) { "email": "test@example.com", "age": 25, // not a string } - + tests := []struct { name string key string @@ -30,7 +30,7 @@ func TestExtractStringFromClaims(t *testing.T) { {"Non-existent key", "missing", ""}, {"Non-string value", "age", ""}, } - + for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { result := extractStringFromClaims(claims, tt.key) @@ -88,16 +88,16 @@ func TestExtractRolesFromClaims(t *testing.T) { expected: []string{}, }, } - + for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { result := extractRolesFromClaims(tt.claims, tt.validateRoles) - + if len(result) != len(tt.expected) { t.Errorf("Expected %d roles, got %d", len(tt.expected), len(result)) return } - + for i, role := range result { if i >= len(tt.expected) || role != tt.expected[i] { t.Errorf("Expected role %s at position %d, got %s", tt.expected[i], i, role) @@ -141,16 +141,16 @@ func TestExtractProjectsFromClaims(t *testing.T) { expected: []string{"project1", "project2"}, // Should skip non-strings }, } - + for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { result := extractProjectsFromClaims(tt.claims) - + if len(result) != len(tt.expected) { t.Errorf("Expected %d projects, got %d", len(tt.expected), len(result)) return } - + for i, project := range result { if i >= len(tt.expected) || project != tt.expected[i] { t.Errorf("Expected project %s at position %d, got %s", tt.expected[i], i, project) @@ -216,7 +216,7 @@ func TestExtractNameFromClaims(t *testing.T) { expected: "123 Smith", // Should convert to string }, } - + for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { result := extractNameFromClaims(tt.claims) @@ -235,29 +235,28 @@ func TestGetUserFromJWT_NoValidation(t *testing.T) { "roles": []any{"user", "admin"}, "projects": []any{"project1", "project2"}, } - + user, err := getUserFromJWT(claims, false, schema.AuthToken, -1) - if err != nil { t.Fatalf("Unexpected error: %v", err) } - + if user.Username != "testuser" { t.Errorf("Expected username 'testuser', got '%s'", user.Username) } - + if user.Name != "Test User" { t.Errorf("Expected name 'Test User', got '%s'", user.Name) } - + if len(user.Roles) != 2 { t.Errorf("Expected 2 roles, got %d", len(user.Roles)) } - + if len(user.Projects) != 2 { t.Errorf("Expected 2 projects, got %d", len(user.Projects)) } - + if user.AuthType != schema.AuthToken { t.Errorf("Expected AuthType %v, got %v", schema.AuthToken, user.AuthType) } @@ -268,13 +267,13 @@ func TestGetUserFromJWT_MissingSub(t *testing.T) { claims := jwt.MapClaims{ "name": "Test User", } - + _, err := getUserFromJWT(claims, false, schema.AuthToken, -1) - + if err == nil { t.Error("Expected error for missing sub claim") } - + if err.Error() != "missing 'sub' claim in JWT" { t.Errorf("Expected specific error message, got: %v", err) } diff --git a/internal/auth/jwtSession.go b/internal/auth/jwtSession.go index 107afcb8..de7e985b 100644 --- a/internal/auth/jwtSession.go +++ b/internal/auth/jwtSession.go @@ -75,13 +75,13 @@ func (ja *JWTSessionAuthenticator) Login( } claims := token.Claims.(jwt.MapClaims) - + // Use shared helper to get user from JWT claims user, err = getUserFromJWT(claims, Keys.JwtConfig.ValidateUser, schema.AuthSession, schema.AuthViaToken) if err != nil { return nil, err } - + // Sync or update user if configured if !Keys.JwtConfig.ValidateUser && (Keys.JwtConfig.SyncUserOnLogin || Keys.JwtConfig.UpdateUserOnLogin) { handleTokenUser(user) diff --git a/internal/auth/oidc.go b/internal/auth/oidc.go index a3fc09cc..b90aca4f 100644 --- a/internal/auth/oidc.go +++ b/internal/auth/oidc.go @@ -59,7 +59,7 @@ func NewOIDC(a *Authentication) *OIDC { // Use context with timeout for provider initialization ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() - + provider, err := oidc.NewProvider(ctx, Keys.OpenIDConfig.Provider) if err != nil { cclog.Fatal(err) @@ -119,7 +119,7 @@ func (oa *OIDC) OAuth2Callback(rw http.ResponseWriter, r *http.Request) { // Exchange authorization code for token with timeout ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() - + token, err := oa.client.Exchange(ctx, code, oauth2.VerifierOption(codeVerifier)) if err != nil { http.Error(rw, "Failed to exchange token: "+err.Error(), http.StatusInternalServerError) From 04a2e460ae8b1e884795bdf5200e1efb671ac958 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Tue, 13 Jan 2026 09:52:00 +0100 Subject: [PATCH 27/59] Refactor metricstore. Initial stub for cluster/ subcluster specific retention times --- internal/metricstore/avroCheckpoint.go | 6 +- internal/metricstore/config.go | 70 ++++++++++- internal/metricstore/configSchema.go | 38 +++++- .../{memorystore.go => metricstore.go} | 114 ++++++++++++++---- ...emorystore_test.go => metricstore_test.go} | 2 +- 5 files changed, 203 insertions(+), 27 deletions(-) rename internal/metricstore/{memorystore.go => metricstore.go} (76%) rename internal/metricstore/{memorystore_test.go => metricstore_test.go} (99%) diff --git a/internal/metricstore/avroCheckpoint.go b/internal/metricstore/avroCheckpoint.go index 275a64bd..aa14ce5a 100644 --- a/internal/metricstore/avroCheckpoint.go +++ b/internal/metricstore/avroCheckpoint.go @@ -24,8 +24,10 @@ import ( "github.com/linkedin/goavro/v2" ) -var NumAvroWorkers int = DefaultAvroWorkers -var startUp bool = true +var ( + NumAvroWorkers int = DefaultAvroWorkers + startUp bool = true +) func (as *AvroStore) ToCheckpoint(dir string, dumpAll bool) (int, error) { levels := make([]*AvroLevel, 0) diff --git a/internal/metricstore/config.go b/internal/metricstore/config.go index 97f16c46..06ae774d 100644 --- a/internal/metricstore/config.go +++ b/internal/metricstore/config.go @@ -33,8 +33,19 @@ type MetricStoreConfig struct { DumpToFile string `json:"dump-to-file"` EnableGops bool `json:"gops"` } `json:"debug"` + // Global default retention duration RetentionInMemory string `json:"retention-in-memory"` - Archive struct { + // Per-cluster retention overrides + Clusters []struct { + Cluster string `json:"cluster"` + RetentionInMemory string `json:"retention-in-memory"` + // Per-subcluster retention overrides within this cluster + SubClusters []struct { + SubCluster string `json:"subcluster"` + RetentionInMemory string `json:"retention-in-memory"` + } `json:"subclusters,omitempty"` + } `json:"clusters,omitempty"` + Archive struct { Interval string `json:"interval"` RootDir string `json:"directory"` DeleteInstead bool `json:"delete-instead"` @@ -50,6 +61,14 @@ type MetricStoreConfig struct { var Keys MetricStoreConfig +type retentionConfig struct { + global time.Duration + clusterMap map[string]time.Duration + subClusterMap map[string]map[string]time.Duration +} + +var retentionLookup *retentionConfig + // AggregationStrategy for aggregation over multiple values at different cpus/sockets/..., not time! type AggregationStrategy int @@ -113,3 +132,52 @@ func AddMetric(name string, metric MetricConfig) error { return nil } + +func GetRetentionDuration(cluster, subCluster string) (time.Duration, error) { + if retentionLookup == nil { + return 0, fmt.Errorf("[METRICSTORE]> retention configuration not initialized") + } + + if subCluster != "" { + if subMap, ok := retentionLookup.subClusterMap[cluster]; ok { + if retention, ok := subMap[subCluster]; ok { + return retention, nil + } + } + } + + if retention, ok := retentionLookup.clusterMap[cluster]; ok { + return retention, nil + } + + return retentionLookup.global, nil +} + +// GetShortestRetentionDuration returns the shortest configured retention duration +// across all levels (global, cluster, and subcluster configurations). +// Returns 0 if retentionLookup is not initialized or global retention is not set. +func GetShortestRetentionDuration() time.Duration { + if retentionLookup == nil || retentionLookup.global <= 0 { + return 0 + } + + shortest := retentionLookup.global + + // Check all cluster-level retention durations + for _, clusterRetention := range retentionLookup.clusterMap { + if clusterRetention > 0 && clusterRetention < shortest { + shortest = clusterRetention + } + } + + // Check all subcluster-level retention durations + for _, subClusterMap := range retentionLookup.subClusterMap { + for _, scRetention := range subClusterMap { + if scRetention > 0 && scRetention < shortest { + shortest = scRetention + } + } + } + + return shortest +} diff --git a/internal/metricstore/configSchema.go b/internal/metricstore/configSchema.go index f1a20a73..868bacc5 100644 --- a/internal/metricstore/configSchema.go +++ b/internal/metricstore/configSchema.go @@ -46,9 +46,45 @@ const configSchema = `{ } }, "retention-in-memory": { - "description": "Keep the metrics within memory for given time interval. Retention for X hours, then the metrics would be freed.", + "description": "Global default: Keep the metrics within memory for given time interval. Retention for X hours, then the metrics would be freed.", "type": "string" }, + "clusters": { + "description": "Optional per-cluster retention overrides", + "type": "array", + "items": { + "type": "object", + "required": ["cluster"], + "properties": { + "cluster": { + "description": "Cluster name", + "type": "string" + }, + "retention-in-memory": { + "description": "Cluster-specific retention duration (overrides global default)", + "type": "string" + }, + "subclusters": { + "description": "Optional per-subcluster retention overrides", + "type": "array", + "items": { + "type": "object", + "required": ["subcluster"], + "properties": { + "subcluster": { + "description": "Subcluster name", + "type": "string" + }, + "retention-in-memory": { + "description": "Subcluster-specific retention duration (overrides cluster and global default)", + "type": "string" + } + } + } + } + } + } + }, "nats": { "description": "Configuration for accepting published data through NATS.", "type": "array", diff --git a/internal/metricstore/memorystore.go b/internal/metricstore/metricstore.go similarity index 76% rename from internal/metricstore/memorystore.go rename to internal/metricstore/metricstore.go index 14a02fcd..5a5c3bce 100644 --- a/internal/metricstore/memorystore.go +++ b/internal/metricstore/metricstore.go @@ -98,6 +98,49 @@ func Init(rawConfig json.RawMessage, wg *sync.WaitGroup) { } } + globalRetention, err := time.ParseDuration(Keys.RetentionInMemory) + if err != nil { + cclog.Fatal(err) + } + + retentionLookup = &retentionConfig{ + global: globalRetention, + clusterMap: make(map[string]time.Duration), + subClusterMap: make(map[string]map[string]time.Duration), + } + + for _, clusterCfg := range Keys.Clusters { + if clusterCfg.RetentionInMemory != "" { + clusterRetention, err := time.ParseDuration(clusterCfg.RetentionInMemory) + if err != nil { + cclog.Warnf("[METRICSTORE]> Invalid retention duration for cluster '%s': %s\n", clusterCfg.Cluster, err.Error()) + continue + } + retentionLookup.clusterMap[clusterCfg.Cluster] = clusterRetention + cclog.Debugf("[METRICSTORE]> Cluster '%s' retention: %s\n", clusterCfg.Cluster, clusterRetention) + } + + if len(clusterCfg.SubClusters) > 0 { + if retentionLookup.subClusterMap[clusterCfg.Cluster] == nil { + retentionLookup.subClusterMap[clusterCfg.Cluster] = make(map[string]time.Duration) + } + + for _, scCfg := range clusterCfg.SubClusters { + if scCfg.RetentionInMemory != "" { + scRetention, err := time.ParseDuration(scCfg.RetentionInMemory) + if err != nil { + cclog.Warnf("[METRICSTORE]> Invalid retention duration for subcluster '%s/%s': %s\n", + clusterCfg.Cluster, scCfg.SubCluster, err.Error()) + continue + } + retentionLookup.subClusterMap[clusterCfg.Cluster][scCfg.SubCluster] = scRetention + cclog.Debugf("[METRICSTORE]> SubCluster '%s/%s' retention: %s\n", + clusterCfg.Cluster, scCfg.SubCluster, scRetention) + } + } + } + } + // Pass the config.MetricStoreKeys InitMetrics(Metrics) @@ -208,32 +251,22 @@ func Shutdown() { cclog.Infof("[METRICSTORE]> Done! (%d files written)\n", files) } -func getName(m *MemoryStore, i int) string { - for key, val := range m.Metrics { - if val.offset == i { - return key - } - } - return "" -} - func Retention(wg *sync.WaitGroup, ctx context.Context) { ms := GetMemoryStore() go func() { defer wg.Done() - d, err := time.ParseDuration(Keys.RetentionInMemory) - if err != nil { - cclog.Fatal(err) - } - if d <= 0 { + shortestRetention := GetShortestRetentionDuration() + if shortestRetention <= 0 { return } - tickInterval := d / 2 + tickInterval := shortestRetention / 2 if tickInterval <= 0 { return } + cclog.Debugf("[METRICSTORE]> Retention ticker interval set to %s (half of shortest retention: %s)\n", + tickInterval, shortestRetention) ticker := time.NewTicker(tickInterval) defer ticker.Stop() @@ -242,13 +275,50 @@ func Retention(wg *sync.WaitGroup, ctx context.Context) { case <-ctx.Done(): return case <-ticker.C: - t := time.Now().Add(-d) - cclog.Infof("[METRICSTORE]> start freeing buffers (older than %s)...\n", t.Format(time.RFC3339)) - freed, err := ms.Free(nil, t.Unix()) - if err != nil { - cclog.Errorf("[METRICSTORE]> freeing up buffers failed: %s\n", err.Error()) - } else { - cclog.Infof("[METRICSTORE]> done: %d buffers freed\n", freed) + totalFreed := 0 + + clusters := ms.ListChildren(nil) + for _, cluster := range clusters { + retention, err := GetRetentionDuration(cluster, "") + if err != nil { + cclog.Warnf("[METRICSTORE]> Could not get retention for cluster '%s': %s\n", cluster, err.Error()) + continue + } + if retention <= 0 { + continue + } + + t := time.Now().Add(-retention) + cclog.Debugf("[METRICSTORE]> Freeing buffers for cluster '%s' (older than %s, retention: %s)...\n", + cluster, t.Format(time.RFC3339), retention) + + subClusters := ms.ListChildren([]string{cluster}) + for _, subCluster := range subClusters { + scRetention, err := GetRetentionDuration(cluster, subCluster) + if err != nil { + cclog.Warnf("[METRICSTORE]> Could not get retention for subcluster '%s/%s': %s\n", + cluster, subCluster, err.Error()) + continue + } + if scRetention <= 0 { + continue + } + + scTime := time.Now().Add(-scRetention) + freed, err := ms.Free([]string{cluster, subCluster}, scTime.Unix()) + if err != nil { + cclog.Errorf("[METRICSTORE]> freeing buffers for '%s/%s' failed: %s\n", + cluster, subCluster, err.Error()) + } else if freed > 0 { + cclog.Debugf("[METRICSTORE]> freed %d buffers for '%s/%s' (retention: %s)\n", + freed, cluster, subCluster, scRetention) + totalFreed += freed + } + } + } + + if totalFreed > 0 { + cclog.Infof("[METRICSTORE]> Total buffers freed: %d\n", totalFreed) } } } diff --git a/internal/metricstore/memorystore_test.go b/internal/metricstore/metricstore_test.go similarity index 99% rename from internal/metricstore/memorystore_test.go rename to internal/metricstore/metricstore_test.go index 29379d21..fd7c963f 100644 --- a/internal/metricstore/memorystore_test.go +++ b/internal/metricstore/metricstore_test.go @@ -131,7 +131,7 @@ func TestBufferWrite(t *testing.T) { func TestBufferRead(t *testing.T) { b := newBuffer(100, 10) - + // Write some test data b.write(100, schema.Float(1.0)) b.write(110, schema.Float(2.0)) From 754f7e16f67e0708a479831c8360064d74147633 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Tue, 13 Jan 2026 09:52:31 +0100 Subject: [PATCH 28/59] Reformat with gofumpt --- internal/repository/node.go | 1 - pkg/archive/fsBackend.go | 4 ++-- pkg/archive/s3Backend_test.go | 42 ++++++++++++++++----------------- tools/archive-manager/main.go | 1 - tools/archive-migration/main.go | 3 +-- 5 files changed, 24 insertions(+), 27 deletions(-) diff --git a/internal/repository/node.go b/internal/repository/node.go index 752a36fa..2890cdbc 100644 --- a/internal/repository/node.go +++ b/internal/repository/node.go @@ -561,7 +561,6 @@ func (r *NodeRepository) GetNodesForList( nodeFilter string, page *model.PageRequest, ) ([]string, map[string]string, int, bool, error) { - // Init Return Vars nodes := make([]string, 0) stateMap := make(map[string]string) diff --git a/pkg/archive/fsBackend.go b/pkg/archive/fsBackend.go index 020f2aa4..61921d70 100644 --- a/pkg/archive/fsBackend.go +++ b/pkg/archive/fsBackend.go @@ -188,7 +188,7 @@ func (fsa *FsArchive) Init(rawConfig json.RawMessage) (uint64, error) { if isEmpty { cclog.Infof("fsBackend Init() > Bootstrapping new archive at %s", fsa.path) versionStr := fmt.Sprintf("%d\n", Version) - if err := os.WriteFile(filepath.Join(fsa.path, "version.txt"), []byte(versionStr), 0644); err != nil { + if err := os.WriteFile(filepath.Join(fsa.path, "version.txt"), []byte(versionStr), 0o644); err != nil { cclog.Errorf("fsBackend Init() > failed to create version.txt: %v", err) return 0, err } @@ -674,7 +674,7 @@ func (fsa *FsArchive) ImportJob( func (fsa *FsArchive) StoreClusterCfg(name string, config *schema.Cluster) error { dir := filepath.Join(fsa.path, name) - if err := os.MkdirAll(dir, 0777); err != nil { + if err := os.MkdirAll(dir, 0o777); err != nil { cclog.Errorf("StoreClusterCfg() > mkdir error: %v", err) return err } diff --git a/pkg/archive/s3Backend_test.go b/pkg/archive/s3Backend_test.go index 2b79db7f..0b4e17a2 100644 --- a/pkg/archive/s3Backend_test.go +++ b/pkg/archive/s3Backend_test.go @@ -41,7 +41,7 @@ func (m *MockS3Client) GetObject(ctx context.Context, params *s3.GetObjectInput, if !exists { return nil, fmt.Errorf("NoSuchKey: object not found") } - + contentLength := int64(len(data)) return &s3.GetObjectOutput{ Body: io.NopCloser(bytes.NewReader(data)), @@ -65,7 +65,7 @@ func (m *MockS3Client) HeadObject(ctx context.Context, params *s3.HeadObjectInpu if !exists { return nil, fmt.Errorf("NotFound") } - + contentLength := int64(len(data)) return &s3.HeadObjectOutput{ ContentLength: &contentLength, @@ -86,12 +86,12 @@ func (m *MockS3Client) CopyObject(ctx context.Context, params *s3.CopyObjectInpu return nil, fmt.Errorf("invalid CopySource") } sourceKey := parts[1] - + data, exists := m.objects[sourceKey] if !exists { return nil, fmt.Errorf("source not found") } - + destKey := aws.ToString(params.Key) m.objects[destKey] = data return &s3.CopyObjectOutput{}, nil @@ -100,15 +100,15 @@ func (m *MockS3Client) CopyObject(ctx context.Context, params *s3.CopyObjectInpu func (m *MockS3Client) ListObjectsV2(ctx context.Context, params *s3.ListObjectsV2Input, optFns ...func(*s3.Options)) (*s3.ListObjectsV2Output, error) { prefix := aws.ToString(params.Prefix) delimiter := aws.ToString(params.Delimiter) - + var contents []types.Object commonPrefixes := make(map[string]bool) - + for key, data := range m.objects { if !strings.HasPrefix(key, prefix) { continue } - + if delimiter != "" { // Check if there's a delimiter after the prefix remainder := strings.TrimPrefix(key, prefix) @@ -120,21 +120,21 @@ func (m *MockS3Client) ListObjectsV2(ctx context.Context, params *s3.ListObjects continue } } - + size := int64(len(data)) contents = append(contents, types.Object{ Key: aws.String(key), Size: &size, }) } - + var prefixList []types.CommonPrefix for p := range commonPrefixes { prefixList = append(prefixList, types.CommonPrefix{ Prefix: aws.String(p), }) } - + return &s3.ListObjectsV2Output{ Contents: contents, CommonPrefixes: prefixList, @@ -144,10 +144,10 @@ func (m *MockS3Client) ListObjectsV2(ctx context.Context, params *s3.ListObjects // Test helper to create a mock S3 archive with test data func setupMockS3Archive(t *testing.T) *MockS3Client { mock := NewMockS3Client() - + // Add version.txt mock.objects["version.txt"] = []byte("2\n") - + // Add a test cluster directory mock.objects["emmy/cluster.json"] = []byte(`{ "name": "emmy", @@ -165,7 +165,7 @@ func setupMockS3Archive(t *testing.T) *MockS3Client { } ] }`) - + // Add a test job mock.objects["emmy/1403/244/1608923076/meta.json"] = []byte(`{ "jobId": 1403244, @@ -174,7 +174,7 @@ func setupMockS3Archive(t *testing.T) *MockS3Client { "numNodes": 1, "resources": [{"hostname": "node001"}] }`) - + mock.objects["emmy/1403/244/1608923076/data.json"] = []byte(`{ "mem_used": { "node": { @@ -184,7 +184,7 @@ func setupMockS3Archive(t *testing.T) *MockS3Client { } } }`) - + return mock } @@ -213,7 +213,7 @@ func TestGetS3Key(t *testing.T) { Cluster: "emmy", StartTime: 1608923076, } - + key := getS3Key(job, "meta.json") expected := "emmy/1403/244/1608923076/meta.json" if key != expected { @@ -227,7 +227,7 @@ func TestGetS3Directory(t *testing.T) { Cluster: "emmy", StartTime: 1608923076, } - + dir := getS3Directory(job) expected := "emmy/1403/244/1608923076/" if dir != expected { @@ -247,13 +247,13 @@ func TestS3ArchiveConfigParsing(t *testing.T) { "region": "us-east-1", "usePathStyle": true }`) - + var cfg S3ArchiveConfig err := json.Unmarshal(rawConfig, &cfg) if err != nil { t.Fatalf("failed to parse config: %v", err) } - + if cfg.Bucket != "test-bucket" { t.Errorf("expected bucket 'test-bucket', got '%s'", cfg.Bucket) } @@ -277,14 +277,14 @@ func TestS3KeyGeneration(t *testing.T) { {1404397, "emmy", 1609300556, "data.json.gz", "emmy/1404/397/1609300556/data.json.gz"}, {42, "fritz", 1234567890, "meta.json", "fritz/0/042/1234567890/meta.json"}, } - + for _, tt := range tests { job := &schema.Job{ JobID: tt.jobID, Cluster: tt.cluster, StartTime: tt.startTime, } - + key := getS3Key(job, tt.file) if key != tt.expected { t.Errorf("for job %d: expected %s, got %s", tt.jobID, tt.expected, key) diff --git a/tools/archive-manager/main.go b/tools/archive-manager/main.go index ffcba793..fff81256 100644 --- a/tools/archive-manager/main.go +++ b/tools/archive-manager/main.go @@ -71,7 +71,6 @@ func countJobsNative(archivePath string) (int, error) { } return nil }) - if err != nil { return 0, fmt.Errorf("failed to walk directory: %w", err) } diff --git a/tools/archive-migration/main.go b/tools/archive-migration/main.go index 8375ee98..1384e065 100644 --- a/tools/archive-migration/main.go +++ b/tools/archive-migration/main.go @@ -70,7 +70,6 @@ func main() { // Run migration migrated, failed, err := migrateArchive(archivePath, dryRun, numWorkers) - if err != nil { cclog.Errorf("Migration completed with errors: %s", err.Error()) if failed > 0 { @@ -104,5 +103,5 @@ func checkVersion(archivePath string) error { func updateVersion(archivePath string) error { versionFile := filepath.Join(archivePath, "version.txt") - return os.WriteFile(versionFile, []byte("3\n"), 0644) + return os.WriteFile(versionFile, []byte("3\n"), 0o644) } From 25c8fca56136eb04cbfe14d5f18a67082512bc64 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Tue, 13 Jan 2026 14:42:24 +0100 Subject: [PATCH 29/59] Revert retention config in metricstore --- internal/metricstore/config.go | 68 ------------------ internal/metricstore/configSchema.go | 38 +--------- internal/metricstore/metricstore.go | 103 ++++----------------------- 3 files changed, 13 insertions(+), 196 deletions(-) diff --git a/internal/metricstore/config.go b/internal/metricstore/config.go index 06ae774d..c789f11c 100644 --- a/internal/metricstore/config.go +++ b/internal/metricstore/config.go @@ -33,18 +33,7 @@ type MetricStoreConfig struct { DumpToFile string `json:"dump-to-file"` EnableGops bool `json:"gops"` } `json:"debug"` - // Global default retention duration - RetentionInMemory string `json:"retention-in-memory"` - // Per-cluster retention overrides - Clusters []struct { - Cluster string `json:"cluster"` RetentionInMemory string `json:"retention-in-memory"` - // Per-subcluster retention overrides within this cluster - SubClusters []struct { - SubCluster string `json:"subcluster"` - RetentionInMemory string `json:"retention-in-memory"` - } `json:"subclusters,omitempty"` - } `json:"clusters,omitempty"` Archive struct { Interval string `json:"interval"` RootDir string `json:"directory"` @@ -61,14 +50,6 @@ type MetricStoreConfig struct { var Keys MetricStoreConfig -type retentionConfig struct { - global time.Duration - clusterMap map[string]time.Duration - subClusterMap map[string]map[string]time.Duration -} - -var retentionLookup *retentionConfig - // AggregationStrategy for aggregation over multiple values at different cpus/sockets/..., not time! type AggregationStrategy int @@ -132,52 +113,3 @@ func AddMetric(name string, metric MetricConfig) error { return nil } - -func GetRetentionDuration(cluster, subCluster string) (time.Duration, error) { - if retentionLookup == nil { - return 0, fmt.Errorf("[METRICSTORE]> retention configuration not initialized") - } - - if subCluster != "" { - if subMap, ok := retentionLookup.subClusterMap[cluster]; ok { - if retention, ok := subMap[subCluster]; ok { - return retention, nil - } - } - } - - if retention, ok := retentionLookup.clusterMap[cluster]; ok { - return retention, nil - } - - return retentionLookup.global, nil -} - -// GetShortestRetentionDuration returns the shortest configured retention duration -// across all levels (global, cluster, and subcluster configurations). -// Returns 0 if retentionLookup is not initialized or global retention is not set. -func GetShortestRetentionDuration() time.Duration { - if retentionLookup == nil || retentionLookup.global <= 0 { - return 0 - } - - shortest := retentionLookup.global - - // Check all cluster-level retention durations - for _, clusterRetention := range retentionLookup.clusterMap { - if clusterRetention > 0 && clusterRetention < shortest { - shortest = clusterRetention - } - } - - // Check all subcluster-level retention durations - for _, subClusterMap := range retentionLookup.subClusterMap { - for _, scRetention := range subClusterMap { - if scRetention > 0 && scRetention < shortest { - shortest = scRetention - } - } - } - - return shortest -} diff --git a/internal/metricstore/configSchema.go b/internal/metricstore/configSchema.go index 868bacc5..f1a20a73 100644 --- a/internal/metricstore/configSchema.go +++ b/internal/metricstore/configSchema.go @@ -46,45 +46,9 @@ const configSchema = `{ } }, "retention-in-memory": { - "description": "Global default: Keep the metrics within memory for given time interval. Retention for X hours, then the metrics would be freed.", + "description": "Keep the metrics within memory for given time interval. Retention for X hours, then the metrics would be freed.", "type": "string" }, - "clusters": { - "description": "Optional per-cluster retention overrides", - "type": "array", - "items": { - "type": "object", - "required": ["cluster"], - "properties": { - "cluster": { - "description": "Cluster name", - "type": "string" - }, - "retention-in-memory": { - "description": "Cluster-specific retention duration (overrides global default)", - "type": "string" - }, - "subclusters": { - "description": "Optional per-subcluster retention overrides", - "type": "array", - "items": { - "type": "object", - "required": ["subcluster"], - "properties": { - "subcluster": { - "description": "Subcluster name", - "type": "string" - }, - "retention-in-memory": { - "description": "Subcluster-specific retention duration (overrides cluster and global default)", - "type": "string" - } - } - } - } - } - } - }, "nats": { "description": "Configuration for accepting published data through NATS.", "type": "array", diff --git a/internal/metricstore/metricstore.go b/internal/metricstore/metricstore.go index 5a5c3bce..ac8948ae 100644 --- a/internal/metricstore/metricstore.go +++ b/internal/metricstore/metricstore.go @@ -98,49 +98,6 @@ func Init(rawConfig json.RawMessage, wg *sync.WaitGroup) { } } - globalRetention, err := time.ParseDuration(Keys.RetentionInMemory) - if err != nil { - cclog.Fatal(err) - } - - retentionLookup = &retentionConfig{ - global: globalRetention, - clusterMap: make(map[string]time.Duration), - subClusterMap: make(map[string]map[string]time.Duration), - } - - for _, clusterCfg := range Keys.Clusters { - if clusterCfg.RetentionInMemory != "" { - clusterRetention, err := time.ParseDuration(clusterCfg.RetentionInMemory) - if err != nil { - cclog.Warnf("[METRICSTORE]> Invalid retention duration for cluster '%s': %s\n", clusterCfg.Cluster, err.Error()) - continue - } - retentionLookup.clusterMap[clusterCfg.Cluster] = clusterRetention - cclog.Debugf("[METRICSTORE]> Cluster '%s' retention: %s\n", clusterCfg.Cluster, clusterRetention) - } - - if len(clusterCfg.SubClusters) > 0 { - if retentionLookup.subClusterMap[clusterCfg.Cluster] == nil { - retentionLookup.subClusterMap[clusterCfg.Cluster] = make(map[string]time.Duration) - } - - for _, scCfg := range clusterCfg.SubClusters { - if scCfg.RetentionInMemory != "" { - scRetention, err := time.ParseDuration(scCfg.RetentionInMemory) - if err != nil { - cclog.Warnf("[METRICSTORE]> Invalid retention duration for subcluster '%s/%s': %s\n", - clusterCfg.Cluster, scCfg.SubCluster, err.Error()) - continue - } - retentionLookup.subClusterMap[clusterCfg.Cluster][scCfg.SubCluster] = scRetention - cclog.Debugf("[METRICSTORE]> SubCluster '%s/%s' retention: %s\n", - clusterCfg.Cluster, scCfg.SubCluster, scRetention) - } - } - } - } - // Pass the config.MetricStoreKeys InitMetrics(Metrics) @@ -256,17 +213,18 @@ func Retention(wg *sync.WaitGroup, ctx context.Context) { go func() { defer wg.Done() - shortestRetention := GetShortestRetentionDuration() - if shortestRetention <= 0 { + d, err := time.ParseDuration(Keys.RetentionInMemory) + if err != nil { + cclog.Fatal(err) + } + if d <= 0 { return } - tickInterval := shortestRetention / 2 + tickInterval := d / 2 if tickInterval <= 0 { return } - cclog.Debugf("[METRICSTORE]> Retention ticker interval set to %s (half of shortest retention: %s)\n", - tickInterval, shortestRetention) ticker := time.NewTicker(tickInterval) defer ticker.Stop() @@ -275,50 +233,13 @@ func Retention(wg *sync.WaitGroup, ctx context.Context) { case <-ctx.Done(): return case <-ticker.C: - totalFreed := 0 - - clusters := ms.ListChildren(nil) - for _, cluster := range clusters { - retention, err := GetRetentionDuration(cluster, "") - if err != nil { - cclog.Warnf("[METRICSTORE]> Could not get retention for cluster '%s': %s\n", cluster, err.Error()) - continue - } - if retention <= 0 { - continue - } - - t := time.Now().Add(-retention) - cclog.Debugf("[METRICSTORE]> Freeing buffers for cluster '%s' (older than %s, retention: %s)...\n", - cluster, t.Format(time.RFC3339), retention) - - subClusters := ms.ListChildren([]string{cluster}) - for _, subCluster := range subClusters { - scRetention, err := GetRetentionDuration(cluster, subCluster) + t := time.Now().Add(-d) + cclog.Infof("[METRICSTORE]> start freeing buffers (older than %s)...\n", t.Format(time.RFC3339)) + freed, err := ms.Free(nil, t.Unix()) if err != nil { - cclog.Warnf("[METRICSTORE]> Could not get retention for subcluster '%s/%s': %s\n", - cluster, subCluster, err.Error()) - continue - } - if scRetention <= 0 { - continue - } - - scTime := time.Now().Add(-scRetention) - freed, err := ms.Free([]string{cluster, subCluster}, scTime.Unix()) - if err != nil { - cclog.Errorf("[METRICSTORE]> freeing buffers for '%s/%s' failed: %s\n", - cluster, subCluster, err.Error()) - } else if freed > 0 { - cclog.Debugf("[METRICSTORE]> freed %d buffers for '%s/%s' (retention: %s)\n", - freed, cluster, subCluster, scRetention) - totalFreed += freed - } - } - } - - if totalFreed > 0 { - cclog.Infof("[METRICSTORE]> Total buffers freed: %d\n", totalFreed) + cclog.Errorf("[METRICSTORE]> freeing up buffers failed: %s\n", err.Error()) + } else { + cclog.Infof("[METRICSTORE]> done: %d buffers freed\n", freed) } } } From 518e9950eac893f54d1d441901860d7e12de71dc Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Tue, 13 Jan 2026 16:59:52 +0100 Subject: [PATCH 30/59] add job exclusivity filter, review db indices --- api/schema.graphqls | 1 + internal/graph/generated/generated.go | 258 +++++++++--------- internal/graph/model/models_gen.go | 1 + internal/graph/schema.resolvers.go | 16 +- internal/repository/jobQuery.go | 88 +++--- .../sqlite3/09_add-job-cache.up.sql | 72 ++--- .../migrations/sqlite3/10_node-table.up.sql | 2 - internal/routerConfig/routes.go | 6 + web/frontend/src/generic/Filters.svelte | 94 ++++--- .../src/generic/filters/JobStates.svelte | 75 ++++- 10 files changed, 365 insertions(+), 248 deletions(-) diff --git a/api/schema.graphqls b/api/schema.graphqls index 1c81e6b6..7be43f73 100644 --- a/api/schema.graphqls +++ b/api/schema.graphqls @@ -458,6 +458,7 @@ input JobFilter { state: [JobState!] metricStats: [MetricStatItem!] shared: String + schedule: String node: StringInput } diff --git a/internal/graph/generated/generated.go b/internal/graph/generated/generated.go index d96ccf1d..2d3aca04 100644 --- a/internal/graph/generated/generated.go +++ b/internal/graph/generated/generated.go @@ -2741,6 +2741,7 @@ input JobFilter { state: [JobState!] metricStats: [MetricStatItem!] shared: String + schedule: String node: StringInput } @@ -3002,7 +3003,7 @@ func (ec *executionContext) field_Query_jobMetrics_args(ctx context.Context, raw return nil, err } args["metrics"] = arg1 - arg2, err := graphql.ProcessArgField(ctx, rawArgs, "scopes", ec.unmarshalOMetricScope2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricScopeᚄ) + arg2, err := graphql.ProcessArgField(ctx, rawArgs, "scopes", ec.unmarshalOMetricScope2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricScopeᚄ) if err != nil { return nil, err } @@ -3159,7 +3160,7 @@ func (ec *executionContext) field_Query_nodeMetricsList_args(ctx context.Context return nil, err } args["nodeFilter"] = arg3 - arg4, err := graphql.ProcessArgField(ctx, rawArgs, "scopes", ec.unmarshalOMetricScope2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricScopeᚄ) + arg4, err := graphql.ProcessArgField(ctx, rawArgs, "scopes", ec.unmarshalOMetricScope2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricScopeᚄ) if err != nil { return nil, err } @@ -3205,7 +3206,7 @@ func (ec *executionContext) field_Query_nodeMetrics_args(ctx context.Context, ra return nil, err } args["nodes"] = arg1 - arg2, err := graphql.ProcessArgField(ctx, rawArgs, "scopes", ec.unmarshalOMetricScope2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricScopeᚄ) + arg2, err := graphql.ProcessArgField(ctx, rawArgs, "scopes", ec.unmarshalOMetricScope2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricScopeᚄ) if err != nil { return nil, err } @@ -3336,7 +3337,7 @@ func (ec *executionContext) field_Query_scopedJobStats_args(ctx context.Context, return nil, err } args["metrics"] = arg1 - arg2, err := graphql.ProcessArgField(ctx, rawArgs, "scopes", ec.unmarshalOMetricScope2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricScopeᚄ) + arg2, err := graphql.ProcessArgField(ctx, rawArgs, "scopes", ec.unmarshalOMetricScope2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricScopeᚄ) if err != nil { return nil, err } @@ -3562,7 +3563,7 @@ func (ec *executionContext) _Cluster_subClusters(ctx context.Context, field grap return obj.SubClusters, nil }, nil, - ec.marshalNSubCluster2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐSubClusterᚄ, + ec.marshalNSubCluster2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐSubClusterᚄ, true, true, ) @@ -3648,7 +3649,7 @@ func (ec *executionContext) _ClusterMetricWithName_unit(ctx context.Context, fie return obj.Unit, nil }, nil, - ec.marshalOUnit2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐUnit, + ec.marshalOUnit2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐUnit, true, false, ) @@ -3712,7 +3713,7 @@ func (ec *executionContext) _ClusterMetricWithName_data(ctx context.Context, fie return obj.Data, nil }, nil, - ec.marshalNNullableFloat2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐFloatᚄ, + ec.marshalNNullableFloat2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐFloatᚄ, true, true, ) @@ -4200,7 +4201,7 @@ func (ec *executionContext) _GlobalMetricListItem_unit(ctx context.Context, fiel return obj.Unit, nil }, nil, - ec.marshalNUnit2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐUnit, + ec.marshalNUnit2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐUnit, true, true, ) @@ -4235,7 +4236,7 @@ func (ec *executionContext) _GlobalMetricListItem_scope(ctx context.Context, fie return obj.Scope, nil }, nil, - ec.marshalNMetricScope2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricScope, + ec.marshalNMetricScope2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricScope, true, true, ) @@ -4293,7 +4294,7 @@ func (ec *executionContext) _GlobalMetricListItem_availability(ctx context.Conte return obj.Availability, nil }, nil, - ec.marshalNClusterSupport2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐClusterSupportᚄ, + ec.marshalNClusterSupport2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐClusterSupportᚄ, true, true, ) @@ -4966,7 +4967,7 @@ func (ec *executionContext) _Job_state(ctx context.Context, field graphql.Collec return obj.State, nil }, nil, - ec.marshalNJobState2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐJobState, + ec.marshalNJobState2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐJobState, true, true, ) @@ -4995,7 +4996,7 @@ func (ec *executionContext) _Job_tags(ctx context.Context, field graphql.Collect return ec.resolvers.Job().Tags(ctx, obj) }, nil, - ec.marshalNTag2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐTagᚄ, + ec.marshalNTag2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐTagᚄ, true, true, ) @@ -5034,7 +5035,7 @@ func (ec *executionContext) _Job_resources(ctx context.Context, field graphql.Co return obj.Resources, nil }, nil, - ec.marshalNResource2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐResourceᚄ, + ec.marshalNResource2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐResourceᚄ, true, true, ) @@ -5401,7 +5402,7 @@ func (ec *executionContext) _JobMetric_unit(ctx context.Context, field graphql.C return obj.Unit, nil }, nil, - ec.marshalOUnit2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐUnit, + ec.marshalOUnit2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐUnit, true, false, ) @@ -5465,7 +5466,7 @@ func (ec *executionContext) _JobMetric_series(ctx context.Context, field graphql return obj.Series, nil }, nil, - ec.marshalOSeries2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐSeriesᚄ, + ec.marshalOSeries2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐSeriesᚄ, true, false, ) @@ -5504,7 +5505,7 @@ func (ec *executionContext) _JobMetric_statisticsSeries(ctx context.Context, fie return obj.StatisticsSeries, nil }, nil, - ec.marshalOStatsSeries2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐStatsSeries, + ec.marshalOStatsSeries2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐStatsSeries, true, false, ) @@ -5572,7 +5573,7 @@ func (ec *executionContext) _JobMetricWithName_scope(ctx context.Context, field return obj.Scope, nil }, nil, - ec.marshalNMetricScope2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricScope, + ec.marshalNMetricScope2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricScope, true, true, ) @@ -5601,7 +5602,7 @@ func (ec *executionContext) _JobMetricWithName_metric(ctx context.Context, field return obj.Metric, nil }, nil, - ec.marshalNJobMetric2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐJobMetric, + ec.marshalNJobMetric2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐJobMetric, true, true, ) @@ -5640,7 +5641,7 @@ func (ec *executionContext) _JobResultList_items(ctx context.Context, field grap return obj.Items, nil }, nil, - ec.marshalNJob2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐJobᚄ, + ec.marshalNJob2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐJobᚄ, true, true, ) @@ -6720,7 +6721,7 @@ func (ec *executionContext) _MetricConfig_unit(ctx context.Context, field graphq return obj.Unit, nil }, nil, - ec.marshalNUnit2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐUnit, + ec.marshalNUnit2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐUnit, true, true, ) @@ -6755,7 +6756,7 @@ func (ec *executionContext) _MetricConfig_scope(ctx context.Context, field graph return obj.Scope, nil }, nil, - ec.marshalNMetricScope2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricScope, + ec.marshalNMetricScope2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricScope, true, true, ) @@ -6987,7 +6988,7 @@ func (ec *executionContext) _MetricConfig_subClusters(ctx context.Context, field return obj.SubClusters, nil }, nil, - ec.marshalNSubClusterConfig2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐSubClusterConfigᚄ, + ec.marshalNSubClusterConfig2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐSubClusterConfigᚄ, true, true, ) @@ -7059,7 +7060,7 @@ func (ec *executionContext) _MetricFootprints_data(ctx context.Context, field gr return obj.Data, nil }, nil, - ec.marshalNNullableFloat2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐFloatᚄ, + ec.marshalNNullableFloat2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐFloatᚄ, true, true, ) @@ -7446,7 +7447,7 @@ func (ec *executionContext) _MetricValue_unit(ctx context.Context, field graphql return obj.Unit, nil }, nil, - ec.marshalNUnit2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐUnit, + ec.marshalNUnit2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐUnit, true, true, ) @@ -7511,7 +7512,7 @@ func (ec *executionContext) _Mutation_createTag(ctx context.Context, field graph return ec.resolvers.Mutation().CreateTag(ctx, fc.Args["type"].(string), fc.Args["name"].(string), fc.Args["scope"].(string)) }, nil, - ec.marshalNTag2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐTag, + ec.marshalNTag2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐTag, true, true, ) @@ -7603,7 +7604,7 @@ func (ec *executionContext) _Mutation_addTagsToJob(ctx context.Context, field gr return ec.resolvers.Mutation().AddTagsToJob(ctx, fc.Args["job"].(string), fc.Args["tagIds"].([]string)) }, nil, - ec.marshalNTag2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐTagᚄ, + ec.marshalNTag2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐTagᚄ, true, true, ) @@ -7654,7 +7655,7 @@ func (ec *executionContext) _Mutation_removeTagsFromJob(ctx context.Context, fie return ec.resolvers.Mutation().RemoveTagsFromJob(ctx, fc.Args["job"].(string), fc.Args["tagIds"].([]string)) }, nil, - ec.marshalNTag2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐTagᚄ, + ec.marshalNTag2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐTagᚄ, true, true, ) @@ -7815,7 +7816,7 @@ func (ec *executionContext) _NamedStats_data(ctx context.Context, field graphql. return obj.Data, nil }, nil, - ec.marshalNMetricStatistics2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricStatistics, + ec.marshalNMetricStatistics2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricStatistics, true, true, ) @@ -7881,7 +7882,7 @@ func (ec *executionContext) _NamedStatsWithScope_scope(ctx context.Context, fiel return obj.Scope, nil }, nil, - ec.marshalNMetricScope2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricScope, + ec.marshalNMetricScope2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricScope, true, true, ) @@ -8179,7 +8180,7 @@ func (ec *executionContext) _Node_schedulerState(ctx context.Context, field grap return ec.resolvers.Node().SchedulerState(ctx, obj) }, nil, - ec.marshalNSchedulerState2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐSchedulerState, + ec.marshalNSchedulerState2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐSchedulerState, true, true, ) @@ -8390,7 +8391,7 @@ func (ec *executionContext) _NodeStateResultList_items(ctx context.Context, fiel return obj.Items, nil }, nil, - ec.marshalNNode2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐNodeᚄ, + ec.marshalNNode2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐNodeᚄ, true, true, ) @@ -8801,7 +8802,7 @@ func (ec *executionContext) _Query_clusters(ctx context.Context, field graphql.C return ec.resolvers.Query().Clusters(ctx) }, nil, - ec.marshalNCluster2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐClusterᚄ, + ec.marshalNCluster2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐClusterᚄ, true, true, ) @@ -8838,7 +8839,7 @@ func (ec *executionContext) _Query_tags(ctx context.Context, field graphql.Colle return ec.resolvers.Query().Tags(ctx) }, nil, - ec.marshalNTag2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐTagᚄ, + ec.marshalNTag2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐTagᚄ, true, true, ) @@ -8877,7 +8878,7 @@ func (ec *executionContext) _Query_globalMetrics(ctx context.Context, field grap return ec.resolvers.Query().GlobalMetrics(ctx) }, nil, - ec.marshalNGlobalMetricListItem2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐGlobalMetricListItemᚄ, + ec.marshalNGlobalMetricListItem2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐGlobalMetricListItemᚄ, true, true, ) @@ -9015,7 +9016,7 @@ func (ec *executionContext) _Query_node(ctx context.Context, field graphql.Colle return ec.resolvers.Query().Node(ctx, fc.Args["id"].(string)) }, nil, - ec.marshalONode2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐNode, + ec.marshalONode2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐNode, true, false, ) @@ -9223,7 +9224,7 @@ func (ec *executionContext) _Query_job(ctx context.Context, field graphql.Collec return ec.resolvers.Query().Job(ctx, fc.Args["id"].(string)) }, nil, - ec.marshalOJob2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐJob, + ec.marshalOJob2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐJob, true, false, ) @@ -10180,7 +10181,7 @@ func (ec *executionContext) _ScopedStats_data(ctx context.Context, field graphql return obj.Data, nil }, nil, - ec.marshalNMetricStatistics2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricStatistics, + ec.marshalNMetricStatistics2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricStatistics, true, true, ) @@ -10275,7 +10276,7 @@ func (ec *executionContext) _Series_statistics(ctx context.Context, field graphq return obj.Statistics, nil }, nil, - ec.marshalOMetricStatistics2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricStatistics, + ec.marshalOMetricStatistics2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricStatistics, true, false, ) @@ -10312,7 +10313,7 @@ func (ec *executionContext) _Series_data(ctx context.Context, field graphql.Coll return obj.Data, nil }, nil, - ec.marshalNNullableFloat2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐFloatᚄ, + ec.marshalNNullableFloat2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐFloatᚄ, true, true, ) @@ -10341,7 +10342,7 @@ func (ec *executionContext) _StatsSeries_mean(ctx context.Context, field graphql return obj.Mean, nil }, nil, - ec.marshalNNullableFloat2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐFloatᚄ, + ec.marshalNNullableFloat2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐFloatᚄ, true, true, ) @@ -10370,7 +10371,7 @@ func (ec *executionContext) _StatsSeries_median(ctx context.Context, field graph return obj.Median, nil }, nil, - ec.marshalNNullableFloat2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐFloatᚄ, + ec.marshalNNullableFloat2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐFloatᚄ, true, true, ) @@ -10399,7 +10400,7 @@ func (ec *executionContext) _StatsSeries_min(ctx context.Context, field graphql. return obj.Min, nil }, nil, - ec.marshalNNullableFloat2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐFloatᚄ, + ec.marshalNNullableFloat2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐFloatᚄ, true, true, ) @@ -10428,7 +10429,7 @@ func (ec *executionContext) _StatsSeries_max(ctx context.Context, field graphql. return obj.Max, nil }, nil, - ec.marshalNNullableFloat2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐFloatᚄ, + ec.marshalNNullableFloat2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐFloatᚄ, true, true, ) @@ -10660,7 +10661,7 @@ func (ec *executionContext) _SubCluster_flopRateScalar(ctx context.Context, fiel return obj.FlopRateScalar, nil }, nil, - ec.marshalNMetricValue2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricValue, + ec.marshalNMetricValue2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricValue, true, true, ) @@ -10697,7 +10698,7 @@ func (ec *executionContext) _SubCluster_flopRateSimd(ctx context.Context, field return obj.FlopRateSimd, nil }, nil, - ec.marshalNMetricValue2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricValue, + ec.marshalNMetricValue2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricValue, true, true, ) @@ -10734,7 +10735,7 @@ func (ec *executionContext) _SubCluster_memoryBandwidth(ctx context.Context, fie return obj.MemoryBandwidth, nil }, nil, - ec.marshalNMetricValue2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricValue, + ec.marshalNMetricValue2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricValue, true, true, ) @@ -10771,7 +10772,7 @@ func (ec *executionContext) _SubCluster_topology(ctx context.Context, field grap return obj.Topology, nil }, nil, - ec.marshalNTopology2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐTopology, + ec.marshalNTopology2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐTopology, true, true, ) @@ -10814,7 +10815,7 @@ func (ec *executionContext) _SubCluster_metricConfig(ctx context.Context, field return obj.MetricConfig, nil }, nil, - ec.marshalNMetricConfig2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricConfigᚄ, + ec.marshalNMetricConfig2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricConfigᚄ, true, true, ) @@ -11273,7 +11274,7 @@ func (ec *executionContext) _TimeWeights_nodeHours(ctx context.Context, field gr return obj.NodeHours, nil }, nil, - ec.marshalNNullableFloat2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐFloatᚄ, + ec.marshalNNullableFloat2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐFloatᚄ, true, true, ) @@ -11302,7 +11303,7 @@ func (ec *executionContext) _TimeWeights_accHours(ctx context.Context, field gra return obj.AccHours, nil }, nil, - ec.marshalNNullableFloat2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐFloatᚄ, + ec.marshalNNullableFloat2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐFloatᚄ, true, true, ) @@ -11331,7 +11332,7 @@ func (ec *executionContext) _TimeWeights_coreHours(ctx context.Context, field gr return obj.CoreHours, nil }, nil, - ec.marshalNNullableFloat2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐFloatᚄ, + ec.marshalNNullableFloat2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐFloatᚄ, true, true, ) @@ -11505,7 +11506,7 @@ func (ec *executionContext) _Topology_accelerators(ctx context.Context, field gr return obj.Accelerators, nil }, nil, - ec.marshalOAccelerator2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐAcceleratorᚄ, + ec.marshalOAccelerator2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐAcceleratorᚄ, true, false, ) @@ -13198,7 +13199,7 @@ func (ec *executionContext) unmarshalInputJobFilter(ctx context.Context, obj any asMap[k] = v } - fieldsInOrder := [...]string{"tags", "dbId", "jobId", "arrayJobId", "user", "project", "jobName", "cluster", "partition", "duration", "energy", "minRunningFor", "numNodes", "numAccelerators", "numHWThreads", "startTime", "state", "metricStats", "shared", "node"} + fieldsInOrder := [...]string{"tags", "dbId", "jobId", "arrayJobId", "user", "project", "jobName", "cluster", "partition", "duration", "energy", "minRunningFor", "numNodes", "numAccelerators", "numHWThreads", "startTime", "state", "metricStats", "shared", "schedule", "node"} for _, k := range fieldsInOrder { v, ok := asMap[k] if !ok { @@ -13319,7 +13320,7 @@ func (ec *executionContext) unmarshalInputJobFilter(ctx context.Context, obj any it.StartTime = data case "state": ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("state")) - data, err := ec.unmarshalOJobState2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐJobStateᚄ(ctx, v) + data, err := ec.unmarshalOJobState2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐJobStateᚄ(ctx, v) if err != nil { return it, err } @@ -13338,6 +13339,13 @@ func (ec *executionContext) unmarshalInputJobFilter(ctx context.Context, obj any return it, err } it.Shared = data + case "schedule": + ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("schedule")) + data, err := ec.unmarshalOString2ᚖstring(ctx, v) + if err != nil { + return it, err + } + it.Schedule = data case "node": ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("node")) data, err := ec.unmarshalOStringInput2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐStringInput(ctx, v) @@ -13422,7 +13430,7 @@ func (ec *executionContext) unmarshalInputNodeFilter(ctx context.Context, obj an it.Subcluster = data case "schedulerState": ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("schedulerState")) - data, err := ec.unmarshalOSchedulerState2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐSchedulerState(ctx, v) + data, err := ec.unmarshalOSchedulerState2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐSchedulerState(ctx, v) if err != nil { return it, err } @@ -17481,7 +17489,7 @@ func (ec *executionContext) ___Type(ctx context.Context, sel ast.SelectionSet, o // region ***************************** type.gotpl ***************************** -func (ec *executionContext) marshalNAccelerator2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐAccelerator(ctx context.Context, sel ast.SelectionSet, v *schema.Accelerator) graphql.Marshaler { +func (ec *executionContext) marshalNAccelerator2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐAccelerator(ctx context.Context, sel ast.SelectionSet, v *schema.Accelerator) graphql.Marshaler { if v == nil { if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { graphql.AddErrorf(ctx, "the requested element is null which the schema does not allow") @@ -17507,7 +17515,7 @@ func (ec *executionContext) marshalNBoolean2bool(ctx context.Context, sel ast.Se return res } -func (ec *executionContext) marshalNCluster2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐClusterᚄ(ctx context.Context, sel ast.SelectionSet, v []*schema.Cluster) graphql.Marshaler { +func (ec *executionContext) marshalNCluster2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐClusterᚄ(ctx context.Context, sel ast.SelectionSet, v []*schema.Cluster) graphql.Marshaler { ret := make(graphql.Array, len(v)) var wg sync.WaitGroup isLen1 := len(v) == 1 @@ -17531,7 +17539,7 @@ func (ec *executionContext) marshalNCluster2ᚕᚖgithubᚗcomᚋClusterCockpit if !isLen1 { defer wg.Done() } - ret[i] = ec.marshalNCluster2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐCluster(ctx, sel, v[i]) + ret[i] = ec.marshalNCluster2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐCluster(ctx, sel, v[i]) } if isLen1 { f(i) @@ -17551,7 +17559,7 @@ func (ec *executionContext) marshalNCluster2ᚕᚖgithubᚗcomᚋClusterCockpit return ret } -func (ec *executionContext) marshalNCluster2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐCluster(ctx context.Context, sel ast.SelectionSet, v *schema.Cluster) graphql.Marshaler { +func (ec *executionContext) marshalNCluster2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐCluster(ctx context.Context, sel ast.SelectionSet, v *schema.Cluster) graphql.Marshaler { if v == nil { if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { graphql.AddErrorf(ctx, "the requested element is null which the schema does not allow") @@ -17629,11 +17637,11 @@ func (ec *executionContext) marshalNClusterMetrics2ᚖgithubᚗcomᚋClusterCock return ec._ClusterMetrics(ctx, sel, v) } -func (ec *executionContext) marshalNClusterSupport2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐClusterSupport(ctx context.Context, sel ast.SelectionSet, v schema.ClusterSupport) graphql.Marshaler { +func (ec *executionContext) marshalNClusterSupport2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐClusterSupport(ctx context.Context, sel ast.SelectionSet, v schema.ClusterSupport) graphql.Marshaler { return ec._ClusterSupport(ctx, sel, &v) } -func (ec *executionContext) marshalNClusterSupport2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐClusterSupportᚄ(ctx context.Context, sel ast.SelectionSet, v []schema.ClusterSupport) graphql.Marshaler { +func (ec *executionContext) marshalNClusterSupport2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐClusterSupportᚄ(ctx context.Context, sel ast.SelectionSet, v []schema.ClusterSupport) graphql.Marshaler { ret := make(graphql.Array, len(v)) var wg sync.WaitGroup isLen1 := len(v) == 1 @@ -17657,7 +17665,7 @@ func (ec *executionContext) marshalNClusterSupport2ᚕgithubᚗcomᚋClusterCock if !isLen1 { defer wg.Done() } - ret[i] = ec.marshalNClusterSupport2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐClusterSupport(ctx, sel, v[i]) + ret[i] = ec.marshalNClusterSupport2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐClusterSupport(ctx, sel, v[i]) } if isLen1 { f(i) @@ -17812,7 +17820,7 @@ func (ec *executionContext) unmarshalNFloatRange2ᚖgithubᚗcomᚋClusterCockpi return &res, graphql.ErrorOnPath(ctx, err) } -func (ec *executionContext) marshalNGlobalMetricListItem2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐGlobalMetricListItemᚄ(ctx context.Context, sel ast.SelectionSet, v []*schema.GlobalMetricListItem) graphql.Marshaler { +func (ec *executionContext) marshalNGlobalMetricListItem2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐGlobalMetricListItemᚄ(ctx context.Context, sel ast.SelectionSet, v []*schema.GlobalMetricListItem) graphql.Marshaler { ret := make(graphql.Array, len(v)) var wg sync.WaitGroup isLen1 := len(v) == 1 @@ -17836,7 +17844,7 @@ func (ec *executionContext) marshalNGlobalMetricListItem2ᚕᚖgithubᚗcomᚋCl if !isLen1 { defer wg.Done() } - ret[i] = ec.marshalNGlobalMetricListItem2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐGlobalMetricListItem(ctx, sel, v[i]) + ret[i] = ec.marshalNGlobalMetricListItem2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐGlobalMetricListItem(ctx, sel, v[i]) } if isLen1 { f(i) @@ -17856,7 +17864,7 @@ func (ec *executionContext) marshalNGlobalMetricListItem2ᚕᚖgithubᚗcomᚋCl return ret } -func (ec *executionContext) marshalNGlobalMetricListItem2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐGlobalMetricListItem(ctx context.Context, sel ast.SelectionSet, v *schema.GlobalMetricListItem) graphql.Marshaler { +func (ec *executionContext) marshalNGlobalMetricListItem2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐGlobalMetricListItem(ctx context.Context, sel ast.SelectionSet, v *schema.GlobalMetricListItem) graphql.Marshaler { if v == nil { if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { graphql.AddErrorf(ctx, "the requested element is null which the schema does not allow") @@ -18134,7 +18142,7 @@ func (ec *executionContext) marshalNInt2ᚖint(ctx context.Context, sel ast.Sele return res } -func (ec *executionContext) marshalNJob2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐJobᚄ(ctx context.Context, sel ast.SelectionSet, v []*schema.Job) graphql.Marshaler { +func (ec *executionContext) marshalNJob2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐJobᚄ(ctx context.Context, sel ast.SelectionSet, v []*schema.Job) graphql.Marshaler { ret := make(graphql.Array, len(v)) var wg sync.WaitGroup isLen1 := len(v) == 1 @@ -18158,7 +18166,7 @@ func (ec *executionContext) marshalNJob2ᚕᚖgithubᚗcomᚋClusterCockpitᚋcc if !isLen1 { defer wg.Done() } - ret[i] = ec.marshalNJob2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐJob(ctx, sel, v[i]) + ret[i] = ec.marshalNJob2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐJob(ctx, sel, v[i]) } if isLen1 { f(i) @@ -18178,7 +18186,7 @@ func (ec *executionContext) marshalNJob2ᚕᚖgithubᚗcomᚋClusterCockpitᚋcc return ret } -func (ec *executionContext) marshalNJob2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐJob(ctx context.Context, sel ast.SelectionSet, v *schema.Job) graphql.Marshaler { +func (ec *executionContext) marshalNJob2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐJob(ctx context.Context, sel ast.SelectionSet, v *schema.Job) graphql.Marshaler { if v == nil { if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { graphql.AddErrorf(ctx, "the requested element is null which the schema does not allow") @@ -18262,7 +18270,7 @@ func (ec *executionContext) marshalNJobLink2ᚖgithubᚗcomᚋClusterCockpitᚋc return ec._JobLink(ctx, sel, v) } -func (ec *executionContext) marshalNJobMetric2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐJobMetric(ctx context.Context, sel ast.SelectionSet, v *schema.JobMetric) graphql.Marshaler { +func (ec *executionContext) marshalNJobMetric2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐJobMetric(ctx context.Context, sel ast.SelectionSet, v *schema.JobMetric) graphql.Marshaler { if v == nil { if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { graphql.AddErrorf(ctx, "the requested element is null which the schema does not allow") @@ -18340,13 +18348,13 @@ func (ec *executionContext) marshalNJobResultList2ᚖgithubᚗcomᚋClusterCockp return ec._JobResultList(ctx, sel, v) } -func (ec *executionContext) unmarshalNJobState2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐJobState(ctx context.Context, v any) (schema.JobState, error) { +func (ec *executionContext) unmarshalNJobState2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐJobState(ctx context.Context, v any) (schema.JobState, error) { var res schema.JobState err := res.UnmarshalGQL(v) return res, graphql.ErrorOnPath(ctx, err) } -func (ec *executionContext) marshalNJobState2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐJobState(ctx context.Context, sel ast.SelectionSet, v schema.JobState) graphql.Marshaler { +func (ec *executionContext) marshalNJobState2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐJobState(ctx context.Context, sel ast.SelectionSet, v schema.JobState) graphql.Marshaler { return v } @@ -18458,11 +18466,11 @@ func (ec *executionContext) marshalNJobsStatistics2ᚖgithubᚗcomᚋClusterCock return ec._JobsStatistics(ctx, sel, v) } -func (ec *executionContext) marshalNMetricConfig2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricConfig(ctx context.Context, sel ast.SelectionSet, v schema.MetricConfig) graphql.Marshaler { +func (ec *executionContext) marshalNMetricConfig2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricConfig(ctx context.Context, sel ast.SelectionSet, v schema.MetricConfig) graphql.Marshaler { return ec._MetricConfig(ctx, sel, &v) } -func (ec *executionContext) marshalNMetricConfig2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricConfigᚄ(ctx context.Context, sel ast.SelectionSet, v []schema.MetricConfig) graphql.Marshaler { +func (ec *executionContext) marshalNMetricConfig2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricConfigᚄ(ctx context.Context, sel ast.SelectionSet, v []schema.MetricConfig) graphql.Marshaler { ret := make(graphql.Array, len(v)) var wg sync.WaitGroup isLen1 := len(v) == 1 @@ -18486,7 +18494,7 @@ func (ec *executionContext) marshalNMetricConfig2ᚕgithubᚗcomᚋClusterCockpi if !isLen1 { defer wg.Done() } - ret[i] = ec.marshalNMetricConfig2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricConfig(ctx, sel, v[i]) + ret[i] = ec.marshalNMetricConfig2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricConfig(ctx, sel, v[i]) } if isLen1 { f(i) @@ -18624,13 +18632,13 @@ func (ec *executionContext) marshalNMetricHistoPoints2ᚖgithubᚗcomᚋClusterC return ec._MetricHistoPoints(ctx, sel, v) } -func (ec *executionContext) unmarshalNMetricScope2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricScope(ctx context.Context, v any) (schema.MetricScope, error) { +func (ec *executionContext) unmarshalNMetricScope2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricScope(ctx context.Context, v any) (schema.MetricScope, error) { var res schema.MetricScope err := res.UnmarshalGQL(v) return res, graphql.ErrorOnPath(ctx, err) } -func (ec *executionContext) marshalNMetricScope2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricScope(ctx context.Context, sel ast.SelectionSet, v schema.MetricScope) graphql.Marshaler { +func (ec *executionContext) marshalNMetricScope2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricScope(ctx context.Context, sel ast.SelectionSet, v schema.MetricScope) graphql.Marshaler { return v } @@ -18639,7 +18647,7 @@ func (ec *executionContext) unmarshalNMetricStatItem2ᚖgithubᚗcomᚋClusterCo return &res, graphql.ErrorOnPath(ctx, err) } -func (ec *executionContext) marshalNMetricStatistics2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricStatistics(ctx context.Context, sel ast.SelectionSet, v *schema.MetricStatistics) graphql.Marshaler { +func (ec *executionContext) marshalNMetricStatistics2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricStatistics(ctx context.Context, sel ast.SelectionSet, v *schema.MetricStatistics) graphql.Marshaler { if v == nil { if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { graphql.AddErrorf(ctx, "the requested element is null which the schema does not allow") @@ -18649,7 +18657,7 @@ func (ec *executionContext) marshalNMetricStatistics2ᚖgithubᚗcomᚋClusterCo return ec._MetricStatistics(ctx, sel, v) } -func (ec *executionContext) marshalNMetricValue2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricValue(ctx context.Context, sel ast.SelectionSet, v schema.MetricValue) graphql.Marshaler { +func (ec *executionContext) marshalNMetricValue2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricValue(ctx context.Context, sel ast.SelectionSet, v schema.MetricValue) graphql.Marshaler { return ec._MetricValue(ctx, sel, &v) } @@ -18777,7 +18785,7 @@ func (ec *executionContext) marshalNNamedStatsWithScope2ᚖgithubᚗcomᚋCluste return ec._NamedStatsWithScope(ctx, sel, v) } -func (ec *executionContext) marshalNNode2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐNodeᚄ(ctx context.Context, sel ast.SelectionSet, v []*schema.Node) graphql.Marshaler { +func (ec *executionContext) marshalNNode2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐNodeᚄ(ctx context.Context, sel ast.SelectionSet, v []*schema.Node) graphql.Marshaler { ret := make(graphql.Array, len(v)) var wg sync.WaitGroup isLen1 := len(v) == 1 @@ -18801,7 +18809,7 @@ func (ec *executionContext) marshalNNode2ᚕᚖgithubᚗcomᚋClusterCockpitᚋc if !isLen1 { defer wg.Done() } - ret[i] = ec.marshalNNode2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐNode(ctx, sel, v[i]) + ret[i] = ec.marshalNNode2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐNode(ctx, sel, v[i]) } if isLen1 { f(i) @@ -18821,7 +18829,7 @@ func (ec *executionContext) marshalNNode2ᚕᚖgithubᚗcomᚋClusterCockpitᚋc return ret } -func (ec *executionContext) marshalNNode2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐNode(ctx context.Context, sel ast.SelectionSet, v *schema.Node) graphql.Marshaler { +func (ec *executionContext) marshalNNode2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐNode(ctx context.Context, sel ast.SelectionSet, v *schema.Node) graphql.Marshaler { if v == nil { if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { graphql.AddErrorf(ctx, "the requested element is null which the schema does not allow") @@ -19026,24 +19034,24 @@ func (ec *executionContext) marshalNNodesResultList2ᚖgithubᚗcomᚋClusterCoc return ec._NodesResultList(ctx, sel, v) } -func (ec *executionContext) unmarshalNNullableFloat2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐFloat(ctx context.Context, v any) (schema.Float, error) { +func (ec *executionContext) unmarshalNNullableFloat2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐFloat(ctx context.Context, v any) (schema.Float, error) { var res schema.Float err := res.UnmarshalGQL(v) return res, graphql.ErrorOnPath(ctx, err) } -func (ec *executionContext) marshalNNullableFloat2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐFloat(ctx context.Context, sel ast.SelectionSet, v schema.Float) graphql.Marshaler { +func (ec *executionContext) marshalNNullableFloat2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐFloat(ctx context.Context, sel ast.SelectionSet, v schema.Float) graphql.Marshaler { return v } -func (ec *executionContext) unmarshalNNullableFloat2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐFloatᚄ(ctx context.Context, v any) ([]schema.Float, error) { +func (ec *executionContext) unmarshalNNullableFloat2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐFloatᚄ(ctx context.Context, v any) ([]schema.Float, error) { var vSlice []any vSlice = graphql.CoerceList(v) var err error res := make([]schema.Float, len(vSlice)) for i := range vSlice { ctx := graphql.WithPathContext(ctx, graphql.NewPathWithIndex(i)) - res[i], err = ec.unmarshalNNullableFloat2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐFloat(ctx, vSlice[i]) + res[i], err = ec.unmarshalNNullableFloat2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐFloat(ctx, vSlice[i]) if err != nil { return nil, err } @@ -19051,10 +19059,10 @@ func (ec *executionContext) unmarshalNNullableFloat2ᚕgithubᚗcomᚋClusterCoc return res, nil } -func (ec *executionContext) marshalNNullableFloat2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐFloatᚄ(ctx context.Context, sel ast.SelectionSet, v []schema.Float) graphql.Marshaler { +func (ec *executionContext) marshalNNullableFloat2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐFloatᚄ(ctx context.Context, sel ast.SelectionSet, v []schema.Float) graphql.Marshaler { ret := make(graphql.Array, len(v)) for i := range v { - ret[i] = ec.marshalNNullableFloat2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐFloat(ctx, sel, v[i]) + ret[i] = ec.marshalNNullableFloat2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐFloat(ctx, sel, v[i]) } for _, e := range ret { @@ -19066,7 +19074,7 @@ func (ec *executionContext) marshalNNullableFloat2ᚕgithubᚗcomᚋClusterCockp return ret } -func (ec *executionContext) marshalNResource2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐResourceᚄ(ctx context.Context, sel ast.SelectionSet, v []*schema.Resource) graphql.Marshaler { +func (ec *executionContext) marshalNResource2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐResourceᚄ(ctx context.Context, sel ast.SelectionSet, v []*schema.Resource) graphql.Marshaler { ret := make(graphql.Array, len(v)) var wg sync.WaitGroup isLen1 := len(v) == 1 @@ -19090,7 +19098,7 @@ func (ec *executionContext) marshalNResource2ᚕᚖgithubᚗcomᚋClusterCockpit if !isLen1 { defer wg.Done() } - ret[i] = ec.marshalNResource2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐResource(ctx, sel, v[i]) + ret[i] = ec.marshalNResource2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐResource(ctx, sel, v[i]) } if isLen1 { f(i) @@ -19110,7 +19118,7 @@ func (ec *executionContext) marshalNResource2ᚕᚖgithubᚗcomᚋClusterCockpit return ret } -func (ec *executionContext) marshalNResource2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐResource(ctx context.Context, sel ast.SelectionSet, v *schema.Resource) graphql.Marshaler { +func (ec *executionContext) marshalNResource2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐResource(ctx context.Context, sel ast.SelectionSet, v *schema.Resource) graphql.Marshaler { if v == nil { if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { graphql.AddErrorf(ctx, "the requested element is null which the schema does not allow") @@ -19120,13 +19128,13 @@ func (ec *executionContext) marshalNResource2ᚖgithubᚗcomᚋClusterCockpitᚋ return ec._Resource(ctx, sel, v) } -func (ec *executionContext) unmarshalNSchedulerState2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐSchedulerState(ctx context.Context, v any) (schema.SchedulerState, error) { +func (ec *executionContext) unmarshalNSchedulerState2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐSchedulerState(ctx context.Context, v any) (schema.SchedulerState, error) { tmp, err := graphql.UnmarshalString(v) res := schema.SchedulerState(tmp) return res, graphql.ErrorOnPath(ctx, err) } -func (ec *executionContext) marshalNSchedulerState2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐSchedulerState(ctx context.Context, sel ast.SelectionSet, v schema.SchedulerState) graphql.Marshaler { +func (ec *executionContext) marshalNSchedulerState2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐSchedulerState(ctx context.Context, sel ast.SelectionSet, v schema.SchedulerState) graphql.Marshaler { _ = sel res := graphql.MarshalString(string(v)) if res == graphql.Null { @@ -19191,7 +19199,7 @@ func (ec *executionContext) marshalNScopedStats2ᚖgithubᚗcomᚋClusterCockpit return ec._ScopedStats(ctx, sel, v) } -func (ec *executionContext) marshalNSeries2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐSeries(ctx context.Context, sel ast.SelectionSet, v schema.Series) graphql.Marshaler { +func (ec *executionContext) marshalNSeries2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐSeries(ctx context.Context, sel ast.SelectionSet, v schema.Series) graphql.Marshaler { return ec._Series(ctx, sel, &v) } @@ -19251,7 +19259,7 @@ func (ec *executionContext) marshalNString2ᚕstringᚄ(ctx context.Context, sel return ret } -func (ec *executionContext) marshalNSubCluster2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐSubClusterᚄ(ctx context.Context, sel ast.SelectionSet, v []*schema.SubCluster) graphql.Marshaler { +func (ec *executionContext) marshalNSubCluster2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐSubClusterᚄ(ctx context.Context, sel ast.SelectionSet, v []*schema.SubCluster) graphql.Marshaler { ret := make(graphql.Array, len(v)) var wg sync.WaitGroup isLen1 := len(v) == 1 @@ -19275,7 +19283,7 @@ func (ec *executionContext) marshalNSubCluster2ᚕᚖgithubᚗcomᚋClusterCockp if !isLen1 { defer wg.Done() } - ret[i] = ec.marshalNSubCluster2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐSubCluster(ctx, sel, v[i]) + ret[i] = ec.marshalNSubCluster2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐSubCluster(ctx, sel, v[i]) } if isLen1 { f(i) @@ -19295,7 +19303,7 @@ func (ec *executionContext) marshalNSubCluster2ᚕᚖgithubᚗcomᚋClusterCockp return ret } -func (ec *executionContext) marshalNSubCluster2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐSubCluster(ctx context.Context, sel ast.SelectionSet, v *schema.SubCluster) graphql.Marshaler { +func (ec *executionContext) marshalNSubCluster2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐSubCluster(ctx context.Context, sel ast.SelectionSet, v *schema.SubCluster) graphql.Marshaler { if v == nil { if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { graphql.AddErrorf(ctx, "the requested element is null which the schema does not allow") @@ -19305,7 +19313,7 @@ func (ec *executionContext) marshalNSubCluster2ᚖgithubᚗcomᚋClusterCockpit return ec._SubCluster(ctx, sel, v) } -func (ec *executionContext) marshalNSubClusterConfig2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐSubClusterConfigᚄ(ctx context.Context, sel ast.SelectionSet, v []*schema.SubClusterConfig) graphql.Marshaler { +func (ec *executionContext) marshalNSubClusterConfig2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐSubClusterConfigᚄ(ctx context.Context, sel ast.SelectionSet, v []*schema.SubClusterConfig) graphql.Marshaler { ret := make(graphql.Array, len(v)) var wg sync.WaitGroup isLen1 := len(v) == 1 @@ -19329,7 +19337,7 @@ func (ec *executionContext) marshalNSubClusterConfig2ᚕᚖgithubᚗcomᚋCluste if !isLen1 { defer wg.Done() } - ret[i] = ec.marshalNSubClusterConfig2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐSubClusterConfig(ctx, sel, v[i]) + ret[i] = ec.marshalNSubClusterConfig2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐSubClusterConfig(ctx, sel, v[i]) } if isLen1 { f(i) @@ -19349,7 +19357,7 @@ func (ec *executionContext) marshalNSubClusterConfig2ᚕᚖgithubᚗcomᚋCluste return ret } -func (ec *executionContext) marshalNSubClusterConfig2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐSubClusterConfig(ctx context.Context, sel ast.SelectionSet, v *schema.SubClusterConfig) graphql.Marshaler { +func (ec *executionContext) marshalNSubClusterConfig2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐSubClusterConfig(ctx context.Context, sel ast.SelectionSet, v *schema.SubClusterConfig) graphql.Marshaler { if v == nil { if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { graphql.AddErrorf(ctx, "the requested element is null which the schema does not allow") @@ -19359,11 +19367,11 @@ func (ec *executionContext) marshalNSubClusterConfig2ᚖgithubᚗcomᚋClusterCo return ec._SubClusterConfig(ctx, sel, v) } -func (ec *executionContext) marshalNTag2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐTag(ctx context.Context, sel ast.SelectionSet, v schema.Tag) graphql.Marshaler { +func (ec *executionContext) marshalNTag2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐTag(ctx context.Context, sel ast.SelectionSet, v schema.Tag) graphql.Marshaler { return ec._Tag(ctx, sel, &v) } -func (ec *executionContext) marshalNTag2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐTagᚄ(ctx context.Context, sel ast.SelectionSet, v []*schema.Tag) graphql.Marshaler { +func (ec *executionContext) marshalNTag2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐTagᚄ(ctx context.Context, sel ast.SelectionSet, v []*schema.Tag) graphql.Marshaler { ret := make(graphql.Array, len(v)) var wg sync.WaitGroup isLen1 := len(v) == 1 @@ -19387,7 +19395,7 @@ func (ec *executionContext) marshalNTag2ᚕᚖgithubᚗcomᚋClusterCockpitᚋcc if !isLen1 { defer wg.Done() } - ret[i] = ec.marshalNTag2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐTag(ctx, sel, v[i]) + ret[i] = ec.marshalNTag2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐTag(ctx, sel, v[i]) } if isLen1 { f(i) @@ -19407,7 +19415,7 @@ func (ec *executionContext) marshalNTag2ᚕᚖgithubᚗcomᚋClusterCockpitᚋcc return ret } -func (ec *executionContext) marshalNTag2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐTag(ctx context.Context, sel ast.SelectionSet, v *schema.Tag) graphql.Marshaler { +func (ec *executionContext) marshalNTag2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐTag(ctx context.Context, sel ast.SelectionSet, v *schema.Tag) graphql.Marshaler { if v == nil { if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { graphql.AddErrorf(ctx, "the requested element is null which the schema does not allow") @@ -19465,11 +19473,11 @@ func (ec *executionContext) marshalNTimeWeights2ᚖgithubᚗcomᚋClusterCockpit return ec._TimeWeights(ctx, sel, v) } -func (ec *executionContext) marshalNTopology2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐTopology(ctx context.Context, sel ast.SelectionSet, v schema.Topology) graphql.Marshaler { +func (ec *executionContext) marshalNTopology2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐTopology(ctx context.Context, sel ast.SelectionSet, v schema.Topology) graphql.Marshaler { return ec._Topology(ctx, sel, &v) } -func (ec *executionContext) marshalNUnit2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐUnit(ctx context.Context, sel ast.SelectionSet, v schema.Unit) graphql.Marshaler { +func (ec *executionContext) marshalNUnit2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐUnit(ctx context.Context, sel ast.SelectionSet, v schema.Unit) graphql.Marshaler { return ec._Unit(ctx, sel, &v) } @@ -19726,7 +19734,7 @@ func (ec *executionContext) marshalN__TypeKind2string(ctx context.Context, sel a return res } -func (ec *executionContext) marshalOAccelerator2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐAcceleratorᚄ(ctx context.Context, sel ast.SelectionSet, v []*schema.Accelerator) graphql.Marshaler { +func (ec *executionContext) marshalOAccelerator2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐAcceleratorᚄ(ctx context.Context, sel ast.SelectionSet, v []*schema.Accelerator) graphql.Marshaler { if v == nil { return graphql.Null } @@ -19753,7 +19761,7 @@ func (ec *executionContext) marshalOAccelerator2ᚕᚖgithubᚗcomᚋClusterCock if !isLen1 { defer wg.Done() } - ret[i] = ec.marshalNAccelerator2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐAccelerator(ctx, sel, v[i]) + ret[i] = ec.marshalNAccelerator2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐAccelerator(ctx, sel, v[i]) } if isLen1 { f(i) @@ -20141,7 +20149,7 @@ func (ec *executionContext) unmarshalOIntRange2ᚖgithubᚗcomᚋClusterCockpit return &res, graphql.ErrorOnPath(ctx, err) } -func (ec *executionContext) marshalOJob2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐJob(ctx context.Context, sel ast.SelectionSet, v *schema.Job) graphql.Marshaler { +func (ec *executionContext) marshalOJob2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐJob(ctx context.Context, sel ast.SelectionSet, v *schema.Job) graphql.Marshaler { if v == nil { return graphql.Null } @@ -20173,7 +20181,7 @@ func (ec *executionContext) marshalOJobLinkResultList2ᚖgithubᚗcomᚋClusterC return ec._JobLinkResultList(ctx, sel, v) } -func (ec *executionContext) unmarshalOJobState2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐJobStateᚄ(ctx context.Context, v any) ([]schema.JobState, error) { +func (ec *executionContext) unmarshalOJobState2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐJobStateᚄ(ctx context.Context, v any) ([]schema.JobState, error) { if v == nil { return nil, nil } @@ -20183,7 +20191,7 @@ func (ec *executionContext) unmarshalOJobState2ᚕgithubᚗcomᚋClusterCockpit res := make([]schema.JobState, len(vSlice)) for i := range vSlice { ctx := graphql.WithPathContext(ctx, graphql.NewPathWithIndex(i)) - res[i], err = ec.unmarshalNJobState2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐJobState(ctx, vSlice[i]) + res[i], err = ec.unmarshalNJobState2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐJobState(ctx, vSlice[i]) if err != nil { return nil, err } @@ -20191,13 +20199,13 @@ func (ec *executionContext) unmarshalOJobState2ᚕgithubᚗcomᚋClusterCockpit return res, nil } -func (ec *executionContext) marshalOJobState2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐJobStateᚄ(ctx context.Context, sel ast.SelectionSet, v []schema.JobState) graphql.Marshaler { +func (ec *executionContext) marshalOJobState2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐJobStateᚄ(ctx context.Context, sel ast.SelectionSet, v []schema.JobState) graphql.Marshaler { if v == nil { return graphql.Null } ret := make(graphql.Array, len(v)) for i := range v { - ret[i] = ec.marshalNJobState2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐJobState(ctx, sel, v[i]) + ret[i] = ec.marshalNJobState2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐJobState(ctx, sel, v[i]) } for _, e := range ret { @@ -20256,7 +20264,7 @@ func (ec *executionContext) marshalOMetricHistoPoint2ᚕᚖgithubᚗcomᚋCluste return ret } -func (ec *executionContext) unmarshalOMetricScope2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricScopeᚄ(ctx context.Context, v any) ([]schema.MetricScope, error) { +func (ec *executionContext) unmarshalOMetricScope2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricScopeᚄ(ctx context.Context, v any) ([]schema.MetricScope, error) { if v == nil { return nil, nil } @@ -20266,7 +20274,7 @@ func (ec *executionContext) unmarshalOMetricScope2ᚕgithubᚗcomᚋClusterCockp res := make([]schema.MetricScope, len(vSlice)) for i := range vSlice { ctx := graphql.WithPathContext(ctx, graphql.NewPathWithIndex(i)) - res[i], err = ec.unmarshalNMetricScope2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricScope(ctx, vSlice[i]) + res[i], err = ec.unmarshalNMetricScope2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricScope(ctx, vSlice[i]) if err != nil { return nil, err } @@ -20274,13 +20282,13 @@ func (ec *executionContext) unmarshalOMetricScope2ᚕgithubᚗcomᚋClusterCockp return res, nil } -func (ec *executionContext) marshalOMetricScope2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricScopeᚄ(ctx context.Context, sel ast.SelectionSet, v []schema.MetricScope) graphql.Marshaler { +func (ec *executionContext) marshalOMetricScope2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricScopeᚄ(ctx context.Context, sel ast.SelectionSet, v []schema.MetricScope) graphql.Marshaler { if v == nil { return graphql.Null } ret := make(graphql.Array, len(v)) for i := range v { - ret[i] = ec.marshalNMetricScope2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricScope(ctx, sel, v[i]) + ret[i] = ec.marshalNMetricScope2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricScope(ctx, sel, v[i]) } for _, e := range ret { @@ -20310,7 +20318,7 @@ func (ec *executionContext) unmarshalOMetricStatItem2ᚕᚖgithubᚗcomᚋCluste return res, nil } -func (ec *executionContext) marshalOMetricStatistics2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricStatistics(ctx context.Context, sel ast.SelectionSet, v schema.MetricStatistics) graphql.Marshaler { +func (ec *executionContext) marshalOMetricStatistics2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricStatistics(ctx context.Context, sel ast.SelectionSet, v schema.MetricStatistics) graphql.Marshaler { return ec._MetricStatistics(ctx, sel, &v) } @@ -20332,7 +20340,7 @@ func (ec *executionContext) marshalOMonitoringState2ᚖstring(ctx context.Contex return res } -func (ec *executionContext) marshalONode2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐNode(ctx context.Context, sel ast.SelectionSet, v *schema.Node) graphql.Marshaler { +func (ec *executionContext) marshalONode2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐNode(ctx context.Context, sel ast.SelectionSet, v *schema.Node) graphql.Marshaler { if v == nil { return graphql.Null } @@ -20373,7 +20381,7 @@ func (ec *executionContext) unmarshalOPageRequest2ᚖgithubᚗcomᚋClusterCockp return &res, graphql.ErrorOnPath(ctx, err) } -func (ec *executionContext) unmarshalOSchedulerState2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐSchedulerState(ctx context.Context, v any) (*schema.SchedulerState, error) { +func (ec *executionContext) unmarshalOSchedulerState2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐSchedulerState(ctx context.Context, v any) (*schema.SchedulerState, error) { if v == nil { return nil, nil } @@ -20382,7 +20390,7 @@ func (ec *executionContext) unmarshalOSchedulerState2ᚖgithubᚗcomᚋClusterCo return &res, graphql.ErrorOnPath(ctx, err) } -func (ec *executionContext) marshalOSchedulerState2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐSchedulerState(ctx context.Context, sel ast.SelectionSet, v *schema.SchedulerState) graphql.Marshaler { +func (ec *executionContext) marshalOSchedulerState2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐSchedulerState(ctx context.Context, sel ast.SelectionSet, v *schema.SchedulerState) graphql.Marshaler { if v == nil { return graphql.Null } @@ -20392,7 +20400,7 @@ func (ec *executionContext) marshalOSchedulerState2ᚖgithubᚗcomᚋClusterCock return res } -func (ec *executionContext) marshalOSeries2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐSeriesᚄ(ctx context.Context, sel ast.SelectionSet, v []schema.Series) graphql.Marshaler { +func (ec *executionContext) marshalOSeries2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐSeriesᚄ(ctx context.Context, sel ast.SelectionSet, v []schema.Series) graphql.Marshaler { if v == nil { return graphql.Null } @@ -20419,7 +20427,7 @@ func (ec *executionContext) marshalOSeries2ᚕgithubᚗcomᚋClusterCockpitᚋcc if !isLen1 { defer wg.Done() } - ret[i] = ec.marshalNSeries2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐSeries(ctx, sel, v[i]) + ret[i] = ec.marshalNSeries2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐSeries(ctx, sel, v[i]) } if isLen1 { f(i) @@ -20455,7 +20463,7 @@ func (ec *executionContext) marshalOSortByAggregate2ᚖgithubᚗcomᚋClusterCoc return v } -func (ec *executionContext) marshalOStatsSeries2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐStatsSeries(ctx context.Context, sel ast.SelectionSet, v *schema.StatsSeries) graphql.Marshaler { +func (ec *executionContext) marshalOStatsSeries2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐStatsSeries(ctx context.Context, sel ast.SelectionSet, v *schema.StatsSeries) graphql.Marshaler { if v == nil { return graphql.Null } @@ -20562,11 +20570,11 @@ func (ec *executionContext) unmarshalOTimeRange2ᚖgithubᚗcomᚋClusterCockpit return &res, graphql.ErrorOnPath(ctx, err) } -func (ec *executionContext) marshalOUnit2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐUnit(ctx context.Context, sel ast.SelectionSet, v schema.Unit) graphql.Marshaler { +func (ec *executionContext) marshalOUnit2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐUnit(ctx context.Context, sel ast.SelectionSet, v schema.Unit) graphql.Marshaler { return ec._Unit(ctx, sel, &v) } -func (ec *executionContext) marshalOUnit2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐUnit(ctx context.Context, sel ast.SelectionSet, v *schema.Unit) graphql.Marshaler { +func (ec *executionContext) marshalOUnit2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐUnit(ctx context.Context, sel ast.SelectionSet, v *schema.Unit) graphql.Marshaler { if v == nil { return graphql.Null } diff --git a/internal/graph/model/models_gen.go b/internal/graph/model/models_gen.go index 31ba03ab..06f0ffcf 100644 --- a/internal/graph/model/models_gen.go +++ b/internal/graph/model/models_gen.go @@ -82,6 +82,7 @@ type JobFilter struct { State []schema.JobState `json:"state,omitempty"` MetricStats []*MetricStatItem `json:"metricStats,omitempty"` Shared *string `json:"shared,omitempty"` + Schedule *string `json:"schedule,omitempty"` Node *StringInput `json:"node,omitempty"` } diff --git a/internal/graph/schema.resolvers.go b/internal/graph/schema.resolvers.go index 32499b8c..11168e80 100644 --- a/internal/graph/schema.resolvers.go +++ b/internal/graph/schema.resolvers.go @@ -972,12 +972,10 @@ func (r *Resolver) Query() generated.QueryResolver { return &queryResolver{r} } // SubCluster returns generated.SubClusterResolver implementation. func (r *Resolver) SubCluster() generated.SubClusterResolver { return &subClusterResolver{r} } -type ( - clusterResolver struct{ *Resolver } - jobResolver struct{ *Resolver } - metricValueResolver struct{ *Resolver } - mutationResolver struct{ *Resolver } - nodeResolver struct{ *Resolver } - queryResolver struct{ *Resolver } - subClusterResolver struct{ *Resolver } -) +type clusterResolver struct{ *Resolver } +type jobResolver struct{ *Resolver } +type metricValueResolver struct{ *Resolver } +type mutationResolver struct{ *Resolver } +type nodeResolver struct{ *Resolver } +type queryResolver struct{ *Resolver } +type subClusterResolver struct{ *Resolver } diff --git a/internal/repository/jobQuery.go b/internal/repository/jobQuery.go index 8c341afb..4655614f 100644 --- a/internal/repository/jobQuery.go +++ b/internal/repository/jobQuery.go @@ -143,57 +143,35 @@ func SecurityCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilde // Build a sq.SelectBuilder out of a schema.JobFilter. func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.SelectBuilder { - if filter.Tags != nil { - // This is an OR-Logic query: Returns all distinct jobs with at least one of the requested tags; TODO: AND-Logic query? - query = query.Join("jobtag ON jobtag.job_id = job.id").Where(sq.Eq{"jobtag.tag_id": filter.Tags}).Distinct() - } + // Primary Key if filter.DbID != nil { dbIDs := make([]string, len(filter.DbID)) copy(dbIDs, filter.DbID) query = query.Where(sq.Eq{"job.id": dbIDs}) } - if filter.JobID != nil { - query = buildStringCondition("job.job_id", filter.JobID, query) - } - if filter.ArrayJobID != nil { - query = query.Where("job.array_job_id = ?", *filter.ArrayJobID) - } - if filter.User != nil { - query = buildStringCondition("job.hpc_user", filter.User, query) - } - if filter.Project != nil { - query = buildStringCondition("job.project", filter.Project, query) - } - if filter.JobName != nil { - query = buildMetaJsonCondition("jobName", filter.JobName, query) - } + // Explicit indices if filter.Cluster != nil { query = buildStringCondition("job.cluster", filter.Cluster, query) } if filter.Partition != nil { query = buildStringCondition("job.cluster_partition", filter.Partition, query) } - if filter.StartTime != nil { - query = buildTimeCondition("job.start_time", filter.StartTime, query) - } - if filter.Duration != nil { - query = buildIntCondition("job.duration", filter.Duration, query) - } - if filter.MinRunningFor != nil { - now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs. - query = query.Where("(job.job_state != 'running' OR (? - job.start_time) > ?)", now, *filter.MinRunningFor) - } - if filter.Shared != nil { - query = query.Where("job.shared = ?", *filter.Shared) - } if filter.State != nil { states := make([]string, len(filter.State)) for i, val := range filter.State { states[i] = string(val) } - query = query.Where(sq.Eq{"job.job_state": states}) } + if filter.Shared != nil { + query = query.Where("job.shared = ?", *filter.Shared) + } + if filter.Project != nil { + query = buildStringCondition("job.project", filter.Project, query) + } + if filter.User != nil { + query = buildStringCondition("job.hpc_user", filter.User, query) + } if filter.NumNodes != nil { query = buildIntCondition("job.num_nodes", filter.NumNodes, query) } @@ -203,17 +181,57 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select if filter.NumHWThreads != nil { query = buildIntCondition("job.num_hwthreads", filter.NumHWThreads, query) } - if filter.Node != nil { - query = buildResourceJsonCondition("hostname", filter.Node, query) + if filter.ArrayJobID != nil { + query = query.Where("job.array_job_id = ?", *filter.ArrayJobID) + } + if filter.StartTime != nil { + query = buildTimeCondition("job.start_time", filter.StartTime, query) + } + if filter.Duration != nil { + query = buildIntCondition("job.duration", filter.Duration, query) } if filter.Energy != nil { query = buildFloatCondition("job.energy", filter.Energy, query) } + // Indices on Tag Table + if filter.Tags != nil { + // This is an OR-Logic query: Returns all distinct jobs with at least one of the requested tags; TODO: AND-Logic query? + query = query.Join("jobtag ON jobtag.job_id = job.id").Where(sq.Eq{"jobtag.tag_id": filter.Tags}).Distinct() + } + // No explicit Indices + if filter.JobID != nil { + query = buildStringCondition("job.job_id", filter.JobID, query) + } + // Queries Within JSONs if filter.MetricStats != nil { for _, ms := range filter.MetricStats { query = buildFloatJsonCondition(ms.MetricName, ms.Range, query) } } + if filter.Node != nil { + query = buildResourceJsonCondition("hostname", filter.Node, query) + } + if filter.JobName != nil { + query = buildMetaJsonCondition("jobName", filter.JobName, query) + } + if filter.Schedule != nil { + interactiveJobname := "interactive" + if *filter.Schedule == "interactive" { + iFilter := model.StringInput{Eq: &interactiveJobname} + query = buildMetaJsonCondition("jobName", &iFilter, query) + } else if *filter.Schedule == "batch" { + sFilter := model.StringInput{Neq: &interactiveJobname} + query = buildMetaJsonCondition("jobName", &sFilter, query) + } + } + + // Configurable Filter to exclude recently started jobs, see config.go: ShortRunningJobsDuration + if filter.MinRunningFor != nil { + now := time.Now().Unix() + // Only jobs whose start timestamp is more than MinRunningFor seconds in the past + // If a job completed within the configured timeframe, it will still show up after the start_time matches the condition! + query = query.Where(sq.Lt{"job.start_time": (now - int64(*filter.MinRunningFor))}) + } return query } diff --git a/internal/repository/migrations/sqlite3/09_add-job-cache.up.sql b/internal/repository/migrations/sqlite3/09_add-job-cache.up.sql index 863b50ea..bd465bcb 100644 --- a/internal/repository/migrations/sqlite3/09_add-job-cache.up.sql +++ b/internal/repository/migrations/sqlite3/09_add-job-cache.up.sql @@ -118,15 +118,13 @@ DROP TABLE lookup_exclusive; DROP TABLE job; -- Deletes All Existing 'job' Indices; Recreate after Renaming ALTER TABLE job_new RENAME TO job; --- Recreate Indices from 08_add-footprint, include new submit_time indices +-- Recreate Indices from 08_add-footprint; include new 'shared' column -- Cluster Filter -CREATE INDEX IF NOT EXISTS jobs_cluster ON job (cluster); CREATE INDEX IF NOT EXISTS jobs_cluster_user ON job (cluster, hpc_user); CREATE INDEX IF NOT EXISTS jobs_cluster_project ON job (cluster, project); CREATE INDEX IF NOT EXISTS jobs_cluster_subcluster ON job (cluster, subcluster); -- Cluster Filter Sorting CREATE INDEX IF NOT EXISTS jobs_cluster_starttime ON job (cluster, start_time); -CREATE INDEX IF NOT EXISTS jobs_cluster_submittime ON job (cluster, submit_time); CREATE INDEX IF NOT EXISTS jobs_cluster_duration ON job (cluster, duration); CREATE INDEX IF NOT EXISTS jobs_cluster_numnodes ON job (cluster, num_nodes); CREATE INDEX IF NOT EXISTS jobs_cluster_numhwthreads ON job (cluster, num_hwthreads); @@ -134,44 +132,42 @@ CREATE INDEX IF NOT EXISTS jobs_cluster_numacc ON job (cluster, num_acc); CREATE INDEX IF NOT EXISTS jobs_cluster_energy ON job (cluster, energy); -- Cluster+Partition Filter -CREATE INDEX IF NOT EXISTS jobs_cluster_partition ON job (cluster, cluster_partition); +CREATE INDEX IF NOT EXISTS jobs_cluster_partition_user ON job (cluster, cluster_partition, hpc_user); +CREATE INDEX IF NOT EXISTS jobs_cluster_partition_project ON job (cluster, cluster_partition, project); +CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, cluster_partition, job_state); +CREATE INDEX IF NOT EXISTS jobs_cluster_partition_shared ON job (cluster, cluster_partition, shared); + -- Cluster+Partition Filter Sorting CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime ON job (cluster, cluster_partition, start_time); -CREATE INDEX IF NOT EXISTS jobs_cluster_partition_submittime ON job (cluster, cluster_partition, submit_time); CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration ON job (cluster, cluster_partition, duration); CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numnodes ON job (cluster, cluster_partition, num_nodes); CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numhwthreads ON job (cluster, cluster_partition, num_hwthreads); CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numacc ON job (cluster, cluster_partition, num_acc); CREATE INDEX IF NOT EXISTS jobs_cluster_partition_energy ON job (cluster, cluster_partition, energy); --- Cluster+Partition+Jobstate Filter -CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, cluster_partition, job_state); -CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_user ON job (cluster, cluster_partition, job_state, hpc_user); -CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_project ON job (cluster, cluster_partition, job_state, project); --- Cluster+Partition+Jobstate Filter Sorting -CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_starttime ON job (cluster, cluster_partition, job_state, start_time); -CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_submittime ON job (cluster, cluster_partition, job_state, submit_time); -CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_duration ON job (cluster, cluster_partition, job_state, duration); -CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numnodes ON job (cluster, cluster_partition, job_state, num_nodes); -CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numhwthreads ON job (cluster, cluster_partition, job_state, num_hwthreads); -CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numacc ON job (cluster, cluster_partition, job_state, num_acc); -CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_energy ON job (cluster, cluster_partition, job_state, energy); - -- Cluster+JobState Filter -CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate ON job (cluster, job_state); CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_user ON job (cluster, job_state, hpc_user); CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_project ON job (cluster, job_state, project); -- Cluster+JobState Filter Sorting CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_starttime ON job (cluster, job_state, start_time); -CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_submittime ON job (cluster, job_state, submit_time); CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_duration ON job (cluster, job_state, duration); CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numnodes ON job (cluster, job_state, num_nodes); CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numhwthreads ON job (cluster, job_state, num_hwthreads); CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numacc ON job (cluster, job_state, num_acc); CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_energy ON job (cluster, job_state, energy); +-- Cluster+Shared Filter +CREATE INDEX IF NOT EXISTS jobs_cluster_shared_user ON job (cluster, shared, hpc_user); +CREATE INDEX IF NOT EXISTS jobs_cluster_shared_project ON job (cluster, shared, project); +-- Cluster+Shared Filter Sorting +CREATE INDEX IF NOT EXISTS jobs_cluster_shared_starttime ON job (cluster, shared, start_time); +CREATE INDEX IF NOT EXISTS jobs_cluster_shared_duration ON job (cluster, shared, duration); +CREATE INDEX IF NOT EXISTS jobs_cluster_shared_numnodes ON job (cluster, shared, num_nodes); +CREATE INDEX IF NOT EXISTS jobs_cluster_shared_numhwthreads ON job (cluster, shared, num_hwthreads); +CREATE INDEX IF NOT EXISTS jobs_cluster_shared_numacc ON job (cluster, shared, num_acc); +CREATE INDEX IF NOT EXISTS jobs_cluster_shared_energy ON job (cluster, shared, energy); + -- User Filter -CREATE INDEX IF NOT EXISTS jobs_user ON job (hpc_user); -- User Filter Sorting CREATE INDEX IF NOT EXISTS jobs_user_starttime ON job (hpc_user, start_time); CREATE INDEX IF NOT EXISTS jobs_user_duration ON job (hpc_user, duration); @@ -181,7 +177,6 @@ CREATE INDEX IF NOT EXISTS jobs_user_numacc ON job (hpc_user, num_acc); CREATE INDEX IF NOT EXISTS jobs_user_energy ON job (hpc_user, energy); -- Project Filter -CREATE INDEX IF NOT EXISTS jobs_project ON job (project); CREATE INDEX IF NOT EXISTS jobs_project_user ON job (project, hpc_user); -- Project Filter Sorting CREATE INDEX IF NOT EXISTS jobs_project_starttime ON job (project, start_time); @@ -192,10 +187,8 @@ CREATE INDEX IF NOT EXISTS jobs_project_numacc ON job (project, num_acc); CREATE INDEX IF NOT EXISTS jobs_project_energy ON job (project, energy); -- JobState Filter -CREATE INDEX IF NOT EXISTS jobs_jobstate ON job (job_state); CREATE INDEX IF NOT EXISTS jobs_jobstate_user ON job (job_state, hpc_user); CREATE INDEX IF NOT EXISTS jobs_jobstate_project ON job (job_state, project); -CREATE INDEX IF NOT EXISTS jobs_jobstate_cluster ON job (job_state, cluster); -- JobState Filter Sorting CREATE INDEX IF NOT EXISTS jobs_jobstate_starttime ON job (job_state, start_time); CREATE INDEX IF NOT EXISTS jobs_jobstate_duration ON job (job_state, duration); @@ -204,18 +197,21 @@ CREATE INDEX IF NOT EXISTS jobs_jobstate_numhwthreads ON job (job_state, num_hwt CREATE INDEX IF NOT EXISTS jobs_jobstate_numacc ON job (job_state, num_acc); CREATE INDEX IF NOT EXISTS jobs_jobstate_energy ON job (job_state, energy); +-- Shared Filter +CREATE INDEX IF NOT EXISTS jobs_shared_user ON job (shared, hpc_user); +CREATE INDEX IF NOT EXISTS jobs_shared_project ON job (shared, project); +-- Shared Filter Sorting +CREATE INDEX IF NOT EXISTS jobs_shared_starttime ON job (shared, start_time); +CREATE INDEX IF NOT EXISTS jobs_shared_duration ON job (shared, duration); +CREATE INDEX IF NOT EXISTS jobs_shared_numnodes ON job (shared, num_nodes); +CREATE INDEX IF NOT EXISTS jobs_shared_numhwthreads ON job (shared, num_hwthreads); +CREATE INDEX IF NOT EXISTS jobs_shared_numacc ON job (shared, num_acc); +CREATE INDEX IF NOT EXISTS jobs_shared_energy ON job (shared, energy); + -- ArrayJob Filter CREATE INDEX IF NOT EXISTS jobs_arrayjobid_starttime ON job (array_job_id, start_time); CREATE INDEX IF NOT EXISTS jobs_cluster_arrayjobid_starttime ON job (cluster, array_job_id, start_time); --- Sorting without active filters -CREATE INDEX IF NOT EXISTS jobs_starttime ON job (start_time); -CREATE INDEX IF NOT EXISTS jobs_duration ON job (duration); -CREATE INDEX IF NOT EXISTS jobs_numnodes ON job (num_nodes); -CREATE INDEX IF NOT EXISTS jobs_numhwthreads ON job (num_hwthreads); -CREATE INDEX IF NOT EXISTS jobs_numacc ON job (num_acc); -CREATE INDEX IF NOT EXISTS jobs_energy ON job (energy); - -- Single filters with default starttime sorting CREATE INDEX IF NOT EXISTS jobs_duration_starttime ON job (duration, start_time); CREATE INDEX IF NOT EXISTS jobs_numnodes_starttime ON job (num_nodes, start_time); @@ -223,6 +219,18 @@ CREATE INDEX IF NOT EXISTS jobs_numhwthreads_starttime ON job (num_hwthreads, st CREATE INDEX IF NOT EXISTS jobs_numacc_starttime ON job (num_acc, start_time); CREATE INDEX IF NOT EXISTS jobs_energy_starttime ON job (energy, start_time); +-- Single filters with duration sorting +CREATE INDEX IF NOT EXISTS jobs_starttime_duration ON job (start_time, duration); +CREATE INDEX IF NOT EXISTS jobs_numnodes_duration ON job (num_nodes, duration); +CREATE INDEX IF NOT EXISTS jobs_numhwthreads_duration ON job (num_hwthreads, duration); +CREATE INDEX IF NOT EXISTS jobs_numacc_duration ON job (num_acc, duration); +CREATE INDEX IF NOT EXISTS jobs_energy_duration ON job (energy, duration); + +-- Notes: +-- Cluster+Partition+Jobstate Filter: Tested -> Full Array Of Combinations non-required +-- Cluster+JobState+Shared Filter: Tested -> No further timing improvement +-- JobState+Shared Filter: Tested -> No further timing improvement + -- Optimize DB index usage PRAGMA optimize; diff --git a/internal/repository/migrations/sqlite3/10_node-table.up.sql b/internal/repository/migrations/sqlite3/10_node-table.up.sql index 247bceab..7b5b5ac7 100644 --- a/internal/repository/migrations/sqlite3/10_node-table.up.sql +++ b/internal/repository/migrations/sqlite3/10_node-table.up.sql @@ -33,8 +33,6 @@ CREATE INDEX IF NOT EXISTS nodes_cluster_subcluster ON node (cluster, subcluster -- Add NEW Indices For New Node_State Table Fields CREATE INDEX IF NOT EXISTS nodestates_timestamp ON node_state (time_stamp); -CREATE INDEX IF NOT EXISTS nodestates_state ON node_state (node_state); -CREATE INDEX IF NOT EXISTS nodestates_health ON node_state (health_state); CREATE INDEX IF NOT EXISTS nodestates_state_timestamp ON node_state (node_state, time_stamp); CREATE INDEX IF NOT EXISTS nodestates_health_timestamp ON node_state (health_state, time_stamp); CREATE INDEX IF NOT EXISTS nodestates_nodeid_state ON node_state (node_id, node_state); diff --git a/internal/routerConfig/routes.go b/internal/routerConfig/routes.go index 436031ef..e6a79095 100644 --- a/internal/routerConfig/routes.go +++ b/internal/routerConfig/routes.go @@ -257,6 +257,12 @@ func buildFilterPresets(query url.Values) map[string]interface{} { if len(query["state"]) != 0 { filterPresets["state"] = query["state"] } + if query.Get("shared") != "" { + filterPresets["shared"] = query.Get("shared") + } + if query.Get("schedule") != "" { + filterPresets["schedule"] = query.Get("schedule") + } if rawtags, ok := query["tag"]; ok { tags := make([]int, len(rawtags)) for i, tid := range rawtags { diff --git a/web/frontend/src/generic/Filters.svelte b/web/frontend/src/generic/Filters.svelte index 7bc877f0..74417015 100644 --- a/web/frontend/src/generic/Filters.svelte +++ b/web/frontend/src/generic/Filters.svelte @@ -28,7 +28,7 @@ } from "@sveltestrap/sveltestrap"; import Info from "./filters/InfoBox.svelte"; import Cluster from "./filters/Cluster.svelte"; - import JobStates, { allJobStates } from "./filters/JobStates.svelte"; + import JobStates, { allJobStates, mapSharedStates } from "./filters/JobStates.svelte"; import StartTime, { startTimeSelectOptions } from "./filters/StartTime.svelte"; import Duration from "./filters/Duration.svelte"; import Tags from "./filters/Tags.svelte"; @@ -69,6 +69,8 @@ cluster: null, partition: null, states: allJobStates, + shared: "", + schedule: "", startTime: { from: null, to: null, range: ""}, duration: { lessThan: null, @@ -103,6 +105,8 @@ filterPresets.states || filterPresets.state ? [filterPresets.state].flat() : allJobStates, + shared: filterPresets.shared || "", + schedule: filterPresets.schedule || "", startTime: filterPresets.startTime || { from: null, to: null, range: ""}, duration: filterPresets.duration || { lessThan: null, @@ -146,19 +150,39 @@ let items = []; if (filters.dbId.length != 0) items.push({ dbId: filters.dbId }); - if (filters.jobId) - items.push({ jobId: { [filters.jobIdMatch]: filters.jobId } }); - if (filters.arrayJobId != null) - items.push({ arrayJobId: filters.arrayJobId }); - if (filters.jobName) items.push({ jobName: { contains: filters.jobName } }); - if (filters.project) - items.push({ project: { [filters.projectMatch]: filters.project } }); - if (filters.user) - items.push({ user: { [filters.userMatch]: filters.user } }); if (filters.cluster) items.push({ cluster: { eq: filters.cluster } }); if (filters.partition) items.push({ partition: { eq: filters.partition } }); if (filters.states.length != allJobStates?.length) items.push({ state: filters.states }); + if (filters.shared) items.push({ shared: filters.shared }); + if (filters.project) + items.push({ project: { [filters.projectMatch]: filters.project } }); + if (filters.user) + items.push({ user: { [filters.userMatch]: filters.user } }); + if (filters.numNodes.from != null || filters.numNodes.to != null) { + items.push({ + numNodes: { from: filters.numNodes.from, to: filters.numNodes.to }, + }); + } + if (filters.numAccelerators.from != null || filters.numAccelerators.to != null) { + items.push({ + numAccelerators: { + from: filters.numAccelerators.from, + to: filters.numAccelerators.to, + }, + }); + } + if (filters.numHWThreads.from != null || filters.numHWThreads.to != null) { + items.push({ + numHWThreads: { + from: filters.numHWThreads.from, + to: filters.numHWThreads.to, + }, + }); + } + if (filters.arrayJobId != null) + items.push({ arrayJobId: filters.arrayJobId }); + if (filters.tags.length != 0) items.push({ tags: filters.tags }); if (filters.startTime.from || filters.startTime.to) items.push({ startTime: { from: filters.startTime.from, to: filters.startTime.to }, @@ -175,36 +199,17 @@ items.push({ duration: { from: 0, to: filters.duration.lessThan } }); if (filters.duration.moreThan) items.push({ duration: { from: filters.duration.moreThan, to: 604800 } }); // 7 days to include special jobs with long runtimes - if (filters.tags.length != 0) items.push({ tags: filters.tags }); - if (filters.numNodes.from != null || filters.numNodes.to != null) { - items.push({ - numNodes: { from: filters.numNodes.from, to: filters.numNodes.to }, - }); - } - if (filters.numHWThreads.from != null || filters.numHWThreads.to != null) { - items.push({ - numHWThreads: { - from: filters.numHWThreads.from, - to: filters.numHWThreads.to, - }, - }); - } - if (filters.numAccelerators.from != null || filters.numAccelerators.to != null) { - items.push({ - numAccelerators: { - from: filters.numAccelerators.from, - to: filters.numAccelerators.to, - }, - }); - } - if (filters.node) items.push({ node: { [filters.nodeMatch]: filters.node } }); if (filters.energy.from || filters.energy.to) items.push({ energy: { from: filters.energy.from, to: filters.energy.to }, }); + if (filters.jobId) + items.push({ jobId: { [filters.jobIdMatch]: filters.jobId } }); if (filters.stats.length != 0) items.push({ metricStats: filters.stats.map((st) => { return { metricName: st.field, range: { from: st.from, to: st.to }} }) }); - + if (filters.node) items.push({ node: { [filters.nodeMatch]: filters.node } }); + if (filters.jobName) items.push({ jobName: { contains: filters.jobName } }); + if (filters.schedule) items.push({ schedule: filters.schedule }); applyFilters({ filters: items }); changeURL(); return items; @@ -248,6 +253,8 @@ if (filters.partition) opts.push(`partition=${filters.partition}`); if (filters.states.length != allJobStates?.length) for (let state of filters.states) opts.push(`state=${state}`); + if (filters.shared) opts.push(`shared=${filters.shared}`); + if (filters.schedule) opts.push(`schedule=${filters.schedule}`); if (filters.startTime.from && filters.startTime.to) opts.push( `startTime=${dateToUnixEpoch(filters.startTime.from)}-${dateToUnixEpoch(filters.startTime.to)}`, @@ -366,6 +373,23 @@ {#if filters.states.length != allJobStates?.length} (isJobStatesOpen = true)}> {filters.states.join(", ")} + {#if filters.shared && !filters.schedule} + ({mapSharedStates[filters.shared]}) + {:else if filters.schedule && !filters.shared} + ({filters.schedule.charAt(0).toUpperCase() + filters.schedule?.slice(1)}) + {:else if (filters.shared && filters.schedule)} + ({[mapSharedStates[filters.shared], (filters.schedule.charAt(0).toUpperCase() + filters.schedule.slice(1))].join(", ")}) + {/if} + + {:else if (filters.shared || filters.schedule)} + (isJobStatesOpen = true)}> + {#if filters.shared && !filters.schedule} + {mapSharedStates[filters.shared]} + {:else if filters.schedule && !filters.shared} + {filters.schedule.charAt(0).toUpperCase() + filters.schedule?.slice(1)} + {:else if (filters.shared && filters.schedule)} + {[mapSharedStates[filters.shared], (filters.schedule.charAt(0).toUpperCase() + filters.schedule.slice(1))].join(", ")} + {/if} {/if} @@ -468,6 +492,8 @@ updateFilters(filter)} /> diff --git a/web/frontend/src/generic/filters/JobStates.svelte b/web/frontend/src/generic/filters/JobStates.svelte index ba4168f7..dc622a20 100644 --- a/web/frontend/src/generic/filters/JobStates.svelte +++ b/web/frontend/src/generic/filters/JobStates.svelte @@ -4,23 +4,35 @@ Properties: - `isOpen Bool?`: Is this filter component opened [Bindable, Default: false] - `presetStates [String]?`: The latest selected filter state [Default: [...allJobStates]] + - `presetShared String?`: The latest selected filter shared [Default: ""] + - `presetShedule String?`: The latest selected filter schedule [Default: ""] - `setFilter Func`: The callback function to apply current filter selection Exported: - `const allJobStates [String]`: List of all available job states used in cc-backend + - `const mapSharedStates {String:String}`: Object of all available shared states used in cc-backend with label --> @@ -60,10 +86,26 @@ name="flavours" value={state} /> - {state} + {state.charAt(0).toUpperCase() + state.slice(1)} {/each} +
    + + +
    Resource Sharing
    + + {#each allSharedStates as shared} + + {/each} + + +
    Processing Type
    + + + + +
    - + {#if pendingStates.length != 0} + + {:else} + + {/if} From fb8db3c3aed9527e01c526f3943b5d5abdafed01 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Wed, 14 Jan 2026 07:37:31 +0100 Subject: [PATCH 31/59] Add query which node metric data needs to be retained --- go.mod | 1 + go.sum | 2 ++ internal/repository/job.go | 61 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+) diff --git a/go.mod b/go.mod index 9cb82fbc..808b2e7a 100644 --- a/go.mod +++ b/go.mod @@ -109,6 +109,7 @@ require ( github.com/urfave/cli/v2 v2.27.7 // indirect github.com/urfave/cli/v3 v3.6.1 // indirect github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 // indirect + github.com/xtgo/set v1.0.0 // indirect go.yaml.in/yaml/v2 v2.4.3 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect diff --git a/go.sum b/go.sum index 8d3904ae..39571309 100644 --- a/go.sum +++ b/go.sum @@ -318,6 +318,8 @@ github.com/vektah/gqlparser/v2 v2.5.31 h1:YhWGA1mfTjID7qJhd1+Vxhpk5HTgydrGU9IgkW github.com/vektah/gqlparser/v2 v2.5.31/go.mod h1:c1I28gSOVNzlfc4WuDlqU7voQnsqI6OG2amkBAFmgts= github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 h1:FnBeRrxr7OU4VvAzt5X7s6266i6cSVkkFPS0TuXWbIg= github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM= +github.com/xtgo/set v1.0.0 h1:6BCNBRv3ORNDQ7fyoJXRv+tstJz3m1JVFQErfeZz2pY= +github.com/xtgo/set v1.0.0/go.mod h1:d3NHzGzSa0NmB2NhFyECA+QdRp29oEn2xbT+TpeFoM8= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= diff --git a/internal/repository/job.go b/internal/repository/job.go index 99970ce1..1ff8047e 100644 --- a/internal/repository/job.go +++ b/internal/repository/job.go @@ -76,6 +76,7 @@ import ( "github.com/ClusterCockpit/cc-lib/v2/schema" sq "github.com/Masterminds/squirrel" "github.com/jmoiron/sqlx" + "github.com/xtgo/set" ) var ( @@ -772,3 +773,63 @@ func (r *JobRepository) UpdateFootprint( return stmt.Set("footprint", string(rawFootprint)), nil } + +func (r *JobRepository) GetUsedNodes(ts uint64) map[string][]string { + q := sq.Select("job.cluster", "job.resources").From("job"). + Where("job.start_time < ?", ts). + Where(sq.Eq{"job.job_state": "running"}) + + rows, err := q.RunWith(r.stmtCache).Query() + if err != nil { + queryString, queryVars, _ := q.ToSql() + cclog.Errorf("Error while running query '%s' %v: %v", queryString, queryVars, err) + return nil + } + defer rows.Close() + + // Use a map of sets for efficient deduplication + nodeSet := make(map[string]map[string]struct{}) + + var ( + cluster string + rawResources []byte + resources []*schema.Resource + ) + + for rows.Next() { + if err := rows.Scan(&cluster, &rawResources); err != nil { + cclog.Warnf("Error scanning job row in GetUsedNodes: %v", err) + continue + } + + if err := json.Unmarshal(rawResources, &resources); err != nil { + cclog.Warnf("Error unmarshaling resources for cluster %s: %v", cluster, err) + continue + } + + if _, ok := nodeSet[cluster]; !ok { + nodeSet[cluster] = make(map[string]struct{}) + } + + for _, res := range resources { + nodeSet[cluster][res.Hostname] = struct{}{} + } + } + + if err := rows.Err(); err != nil { + cclog.Errorf("Error iterating rows in GetUsedNodes: %v", err) + } + + nodeList := make(map[string][]string) + for cluster, nodes := range nodeSet { + // Convert map keys to slice + list := make([]string, 0, len(nodes)) + for node := range nodes { + list = append(list, node) + } + // set.Strings sorts the slice and ensures uniqueness + nodeList[cluster] = set.Strings(list) + } + + return nodeList +} From 71b75eea0e53b99b27b18ed78f5600ce4e199b6f Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Wed, 14 Jan 2026 08:49:55 +0100 Subject: [PATCH 32/59] Improve GetUsedNodes function --- internal/repository/job.go | 41 ++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/internal/repository/job.go b/internal/repository/job.go index 1ff8047e..78e1f3fe 100644 --- a/internal/repository/job.go +++ b/internal/repository/job.go @@ -66,6 +66,7 @@ import ( "fmt" "maps" "math" + "sort" "strconv" "sync" "time" @@ -76,7 +77,6 @@ import ( "github.com/ClusterCockpit/cc-lib/v2/schema" sq "github.com/Masterminds/squirrel" "github.com/jmoiron/sqlx" - "github.com/xtgo/set" ) var ( @@ -774,7 +774,16 @@ func (r *JobRepository) UpdateFootprint( return stmt.Set("footprint", string(rawFootprint)), nil } -func (r *JobRepository) GetUsedNodes(ts uint64) map[string][]string { +// GetUsedNodes returns a map of cluster names to sorted lists of unique hostnames +// that are currently in use by jobs that started before the given timestamp and +// are still in running state. +// +// The timestamp parameter (ts) is compared against job.start_time to find +// relevant jobs. Returns an error if the database query fails or row iteration +// encounters errors. Individual row parsing errors are logged but don't fail +// the entire operation. +func (r *JobRepository) GetUsedNodes(ts uint64) (map[string][]string, error) { + // Note: Query expects index on (job_state, start_time) for optimal performance q := sq.Select("job.cluster", "job.resources").From("job"). Where("job.start_time < ?", ts). Where(sq.Eq{"job.job_state": "running"}) @@ -782,8 +791,7 @@ func (r *JobRepository) GetUsedNodes(ts uint64) map[string][]string { rows, err := q.RunWith(r.stmtCache).Query() if err != nil { queryString, queryVars, _ := q.ToSql() - cclog.Errorf("Error while running query '%s' %v: %v", queryString, queryVars, err) - return nil + return nil, fmt.Errorf("query failed [%s] %v: %w", queryString, queryVars, err) } defer rows.Close() @@ -794,16 +802,25 @@ func (r *JobRepository) GetUsedNodes(ts uint64) map[string][]string { cluster string rawResources []byte resources []*schema.Resource + skippedRows int ) for rows.Next() { if err := rows.Scan(&cluster, &rawResources); err != nil { cclog.Warnf("Error scanning job row in GetUsedNodes: %v", err) + skippedRows++ continue } + resources = resources[:0] // Clear slice, keep capacity if err := json.Unmarshal(rawResources, &resources); err != nil { cclog.Warnf("Error unmarshaling resources for cluster %s: %v", cluster, err) + skippedRows++ + continue + } + + if len(resources) == 0 { + cclog.Debugf("Job in cluster %s has no resources", cluster) continue } @@ -817,19 +834,23 @@ func (r *JobRepository) GetUsedNodes(ts uint64) map[string][]string { } if err := rows.Err(); err != nil { - cclog.Errorf("Error iterating rows in GetUsedNodes: %v", err) + return nil, fmt.Errorf("error iterating rows: %w", err) } - nodeList := make(map[string][]string) + if skippedRows > 0 { + cclog.Warnf("GetUsedNodes: Skipped %d rows due to parsing errors", skippedRows) + } + + // Convert sets to sorted slices + nodeList := make(map[string][]string, len(nodeSet)) for cluster, nodes := range nodeSet { - // Convert map keys to slice list := make([]string, 0, len(nodes)) for node := range nodes { list = append(list, node) } - // set.Strings sorts the slice and ensures uniqueness - nodeList[cluster] = set.Strings(list) + sort.Strings(list) + nodeList[cluster] = list } - return nodeList + return nodeList, nil } From 6cf59043a38ce70963e5a8b24046cd0c9820708c Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Wed, 14 Jan 2026 08:59:27 +0100 Subject: [PATCH 33/59] Review and improve, add documentation --- internal/repository/jobQuery.go | 128 +++++++++++++++++++++----------- 1 file changed, 86 insertions(+), 42 deletions(-) diff --git a/internal/repository/jobQuery.go b/internal/repository/jobQuery.go index 4655614f..745fa32d 100644 --- a/internal/repository/jobQuery.go +++ b/internal/repository/jobQuery.go @@ -2,6 +2,10 @@ // All rights reserved. This file is part of cc-backend. // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. + +// Package repository provides job query functionality with filtering, pagination, +// and security controls. This file contains the main query builders and security +// checks for job retrieval operations. package repository import ( @@ -19,6 +23,22 @@ import ( sq "github.com/Masterminds/squirrel" ) +const ( + // Default initial capacity for job result slices + defaultJobsCapacity = 50 +) + +// QueryJobs retrieves jobs from the database with optional filtering, pagination, +// and sorting. Security controls are automatically applied based on the user context. +// +// Parameters: +// - ctx: Context containing user authentication information +// - filters: Optional job filters (cluster, state, user, time ranges, etc.) +// - page: Optional pagination parameters (page number and items per page) +// - order: Optional sorting specification (column or footprint field) +// +// Returns a slice of jobs matching the criteria, or an error if the query fails. +// The function enforces role-based access control through SecurityCheck. func (r *JobRepository) QueryJobs( ctx context.Context, filters []*model.JobFilter, @@ -33,18 +53,16 @@ func (r *JobRepository) QueryJobs( if order != nil { field := toSnakeCase(order.Field) if order.Type == "col" { - // "col": Fixed column name query switch order.Order { case model.SortDirectionEnumAsc: query = query.OrderBy(fmt.Sprintf("job.%s ASC", field)) case model.SortDirectionEnumDesc: query = query.OrderBy(fmt.Sprintf("job.%s DESC", field)) default: - return nil, errors.New("REPOSITORY/QUERY > invalid sorting order for column") + return nil, errors.New("invalid sorting order for column") } } else { - // "foot": Order by footprint JSON field values - // Verify and Search Only in Valid Jsons + // Order by footprint JSON field values query = query.Where("JSON_VALID(meta_data)") switch order.Order { case model.SortDirectionEnumAsc: @@ -52,7 +70,7 @@ func (r *JobRepository) QueryJobs( case model.SortDirectionEnumDesc: query = query.OrderBy(fmt.Sprintf("JSON_EXTRACT(footprint, \"$.%s\") DESC", field)) default: - return nil, errors.New("REPOSITORY/QUERY > invalid sorting order for footprint") + return nil, errors.New("invalid sorting order for footprint") } } } @@ -69,29 +87,35 @@ func (r *JobRepository) QueryJobs( rows, err := query.RunWith(r.stmtCache).Query() if err != nil { queryString, queryVars, _ := query.ToSql() - cclog.Errorf("Error while running query '%s' %v: %v", queryString, queryVars, err) - return nil, err + return nil, fmt.Errorf("query failed [%s] %v: %w", queryString, queryVars, err) } + defer rows.Close() - jobs := make([]*schema.Job, 0, 50) + jobs := make([]*schema.Job, 0, defaultJobsCapacity) for rows.Next() { job, err := scanJob(rows) if err != nil { - rows.Close() - cclog.Warn("Error while scanning rows (Jobs)") - return nil, err + cclog.Warnf("Error scanning job row: %v", err) + return nil, fmt.Errorf("failed to scan job row: %w", err) } jobs = append(jobs, job) } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("error iterating job rows: %w", err) + } + return jobs, nil } +// CountJobs returns the total number of jobs matching the given filters. +// Security controls are automatically applied based on the user context. +// Uses DISTINCT count to handle tag filters correctly (jobs may appear multiple +// times when joined with the tag table). func (r *JobRepository) CountJobs( ctx context.Context, filters []*model.JobFilter, ) (int, error) { - // DISTICT count for tags filters, does not affect other queries query, qerr := SecurityCheck(ctx, sq.Select("count(DISTINCT job.id)").From("job")) if qerr != nil { return 0, qerr @@ -103,12 +127,22 @@ func (r *JobRepository) CountJobs( var count int if err := query.RunWith(r.DB).Scan(&count); err != nil { - return 0, err + return 0, fmt.Errorf("failed to count jobs: %w", err) } return count, nil } +// SecurityCheckWithUser applies role-based access control filters to a job query +// based on the provided user's roles and permissions. +// +// Access rules by role: +// - API role (exclusive): Full access to all jobs +// - Admin/Support roles: Full access to all jobs +// - Manager role: Access to jobs in managed projects plus own jobs +// - User role: Access only to own jobs +// +// Returns an error if the user is nil or has no recognized roles. func SecurityCheckWithUser(user *schema.User, query sq.SelectBuilder) (sq.SelectBuilder, error) { if user == nil { var qnil sq.SelectBuilder @@ -116,32 +150,35 @@ func SecurityCheckWithUser(user *schema.User, query sq.SelectBuilder) (sq.Select } switch { - case len(user.Roles) == 1 && user.HasRole(schema.RoleApi): // API-User : All jobs + case len(user.Roles) == 1 && user.HasRole(schema.RoleApi): return query, nil - case user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}): // Admin & Support : All jobs + case user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}): return query, nil - case user.HasRole(schema.RoleManager): // Manager : Add filter for managed projects' jobs only + personal jobs + case user.HasRole(schema.RoleManager): if len(user.Projects) != 0 { return query.Where(sq.Or{sq.Eq{"job.project": user.Projects}, sq.Eq{"job.hpc_user": user.Username}}), nil - } else { - cclog.Debugf("Manager-User '%s' has no defined projects to lookup! Query only personal jobs ...", user.Username) - return query.Where("job.hpc_user = ?", user.Username), nil } - case user.HasRole(schema.RoleUser): // User : Only personal jobs + cclog.Debugf("Manager '%s' has no assigned projects, restricting to personal jobs", user.Username) return query.Where("job.hpc_user = ?", user.Username), nil - default: // No known Role, return error + case user.HasRole(schema.RoleUser): + return query.Where("job.hpc_user = ?", user.Username), nil + default: var qnil sq.SelectBuilder return qnil, fmt.Errorf("user has no or unknown roles") } } +// SecurityCheck extracts the user from the context and applies role-based access +// control filters to the query. This is a convenience wrapper around SecurityCheckWithUser. func SecurityCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilder, error) { user := GetUserFromContext(ctx) - return SecurityCheckWithUser(user, query) } -// Build a sq.SelectBuilder out of a schema.JobFilter. +// BuildWhereClause constructs SQL WHERE conditions from a JobFilter and applies +// them to the query. Supports filtering by job properties (cluster, state, user), +// time ranges, resource usage, tags, and JSON field searches in meta_data, +// footprint, and resources columns. func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.SelectBuilder { // Primary Key if filter.DbID != nil { @@ -205,23 +242,24 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select // Queries Within JSONs if filter.MetricStats != nil { for _, ms := range filter.MetricStats { - query = buildFloatJsonCondition(ms.MetricName, ms.Range, query) + query = buildFloatJSONCondition(ms.MetricName, ms.Range, query) } } if filter.Node != nil { - query = buildResourceJsonCondition("hostname", filter.Node, query) + query = buildResourceJSONCondition("hostname", filter.Node, query) } if filter.JobName != nil { - query = buildMetaJsonCondition("jobName", filter.JobName, query) + query = buildMetaJSONCondition("jobName", filter.JobName, query) } if filter.Schedule != nil { interactiveJobname := "interactive" - if *filter.Schedule == "interactive" { + switch *filter.Schedule { + case "interactive": iFilter := model.StringInput{Eq: &interactiveJobname} - query = buildMetaJsonCondition("jobName", &iFilter, query) - } else if *filter.Schedule == "batch" { + query = buildMetaJSONCondition("jobName", &iFilter, query) + case "batch": sFilter := model.StringInput{Neq: &interactiveJobname} - query = buildMetaJsonCondition("jobName", &sFilter, query) + query = buildMetaJSONCondition("jobName", &sFilter, query) } } @@ -235,14 +273,18 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select return query } +// buildIntCondition creates a BETWEEN clause for integer range filters. func buildIntCondition(field string, cond *config.IntRange, query sq.SelectBuilder) sq.SelectBuilder { return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To) } +// buildFloatCondition creates a BETWEEN clause for float range filters. func buildFloatCondition(field string, cond *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder { return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To) } +// buildTimeCondition creates time range filters supporting absolute timestamps, +// relative time ranges (last6h, last24h, last7d, last30d), or open-ended ranges. func buildTimeCondition(field string, cond *config.TimeRange, query sq.SelectBuilder) sq.SelectBuilder { if cond.From != nil && cond.To != nil { return query.Where(field+" BETWEEN ? AND ?", cond.From.Unix(), cond.To.Unix()) @@ -272,12 +314,14 @@ func buildTimeCondition(field string, cond *config.TimeRange, query sq.SelectBui } } -func buildFloatJsonCondition(condName string, condRange *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder { - // Verify and Search Only in Valid Jsons +// buildFloatJSONCondition creates a filter on a numeric field within the footprint JSON column. +func buildFloatJSONCondition(condName string, condRange *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder { query = query.Where("JSON_VALID(footprint)") return query.Where("JSON_EXTRACT(footprint, \"$."+condName+"\") BETWEEN ? AND ?", condRange.From, condRange.To) } +// buildStringCondition creates filters for string fields supporting equality, +// inequality, prefix, suffix, substring, and IN list matching. func buildStringCondition(field string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder { if cond.Eq != nil { return query.Where(field+" = ?", *cond.Eq) @@ -302,10 +346,9 @@ func buildStringCondition(field string, cond *model.StringInput, query sq.Select return query } -func buildMetaJsonCondition(jsonField string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder { - // Verify and Search Only in Valid Jsons +// buildMetaJSONCondition creates filters on fields within the meta_data JSON column. +func buildMetaJSONCondition(jsonField string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder { query = query.Where("JSON_VALID(meta_data)") - // add "AND" Sql query Block for field match if cond.Eq != nil { return query.Where("JSON_EXTRACT(meta_data, \"$."+jsonField+"\") = ?", *cond.Eq) } @@ -324,10 +367,10 @@ func buildMetaJsonCondition(jsonField string, cond *model.StringInput, query sq. return query } -func buildResourceJsonCondition(jsonField string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder { - // Verify and Search Only in Valid Jsons +// buildResourceJSONCondition creates filters on fields within the resources JSON array column. +// Uses json_each to search within array elements. +func buildResourceJSONCondition(jsonField string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder { query = query.Where("JSON_VALID(resources)") - // add "AND" Sql query Block for field match if cond.Eq != nil { return query.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, \"$."+jsonField+"\") = ?)", *cond.Eq) } @@ -351,15 +394,16 @@ var ( matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])") ) +// toSnakeCase converts camelCase strings to snake_case for SQL column names. +// Includes security checks to prevent SQL injection attempts. +// Panics if potentially dangerous characters are detected. func toSnakeCase(str string) string { for _, c := range str { - if c == '\'' || c == '\\' { - cclog.Panic("toSnakeCase() attack vector!") + if c == '\'' || c == '\\' || c == '"' || c == ';' || c == '-' || c == ' ' { + cclog.Panicf("toSnakeCase: potentially dangerous character detected in input: %q", str) } } - str = strings.ReplaceAll(str, "'", "") - str = strings.ReplaceAll(str, "\\", "") snake := matchFirstCap.ReplaceAllString(str, "${1}_${2}") snake = matchAllCap.ReplaceAllString(snake, "${1}_${2}") return strings.ToLower(snake) From 9e542dc2006c2dbe3fc4c7206254d2590affdfd8 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Wed, 14 Jan 2026 09:26:03 +0100 Subject: [PATCH 34/59] Review and improve, add documentation --- internal/repository/jobFind.go | 83 +++++++++++++++++++++++----------- 1 file changed, 57 insertions(+), 26 deletions(-) diff --git a/internal/repository/jobFind.go b/internal/repository/jobFind.go index ff2c27aa..8f6daeb4 100644 --- a/internal/repository/jobFind.go +++ b/internal/repository/jobFind.go @@ -27,6 +27,10 @@ func (r *JobRepository) Find( cluster *string, startTime *int64, ) (*schema.Job, error) { + if jobID == nil { + return nil, fmt.Errorf("jobID cannot be nil") + } + start := time.Now() q := sq.Select(jobColumns...).From("job"). Where("job.job_id = ?", *jobID) @@ -38,17 +42,27 @@ func (r *JobRepository) Find( q = q.Where("job.start_time = ?", *startTime) } - q = q.OrderBy("job.id DESC") // always use newest matching job by db id if more than one match + q = q.OrderBy("job.id DESC").Limit(1) // always use newest matching job by db id if more than one match cclog.Debugf("Timer Find %s", time.Since(start)) return scanJob(q.RunWith(r.stmtCache).QueryRow()) } +// FindCached executes a SQL query to find a specific batch job from the job_cache table. +// The job is queried using the batch job id, and optionally filtered by cluster name +// and start time (UNIX epoch time seconds). This method uses cached job data which +// may be stale but provides faster access than Find(). +// It returns a pointer to a schema.Job data structure and an error variable. +// To check if no job was found test err == sql.ErrNoRows func (r *JobRepository) FindCached( jobID *int64, cluster *string, startTime *int64, ) (*schema.Job, error) { + if jobID == nil { + return nil, fmt.Errorf("jobID cannot be nil") + } + q := sq.Select(jobCacheColumns...).From("job_cache"). Where("job_cache.job_id = ?", *jobID) @@ -59,7 +73,7 @@ func (r *JobRepository) FindCached( q = q.Where("job_cache.start_time = ?", *startTime) } - q = q.OrderBy("job_cache.id DESC") // always use newest matching job by db id if more than one match + q = q.OrderBy("job_cache.id DESC").Limit(1) // always use newest matching job by db id if more than one match return scanJob(q.RunWith(r.stmtCache).QueryRow()) } @@ -74,6 +88,10 @@ func (r *JobRepository) FindAll( cluster *string, startTime *int64, ) ([]*schema.Job, error) { + if jobID == nil { + return nil, fmt.Errorf("jobID cannot be nil") + } + start := time.Now() q := sq.Select(jobColumns...).From("job"). Where("job.job_id = ?", *jobID) @@ -87,8 +105,8 @@ func (r *JobRepository) FindAll( rows, err := q.RunWith(r.stmtCache).Query() if err != nil { - cclog.Error("Error while running query") - return nil, err + cclog.Errorf("Error while running FindAll query for jobID=%d: %v", *jobID, err) + return nil, fmt.Errorf("failed to execute FindAll query: %w", err) } defer rows.Close() @@ -96,8 +114,8 @@ func (r *JobRepository) FindAll( for rows.Next() { job, err := scanJob(rows) if err != nil { - cclog.Warn("Error while scanning rows") - return nil, err + cclog.Warnf("Error while scanning rows in FindAll: %v", err) + return nil, fmt.Errorf("failed to scan job row: %w", err) } jobs = append(jobs, job) } @@ -120,8 +138,8 @@ func (r *JobRepository) GetJobList(limit int, offset int) ([]int64, error) { rows, err := query.RunWith(r.stmtCache).Query() if err != nil { - cclog.Error("Error while running query") - return nil, err + cclog.Errorf("Error while running GetJobList query (limit=%d, offset=%d): %v", limit, offset, err) + return nil, fmt.Errorf("failed to execute GetJobList query: %w", err) } defer rows.Close() @@ -130,8 +148,8 @@ func (r *JobRepository) GetJobList(limit int, offset int) ([]int64, error) { var id int64 err := rows.Scan(&id) if err != nil { - cclog.Warn("Error while scanning rows") - return nil, err + cclog.Warnf("Error while scanning rows in GetJobList: %v", err) + return nil, fmt.Errorf("failed to scan job ID: %w", err) } jl = append(jl, id) } @@ -202,10 +220,10 @@ func (r *JobRepository) FindByJobID(ctx context.Context, jobID int64, startTime return scanJob(q.RunWith(r.stmtCache).QueryRow()) } -// IsJobOwner executes a SQL query to find a specific batch job. -// The job is queried using the slurm id,a username and the cluster. -// It returns a bool. -// If job was found, user is owner: test err != sql.ErrNoRows +// IsJobOwner checks if the specified user owns the batch job identified by jobID, +// startTime, and cluster. Returns true if the user is the owner, false otherwise. +// This method does not return errors; it returns false for both non-existent jobs +// and jobs owned by other users. func (r *JobRepository) IsJobOwner(jobID int64, startTime int64, user string, cluster string) bool { q := sq.Select("id"). From("job"). @@ -215,6 +233,9 @@ func (r *JobRepository) IsJobOwner(jobID int64, startTime int64, user string, cl Where("job.start_time = ?", startTime) _, err := scanJob(q.RunWith(r.stmtCache).QueryRow()) + if err != nil && err != sql.ErrNoRows { + cclog.Warnf("IsJobOwner: unexpected error for jobID=%d, user=%s, cluster=%s: %v", jobID, user, cluster, err) + } return err != sql.ErrNoRows } @@ -232,6 +253,11 @@ func (r *JobRepository) FindConcurrentJobs( } query = query.Where("cluster = ?", job.Cluster) + + if len(job.Resources) == 0 { + return nil, fmt.Errorf("job has no resources defined") + } + var startTime int64 var stopTime int64 @@ -244,10 +270,15 @@ func (r *JobRepository) FindConcurrentJobs( stopTime = startTime + int64(job.Duration) } - // Add 200s overlap for jobs start time at the end - startTimeTail := startTime + 10 - stopTimeTail := stopTime - 200 - startTimeFront := startTime + 200 + // Time buffer constants for finding overlapping jobs + // overlapBufferStart: 10s grace period at job start to catch jobs starting just after + // overlapBufferEnd: 200s buffer at job end to account for scheduling/cleanup overlap + const overlapBufferStart = 10 + const overlapBufferEnd = 200 + + startTimeTail := startTime + overlapBufferStart + stopTimeTail := stopTime - overlapBufferEnd + startTimeFront := startTime + overlapBufferEnd queryRunning := query.Where("job.job_state = ?").Where("(job.start_time BETWEEN ? AND ? OR job.start_time < ?)", "running", startTimeTail, stopTimeTail, startTime) @@ -261,8 +292,8 @@ func (r *JobRepository) FindConcurrentJobs( rows, err := query.RunWith(r.stmtCache).Query() if err != nil { - cclog.Errorf("Error while running query: %v", err) - return nil, err + cclog.Errorf("Error while running concurrent jobs query: %v", err) + return nil, fmt.Errorf("failed to execute concurrent jobs query: %w", err) } defer rows.Close() @@ -273,8 +304,8 @@ func (r *JobRepository) FindConcurrentJobs( var id, jobID, startTime sql.NullInt64 if err = rows.Scan(&id, &jobID, &startTime); err != nil { - cclog.Warn("Error while scanning rows") - return nil, err + cclog.Warnf("Error while scanning concurrent job rows: %v", err) + return nil, fmt.Errorf("failed to scan concurrent job row: %w", err) } if id.Valid { @@ -289,8 +320,8 @@ func (r *JobRepository) FindConcurrentJobs( rows, err = queryRunning.RunWith(r.stmtCache).Query() if err != nil { - cclog.Errorf("Error while running query: %v", err) - return nil, err + cclog.Errorf("Error while running concurrent running jobs query: %v", err) + return nil, fmt.Errorf("failed to execute concurrent running jobs query: %w", err) } defer rows.Close() @@ -298,8 +329,8 @@ func (r *JobRepository) FindConcurrentJobs( var id, jobID, startTime sql.NullInt64 if err := rows.Scan(&id, &jobID, &startTime); err != nil { - cclog.Warn("Error while scanning rows") - return nil, err + cclog.Warnf("Error while scanning running concurrent job rows: %v", err) + return nil, fmt.Errorf("failed to scan running concurrent job row: %w", err) } if id.Valid { From b2f870e3c04a41b4fcca658afa59677d36a23146 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Wed, 14 Jan 2026 10:08:06 +0100 Subject: [PATCH 35/59] Convert nodestate nats API to influx line protocol payload. Review and add doc comments. Improve and extend tests --- internal/api/nats.go | 143 +++++++++++++++++++++++++++----- internal/api/nats_test.go | 170 +++++++++++++++++++++++++++----------- 2 files changed, 243 insertions(+), 70 deletions(-) diff --git a/internal/api/nats.go b/internal/api/nats.go index efd04406..48c6449b 100644 --- a/internal/api/nats.go +++ b/internal/api/nats.go @@ -6,7 +6,6 @@ package api import ( - "bytes" "database/sql" "encoding/json" "strings" @@ -26,7 +25,40 @@ import ( ) // NatsAPI provides NATS subscription-based handlers for Job and Node operations. -// It mirrors the functionality of the REST API but uses NATS messaging. +// It mirrors the functionality of the REST API but uses NATS messaging with +// InfluxDB line protocol as the message format. +// +// # Message Format +// +// All NATS messages use InfluxDB line protocol format (https://docs.influxdata.com/influxdb/v2.0/reference/syntax/line-protocol/) +// with the following structure: +// +// measurement,tag1=value1,tag2=value2 field1=value1,field2=value2 timestamp +// +// # Job Events +// +// Job start/stop events use the "job" measurement with a "function" tag to distinguish operations: +// +// job,function=start_job event="{...JSON payload...}" +// job,function=stop_job event="{...JSON payload...}" +// +// The JSON payload in the "event" field follows the schema.Job or StopJobAPIRequest structure. +// +// Example job start message: +// +// job,function=start_job event="{\"jobId\":1001,\"user\":\"testuser\",\"cluster\":\"testcluster\",...}" 1234567890000000000 +// +// # Node State Events +// +// Node state updates use the "nodestate" measurement with cluster information: +// +// nodestate event="{...JSON payload...}" +// +// The JSON payload follows the UpdateNodeStatesRequest structure. +// +// Example node state message: +// +// nodestate event="{\"cluster\":\"testcluster\",\"nodes\":[{\"hostname\":\"node01\",\"states\":[\"idle\"]}]}" 1234567890000000000 type NatsAPI struct { JobRepository *repository.JobRepository // RepositoryMutex protects job creation operations from race conditions @@ -67,10 +99,12 @@ func (api *NatsAPI) StartSubscriptions() error { return nil } +// processJobEvent routes job event messages to the appropriate handler based on the "function" tag. +// Validates that required tags and fields are present before processing. func (api *NatsAPI) processJobEvent(msg lp.CCMessage) { function, ok := msg.GetTag("function") if !ok { - cclog.Errorf("Job event is missing tag 'function': %+v", msg) + cclog.Errorf("Job event is missing required tag 'function': measurement=%s", msg.Name()) return } @@ -78,43 +112,66 @@ func (api *NatsAPI) processJobEvent(msg lp.CCMessage) { case "start_job": v, ok := msg.GetEventValue() if !ok { - cclog.Errorf("Job event is missing event value: %+v", msg) + cclog.Errorf("Job start event is missing event field with JSON payload") + return } api.handleStartJob(v) case "stop_job": v, ok := msg.GetEventValue() if !ok { - cclog.Errorf("Job event is missing event value: %+v", msg) + cclog.Errorf("Job stop event is missing event field with JSON payload") + return } api.handleStopJob(v) + default: - cclog.Warnf("Unimplemented job event: %+v", msg) + cclog.Warnf("Unknown job event function '%s', expected 'start_job' or 'stop_job'", function) } } +// handleJobEvent processes job-related messages received via NATS using InfluxDB line protocol. +// The message must be in line protocol format with measurement="job" and include: +// - tag "function" with value "start_job" or "stop_job" +// - field "event" containing JSON payload (schema.Job or StopJobAPIRequest) +// +// Example: job,function=start_job event="{\"jobId\":1001,...}" 1234567890000000000 func (api *NatsAPI) handleJobEvent(subject string, data []byte) { + if len(data) == 0 { + cclog.Warnf("NATS %s: received empty message", subject) + return + } + d := influx.NewDecoderWithBytes(data) for d.Next() { m, err := receivers.DecodeInfluxMessage(d) if err != nil { - cclog.Errorf("NATS %s: Failed to decode message: %v", subject, err) + cclog.Errorf("NATS %s: failed to decode InfluxDB line protocol message: %v", subject, err) return } - if m.IsEvent() { - if m.Name() == "job" { - api.processJobEvent(m) - } + if !m.IsEvent() { + cclog.Warnf("NATS %s: received non-event message, skipping", subject) + continue } + if m.Name() == "job" { + api.processJobEvent(m) + } else { + cclog.Warnf("NATS %s: unexpected measurement name '%s', expected 'job'", subject, m.Name()) + } } } // handleStartJob processes job start messages received via NATS. -// Expected JSON payload follows the schema.Job structure. +// The payload parameter contains JSON following the schema.Job structure. +// Jobs are validated, checked for duplicates, and inserted into the database. func (api *NatsAPI) handleStartJob(payload string) { + if payload == "" { + cclog.Error("NATS start job: payload is empty") + return + } req := schema.Job{ Shared: "none", MonitoringStatus: schema.MonitoringStatusRunningOrArchiving, @@ -173,8 +230,13 @@ func (api *NatsAPI) handleStartJob(payload string) { } // handleStopJob processes job stop messages received via NATS. -// Expected JSON payload follows the StopJobAPIRequest structure. +// The payload parameter contains JSON following the StopJobAPIRequest structure. +// The job is marked as stopped in the database and archiving is triggered if monitoring is enabled. func (api *NatsAPI) handleStopJob(payload string) { + if payload == "" { + cclog.Error("NATS stop job: payload is empty") + return + } var req StopJobAPIRequest dec := json.NewDecoder(strings.NewReader(payload)) @@ -243,15 +305,21 @@ func (api *NatsAPI) handleStopJob(payload string) { archiver.TriggerArchiving(job) } -// handleNodeState processes node state update messages received via NATS. -// Expected JSON payload follows the UpdateNodeStatesRequest structure. -func (api *NatsAPI) handleNodeState(subject string, data []byte) { +// processNodestateEvent extracts and processes node state data from the InfluxDB message. +// Updates node states in the repository for all nodes in the payload. +func (api *NatsAPI) processNodestateEvent(msg lp.CCMessage) { + v, ok := msg.GetEventValue() + if !ok { + cclog.Errorf("Nodestate event is missing event field with JSON payload") + return + } + var req UpdateNodeStatesRequest - dec := json.NewDecoder(bytes.NewReader(data)) + dec := json.NewDecoder(strings.NewReader(v)) dec.DisallowUnknownFields() if err := dec.Decode(&req); err != nil { - cclog.Errorf("NATS %s: parsing request failed: %v", subject, err) + cclog.Errorf("NATS nodestate: parsing request failed: %v", err) return } @@ -270,10 +338,43 @@ func (api *NatsAPI) handleNodeState(subject string, data []byte) { } if err := repo.UpdateNodeState(node.Hostname, req.Cluster, &nodeState); err != nil { - cclog.Errorf("NATS %s: updating node state for %s on %s failed: %v", - subject, node.Hostname, req.Cluster, err) + cclog.Errorf("NATS nodestate: updating node state for %s on %s failed: %v", + node.Hostname, req.Cluster, err) } } - cclog.Debugf("NATS %s: updated %d node states for cluster %s", subject, len(req.Nodes), req.Cluster) + cclog.Debugf("NATS nodestate: updated %d node states for cluster %s", len(req.Nodes), req.Cluster) +} + +// handleNodeState processes node state update messages received via NATS using InfluxDB line protocol. +// The message must be in line protocol format with measurement="nodestate" and include: +// - field "event" containing JSON payload (UpdateNodeStatesRequest) +// +// Example: nodestate event="{\"cluster\":\"testcluster\",\"nodes\":[...]}" 1234567890000000000 +func (api *NatsAPI) handleNodeState(subject string, data []byte) { + if len(data) == 0 { + cclog.Warnf("NATS %s: received empty message", subject) + return + } + + d := influx.NewDecoderWithBytes(data) + + for d.Next() { + m, err := receivers.DecodeInfluxMessage(d) + if err != nil { + cclog.Errorf("NATS %s: failed to decode InfluxDB line protocol message: %v", subject, err) + return + } + + if !m.IsEvent() { + cclog.Warnf("NATS %s: received non-event message, skipping", subject) + continue + } + + if m.Name() == "nodestate" { + api.processNodestateEvent(m) + } else { + cclog.Warnf("NATS %s: unexpected measurement name '%s', expected 'nodestate'", subject, m.Name()) + } + } } diff --git a/internal/api/nats_test.go b/internal/api/nats_test.go index 319668bb..4b1431cb 100644 --- a/internal/api/nats_test.go +++ b/internal/api/nats_test.go @@ -603,25 +603,13 @@ func TestNatsHandleNodeState(t *testing.T) { tests := []struct { name string - payload string + data []byte expectError bool validateFn func(t *testing.T) }{ { - name: "valid node state update", - payload: `{ - "cluster": "testcluster", - "nodes": [ - { - "hostname": "host123", - "states": ["allocated"], - "cpusAllocated": 8, - "memoryAllocated": 16384, - "gpusAllocated": 0, - "jobsRunning": 1 - } - ] - }`, + name: "valid node state update", + data: []byte(`nodestate event="{\"cluster\":\"testcluster\",\"nodes\":[{\"hostname\":\"host123\",\"states\":[\"allocated\"],\"cpusAllocated\":8,\"memoryAllocated\":16384,\"gpusAllocated\":0,\"jobsRunning\":1}]}" 1234567890000000000`), expectError: false, validateFn: func(t *testing.T) { // In a full test, we would verify the node state was updated in the database @@ -629,51 +617,35 @@ func TestNatsHandleNodeState(t *testing.T) { }, }, { - name: "multiple nodes", - payload: `{ - "cluster": "testcluster", - "nodes": [ - { - "hostname": "host123", - "states": ["idle"], - "cpusAllocated": 0, - "memoryAllocated": 0, - "gpusAllocated": 0, - "jobsRunning": 0 - }, - { - "hostname": "host124", - "states": ["allocated"], - "cpusAllocated": 4, - "memoryAllocated": 8192, - "gpusAllocated": 1, - "jobsRunning": 1 - } - ] - }`, + name: "multiple nodes", + data: []byte(`nodestate event="{\"cluster\":\"testcluster\",\"nodes\":[{\"hostname\":\"host123\",\"states\":[\"idle\"],\"cpusAllocated\":0,\"memoryAllocated\":0,\"gpusAllocated\":0,\"jobsRunning\":0},{\"hostname\":\"host124\",\"states\":[\"allocated\"],\"cpusAllocated\":4,\"memoryAllocated\":8192,\"gpusAllocated\":1,\"jobsRunning\":1}]}" 1234567890000000000`), expectError: false, }, { - name: "invalid JSON", - payload: `{ - "cluster": "testcluster", - "nodes": "not an array" - }`, + name: "invalid JSON in event field", + data: []byte(`nodestate event="{\"cluster\":\"testcluster\",\"nodes\":\"not an array\"}" 1234567890000000000`), expectError: true, }, { - name: "empty nodes array", - payload: `{ - "cluster": "testcluster", - "nodes": [] - }`, + name: "empty nodes array", + data: []byte(`nodestate event="{\"cluster\":\"testcluster\",\"nodes\":[]}" 1234567890000000000`), expectError: false, // Empty array should not cause error }, + { + name: "invalid line protocol format", + data: []byte(`invalid line protocol format`), + expectError: true, + }, + { + name: "empty data", + data: []byte(``), + expectError: false, // Should be handled gracefully with warning + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - natsAPI.handleNodeState("test.subject", []byte(tt.payload)) + natsAPI.handleNodeState("test.subject", tt.data) // Allow some time for async operations time.Sleep(50 * time.Millisecond) @@ -789,7 +761,7 @@ func TestNatsHandleJobEvent(t *testing.T) { }{ { name: "valid influx line protocol", - data: []byte(`job,function=start_job event="{\"jobId\":4001,\"user\":\"testuser\",\"project\":\"testproj\",\"cluster\":\"testcluster\",\"partition\":\"main\",\"walltime\":3600,\"numNodes\":1,\"numHwthreads\":8,\"numAcc\":0,\"shared\":\"none\",\"monitoringStatus\":1,\"smt\":1,\"resources\":[{\"hostname\":\"host123\",\"hwthreads\":[0,1,2,3]}],\"startTime\":1234567890}"`), + data: []byte(`job,function=start_job event="{\"jobId\":4001,\"user\":\"testuser\",\"project\":\"testproj\",\"cluster\":\"testcluster\",\"partition\":\"main\",\"walltime\":3600,\"numNodes\":1,\"numHwthreads\":8,\"numAcc\":0,\"shared\":\"none\",\"monitoringStatus\":1,\"smt\":1,\"resources\":[{\"hostname\":\"host123\",\"hwthreads\":[0,1,2,3]}],\"startTime\":1234567890}" 1234567890000000000`), expectError: false, }, { @@ -814,6 +786,106 @@ func TestNatsHandleJobEvent(t *testing.T) { } } +func TestNatsHandleJobEventEdgeCases(t *testing.T) { + natsAPI := setupNatsTest(t) + t.Cleanup(cleanupNatsTest) + + tests := []struct { + name string + data []byte + expectError bool + description string + }{ + { + name: "non-event message (metric data)", + data: []byte(`job,function=start_job value=123.45 1234567890000000000`), + expectError: false, + description: "Should skip non-event messages gracefully", + }, + { + name: "wrong measurement name", + data: []byte(`wrongmeasurement,function=start_job event="{}" 1234567890000000000`), + expectError: false, + description: "Should warn about unexpected measurement but not fail", + }, + { + name: "missing event field", + data: []byte(`job,function=start_job other_field="value" 1234567890000000000`), + expectError: true, + description: "Should error when event field is missing", + }, + { + name: "multiple measurements in one message", + data: []byte("job,function=start_job event=\"{}\" 1234567890000000000\njob,function=stop_job event=\"{}\" 1234567890000000000"), + expectError: false, + description: "Should process multiple lines", + }, + { + name: "escaped quotes in JSON payload", + data: []byte(`job,function=start_job event="{\"jobId\":6001,\"user\":\"test\\\"user\",\"cluster\":\"test\"}" 1234567890000000000`), + expectError: true, + description: "Should handle escaped quotes (though JSON parsing may fail)", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + natsAPI.handleJobEvent("test.subject", tt.data) + time.Sleep(50 * time.Millisecond) + }) + } +} + +func TestNatsHandleNodeStateEdgeCases(t *testing.T) { + natsAPI := setupNatsTest(t) + t.Cleanup(cleanupNatsTest) + + tests := []struct { + name string + data []byte + expectError bool + description string + }{ + { + name: "missing cluster field in JSON", + data: []byte(`nodestate event="{\"nodes\":[]}" 1234567890000000000`), + expectError: true, + description: "Should fail when cluster is missing", + }, + { + name: "malformed JSON with unescaped quotes", + data: []byte(`nodestate event="{\"cluster\":\"test"cluster\",\"nodes\":[]}" 1234567890000000000`), + expectError: true, + description: "Should fail on malformed JSON", + }, + { + name: "unicode characters in hostname", + data: []byte(`nodestate event="{\"cluster\":\"testcluster\",\"nodes\":[{\"hostname\":\"host-ñ123\",\"states\":[\"idle\"],\"cpusAllocated\":0,\"memoryAllocated\":0,\"gpusAllocated\":0,\"jobsRunning\":0}]}" 1234567890000000000`), + expectError: false, + description: "Should handle unicode characters", + }, + { + name: "very large node count", + data: []byte(`nodestate event="{\"cluster\":\"testcluster\",\"nodes\":[{\"hostname\":\"node1\",\"states\":[\"idle\"],\"cpusAllocated\":0,\"memoryAllocated\":0,\"gpusAllocated\":0,\"jobsRunning\":0},{\"hostname\":\"node2\",\"states\":[\"idle\"],\"cpusAllocated\":0,\"memoryAllocated\":0,\"gpusAllocated\":0,\"jobsRunning\":0},{\"hostname\":\"node3\",\"states\":[\"idle\"],\"cpusAllocated\":0,\"memoryAllocated\":0,\"gpusAllocated\":0,\"jobsRunning\":0}]}" 1234567890000000000`), + expectError: false, + description: "Should handle multiple nodes efficiently", + }, + { + name: "timestamp in past", + data: []byte(`nodestate event="{\"cluster\":\"testcluster\",\"nodes\":[]}" 1000000000000000000`), + expectError: false, + description: "Should accept any valid timestamp", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + natsAPI.handleNodeState("test.subject", tt.data) + time.Sleep(50 * time.Millisecond) + }) + } +} + func TestNatsHandleStartJobDuplicatePrevention(t *testing.T) { natsAPI := setupNatsTest(t) t.Cleanup(cleanupNatsTest) From 19402d30af2c984e335099933206ecb7c8b6e8bc Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Wed, 14 Jan 2026 10:09:19 +0100 Subject: [PATCH 36/59] Review and improve error messages and doc comments --- internal/repository/job.go | 149 +++++++++++++++++++++++-------------- 1 file changed, 94 insertions(+), 55 deletions(-) diff --git a/internal/repository/job.go b/internal/repository/job.go index 78e1f3fe..293c28d4 100644 --- a/internal/repository/job.go +++ b/internal/repository/job.go @@ -156,27 +156,41 @@ func scanJob(row interface{ Scan(...any) error }) (*schema.Job, error) { return job, nil } +// Optimize performs database optimization by running VACUUM command. +// This reclaims unused space and defragments the database file. +// Should be run periodically during maintenance windows. func (r *JobRepository) Optimize() error { if _, err := r.DB.Exec(`VACUUM`); err != nil { - return err + cclog.Errorf("Error while executing VACUUM: %v", err) + return fmt.Errorf("failed to optimize database: %w", err) } return nil } +// Flush removes all data from job-related tables (jobtag, tag, job). +// WARNING: This is a destructive operation that deletes all job data. +// Use with extreme caution, typically only for testing or complete resets. func (r *JobRepository) Flush() error { if _, err := r.DB.Exec(`DELETE FROM jobtag`); err != nil { - return err + cclog.Errorf("Error while deleting from jobtag table: %v", err) + return fmt.Errorf("failed to flush jobtag table: %w", err) } if _, err := r.DB.Exec(`DELETE FROM tag`); err != nil { - return err + cclog.Errorf("Error while deleting from tag table: %v", err) + return fmt.Errorf("failed to flush tag table: %w", err) } if _, err := r.DB.Exec(`DELETE FROM job`); err != nil { - return err + cclog.Errorf("Error while deleting from job table: %v", err) + return fmt.Errorf("failed to flush job table: %w", err) } return nil } func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error) { + if job == nil { + return nil, fmt.Errorf("job cannot be nil") + } + start := time.Now() cachekey := fmt.Sprintf("metadata:%d", job.ID) if cached := r.cache.Get(cachekey, nil); cached != nil { @@ -186,8 +200,8 @@ func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error if err := sq.Select("job.meta_data").From("job").Where("job.id = ?", job.ID). RunWith(r.stmtCache).QueryRow().Scan(&job.RawMetaData); err != nil { - cclog.Warn("Error while scanning for job metadata") - return nil, err + cclog.Warnf("Error while scanning for job metadata (ID=%d): %v", job.ID, err) + return nil, fmt.Errorf("failed to fetch metadata for job %d: %w", job.ID, err) } if len(job.RawMetaData) == 0 { @@ -195,8 +209,8 @@ func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error } if err := json.Unmarshal(job.RawMetaData, &job.MetaData); err != nil { - cclog.Warn("Error while unmarshaling raw metadata json") - return nil, err + cclog.Warnf("Error while unmarshaling raw metadata json (ID=%d): %v", job.ID, err) + return nil, fmt.Errorf("failed to unmarshal metadata for job %d: %w", job.ID, err) } r.cache.Put(cachekey, job.MetaData, len(job.RawMetaData), 24*time.Hour) @@ -205,6 +219,10 @@ func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error } func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err error) { + if job == nil { + return fmt.Errorf("job cannot be nil") + } + cachekey := fmt.Sprintf("metadata:%d", job.ID) r.cache.Del(cachekey) if job.MetaData == nil { @@ -241,12 +259,16 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er } func (r *JobRepository) FetchFootprint(job *schema.Job) (map[string]float64, error) { + if job == nil { + return nil, fmt.Errorf("job cannot be nil") + } + start := time.Now() if err := sq.Select("job.footprint").From("job").Where("job.id = ?", job.ID). RunWith(r.stmtCache).QueryRow().Scan(&job.RawFootprint); err != nil { - cclog.Warn("Error while scanning for job footprint") - return nil, err + cclog.Warnf("Error while scanning for job footprint (ID=%d): %v", job.ID, err) + return nil, fmt.Errorf("failed to fetch footprint for job %d: %w", job.ID, err) } if len(job.RawFootprint) == 0 { @@ -254,8 +276,8 @@ func (r *JobRepository) FetchFootprint(job *schema.Job) (map[string]float64, err } if err := json.Unmarshal(job.RawFootprint, &job.Footprint); err != nil { - cclog.Warn("Error while unmarshaling raw footprint json") - return nil, err + cclog.Warnf("Error while unmarshaling raw footprint json (ID=%d): %v", job.ID, err) + return nil, fmt.Errorf("failed to unmarshal footprint for job %d: %w", job.ID, err) } cclog.Debugf("Timer FetchFootprint %s", time.Since(start)) @@ -263,6 +285,10 @@ func (r *JobRepository) FetchFootprint(job *schema.Job) (map[string]float64, err } func (r *JobRepository) FetchEnergyFootprint(job *schema.Job) (map[string]float64, error) { + if job == nil { + return nil, fmt.Errorf("job cannot be nil") + } + start := time.Now() cachekey := fmt.Sprintf("energyFootprint:%d", job.ID) if cached := r.cache.Get(cachekey, nil); cached != nil { @@ -272,8 +298,8 @@ func (r *JobRepository) FetchEnergyFootprint(job *schema.Job) (map[string]float6 if err := sq.Select("job.energy_footprint").From("job").Where("job.id = ?", job.ID). RunWith(r.stmtCache).QueryRow().Scan(&job.RawEnergyFootprint); err != nil { - cclog.Warn("Error while scanning for job energy_footprint") - return nil, err + cclog.Warnf("Error while scanning for job energy_footprint (ID=%d): %v", job.ID, err) + return nil, fmt.Errorf("failed to fetch energy footprint for job %d: %w", job.ID, err) } if len(job.RawEnergyFootprint) == 0 { @@ -281,8 +307,8 @@ func (r *JobRepository) FetchEnergyFootprint(job *schema.Job) (map[string]float6 } if err := json.Unmarshal(job.RawEnergyFootprint, &job.EnergyFootprint); err != nil { - cclog.Warn("Error while unmarshaling raw energy footprint json") - return nil, err + cclog.Warnf("Error while unmarshaling raw energy footprint json (ID=%d): %v", job.ID, err) + return nil, fmt.Errorf("failed to unmarshal energy footprint for job %d: %w", job.ID, err) } r.cache.Put(cachekey, job.EnergyFootprint, len(job.EnergyFootprint), 24*time.Hour) @@ -363,6 +389,10 @@ func (r *JobRepository) DeleteJobByID(id int64) error { } func (r *JobRepository) FindUserOrProjectOrJobname(user *schema.User, searchterm string) (jobid string, username string, project string, jobname string) { + if searchterm == "" { + return "", "", "", "" + } + if _, err := strconv.Atoi(searchterm); err == nil { // Return empty on successful conversion: parent method will redirect for integer jobId return searchterm, "", "", "" } else { // Has to have letters and logged-in user for other guesses @@ -394,6 +424,10 @@ var ( ) func (r *JobRepository) FindColumnValue(user *schema.User, searchterm string, table string, selectColumn string, whereColumn string, isLike bool) (result string, err error) { + if user == nil { + return "", fmt.Errorf("user cannot be nil") + } + compareStr := " = ?" query := searchterm if isLike { @@ -404,17 +438,11 @@ func (r *JobRepository) FindColumnValue(user *schema.User, searchterm string, ta theQuery := sq.Select(table+"."+selectColumn).Distinct().From(table). Where(table+"."+whereColumn+compareStr, query) - // theSql, args, theErr := theQuery.ToSql() - // if theErr != nil { - // cclog.Warn("Error while converting query to sql") - // return "", err - // } - // cclog.Debugf("SQL query (FindColumnValue): `%s`, args: %#v", theSql, args) - err := theQuery.RunWith(r.stmtCache).QueryRow().Scan(&result) if err != nil && err != sql.ErrNoRows { - return "", err + cclog.Warnf("Error while querying FindColumnValue (table=%s, column=%s): %v", table, selectColumn, err) + return "", fmt.Errorf("failed to find column value: %w", err) } else if err == nil { return result, nil } @@ -426,21 +454,26 @@ func (r *JobRepository) FindColumnValue(user *schema.User, searchterm string, ta } func (r *JobRepository) FindColumnValues(user *schema.User, query string, table string, selectColumn string, whereColumn string) (results []string, err error) { + if user == nil { + return nil, fmt.Errorf("user cannot be nil") + } + emptyResult := make([]string, 0) if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport, schema.RoleManager}) { rows, err := sq.Select(table+"."+selectColumn).Distinct().From(table). Where(table+"."+whereColumn+" LIKE ?", fmt.Sprint("%", query, "%")). RunWith(r.stmtCache).Query() if err != nil && err != sql.ErrNoRows { - return emptyResult, err + cclog.Errorf("Error while querying FindColumnValues (table=%s, column=%s): %v", table, selectColumn, err) + return emptyResult, fmt.Errorf("failed to find column values: %w", err) } else if err == nil { + defer rows.Close() for rows.Next() { var result string err := rows.Scan(&result) if err != nil { - rows.Close() - cclog.Warnf("Error while scanning rows: %v", err) - return emptyResult, err + cclog.Warnf("Error while scanning rows in FindColumnValues: %v", err) + return emptyResult, fmt.Errorf("failed to scan column value: %w", err) } results = append(results, result) } @@ -482,8 +515,8 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in Where("job.cluster = ?", cluster). RunWith(r.stmtCache).Query() if err != nil { - cclog.Error("Error while running query") - return nil, err + cclog.Errorf("Error while running AllocatedNodes query for cluster=%s: %v", cluster, err) + return nil, fmt.Errorf("failed to query allocated nodes for cluster %s: %w", cluster, err) } var raw []byte @@ -493,12 +526,12 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in var resources []*schema.Resource var subcluster string if err := rows.Scan(&raw, &subcluster); err != nil { - cclog.Warn("Error while scanning rows") - return nil, err + cclog.Warnf("Error while scanning rows in AllocatedNodes: %v", err) + return nil, fmt.Errorf("failed to scan allocated nodes row: %w", err) } if err := json.Unmarshal(raw, &resources); err != nil { - cclog.Warn("Error while unmarshaling raw resources json") - return nil, err + cclog.Warnf("Error while unmarshaling raw resources json in AllocatedNodes: %v", err) + return nil, fmt.Errorf("failed to unmarshal resources in AllocatedNodes: %w", err) } hosts, ok := subclusters[subcluster] @@ -529,14 +562,14 @@ func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error { Where("(? - job.start_time) > (job.walltime + ?)", currentTime, seconds). RunWith(r.DB).Exec() if err != nil { - cclog.Warn("Error while stopping jobs exceeding walltime") - return err + cclog.Warnf("Error while stopping jobs exceeding walltime: %v", err) + return fmt.Errorf("failed to stop jobs exceeding walltime: %w", err) } rowsAffected, err := res.RowsAffected() if err != nil { - cclog.Warn("Error while fetching affected rows after stopping due to exceeded walltime") - return err + cclog.Warnf("Error while fetching affected rows after stopping due to exceeded walltime: %v", err) + return fmt.Errorf("failed to get rows affected count: %w", err) } if rowsAffected > 0 { @@ -552,18 +585,19 @@ func (r *JobRepository) FindJobIdsByTag(tagID int64) ([]int64, error) { Where(sq.Eq{"jobtag.tag_id": tagID}).Distinct() rows, err := query.RunWith(r.stmtCache).Query() if err != nil { - cclog.Error("Error while running query") - return nil, err + cclog.Errorf("Error while running FindJobIdsByTag query for tagID=%d: %v", tagID, err) + return nil, fmt.Errorf("failed to find job IDs by tag %d: %w", tagID, err) } + defer rows.Close() + jobIds := make([]int64, 0, 100) for rows.Next() { var jobID int64 if err := rows.Scan(&jobID); err != nil { - rows.Close() - cclog.Warn("Error while scanning rows") - return nil, err + cclog.Warnf("Error while scanning rows in FindJobIdsByTag: %v", err) + return nil, fmt.Errorf("failed to scan job ID in FindJobIdsByTag: %w", err) } jobIds = append(jobIds, jobID) @@ -581,8 +615,8 @@ func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) { rows, err := query.RunWith(r.stmtCache).Query() if err != nil { - cclog.Error("Error while running query") - return nil, err + cclog.Errorf("Error while running FindRunningJobs query for cluster=%s: %v", cluster, err) + return nil, fmt.Errorf("failed to find running jobs for cluster %s: %w", cluster, err) } defer rows.Close() @@ -590,8 +624,8 @@ func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) { for rows.Next() { job, err := scanJob(rows) if err != nil { - cclog.Warn("Error while scanning rows") - return nil, err + cclog.Warnf("Error while scanning rows in FindRunningJobs: %v", err) + return nil, fmt.Errorf("failed to scan job in FindRunningJobs: %w", err) } jobs = append(jobs, job) } @@ -607,7 +641,8 @@ func (r *JobRepository) UpdateDuration() error { _, err := stmnt.RunWith(r.stmtCache).Exec() if err != nil { - return err + cclog.Errorf("Error while updating duration for running jobs: %v", err) + return fmt.Errorf("failed to update duration for running jobs: %w", err) } return nil @@ -634,8 +669,8 @@ func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64 rows, err := query.RunWith(r.stmtCache).Query() if err != nil { - cclog.Error("Error while running query") - return nil, err + cclog.Errorf("Error while running FindJobsBetween query: %v", err) + return nil, fmt.Errorf("failed to find jobs between %d and %d: %w", startTimeBegin, startTimeEnd, err) } defer rows.Close() @@ -643,8 +678,8 @@ func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64 for rows.Next() { job, err := scanJob(rows) if err != nil { - cclog.Warn("Error while scanning rows") - return nil, err + cclog.Warnf("Error while scanning rows in FindJobsBetween: %v", err) + return nil, fmt.Errorf("failed to scan job in FindJobsBetween: %w", err) } jobs = append(jobs, job) } @@ -662,13 +697,17 @@ func (r *JobRepository) UpdateMonitoringStatus(job int64, monitoringStatus int32 Set("monitoring_status", monitoringStatus). Where("job.id = ?", job) - _, err = stmt.RunWith(r.stmtCache).Exec() - return err + if _, err = stmt.RunWith(r.stmtCache).Exec(); err != nil { + cclog.Errorf("Error while updating monitoring status for job %d: %v", job, err) + return fmt.Errorf("failed to update monitoring status for job %d: %w", job, err) + } + return nil } func (r *JobRepository) Execute(stmt sq.UpdateBuilder) error { if _, err := stmt.RunWith(r.stmtCache).Exec(); err != nil { - return err + cclog.Errorf("Error while executing statement: %v", err) + return fmt.Errorf("failed to execute update statement: %w", err) } return nil From 0ea0270fe17ceca3156a2f563df7987d6ab50063 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Wed, 14 Jan 2026 10:37:07 +0100 Subject: [PATCH 37/59] Reintroduce Clusters as string list of cluster names --- web/web.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/web/web.go b/web/web.go index 37f1c2b2..6ff01403 100644 --- a/web/web.go +++ b/web/web.go @@ -245,6 +245,7 @@ type Page struct { User schema.User // Information about the currently logged in user (Full User Info) Roles map[string]schema.Role // Available roles for frontend render checks Build Build // Latest information about the application + Clusters []string // List of all cluster names SubClusters map[string][]string // Map per cluster of all subClusters for use in the Header FilterPresets map[string]any // For pages with the Filter component, this can be used to set initial filters. Infos map[string]any // For generic use (e.g. username for /monitoring/user/, job id for /monitoring/job/) @@ -259,9 +260,15 @@ func RenderTemplate(rw http.ResponseWriter, file string, page *Page) { cclog.Errorf("WEB/WEB > template '%s' not found", file) } + if page.Clusters == nil { + page.Clusters = make([]string, 2) + } + if page.SubClusters == nil { page.SubClusters = make(map[string][]string) for _, cluster := range archive.Clusters { + page.Clusters = append(page.Clusters, cluster.Name) + for _, sc := range cluster.SubClusters { page.SubClusters[cluster.Name] = append(page.SubClusters[cluster.Name], sc.Name) } From c8627a13f40011cd07d43772760c09a73078c0c4 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Wed, 14 Jan 2026 11:17:49 +0100 Subject: [PATCH 38/59] Remove obsolete slusters config section --- cmd/cc-backend/init.go | 28 ++------------- configs/config-demo.json | 47 -------------------------- configs/config.json | 34 ++----------------- internal/api/api_test.go | 35 +++++++------------ internal/api/nats_test.go | 11 ------ internal/importer/importer_test.go | 32 ++---------------- internal/repository/node_test.go | 11 ------ internal/repository/userConfig_test.go | 10 ------ 8 files changed, 19 insertions(+), 189 deletions(-) diff --git a/cmd/cc-backend/init.go b/cmd/cc-backend/init.go index 025396be..e30ae2e1 100644 --- a/cmd/cc-backend/init.go +++ b/cmd/cc-backend/init.go @@ -48,7 +48,7 @@ const configString = ` "emission-constant": 317 }, "cron": { - "commit-job-worker": "2m", + "commit-job-worker": "1m", "duration-worker": "5m", "footprint-worker": "10m" }, @@ -60,31 +60,7 @@ const configString = ` "jwts": { "max-age": "2000h" } - }, - "clusters": [ - { - "name": "name", - "metricDataRepository": { - "kind": "cc-metric-store", - "url": "http://localhost:8082", - "token": "" - }, - "filterRanges": { - "numNodes": { - "from": 1, - "to": 64 - }, - "duration": { - "from": 0, - "to": 86400 - }, - "startTime": { - "from": "2023-01-01T00:00:00Z", - "to": null - } - } - } - ] + } } ` diff --git a/configs/config-demo.json b/configs/config-demo.json index aa388316..bd492e31 100644 --- a/configs/config-demo.json +++ b/configs/config-demo.json @@ -29,52 +29,6 @@ "username": "root", "password": "root" }, - "clusters": [ - { - "name": "fritz", - "metricDataRepository": { - "kind": "cc-metric-store-internal", - "url": "http://localhost:8082", - "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJFZERTQSJ9.eyJ1c2VyIjoiYWRtaW4iLCJyb2xlcyI6WyJST0xFX0FETUlOIiwiUk9MRV9BTkFMWVNUIiwiUk9MRV9VU0VSIl19.d-3_3FZTsadPjDEdsWrrQ7nS0edMAR4zjl-eK7rJU3HziNBfI9PDHDIpJVHTNN5E5SlLGLFXctWyKAkwhXL-Dw" - }, - "filterRanges": { - "numNodes": { - "from": 1, - "to": 64 - }, - "duration": { - "from": 0, - "to": 86400 - }, - "startTime": { - "from": "2022-01-01T00:00:00Z", - "to": null - } - } - }, - { - "name": "alex", - "metricDataRepository": { - "kind": "cc-metric-store-internal", - "url": "http://localhost:8082", - "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJFZERTQSJ9.eyJ1c2VyIjoiYWRtaW4iLCJyb2xlcyI6WyJST0xFX0FETUlOIiwiUk9MRV9BTkFMWVNUIiwiUk9MRV9VU0VSIl19.d-3_3FZTsadPjDEdsWrrQ7nS0edMAR4zjl-eK7rJU3HziNBfI9PDHDIpJVHTNN5E5SlLGLFXctWyKAkwhXL-Dw" - }, - "filterRanges": { - "numNodes": { - "from": 1, - "to": 64 - }, - "duration": { - "from": 0, - "to": 86400 - }, - "startTime": { - "from": "2022-01-01T00:00:00Z", - "to": null - } - } - } - ], "metric-store": { "checkpoints": { "file-format": "avro", @@ -99,4 +53,3 @@ ] } } - diff --git a/configs/config.json b/configs/config.json index 41d8ecac..44961c85 100644 --- a/configs/config.json +++ b/configs/config.json @@ -11,10 +11,7 @@ "resampling": { "minimumPoints": 600, "trigger": 180, - "resolutions": [ - 240, - 60 - ] + "resolutions": [240, 60] }, "apiSubjects": { "subjectJobEvent": "cc.job.event", @@ -22,37 +19,12 @@ } }, "cron": { - "commit-job-worker": "2m", + "commit-job-worker": "1m", "duration-worker": "5m", "footprint-worker": "10m" }, "archive": { "kind": "file", "path": "./var/job-archive" - }, - "clusters": [ - { - "name": "test", - "metricDataRepository": { - "kind": "cc-metric-store", - "url": "http://localhost:8082", - "token": "eyJhbGciOiJF-E-pQBQ" - }, - "filterRanges": { - "numNodes": { - "from": 1, - "to": 64 - }, - "duration": { - "from": 0, - "to": 86400 - }, - "startTime": { - "from": "2022-01-01T00:00:00Z", - "to": null - } - } - } - ] + } } - diff --git a/internal/api/api_test.go b/internal/api/api_test.go index 7aa935ff..025983c1 100644 --- a/internal/api/api_test.go +++ b/internal/api/api_test.go @@ -39,33 +39,22 @@ func setup(t *testing.T) *api.RestAPI { repository.ResetConnection() const testconfig = `{ - "main": { - "addr": "0.0.0.0:8080", - "validate": false, - "apiAllowedIPs": [ - "*" - ] - }, + "main": { + "addr": "0.0.0.0:8080", + "validate": false, + "apiAllowedIPs": [ + "*" + ] + }, "archive": { - "kind": "file", - "path": "./var/job-archive" + "kind": "file", + "path": "./var/job-archive" }, "auth": { - "jwts": { - "max-age": "2m" + "jwts": { + "max-age": "2m" + } } - }, - "clusters": [ - { - "name": "testcluster", - "metricDataRepository": {"kind": "test", "url": "bla:8081"}, - "filterRanges": { - "numNodes": { "from": 1, "to": 64 }, - "duration": { "from": 0, "to": 86400 }, - "startTime": { "from": "2022-01-01T00:00:00Z", "to": null } - } - } - ] }` const testclusterJSON = `{ "name": "testcluster", diff --git a/internal/api/nats_test.go b/internal/api/nats_test.go index 4b1431cb..c6a9bcd9 100644 --- a/internal/api/nats_test.go +++ b/internal/api/nats_test.go @@ -48,18 +48,7 @@ func setupNatsTest(t *testing.T) *NatsAPI { "jwts": { "max-age": "2m" } - }, - "clusters": [ - { - "name": "testcluster", - "metricDataRepository": {"kind": "test", "url": "bla:8081"}, - "filterRanges": { - "numNodes": { "from": 1, "to": 64 }, - "duration": { "from": 0, "to": 86400 }, - "startTime": { "from": "2022-01-01T00:00:00Z", "to": null } - } } - ] }` const testclusterJSON = `{ "name": "testcluster", diff --git a/internal/importer/importer_test.go b/internal/importer/importer_test.go index 2d00fc84..ebc500b7 100644 --- a/internal/importer/importer_test.go +++ b/internal/importer/importer_test.go @@ -56,36 +56,8 @@ func setup(t *testing.T) *repository.JobRepository { "archive": { "kind": "file", "path": "./var/job-archive" - }, - "clusters": [ - { - "name": "testcluster", - "metricDataRepository": {"kind": "test", "url": "bla:8081"}, - "filterRanges": { - "numNodes": { "from": 1, "to": 64 }, - "duration": { "from": 0, "to": 86400 }, - "startTime": { "from": "2022-01-01T00:00:00Z", "to": null } - } - }, - { - "name": "fritz", - "metricDataRepository": {"kind": "test", "url": "bla:8081"}, - "filterRanges": { - "numNodes": { "from": 1, "to": 944 }, - "duration": { "from": 0, "to": 86400 }, - "startTime": { "from": "2022-01-01T00:00:00Z", "to": null } - } - }, - { - "name": "taurus", - "metricDataRepository": {"kind": "test", "url": "bla:8081"}, - "filterRanges": { - "numNodes": { "from": 1, "to": 4000 }, - "duration": { "from": 0, "to": 604800 }, - "startTime": { "from": "2010-01-01T00:00:00Z", "to": null } - } - } - ]}` + } + }` cclog.Init("info", true) tmpdir := t.TempDir() diff --git a/internal/repository/node_test.go b/internal/repository/node_test.go index fd935b53..990de924 100644 --- a/internal/repository/node_test.go +++ b/internal/repository/node_test.go @@ -38,18 +38,7 @@ func nodeTestSetup(t *testing.T) { "jwts": { "max-age": "2m" } - }, - "clusters": [ - { - "name": "testcluster", - "metricDataRepository": {"kind": "test", "url": "bla:8081"}, - "filterRanges": { - "numNodes": { "from": 1, "to": 64 }, - "duration": { "from": 0, "to": 86400 }, - "startTime": { "from": "2022-01-01T00:00:00Z", "to": null } - } } - ] }` const testclusterJSON = `{ "name": "testcluster", diff --git a/internal/repository/userConfig_test.go b/internal/repository/userConfig_test.go index ae3adaf2..f66fccfb 100644 --- a/internal/repository/userConfig_test.go +++ b/internal/repository/userConfig_test.go @@ -27,17 +27,7 @@ func setupUserTest(t *testing.T) *UserCfgRepo { "archive": { "kind": "file", "path": "./var/job-archive" - }, - "clusters": [ - { - "name": "testcluster", - "metricDataRepository": {"kind": "test", "url": "bla:8081"}, - "filterRanges": { - "numNodes": { "from": 1, "to": 64 }, - "duration": { "from": 0, "to": 86400 }, - "startTime": { "from": "2022-01-01T00:00:00Z", "to": null } } - }] }` cclog.Init("info", true) From 4cb8d648cbb5bd40e40462d6f0d2d4d8fa78178c Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Wed, 14 Jan 2026 11:25:40 +0100 Subject: [PATCH 39/59] adapt frontend to backend config changes, clarify variable names --- web/frontend/src/Config.root.svelte | 6 +-- web/frontend/src/Header.svelte | 36 +++++++-------- web/frontend/src/config.entrypoint.js | 2 +- web/frontend/src/config/AdminSettings.svelte | 6 +-- web/frontend/src/config/admin/Options.svelte | 12 ++--- .../src/generic/filters/Resources.svelte | 20 +-------- web/frontend/src/header.entrypoint.js | 4 +- web/frontend/src/header/NavbarLinks.svelte | 44 +++++++++---------- web/templates/base.tmpl | 4 +- 9 files changed, 58 insertions(+), 76 deletions(-) diff --git a/web/frontend/src/Config.root.svelte b/web/frontend/src/Config.root.svelte index 171b2a08..0e1daec3 100644 --- a/web/frontend/src/Config.root.svelte +++ b/web/frontend/src/Config.root.svelte @@ -7,7 +7,7 @@ - `isApi Bool!`: Is currently logged in user api authority - `username String!`: Empty string if auth. is disabled, otherwise the username as string - `ncontent String!`: The currently displayed message on the homescreen - - `clusters [String]`: The available clusternames + - `clusterNames [String]`: The available clusternames --> @@ -32,7 +32,7 @@ Admin Options - +
    {/if} diff --git a/web/frontend/src/Header.svelte b/web/frontend/src/Header.svelte index f7ceac2e..ea818c62 100644 --- a/web/frontend/src/Header.svelte +++ b/web/frontend/src/Header.svelte @@ -4,8 +4,8 @@ Properties: - `username String`: Empty string if auth. is disabled, otherwise the username as string - `authlevel Number`: The current users authentication level - - `clusters [String]`: List of cluster names - - `subClusters [String]`: List of subCluster names + - `clusterNames [String]`: List of cluster names + - `subclusterMap map[String][]string`: Map of subclusters by cluster names - `roles [Number]`: Enum containing available roles --> @@ -28,8 +28,8 @@ let { username, authlevel, - clusters, - subClusters, + clusterNames, + subclusterMap, roles } = $props(); @@ -152,15 +152,15 @@