mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-01-15 17:21:46 +01:00
Compare commits
87 Commits
upstream-t
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
02a8cf05d1 | ||
| 7db2bbe6b0 | |||
| b6f0faa97f | |||
| a3fffa8e8b | |||
| 72248defbf | |||
| 155e05495e | |||
| 9c92a7796b | |||
| 7c78407c49 | |||
| cb219b3c74 | |||
| d59aa2e855 | |||
|
|
cd3d133f0d | ||
|
|
3b7fc44ce9 | ||
| e1efc68476 | |||
| 8f0bb907ff | |||
|
|
e5c620ca20 | ||
|
|
d0bcfb90e6 | ||
|
|
9deee54e41 | ||
|
|
94b86ef11a | ||
|
|
d8cd752dcb | ||
|
|
5d376e6865 | ||
| 9c3beddf54 | |||
| c6465ad9e5 | |||
| d415381d4a | |||
| 211d4fae54 | |||
|
|
3276ed7785 | ||
|
|
77b7548ef3 | ||
|
|
59851f410e | ||
|
|
4cb8d648cb | ||
| c8627a13f4 | |||
| 0ea0270fe1 | |||
| 19402d30af | |||
| b2f870e3c0 | |||
| 9e542dc200 | |||
| 6cf59043a3 | |||
| 71b75eea0e | |||
|
e900a686db
|
|||
|
fb8db3c3ae
|
|||
|
|
170a9ace8a | ||
|
|
518e9950ea | ||
|
25c8fca561
|
|||
| 754f7e16f6 | |||
| 04a2e460ae | |||
| 2ebab1e2e2 | |||
| a9366d14c6 | |||
| 42809e3f75 | |||
| 4cec933349 | |||
| d3f3c532b1 | |||
| ad1e87d0b8 | |||
|
|
affa85c086 | ||
|
|
aa053d78f7 | ||
|
|
fae6d9d835 | ||
|
|
78f1db7ad1 | ||
| f1367f84f8 | |||
|
|
4c81696f4d | ||
|
|
a91f8f72e3 | ||
|
|
87f7ed329c | ||
|
|
8641d9053d | ||
|
|
4a5ab8a279 | ||
|
|
d179412ab6 | ||
|
|
968c7d179d | ||
| 56399523d7 | |||
| 4d6326b8be | |||
|
|
a2414791bf | ||
|
|
faf3a19f0c | ||
|
|
4e6038d6c1 | ||
|
|
ddc2ecf829 | ||
| ecb5aef735 | |||
| 11ec2267da | |||
| 8576ae458d | |||
|
|
c66445acb5 | ||
|
|
29a20f7b0b | ||
|
|
874c019fb6 | ||
|
|
54825626de | ||
| 9bf5c5dc1a | |||
| 64fef9774c | |||
| 999667ec0c | |||
| c1135531ba | |||
| 287256e5f1 | |||
| 0bc26aa194 | |||
|
|
502d7e9084 | ||
|
|
89875db4a9 | ||
|
|
5a8b929448 | ||
|
|
fe78f2f433 | ||
|
|
e37591ce6d | ||
|
|
998f800632 | ||
|
|
10a0b0add8 | ||
|
|
f9aa47ea1c |
15
.github/dependabot.yml
vendored
15
.github/dependabot.yml
vendored
@@ -1,15 +0,0 @@
|
|||||||
# To get started with Dependabot version updates, you'll need to specify which
|
|
||||||
# package ecosystems to update and where the package manifests are located.
|
|
||||||
# Please see the documentation for all configuration options:
|
|
||||||
# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
|
|
||||||
|
|
||||||
version: 2
|
|
||||||
updates:
|
|
||||||
- package-ecosystem: "gomod"
|
|
||||||
directory: "/"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "npm"
|
|
||||||
directory: "/web/frontend"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
104
CLAUDE.md
104
CLAUDE.md
@@ -96,15 +96,19 @@ The backend follows a layered architecture with clear separation of concerns:
|
|||||||
- **internal/auth**: Authentication layer
|
- **internal/auth**: Authentication layer
|
||||||
- Supports local accounts, LDAP, OIDC, and JWT tokens
|
- Supports local accounts, LDAP, OIDC, and JWT tokens
|
||||||
- Implements rate limiting for login attempts
|
- Implements rate limiting for login attempts
|
||||||
- **internal/metricdata**: Metric data repository abstraction
|
- **internal/metricstore**: Metric store with data loading API
|
||||||
- Pluggable backends: cc-metric-store, Prometheus, InfluxDB
|
- In-memory metric storage with checkpointing
|
||||||
- Each cluster can have a different metric data backend
|
- Query API for loading job metric data
|
||||||
- **internal/archiver**: Job archiving to file-based archive
|
- **internal/archiver**: Job archiving to file-based archive
|
||||||
|
- **internal/api/nats.go**: NATS-based API for job and node operations
|
||||||
|
- Subscribes to NATS subjects for job events (start/stop)
|
||||||
|
- Handles node state updates via NATS
|
||||||
|
- Uses InfluxDB line protocol message format
|
||||||
- **pkg/archive**: Job archive backend implementations
|
- **pkg/archive**: Job archive backend implementations
|
||||||
- File system backend (default)
|
- File system backend (default)
|
||||||
- S3 backend
|
- S3 backend
|
||||||
- SQLite backend (experimental)
|
- SQLite backend (experimental)
|
||||||
- **pkg/nats**: NATS integration for metric ingestion
|
- **pkg/nats**: NATS client and message decoding utilities
|
||||||
|
|
||||||
### Frontend Structure
|
### Frontend Structure
|
||||||
|
|
||||||
@@ -146,6 +150,14 @@ applied automatically on startup. Version tracking in `version` table.
|
|||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
- **config.json**: Main configuration (clusters, metric repositories, archive settings)
|
- **config.json**: Main configuration (clusters, metric repositories, archive settings)
|
||||||
|
- `main.apiSubjects`: NATS subject configuration (optional)
|
||||||
|
- `subjectJobEvent`: Subject for job start/stop events (e.g., "cc.job.event")
|
||||||
|
- `subjectNodeState`: Subject for node state updates (e.g., "cc.node.state")
|
||||||
|
- `nats`: NATS client connection configuration (optional)
|
||||||
|
- `address`: NATS server address (e.g., "nats://localhost:4222")
|
||||||
|
- `username`: Authentication username (optional)
|
||||||
|
- `password`: Authentication password (optional)
|
||||||
|
- `creds-file-path`: Path to NATS credentials file (optional)
|
||||||
- **.env**: Environment variables (secrets like JWT keys)
|
- **.env**: Environment variables (secrets like JWT keys)
|
||||||
- Copy from `configs/env-template.txt`
|
- Copy from `configs/env-template.txt`
|
||||||
- NEVER commit this file
|
- NEVER commit this file
|
||||||
@@ -197,8 +209,8 @@ applied automatically on startup. Version tracking in `version` table.
|
|||||||
|
|
||||||
### Adding a new metric data backend
|
### Adding a new metric data backend
|
||||||
|
|
||||||
1. Implement `MetricDataRepository` interface in `internal/metricdata/`
|
1. Implement metric loading functions in `internal/metricstore/query.go`
|
||||||
2. Register in `metricdata.Init()` switch statement
|
2. Add cluster configuration to metric store initialization
|
||||||
3. Update config.json schema documentation
|
3. Update config.json schema documentation
|
||||||
|
|
||||||
### Modifying database schema
|
### Modifying database schema
|
||||||
@@ -207,9 +219,87 @@ applied automatically on startup. Version tracking in `version` table.
|
|||||||
2. Increment `repository.Version`
|
2. Increment `repository.Version`
|
||||||
3. Test with fresh database and existing database
|
3. Test with fresh database and existing database
|
||||||
|
|
||||||
|
## NATS API
|
||||||
|
|
||||||
|
The backend supports a NATS-based API as an alternative to the REST API for job and node operations.
|
||||||
|
|
||||||
|
### Setup
|
||||||
|
|
||||||
|
1. Configure NATS client connection in `config.json`:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"nats": {
|
||||||
|
"address": "nats://localhost:4222",
|
||||||
|
"username": "user",
|
||||||
|
"password": "pass"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Configure API subjects in `config.json` under `main`:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"main": {
|
||||||
|
"apiSubjects": {
|
||||||
|
"subjectJobEvent": "cc.job.event",
|
||||||
|
"subjectNodeState": "cc.node.state"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Message Format
|
||||||
|
|
||||||
|
Messages use **InfluxDB line protocol** format with the following structure:
|
||||||
|
|
||||||
|
#### Job Events
|
||||||
|
|
||||||
|
**Start Job:**
|
||||||
|
```
|
||||||
|
job,function=start_job event="{\"jobId\":123,\"user\":\"alice\",\"cluster\":\"test\", ...}" 1234567890000000000
|
||||||
|
```
|
||||||
|
|
||||||
|
**Stop Job:**
|
||||||
|
```
|
||||||
|
job,function=stop_job event="{\"jobId\":123,\"cluster\":\"test\",\"startTime\":1234567890,\"stopTime\":1234571490,\"jobState\":\"completed\"}" 1234571490000000000
|
||||||
|
```
|
||||||
|
|
||||||
|
**Tags:**
|
||||||
|
- `function`: Either `start_job` or `stop_job`
|
||||||
|
|
||||||
|
**Fields:**
|
||||||
|
- `event`: JSON payload containing job data (see REST API documentation for schema)
|
||||||
|
|
||||||
|
#### Node State Updates
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"cluster": "testcluster",
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"hostname": "node001",
|
||||||
|
"states": ["allocated"],
|
||||||
|
"cpusAllocated": 8,
|
||||||
|
"memoryAllocated": 16384,
|
||||||
|
"gpusAllocated": 0,
|
||||||
|
"jobsRunning": 1
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Implementation Notes
|
||||||
|
|
||||||
|
- NATS API mirrors REST API functionality but uses messaging
|
||||||
|
- Job start/stop events are processed asynchronously
|
||||||
|
- Duplicate job detection is handled (same as REST API)
|
||||||
|
- All validation rules from REST API apply
|
||||||
|
- Messages are logged; no responses are sent back to publishers
|
||||||
|
- If NATS client is unavailable, API subscriptions are skipped (logged as warning)
|
||||||
|
|
||||||
## Dependencies
|
## Dependencies
|
||||||
|
|
||||||
- Go 1.24.0+ (check go.mod for exact version)
|
- Go 1.24.0+ (check go.mod for exact version)
|
||||||
- Node.js (for frontend builds)
|
- Node.js (for frontend builds)
|
||||||
- SQLite 3 (only supported database)
|
- SQLite 3 (only supported database)
|
||||||
- Optional: NATS server for metric ingestion
|
- Optional: NATS server for NATS API integration
|
||||||
|
|||||||
60
README.md
60
README.md
@@ -22,11 +22,12 @@ switching from PHP Symfony to a Golang based solution are explained
|
|||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
This is a Golang web backend for the ClusterCockpit job-specific performance
|
This is a Golang web backend for the ClusterCockpit job-specific performance
|
||||||
monitoring framework. It provides a REST API for integrating ClusterCockpit with
|
monitoring framework. It provides a REST API and an optional NATS-based messaging
|
||||||
an HPC cluster batch system and external analysis scripts. Data exchange between
|
API for integrating ClusterCockpit with an HPC cluster batch system and external
|
||||||
the web front-end and the back-end is based on a GraphQL API. The web frontend
|
analysis scripts. Data exchange between the web front-end and the back-end is
|
||||||
is also served by the backend using [Svelte](https://svelte.dev/) components.
|
based on a GraphQL API. The web frontend is also served by the backend using
|
||||||
Layout and styling are based on [Bootstrap 5](https://getbootstrap.com/) using
|
[Svelte](https://svelte.dev/) components. Layout and styling are based on
|
||||||
|
[Bootstrap 5](https://getbootstrap.com/) using
|
||||||
[Bootstrap Icons](https://icons.getbootstrap.com/).
|
[Bootstrap Icons](https://icons.getbootstrap.com/).
|
||||||
|
|
||||||
The backend uses [SQLite 3](https://sqlite.org/) as the relational SQL database.
|
The backend uses [SQLite 3](https://sqlite.org/) as the relational SQL database.
|
||||||
@@ -35,6 +36,10 @@ databases, the only tested and supported setup is to use cc-metric-store as the
|
|||||||
metric data backend. Documentation on how to integrate ClusterCockpit with other
|
metric data backend. Documentation on how to integrate ClusterCockpit with other
|
||||||
time series databases will be added in the future.
|
time series databases will be added in the future.
|
||||||
|
|
||||||
|
For real-time integration with HPC systems, the backend can subscribe to
|
||||||
|
[NATS](https://nats.io/) subjects to receive job start/stop events and node
|
||||||
|
state updates, providing an alternative to REST API polling.
|
||||||
|
|
||||||
Completed batch jobs are stored in a file-based job archive according to
|
Completed batch jobs are stored in a file-based job archive according to
|
||||||
[this specification](https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive).
|
[this specification](https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive).
|
||||||
The backend supports authentication via local accounts, an external LDAP
|
The backend supports authentication via local accounts, an external LDAP
|
||||||
@@ -130,27 +135,58 @@ ln -s <your-existing-job-archive> ./var/job-archive
|
|||||||
|
|
||||||
## Project file structure
|
## Project file structure
|
||||||
|
|
||||||
|
- [`.github/`](https://github.com/ClusterCockpit/cc-backend/tree/master/.github)
|
||||||
|
GitHub Actions workflows and dependabot configuration for CI/CD.
|
||||||
- [`api/`](https://github.com/ClusterCockpit/cc-backend/tree/master/api)
|
- [`api/`](https://github.com/ClusterCockpit/cc-backend/tree/master/api)
|
||||||
contains the API schema files for the REST and GraphQL APIs. The REST API is
|
contains the API schema files for the REST and GraphQL APIs. The REST API is
|
||||||
documented in the OpenAPI 3.0 format in
|
documented in the OpenAPI 3.0 format in
|
||||||
[./api/openapi.yaml](./api/openapi.yaml).
|
[./api/swagger.yaml](./api/swagger.yaml). The GraphQL schema is in
|
||||||
|
[./api/schema.graphqls](./api/schema.graphqls).
|
||||||
- [`cmd/cc-backend`](https://github.com/ClusterCockpit/cc-backend/tree/master/cmd/cc-backend)
|
- [`cmd/cc-backend`](https://github.com/ClusterCockpit/cc-backend/tree/master/cmd/cc-backend)
|
||||||
contains `main.go` for the main application.
|
contains the main application entry point and CLI implementation.
|
||||||
- [`configs/`](https://github.com/ClusterCockpit/cc-backend/tree/master/configs)
|
- [`configs/`](https://github.com/ClusterCockpit/cc-backend/tree/master/configs)
|
||||||
contains documentation about configuration and command line options and required
|
contains documentation about configuration and command line options and required
|
||||||
environment variables. A sample configuration file is provided.
|
environment variables. Sample configuration files are provided.
|
||||||
- [`docs/`](https://github.com/ClusterCockpit/cc-backend/tree/master/docs)
|
|
||||||
contains more in-depth documentation.
|
|
||||||
- [`init/`](https://github.com/ClusterCockpit/cc-backend/tree/master/init)
|
- [`init/`](https://github.com/ClusterCockpit/cc-backend/tree/master/init)
|
||||||
contains an example of setting up systemd for production use.
|
contains an example of setting up systemd for production use.
|
||||||
- [`internal/`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal)
|
- [`internal/`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal)
|
||||||
contains library source code that is not intended for use by others.
|
contains library source code that is not intended for use by others.
|
||||||
|
- [`api`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/api)
|
||||||
|
REST API handlers and NATS integration
|
||||||
|
- [`archiver`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/archiver)
|
||||||
|
Job archiving functionality
|
||||||
|
- [`auth`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/auth)
|
||||||
|
Authentication (local, LDAP, OIDC) and JWT token handling
|
||||||
|
- [`config`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/config)
|
||||||
|
Configuration management and validation
|
||||||
|
- [`graph`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/graph)
|
||||||
|
GraphQL schema and resolvers
|
||||||
|
- [`importer`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/importer)
|
||||||
|
Job data import and database initialization
|
||||||
|
- [`metricstore`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/metricstore)
|
||||||
|
In-memory metric data store with checkpointing and metric loading
|
||||||
|
- [`metricdispatch`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/metricdispatch)
|
||||||
|
Dispatches metric data loading to appropriate backends
|
||||||
|
- [`repository`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/repository)
|
||||||
|
Database repository layer for jobs and metadata
|
||||||
|
- [`routerConfig`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/routerConfig)
|
||||||
|
HTTP router configuration and middleware
|
||||||
|
- [`tagger`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/tagger)
|
||||||
|
Job classification and application detection
|
||||||
|
- [`taskmanager`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/taskmanager)
|
||||||
|
Background task management and scheduled jobs
|
||||||
- [`pkg/`](https://github.com/ClusterCockpit/cc-backend/tree/master/pkg)
|
- [`pkg/`](https://github.com/ClusterCockpit/cc-backend/tree/master/pkg)
|
||||||
contains Go packages that can be used by other projects.
|
contains Go packages that can be used by other projects.
|
||||||
|
- [`archive`](https://github.com/ClusterCockpit/cc-backend/tree/master/pkg/archive)
|
||||||
|
Job archive backend implementations (filesystem, S3)
|
||||||
|
- [`nats`](https://github.com/ClusterCockpit/cc-backend/tree/master/pkg/nats)
|
||||||
|
NATS client and message handling
|
||||||
- [`tools/`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools)
|
- [`tools/`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools)
|
||||||
Additional command line helper tools.
|
Additional command line helper tools.
|
||||||
- [`archive-manager`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-manager)
|
- [`archive-manager`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-manager)
|
||||||
Commands for getting infos about and existing job archive.
|
Commands for getting infos about an existing job archive.
|
||||||
|
- [`archive-migration`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-migration)
|
||||||
|
Tool for migrating job archives between formats.
|
||||||
- [`convert-pem-pubkey`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/convert-pem-pubkey)
|
- [`convert-pem-pubkey`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/convert-pem-pubkey)
|
||||||
Tool to convert external pubkey for use in `cc-backend`.
|
Tool to convert external pubkey for use in `cc-backend`.
|
||||||
- [`gen-keypair`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/gen-keypair)
|
- [`gen-keypair`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/gen-keypair)
|
||||||
@@ -162,7 +198,7 @@ ln -s <your-existing-job-archive> ./var/job-archive
|
|||||||
- [`frontend`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/frontend)
|
- [`frontend`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/frontend)
|
||||||
Svelte components and static assets for the frontend UI
|
Svelte components and static assets for the frontend UI
|
||||||
- [`templates`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/templates)
|
- [`templates`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/templates)
|
||||||
Server-side Go templates
|
Server-side Go templates, including monitoring views
|
||||||
- [`gqlgen.yml`](https://github.com/ClusterCockpit/cc-backend/blob/master/gqlgen.yml)
|
- [`gqlgen.yml`](https://github.com/ClusterCockpit/cc-backend/blob/master/gqlgen.yml)
|
||||||
Configures the behaviour and generation of
|
Configures the behaviour and generation of
|
||||||
[gqlgen](https://github.com/99designs/gqlgen).
|
[gqlgen](https://github.com/99designs/gqlgen).
|
||||||
|
|||||||
@@ -458,6 +458,7 @@ input JobFilter {
|
|||||||
state: [JobState!]
|
state: [JobState!]
|
||||||
metricStats: [MetricStatItem!]
|
metricStats: [MetricStatItem!]
|
||||||
shared: String
|
shared: String
|
||||||
|
schedule: String
|
||||||
node: StringInput
|
node: StringInput
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
1126
api/swagger.json
1126
api/swagger.json
File diff suppressed because it is too large
Load Diff
690
api/swagger.yaml
690
api/swagger.yaml
File diff suppressed because it is too large
Load Diff
@@ -15,8 +15,8 @@ import (
|
|||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/util"
|
"github.com/ClusterCockpit/cc-lib/v2/util"
|
||||||
)
|
)
|
||||||
|
|
||||||
const envString = `
|
const envString = `
|
||||||
@@ -36,7 +36,7 @@ const configString = `
|
|||||||
"short-running-jobs-duration": 300,
|
"short-running-jobs-duration": 300,
|
||||||
"resampling": {
|
"resampling": {
|
||||||
"minimumPoints": 600,
|
"minimumPoints": 600,
|
||||||
"trigger": 180,
|
"trigger": 300,
|
||||||
"resolutions": [
|
"resolutions": [
|
||||||
240,
|
240,
|
||||||
60
|
60
|
||||||
@@ -48,7 +48,7 @@ const configString = `
|
|||||||
"emission-constant": 317
|
"emission-constant": 317
|
||||||
},
|
},
|
||||||
"cron": {
|
"cron": {
|
||||||
"commit-job-worker": "2m",
|
"commit-job-worker": "1m",
|
||||||
"duration-worker": "5m",
|
"duration-worker": "5m",
|
||||||
"footprint-worker": "10m"
|
"footprint-worker": "10m"
|
||||||
},
|
},
|
||||||
@@ -60,31 +60,7 @@ const configString = `
|
|||||||
"jwts": {
|
"jwts": {
|
||||||
"max-age": "2000h"
|
"max-age": "2000h"
|
||||||
}
|
}
|
||||||
},
|
|
||||||
"clusters": [
|
|
||||||
{
|
|
||||||
"name": "name",
|
|
||||||
"metricDataRepository": {
|
|
||||||
"kind": "cc-metric-store",
|
|
||||||
"url": "http://localhost:8082",
|
|
||||||
"token": ""
|
|
||||||
},
|
|
||||||
"filterRanges": {
|
|
||||||
"numNodes": {
|
|
||||||
"from": 1,
|
|
||||||
"to": 64
|
|
||||||
},
|
|
||||||
"duration": {
|
|
||||||
"from": 0,
|
|
||||||
"to": 86400
|
|
||||||
},
|
|
||||||
"startTime": {
|
|
||||||
"from": "2023-01-01T00:00:00Z",
|
|
||||||
"to": null
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
`
|
`
|
||||||
|
|
||||||
|
|||||||
@@ -24,19 +24,18 @@ import (
|
|||||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/memorystore"
|
"github.com/ClusterCockpit/cc-backend/internal/metricstore"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/tagger"
|
"github.com/ClusterCockpit/cc-backend/internal/tagger"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/taskmanager"
|
"github.com/ClusterCockpit/cc-backend/internal/taskmanager"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/nats"
|
"github.com/ClusterCockpit/cc-backend/pkg/nats"
|
||||||
"github.com/ClusterCockpit/cc-backend/web"
|
"github.com/ClusterCockpit/cc-backend/web"
|
||||||
ccconf "github.com/ClusterCockpit/cc-lib/ccConfig"
|
ccconf "github.com/ClusterCockpit/cc-lib/v2/ccConfig"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/runtimeEnv"
|
"github.com/ClusterCockpit/cc-lib/v2/runtime"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
"github.com/ClusterCockpit/cc-lib/util"
|
"github.com/ClusterCockpit/cc-lib/v2/util"
|
||||||
"github.com/google/gops/agent"
|
"github.com/google/gops/agent"
|
||||||
"github.com/joho/godotenv"
|
"github.com/joho/godotenv"
|
||||||
|
|
||||||
@@ -103,12 +102,7 @@ func initConfiguration() error {
|
|||||||
return fmt.Errorf("main configuration must be present")
|
return fmt.Errorf("main configuration must be present")
|
||||||
}
|
}
|
||||||
|
|
||||||
clustercfg := ccconf.GetPackageConfig("clusters")
|
config.Init(cfg)
|
||||||
if clustercfg == nil {
|
|
||||||
return fmt.Errorf("cluster configuration must be present")
|
|
||||||
}
|
|
||||||
|
|
||||||
config.Init(cfg, clustercfg)
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -277,21 +271,14 @@ func initSubsystems() error {
|
|||||||
// Initialize job archive
|
// Initialize job archive
|
||||||
archiveCfg := ccconf.GetPackageConfig("archive")
|
archiveCfg := ccconf.GetPackageConfig("archive")
|
||||||
if archiveCfg == nil {
|
if archiveCfg == nil {
|
||||||
|
cclog.Debug("Archive configuration not found, using default archive configuration")
|
||||||
archiveCfg = json.RawMessage(defaultArchiveConfig)
|
archiveCfg = json.RawMessage(defaultArchiveConfig)
|
||||||
}
|
}
|
||||||
if err := archive.Init(archiveCfg, config.Keys.DisableArchive); err != nil {
|
if err := archive.Init(archiveCfg, config.Keys.DisableArchive); err != nil {
|
||||||
return fmt.Errorf("initializing archive: %w", err)
|
return fmt.Errorf("initializing archive: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize metricdata
|
// Note: metricstore.Init() is called later in runServer() with proper configuration
|
||||||
// if err := metricdata.Init(); err != nil {
|
|
||||||
// return fmt.Errorf("initializing metricdata repository: %w", err)
|
|
||||||
// }
|
|
||||||
|
|
||||||
// Initialize upstream metricdata repositories for pull worker
|
|
||||||
if err := metricdata.InitUpstreamRepos(); err != nil {
|
|
||||||
return fmt.Errorf("initializing upstream metricdata repositories: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle database re-initialization
|
// Handle database re-initialization
|
||||||
if flagReinitDB {
|
if flagReinitDB {
|
||||||
@@ -316,6 +303,8 @@ func initSubsystems() error {
|
|||||||
|
|
||||||
// Apply tags if requested
|
// Apply tags if requested
|
||||||
if flagApplyTags {
|
if flagApplyTags {
|
||||||
|
tagger.Init()
|
||||||
|
|
||||||
if err := tagger.RunTaggers(); err != nil {
|
if err := tagger.RunTaggers(); err != nil {
|
||||||
return fmt.Errorf("running job taggers: %w", err)
|
return fmt.Errorf("running job taggers: %w", err)
|
||||||
}
|
}
|
||||||
@@ -330,9 +319,14 @@ func runServer(ctx context.Context) error {
|
|||||||
// Initialize metric store if configuration is provided
|
// Initialize metric store if configuration is provided
|
||||||
mscfg := ccconf.GetPackageConfig("metric-store")
|
mscfg := ccconf.GetPackageConfig("metric-store")
|
||||||
if mscfg != nil {
|
if mscfg != nil {
|
||||||
memorystore.Init(mscfg, &wg)
|
metricstore.Init(mscfg, &wg)
|
||||||
|
|
||||||
|
// Inject repository as NodeProvider to break import cycle
|
||||||
|
ms := metricstore.GetMemoryStore()
|
||||||
|
jobRepo := repository.GetJobRepository()
|
||||||
|
ms.SetNodeProvider(jobRepo)
|
||||||
} else {
|
} else {
|
||||||
cclog.Debug("Metric store configuration not found, skipping memorystore initialization")
|
return fmt.Errorf("missing metricstore configuration")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start archiver and task manager
|
// Start archiver and task manager
|
||||||
@@ -375,7 +369,7 @@ func runServer(ctx context.Context) error {
|
|||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
}
|
}
|
||||||
|
|
||||||
runtimeEnv.SystemdNotifiy(false, "Shutting down ...")
|
runtime.SystemdNotify(false, "Shutting down ...")
|
||||||
srv.Shutdown(ctx)
|
srv.Shutdown(ctx)
|
||||||
util.FsWatcherShutdown()
|
util.FsWatcherShutdown()
|
||||||
taskmanager.Shutdown()
|
taskmanager.Shutdown()
|
||||||
@@ -385,24 +379,39 @@ func runServer(ctx context.Context) error {
|
|||||||
if os.Getenv(envGOGC) == "" {
|
if os.Getenv(envGOGC) == "" {
|
||||||
debug.SetGCPercent(25)
|
debug.SetGCPercent(25)
|
||||||
}
|
}
|
||||||
runtimeEnv.SystemdNotifiy(true, "running")
|
runtime.SystemdNotify(true, "running")
|
||||||
|
|
||||||
// Wait for completion or error
|
waitDone := make(chan struct{})
|
||||||
go func() {
|
go func() {
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
|
close(waitDone)
|
||||||
|
}()
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
<-waitDone
|
||||||
close(errChan)
|
close(errChan)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// Check for server startup errors
|
// Wait for either:
|
||||||
|
// 1. An error from server startup
|
||||||
|
// 2. Completion of all goroutines (normal shutdown or crash)
|
||||||
|
select {
|
||||||
|
case err := <-errChan:
|
||||||
|
// errChan will be closed when waitDone is closed, which happens
|
||||||
|
// when all goroutines complete (either from normal shutdown or error)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
case <-time.After(100 * time.Millisecond):
|
||||||
|
// Give the server 100ms to start and report any immediate startup errors
|
||||||
|
// After that, just wait for normal shutdown completion
|
||||||
select {
|
select {
|
||||||
case err := <-errChan:
|
case err := <-errChan:
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
case <-time.After(100 * time.Millisecond):
|
case <-waitDone:
|
||||||
// Server started successfully, wait for completion
|
// Normal shutdown completed
|
||||||
if err := <-errChan; err != nil {
|
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -29,12 +29,12 @@ import (
|
|||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
|
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/memorystore"
|
"github.com/ClusterCockpit/cc-backend/internal/metricstore"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/routerConfig"
|
"github.com/ClusterCockpit/cc-backend/internal/routerConfig"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/nats"
|
"github.com/ClusterCockpit/cc-backend/pkg/nats"
|
||||||
"github.com/ClusterCockpit/cc-backend/web"
|
"github.com/ClusterCockpit/cc-backend/web"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/runtimeEnv"
|
"github.com/ClusterCockpit/cc-lib/v2/runtime"
|
||||||
"github.com/gorilla/handlers"
|
"github.com/gorilla/handlers"
|
||||||
"github.com/gorilla/mux"
|
"github.com/gorilla/mux"
|
||||||
httpSwagger "github.com/swaggo/http-swagger"
|
httpSwagger "github.com/swaggo/http-swagger"
|
||||||
@@ -345,7 +345,7 @@ func (s *Server) Start(ctx context.Context) error {
|
|||||||
// Because this program will want to bind to a privileged port (like 80), the listener must
|
// Because this program will want to bind to a privileged port (like 80), the listener must
|
||||||
// be established first, then the user can be changed, and after that,
|
// be established first, then the user can be changed, and after that,
|
||||||
// the actual http server can be started.
|
// the actual http server can be started.
|
||||||
if err := runtimeEnv.DropPrivileges(config.Keys.Group, config.Keys.User); err != nil {
|
if err := runtime.DropPrivileges(config.Keys.Group, config.Keys.User); err != nil {
|
||||||
return fmt.Errorf("dropping privileges: %w", err)
|
return fmt.Errorf("dropping privileges: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -381,7 +381,7 @@ func (s *Server) Shutdown(ctx context.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Archive all the metric store data
|
// Archive all the metric store data
|
||||||
memorystore.Shutdown()
|
metricstore.Shutdown()
|
||||||
|
|
||||||
// Shutdown archiver with 10 second timeout for fast shutdown
|
// Shutdown archiver with 10 second timeout for fast shutdown
|
||||||
if err := archiver.Shutdown(10 * time.Second); err != nil {
|
if err := archiver.Shutdown(10 * time.Second); err != nil {
|
||||||
|
|||||||
@@ -1,96 +1,22 @@
|
|||||||
{
|
{
|
||||||
"main": {
|
"main": {
|
||||||
"addr": "127.0.0.1:8080",
|
"addr": "127.0.0.1:8080",
|
||||||
"short-running-jobs-duration": 300,
|
"apiAllowedIPs": ["*"]
|
||||||
"resampling": {
|
|
||||||
"minimumPoints": 600,
|
|
||||||
"trigger": 180,
|
|
||||||
"resolutions": [
|
|
||||||
240,
|
|
||||||
60
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"apiAllowedIPs": [
|
|
||||||
"*"
|
|
||||||
],
|
|
||||||
"emission-constant": 317
|
|
||||||
},
|
},
|
||||||
"cron": {
|
"cron": {
|
||||||
"commit-job-worker": "2m",
|
"commit-job-worker": "1m",
|
||||||
"duration-worker": "5m",
|
"duration-worker": "3m",
|
||||||
"footprint-worker": "10m"
|
"footprint-worker": "5m"
|
||||||
},
|
|
||||||
"archive": {
|
|
||||||
"kind": "file",
|
|
||||||
"path": "./var/job-archive"
|
|
||||||
},
|
},
|
||||||
"auth": {
|
"auth": {
|
||||||
"jwts": {
|
"jwts": {
|
||||||
"max-age": "2000h"
|
"max-age": "2000h"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nats": {
|
|
||||||
"address": "nats://0.0.0.0:4222",
|
|
||||||
"username": "root",
|
|
||||||
"password": "root"
|
|
||||||
},
|
|
||||||
"clusters": [
|
|
||||||
{
|
|
||||||
"name": "fritz",
|
|
||||||
"filterRanges": {
|
|
||||||
"numNodes": {
|
|
||||||
"from": 1,
|
|
||||||
"to": 64
|
|
||||||
},
|
|
||||||
"duration": {
|
|
||||||
"from": 0,
|
|
||||||
"to": 86400
|
|
||||||
},
|
|
||||||
"startTime": {
|
|
||||||
"from": "2022-01-01T00:00:00Z",
|
|
||||||
"to": null
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "alex",
|
|
||||||
"filterRanges": {
|
|
||||||
"numNodes": {
|
|
||||||
"from": 1,
|
|
||||||
"to": 64
|
|
||||||
},
|
|
||||||
"duration": {
|
|
||||||
"from": 0,
|
|
||||||
"to": 86400
|
|
||||||
},
|
|
||||||
"startTime": {
|
|
||||||
"from": "2022-01-01T00:00:00Z",
|
|
||||||
"to": null
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metric-store": {
|
"metric-store": {
|
||||||
"checkpoints": {
|
"checkpoints": {
|
||||||
"file-format": "avro",
|
"interval": "1h"
|
||||||
"interval": "1h",
|
|
||||||
"directory": "./var/checkpoints",
|
|
||||||
"restore": "48h"
|
|
||||||
},
|
},
|
||||||
"archive": {
|
"retention-in-memory": "12h"
|
||||||
"interval": "1h",
|
|
||||||
"directory": "./var/archive"
|
|
||||||
},
|
|
||||||
"retention-in-memory": "48h",
|
|
||||||
"subscriptions": [
|
|
||||||
{
|
|
||||||
"subscribe-to": "hpc-nats",
|
|
||||||
"cluster-tag": "fritz"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"subscribe-to": "hpc-nats",
|
|
||||||
"cluster-tag": "alex"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -5,45 +5,61 @@
|
|||||||
"https-key-file": "/etc/letsencrypt/live/url/privkey.pem",
|
"https-key-file": "/etc/letsencrypt/live/url/privkey.pem",
|
||||||
"user": "clustercockpit",
|
"user": "clustercockpit",
|
||||||
"group": "clustercockpit",
|
"group": "clustercockpit",
|
||||||
"validate": false,
|
|
||||||
"apiAllowedIPs": ["*"],
|
"apiAllowedIPs": ["*"],
|
||||||
"short-running-jobs-duration": 300,
|
"short-running-jobs-duration": 300,
|
||||||
|
"enable-job-taggers": true,
|
||||||
"resampling": {
|
"resampling": {
|
||||||
"minimumPoints": 600,
|
"minimumPoints": 600,
|
||||||
"trigger": 180,
|
"trigger": 180,
|
||||||
"resolutions": [
|
"resolutions": [240, 60]
|
||||||
240,
|
},
|
||||||
60
|
"apiSubjects": {
|
||||||
]
|
"subjectJobEvent": "cc.job.event",
|
||||||
|
"subjectNodeState": "cc.node.state"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nats": {
|
||||||
|
"address": "nats://0.0.0.0:4222",
|
||||||
|
"username": "root",
|
||||||
|
"password": "root"
|
||||||
|
},
|
||||||
|
"auth": {
|
||||||
|
"jwts": {
|
||||||
|
"max-age": "2000h"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"cron": {
|
"cron": {
|
||||||
"commit-job-worker": "2m",
|
"commit-job-worker": "1m",
|
||||||
"duration-worker": "5m",
|
"duration-worker": "5m",
|
||||||
"footprint-worker": "10m"
|
"footprint-worker": "10m"
|
||||||
},
|
},
|
||||||
"archive": {
|
"archive": {
|
||||||
"kind": "file",
|
"kind": "s3",
|
||||||
"path": "./var/job-archive"
|
"endpoint": "http://x.x.x.x",
|
||||||
|
"bucket": "jobarchive",
|
||||||
|
"accessKey": "xx",
|
||||||
|
"secretKey": "xx",
|
||||||
|
"retention": {
|
||||||
|
"policy": "move",
|
||||||
|
"age": 365,
|
||||||
|
"location": "./var/archive"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"clusters": [
|
"metric-store": {
|
||||||
|
"checkpoints": {
|
||||||
|
"interval": "1h"
|
||||||
|
},
|
||||||
|
"retention-in-memory": "12h",
|
||||||
|
"subscriptions": [
|
||||||
{
|
{
|
||||||
"name": "test",
|
"subscribe-to": "hpc-nats",
|
||||||
"filterRanges": {
|
"cluster-tag": "fritz"
|
||||||
"numNodes": {
|
|
||||||
"from": 1,
|
|
||||||
"to": 64
|
|
||||||
},
|
},
|
||||||
"duration": {
|
{
|
||||||
"from": 0,
|
"subscribe-to": "hpc-nats",
|
||||||
"to": 86400
|
"cluster-tag": "alex"
|
||||||
},
|
|
||||||
"startTime": {
|
|
||||||
"from": "2022-01-01T00:00:00Z",
|
|
||||||
"to": null
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
"ui-file": "ui-config.json"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
419
configs/tagger/README.md
Normal file
419
configs/tagger/README.md
Normal file
@@ -0,0 +1,419 @@
|
|||||||
|
# Job Tagging Configuration
|
||||||
|
|
||||||
|
ClusterCockpit provides automatic job tagging functionality to classify and
|
||||||
|
categorize jobs based on configurable rules. The tagging system consists of two
|
||||||
|
main components:
|
||||||
|
|
||||||
|
1. **Application Detection** - Identifies which application a job is running
|
||||||
|
2. **Job Classification** - Analyzes job performance characteristics and applies classification tags
|
||||||
|
|
||||||
|
## Directory Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
configs/tagger/
|
||||||
|
├── apps/ # Application detection patterns
|
||||||
|
│ ├── vasp.txt
|
||||||
|
│ ├── gromacs.txt
|
||||||
|
│ └── ...
|
||||||
|
└── jobclasses/ # Job classification rules
|
||||||
|
├── parameters.json
|
||||||
|
├── lowUtilization.json
|
||||||
|
├── highload.json
|
||||||
|
└── ...
|
||||||
|
```
|
||||||
|
|
||||||
|
## Activating Tagger Rules
|
||||||
|
|
||||||
|
### Step 1: Copy Configuration Files
|
||||||
|
|
||||||
|
To activate tagging, review, adapt, and copy the configuration files from
|
||||||
|
`configs/tagger/` to `var/tagger/`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# From the cc-backend root directory
|
||||||
|
mkdir -p var/tagger
|
||||||
|
cp -r configs/tagger/apps var/tagger/
|
||||||
|
cp -r configs/tagger/jobclasses var/tagger/
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 2: Enable Tagging in Configuration
|
||||||
|
|
||||||
|
Add or set the following configuration key in the `main` section of your `config.json`:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"enable-job-taggers": true
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Important**: Automatic tagging is disabled by default. You must explicitly
|
||||||
|
enable it by setting `enable-job-taggers: true` in the main configuration file.
|
||||||
|
|
||||||
|
### Step 3: Restart cc-backend
|
||||||
|
|
||||||
|
The tagger system automatically loads configuration from `./var/tagger/` at
|
||||||
|
startup. After copying the files and enabling the feature, restart cc-backend:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./cc-backend -server
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 4: Verify Configuration Loaded
|
||||||
|
|
||||||
|
Check the logs for messages indicating successful configuration loading:
|
||||||
|
|
||||||
|
```
|
||||||
|
[INFO] Setup file watch for ./var/tagger/apps
|
||||||
|
[INFO] Setup file watch for ./var/tagger/jobclasses
|
||||||
|
```
|
||||||
|
|
||||||
|
## How Tagging Works
|
||||||
|
|
||||||
|
### Automatic Tagging
|
||||||
|
|
||||||
|
When `enable-job-taggers` is set to `true` in the configuration, tags are
|
||||||
|
automatically applied when:
|
||||||
|
|
||||||
|
- **Job Start**: Application detection runs immediately when a job starts
|
||||||
|
- **Job Stop**: Job classification runs when a job completes
|
||||||
|
|
||||||
|
The system analyzes job metadata and metrics to determine appropriate tags.
|
||||||
|
|
||||||
|
**Note**: Automatic tagging only works for jobs that start or stop after the
|
||||||
|
feature is enabled. Existing jobs are not automatically retagged.
|
||||||
|
|
||||||
|
### Manual Tagging (Retroactive)
|
||||||
|
|
||||||
|
To apply tags to existing jobs in the database, use the `-apply-tags` command
|
||||||
|
line option:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./cc-backend -apply-tags
|
||||||
|
```
|
||||||
|
|
||||||
|
This processes all jobs in the database and applies current tagging rules. This
|
||||||
|
is useful when:
|
||||||
|
|
||||||
|
- You have existing jobs that were created before tagging was enabled
|
||||||
|
- You've added new tagging rules and want to apply them to historical data
|
||||||
|
- You've modified existing rules and want to re-evaluate all jobs
|
||||||
|
|
||||||
|
### Hot Reload
|
||||||
|
|
||||||
|
The tagger system watches the configuration directories for changes. You can
|
||||||
|
modify or add rules without restarting `cc-backend`:
|
||||||
|
|
||||||
|
- Changes to `var/tagger/apps/*` are detected automatically
|
||||||
|
- Changes to `var/tagger/jobclasses/*` are detected automatically
|
||||||
|
|
||||||
|
## Application Detection
|
||||||
|
|
||||||
|
Application detection identifies which software a job is running by matching
|
||||||
|
patterns in the job script.
|
||||||
|
|
||||||
|
### Configuration Format
|
||||||
|
|
||||||
|
Application patterns are stored in text files under `var/tagger/apps/`. Each
|
||||||
|
file contains one or more regular expression patterns (one per line) that match
|
||||||
|
against the job script.
|
||||||
|
|
||||||
|
**Example: `apps/vasp.txt`**
|
||||||
|
|
||||||
|
```
|
||||||
|
vasp
|
||||||
|
VASP
|
||||||
|
```
|
||||||
|
|
||||||
|
### How It Works
|
||||||
|
|
||||||
|
1. When a job starts, the system retrieves the job script from metadata
|
||||||
|
2. Each line in the app files is treated as a regex pattern
|
||||||
|
3. Patterns are matched case-insensitively against the lowercased job script
|
||||||
|
4. If a match is found, a tag of type `app` with the filename (without extension) is applied
|
||||||
|
5. Only the first matching application is tagged
|
||||||
|
|
||||||
|
### Adding New Applications
|
||||||
|
|
||||||
|
1. Create a new file in `var/tagger/apps/` (e.g., `tensorflow.txt`)
|
||||||
|
2. Add regex patterns, one per line:
|
||||||
|
|
||||||
|
```
|
||||||
|
tensorflow
|
||||||
|
tf\.keras
|
||||||
|
import tensorflow
|
||||||
|
```
|
||||||
|
|
||||||
|
3. The file is automatically detected and loaded
|
||||||
|
|
||||||
|
**Note**: The tag name will be the filename without the `.txt` extension (e.g., `tensorflow`).
|
||||||
|
|
||||||
|
## Job Classification
|
||||||
|
|
||||||
|
Job classification analyzes completed jobs based on their metrics and properties
|
||||||
|
to identify performance issues or characteristics.
|
||||||
|
|
||||||
|
### Configuration Format
|
||||||
|
|
||||||
|
Job classification rules are defined in JSON files under
|
||||||
|
`var/tagger/jobclasses/`. Each rule file defines:
|
||||||
|
|
||||||
|
- **Metrics required**: Which job metrics to analyze
|
||||||
|
- **Requirements**: Pre-conditions that must be met
|
||||||
|
- **Variables**: Computed values used in the rule
|
||||||
|
- **Rule expression**: Boolean expression that determines if the rule matches
|
||||||
|
- **Hint template**: Message displayed when the rule matches
|
||||||
|
|
||||||
|
### Parameters File
|
||||||
|
|
||||||
|
`jobclasses/parameters.json` defines shared threshold values used across multiple rules:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"lowcpuload_threshold_factor": 0.9,
|
||||||
|
"highmemoryusage_threshold_factor": 0.9,
|
||||||
|
"job_min_duration_seconds": 600.0,
|
||||||
|
"sampling_interval_seconds": 30.0
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Rule File Structure
|
||||||
|
|
||||||
|
**Example: `jobclasses/lowUtilization.json`**
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"name": "Low resource utilization",
|
||||||
|
"tag": "lowutilization",
|
||||||
|
"parameters": ["job_min_duration_seconds"],
|
||||||
|
"metrics": ["flops_any", "mem_bw"],
|
||||||
|
"requirements": [
|
||||||
|
"job.shared == \"none\"",
|
||||||
|
"job.duration > job_min_duration_seconds"
|
||||||
|
],
|
||||||
|
"variables": [
|
||||||
|
{
|
||||||
|
"name": "mem_bw_perc",
|
||||||
|
"expr": "1.0 - (mem_bw.avg / mem_bw.limits.peak)"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"rule": "flops_any.avg < flops_any.limits.alert",
|
||||||
|
"hint": "Average flop rate {{.flops_any.avg}} falls below threshold {{.flops_any.limits.alert}}"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Field Descriptions
|
||||||
|
|
||||||
|
| Field | Description |
|
||||||
|
| -------------- | ----------------------------------------------------------------------------- |
|
||||||
|
| `name` | Human-readable description of the rule |
|
||||||
|
| `tag` | Tag identifier applied when the rule matches |
|
||||||
|
| `parameters` | List of parameter names from `parameters.json` to include in rule environment |
|
||||||
|
| `metrics` | List of metrics required for evaluation (must be present in job data) |
|
||||||
|
| `requirements` | Boolean expressions that must all be true for the rule to be evaluated |
|
||||||
|
| `variables` | Named expressions computed before evaluating the main rule |
|
||||||
|
| `rule` | Boolean expression that determines if the job matches this classification |
|
||||||
|
| `hint` | Go template string for generating a user-visible message |
|
||||||
|
|
||||||
|
### Expression Environment
|
||||||
|
|
||||||
|
Expressions in `requirements`, `variables`, and `rule` have access to:
|
||||||
|
|
||||||
|
**Job Properties:**
|
||||||
|
|
||||||
|
- `job.shared` - Shared node allocation type
|
||||||
|
- `job.duration` - Job runtime in seconds
|
||||||
|
- `job.numCores` - Number of CPU cores
|
||||||
|
- `job.numNodes` - Number of nodes
|
||||||
|
- `job.jobState` - Job completion state
|
||||||
|
- `job.numAcc` - Number of accelerators
|
||||||
|
- `job.smt` - SMT setting
|
||||||
|
|
||||||
|
**Metric Statistics (for each metric in `metrics`):**
|
||||||
|
|
||||||
|
- `<metric>.min` - Minimum value
|
||||||
|
- `<metric>.max` - Maximum value
|
||||||
|
- `<metric>.avg` - Average value
|
||||||
|
- `<metric>.limits.peak` - Peak limit from cluster config
|
||||||
|
- `<metric>.limits.normal` - Normal threshold
|
||||||
|
- `<metric>.limits.caution` - Caution threshold
|
||||||
|
- `<metric>.limits.alert` - Alert threshold
|
||||||
|
|
||||||
|
**Parameters:**
|
||||||
|
|
||||||
|
- All parameters listed in the `parameters` field
|
||||||
|
|
||||||
|
**Variables:**
|
||||||
|
|
||||||
|
- All variables defined in the `variables` array
|
||||||
|
|
||||||
|
### Expression Language
|
||||||
|
|
||||||
|
Rules use the [expr](https://github.com/expr-lang/expr) language for expressions. Supported operations:
|
||||||
|
|
||||||
|
- **Arithmetic**: `+`, `-`, `*`, `/`, `%`, `^`
|
||||||
|
- **Comparison**: `==`, `!=`, `<`, `<=`, `>`, `>=`
|
||||||
|
- **Logical**: `&&`, `||`, `!`
|
||||||
|
- **Functions**: Standard math functions (see expr documentation)
|
||||||
|
|
||||||
|
### Hint Templates
|
||||||
|
|
||||||
|
Hints use Go's `text/template` syntax. Variables from the evaluation environment are accessible:
|
||||||
|
|
||||||
|
```
|
||||||
|
{{.flops_any.avg}} # Access metric average
|
||||||
|
{{.job.duration}} # Access job property
|
||||||
|
{{.my_variable}} # Access computed variable
|
||||||
|
```
|
||||||
|
|
||||||
|
### Adding New Classification Rules
|
||||||
|
|
||||||
|
1. Create a new JSON file in `var/tagger/jobclasses/` (e.g., `memoryLeak.json`)
|
||||||
|
2. Define the rule structure:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"name": "Memory Leak Detection",
|
||||||
|
"tag": "memory_leak",
|
||||||
|
"parameters": ["memory_leak_slope_threshold"],
|
||||||
|
"metrics": ["mem_used"],
|
||||||
|
"requirements": ["job.duration > 3600"],
|
||||||
|
"variables": [
|
||||||
|
{
|
||||||
|
"name": "mem_growth",
|
||||||
|
"expr": "(mem_used.max - mem_used.min) / job.duration"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"rule": "mem_growth > memory_leak_slope_threshold",
|
||||||
|
"hint": "Memory usage grew by {{.mem_growth}} per second"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Add any new parameters to `parameters.json`
|
||||||
|
4. The file is automatically detected and loaded
|
||||||
|
|
||||||
|
## Configuration Paths
|
||||||
|
|
||||||
|
The tagger system reads from these paths (relative to cc-backend working directory):
|
||||||
|
|
||||||
|
- **Application patterns**: `./var/tagger/apps/`
|
||||||
|
- **Job classification rules**: `./var/tagger/jobclasses/`
|
||||||
|
|
||||||
|
These paths are defined as constants in the source code and cannot be changed without recompiling.
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Tags Not Applied
|
||||||
|
|
||||||
|
1. **Check tagging is enabled**: Verify `enable-job-taggers: true` is set in `config.json`
|
||||||
|
|
||||||
|
2. **Check configuration exists**:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ls -la var/tagger/apps
|
||||||
|
ls -la var/tagger/jobclasses
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Check logs for errors**:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./cc-backend -server -loglevel debug
|
||||||
|
```
|
||||||
|
|
||||||
|
4. **Verify file permissions**: Ensure cc-backend can read the configuration files
|
||||||
|
|
||||||
|
5. **For existing jobs**: Use `./cc-backend -apply-tags` to retroactively tag jobs
|
||||||
|
|
||||||
|
### Rules Not Matching
|
||||||
|
|
||||||
|
1. **Enable debug logging**: Set `loglevel: debug` to see detailed rule evaluation
|
||||||
|
2. **Check requirements**: Ensure all requirements in the rule are satisfied
|
||||||
|
3. **Verify metrics exist**: Classification rules require job metrics to be available
|
||||||
|
4. **Check metric names**: Ensure metric names match those in your cluster configuration
|
||||||
|
|
||||||
|
### File Watch Not Working
|
||||||
|
|
||||||
|
If changes to configuration files aren't detected:
|
||||||
|
|
||||||
|
1. Restart cc-backend to reload all configuration
|
||||||
|
2. Check filesystem supports file watching (network filesystems may not)
|
||||||
|
3. Check logs for file watch setup messages
|
||||||
|
|
||||||
|
## Best Practices
|
||||||
|
|
||||||
|
1. **Start Simple**: Begin with basic rules and refine based on results
|
||||||
|
2. **Use Requirements**: Filter out irrelevant jobs early with requirements
|
||||||
|
3. **Test Incrementally**: Add one rule at a time and verify behavior
|
||||||
|
4. **Document Rules**: Use descriptive names and clear hint messages
|
||||||
|
5. **Share Parameters**: Define common thresholds in `parameters.json` for consistency
|
||||||
|
6. **Version Control**: Keep your `var/tagger/` configuration in version control
|
||||||
|
7. **Backup Before Changes**: Test new rules on a copy before deploying to production
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
### Simple Application Detection
|
||||||
|
|
||||||
|
**File: `var/tagger/apps/python.txt`**
|
||||||
|
|
||||||
|
```
|
||||||
|
python
|
||||||
|
python3
|
||||||
|
\.py
|
||||||
|
```
|
||||||
|
|
||||||
|
This detects jobs running Python scripts.
|
||||||
|
|
||||||
|
### Complex Classification Rule
|
||||||
|
|
||||||
|
**File: `var/tagger/jobclasses/cpuImbalance.json`**
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"name": "CPU Load Imbalance",
|
||||||
|
"tag": "cpu_imbalance",
|
||||||
|
"parameters": ["core_load_imbalance_threshold_factor"],
|
||||||
|
"metrics": ["cpu_load"],
|
||||||
|
"requirements": ["job.numCores > 1", "job.duration > 600"],
|
||||||
|
"variables": [
|
||||||
|
{
|
||||||
|
"name": "load_variance",
|
||||||
|
"expr": "(cpu_load.max - cpu_load.min) / cpu_load.avg"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"rule": "load_variance > core_load_imbalance_threshold_factor",
|
||||||
|
"hint": "CPU load varies by {{printf \"%.1f%%\" (load_variance * 100)}} across cores"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
This detects jobs where CPU load is unevenly distributed across cores.
|
||||||
|
|
||||||
|
## Reference
|
||||||
|
|
||||||
|
### Configuration Options
|
||||||
|
|
||||||
|
**Main Configuration (`config.json`)**:
|
||||||
|
|
||||||
|
- `enable-job-taggers` (boolean, default: `false`) - Enables automatic job tagging system
|
||||||
|
- Must be set to `true` to activate automatic tagging on job start/stop events
|
||||||
|
- Does not affect the `-apply-tags` command line option
|
||||||
|
|
||||||
|
**Command Line Options**:
|
||||||
|
|
||||||
|
- `-apply-tags` - Apply all tagging rules to existing jobs in the database
|
||||||
|
- Works independently of `enable-job-taggers` configuration
|
||||||
|
- Useful for retroactively tagging jobs or re-evaluating with updated rules
|
||||||
|
|
||||||
|
### Default Configuration Location
|
||||||
|
|
||||||
|
The example configurations are provided in:
|
||||||
|
|
||||||
|
- `configs/tagger/apps/` - Example application patterns (16 applications)
|
||||||
|
- `configs/tagger/jobclasses/` - Example classification rules (3 rules)
|
||||||
|
|
||||||
|
Copy these to `var/tagger/` and customize for your environment.
|
||||||
|
|
||||||
|
### Tag Types
|
||||||
|
|
||||||
|
- `app` - Application tags (e.g., "vasp", "gromacs")
|
||||||
|
- `jobClass` - Classification tags (e.g., "lowutilization", "highload")
|
||||||
|
|
||||||
|
Tags can be queried and filtered in the ClusterCockpit UI and API.
|
||||||
87
go.mod
87
go.mod
@@ -10,16 +10,16 @@ tool (
|
|||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/99designs/gqlgen v0.17.84
|
github.com/99designs/gqlgen v0.17.85
|
||||||
github.com/ClusterCockpit/cc-lib v1.0.2
|
github.com/ClusterCockpit/cc-lib/v2 v2.1.0
|
||||||
github.com/Masterminds/squirrel v1.5.4
|
github.com/Masterminds/squirrel v1.5.4
|
||||||
github.com/aws/aws-sdk-go-v2 v1.41.0
|
github.com/aws/aws-sdk-go-v2 v1.41.1
|
||||||
github.com/aws/aws-sdk-go-v2/config v1.31.20
|
github.com/aws/aws-sdk-go-v2/config v1.32.6
|
||||||
github.com/aws/aws-sdk-go-v2/credentials v1.18.24
|
github.com/aws/aws-sdk-go-v2/credentials v1.19.7
|
||||||
github.com/aws/aws-sdk-go-v2/service/s3 v1.90.2
|
github.com/aws/aws-sdk-go-v2/service/s3 v1.95.0
|
||||||
github.com/coreos/go-oidc/v3 v3.16.0
|
github.com/coreos/go-oidc/v3 v3.17.0
|
||||||
github.com/expr-lang/expr v1.17.6
|
github.com/expr-lang/expr v1.17.7
|
||||||
github.com/go-co-op/gocron/v2 v2.18.2
|
github.com/go-co-op/gocron/v2 v2.19.0
|
||||||
github.com/go-ldap/ldap/v3 v3.4.12
|
github.com/go-ldap/ldap/v3 v3.4.12
|
||||||
github.com/golang-jwt/jwt/v5 v5.3.0
|
github.com/golang-jwt/jwt/v5 v5.3.0
|
||||||
github.com/golang-migrate/migrate/v4 v4.19.1
|
github.com/golang-migrate/migrate/v4 v4.19.1
|
||||||
@@ -31,18 +31,16 @@ require (
|
|||||||
github.com/jmoiron/sqlx v1.4.0
|
github.com/jmoiron/sqlx v1.4.0
|
||||||
github.com/joho/godotenv v1.5.1
|
github.com/joho/godotenv v1.5.1
|
||||||
github.com/linkedin/goavro/v2 v2.14.1
|
github.com/linkedin/goavro/v2 v2.14.1
|
||||||
github.com/mattn/go-sqlite3 v1.14.32
|
github.com/mattn/go-sqlite3 v1.14.33
|
||||||
github.com/nats-io/nats.go v1.47.0
|
github.com/nats-io/nats.go v1.47.0
|
||||||
github.com/prometheus/client_golang v1.23.2
|
|
||||||
github.com/prometheus/common v0.67.4
|
|
||||||
github.com/qustavo/sqlhooks/v2 v2.1.0
|
github.com/qustavo/sqlhooks/v2 v2.1.0
|
||||||
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1
|
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1
|
||||||
github.com/stretchr/testify v1.11.1
|
github.com/stretchr/testify v1.11.1
|
||||||
github.com/swaggo/http-swagger v1.3.4
|
github.com/swaggo/http-swagger v1.3.4
|
||||||
github.com/swaggo/swag v1.16.6
|
github.com/swaggo/swag v1.16.6
|
||||||
github.com/vektah/gqlparser/v2 v2.5.31
|
github.com/vektah/gqlparser/v2 v2.5.31
|
||||||
golang.org/x/crypto v0.45.0
|
golang.org/x/crypto v0.46.0
|
||||||
golang.org/x/oauth2 v0.32.0
|
golang.org/x/oauth2 v0.34.0
|
||||||
golang.org/x/time v0.14.0
|
golang.org/x/time v0.14.0
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -50,22 +48,22 @@ require (
|
|||||||
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect
|
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect
|
||||||
github.com/KyleBanks/depth v1.2.1 // indirect
|
github.com/KyleBanks/depth v1.2.1 // indirect
|
||||||
github.com/agnivade/levenshtein v1.2.1 // indirect
|
github.com/agnivade/levenshtein v1.2.1 // indirect
|
||||||
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.3 // indirect
|
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
|
||||||
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.13 // indirect
|
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4 // indirect
|
||||||
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.13 // indirect
|
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.17 // indirect
|
||||||
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.13 // indirect
|
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.17 // indirect
|
||||||
|
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.17 // indirect
|
||||||
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 // indirect
|
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 // indirect
|
||||||
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.13 // indirect
|
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.16 // indirect
|
||||||
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.3 // indirect
|
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4 // indirect
|
||||||
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.4 // indirect
|
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.7 // indirect
|
||||||
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.13 // indirect
|
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.17 // indirect
|
||||||
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.13 // indirect
|
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.16 // indirect
|
||||||
github.com/aws/aws-sdk-go-v2/service/sso v1.30.3 // indirect
|
github.com/aws/aws-sdk-go-v2/service/signin v1.0.5 // indirect
|
||||||
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.7 // indirect
|
github.com/aws/aws-sdk-go-v2/service/sso v1.30.9 // indirect
|
||||||
github.com/aws/aws-sdk-go-v2/service/sts v1.40.2 // indirect
|
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.13 // indirect
|
||||||
|
github.com/aws/aws-sdk-go-v2/service/sts v1.41.6 // indirect
|
||||||
github.com/aws/smithy-go v1.24.0 // indirect
|
github.com/aws/smithy-go v1.24.0 // indirect
|
||||||
github.com/beorn7/perks v1.0.1 // indirect
|
|
||||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
|
||||||
github.com/cpuguy83/go-md2man/v2 v2.0.7 // indirect
|
github.com/cpuguy83/go-md2man/v2 v2.0.7 // indirect
|
||||||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
|
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
|
||||||
github.com/felixge/httpsnoop v1.0.4 // indirect
|
github.com/felixge/httpsnoop v1.0.4 // indirect
|
||||||
@@ -85,28 +83,27 @@ require (
|
|||||||
github.com/go-viper/mapstructure/v2 v2.4.0 // indirect
|
github.com/go-viper/mapstructure/v2 v2.4.0 // indirect
|
||||||
github.com/goccy/go-yaml v1.19.0 // indirect
|
github.com/goccy/go-yaml v1.19.0 // indirect
|
||||||
github.com/golang/snappy v0.0.4 // indirect
|
github.com/golang/snappy v0.0.4 // indirect
|
||||||
|
github.com/google/go-cmp v0.7.0 // indirect
|
||||||
github.com/google/uuid v1.6.0 // indirect
|
github.com/google/uuid v1.6.0 // indirect
|
||||||
github.com/gorilla/securecookie v1.1.2 // indirect
|
github.com/gorilla/securecookie v1.1.2 // indirect
|
||||||
github.com/gorilla/websocket v1.5.3 // indirect
|
github.com/gorilla/websocket v1.5.3 // indirect
|
||||||
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
|
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
|
||||||
|
github.com/influxdata/influxdb-client-go/v2 v2.14.0 // indirect
|
||||||
|
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf // indirect
|
||||||
github.com/jonboulle/clockwork v0.5.0 // indirect
|
github.com/jonboulle/clockwork v0.5.0 // indirect
|
||||||
github.com/jpillora/backoff v1.0.0 // indirect
|
github.com/klauspost/compress v1.18.2 // indirect
|
||||||
github.com/json-iterator/go v1.1.12 // indirect
|
github.com/kr/pretty v0.3.1 // indirect
|
||||||
github.com/klauspost/compress v1.18.1 // indirect
|
|
||||||
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
|
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
|
||||||
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
|
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
|
||||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
github.com/nats-io/nkeys v0.4.12 // indirect
|
||||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
|
||||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
|
||||||
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
|
|
||||||
github.com/nats-io/nkeys v0.4.11 // indirect
|
|
||||||
github.com/nats-io/nuid v1.0.1 // indirect
|
github.com/nats-io/nuid v1.0.1 // indirect
|
||||||
|
github.com/oapi-codegen/runtime v1.1.1 // indirect
|
||||||
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
|
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
|
||||||
github.com/prometheus/client_model v0.6.2 // indirect
|
github.com/prometheus/common v0.67.4 // indirect
|
||||||
github.com/prometheus/procfs v0.16.1 // indirect
|
|
||||||
github.com/robfig/cron/v3 v3.0.1 // indirect
|
github.com/robfig/cron/v3 v3.0.1 // indirect
|
||||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||||
github.com/sosodev/duration v1.3.1 // indirect
|
github.com/sosodev/duration v1.3.1 // indirect
|
||||||
|
github.com/stmcginnis/gofish v0.20.0 // indirect
|
||||||
github.com/stretchr/objx v0.5.2 // indirect
|
github.com/stretchr/objx v0.5.2 // indirect
|
||||||
github.com/swaggo/files v1.0.1 // indirect
|
github.com/swaggo/files v1.0.1 // indirect
|
||||||
github.com/urfave/cli/v2 v2.27.7 // indirect
|
github.com/urfave/cli/v2 v2.27.7 // indirect
|
||||||
@@ -114,13 +111,13 @@ require (
|
|||||||
github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 // indirect
|
github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 // indirect
|
||||||
go.yaml.in/yaml/v2 v2.4.3 // indirect
|
go.yaml.in/yaml/v2 v2.4.3 // indirect
|
||||||
go.yaml.in/yaml/v3 v3.0.4 // indirect
|
go.yaml.in/yaml/v3 v3.0.4 // indirect
|
||||||
golang.org/x/mod v0.30.0 // indirect
|
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect
|
||||||
golang.org/x/net v0.47.0 // indirect
|
golang.org/x/mod v0.31.0 // indirect
|
||||||
golang.org/x/sync v0.18.0 // indirect
|
golang.org/x/net v0.48.0 // indirect
|
||||||
golang.org/x/sys v0.38.0 // indirect
|
golang.org/x/sync v0.19.0 // indirect
|
||||||
golang.org/x/text v0.31.0 // indirect
|
golang.org/x/sys v0.39.0 // indirect
|
||||||
golang.org/x/tools v0.39.0 // indirect
|
golang.org/x/text v0.32.0 // indirect
|
||||||
google.golang.org/protobuf v1.36.10 // indirect
|
golang.org/x/tools v0.40.0 // indirect
|
||||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||||
sigs.k8s.io/yaml v1.6.0 // indirect
|
sigs.k8s.io/yaml v1.6.0 // indirect
|
||||||
)
|
)
|
||||||
|
|||||||
164
go.sum
164
go.sum
@@ -1,11 +1,11 @@
|
|||||||
filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA=
|
filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA=
|
||||||
filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
|
filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
|
||||||
github.com/99designs/gqlgen v0.17.84 h1:iVMdiStgUVx/BFkMb0J5GAXlqfqtQ7bqMCYK6v52kQ0=
|
github.com/99designs/gqlgen v0.17.85 h1:EkGx3U2FDcxQm8YDLQSpXIAVmpDyZ3IcBMOJi2nH1S0=
|
||||||
github.com/99designs/gqlgen v0.17.84/go.mod h1:qjoUqzTeiejdo+bwUg8unqSpeYG42XrcrQboGIezmFA=
|
github.com/99designs/gqlgen v0.17.85/go.mod h1:yvs8s0bkQlRfqg03YXr3eR4OQUowVhODT/tHzCXnbOU=
|
||||||
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8=
|
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8=
|
||||||
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU=
|
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU=
|
||||||
github.com/ClusterCockpit/cc-lib v1.0.2 h1:ZWn3oZkXgxrr3zSigBdlOOfayZ4Om4xL20DhmritPPg=
|
github.com/ClusterCockpit/cc-lib/v2 v2.1.0 h1:B6l6h0IjfEuY9DU6aVM3fSsj24lQ1eudXK9QTKmJjqg=
|
||||||
github.com/ClusterCockpit/cc-lib v1.0.2/go.mod h1:UGdOvXEnjFqlnPSxtvtFwO6BtXYW6NnXFoud9FtN93k=
|
github.com/ClusterCockpit/cc-lib/v2 v2.1.0/go.mod h1:JuxMAuEOaLLNEnnL9U3ejha8kMvsSatLdKPZEgJw6iw=
|
||||||
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
|
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
|
||||||
github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
|
github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
|
||||||
github.com/Masterminds/squirrel v1.5.4 h1:uUcX/aBc8O7Fg9kaISIUsHXdKuqehiXAMQTYX8afzqM=
|
github.com/Masterminds/squirrel v1.5.4 h1:uUcX/aBc8O7Fg9kaISIUsHXdKuqehiXAMQTYX8afzqM=
|
||||||
@@ -14,6 +14,7 @@ github.com/NVIDIA/go-nvml v0.13.0-1 h1:OLX8Jq3dONuPOQPC7rndB6+iDmDakw0XTYgzMxObk
|
|||||||
github.com/NVIDIA/go-nvml v0.13.0-1/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4=
|
github.com/NVIDIA/go-nvml v0.13.0-1/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4=
|
||||||
github.com/PuerkitoBio/goquery v1.11.0 h1:jZ7pwMQXIITcUXNH83LLk+txlaEy6NVOfTuP43xxfqw=
|
github.com/PuerkitoBio/goquery v1.11.0 h1:jZ7pwMQXIITcUXNH83LLk+txlaEy6NVOfTuP43xxfqw=
|
||||||
github.com/PuerkitoBio/goquery v1.11.0/go.mod h1:wQHgxUOU3JGuj3oD/QFfxUdlzW6xPHfqyHre6VMY4DQ=
|
github.com/PuerkitoBio/goquery v1.11.0/go.mod h1:wQHgxUOU3JGuj3oD/QFfxUdlzW6xPHfqyHre6VMY4DQ=
|
||||||
|
github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk=
|
||||||
github.com/agnivade/levenshtein v1.2.1 h1:EHBY3UOn1gwdy/VbFwgo4cxecRznFk7fKWN1KOX7eoM=
|
github.com/agnivade/levenshtein v1.2.1 h1:EHBY3UOn1gwdy/VbFwgo4cxecRznFk7fKWN1KOX7eoM=
|
||||||
github.com/agnivade/levenshtein v1.2.1/go.mod h1:QVVI16kDrtSuwcpd0p1+xMC6Z/VfhtCyDIjcwga4/DU=
|
github.com/agnivade/levenshtein v1.2.1/go.mod h1:QVVI16kDrtSuwcpd0p1+xMC6Z/VfhtCyDIjcwga4/DU=
|
||||||
github.com/alexbrainman/sspi v0.0.0-20250919150558-7d374ff0d59e h1:4dAU9FXIyQktpoUAgOJK3OTFc/xug0PCXYCqU0FgDKI=
|
github.com/alexbrainman/sspi v0.0.0-20250919150558-7d374ff0d59e h1:4dAU9FXIyQktpoUAgOJK3OTFc/xug0PCXYCqU0FgDKI=
|
||||||
@@ -22,52 +23,57 @@ github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883 h1:bvNMNQO63//z+xNg
|
|||||||
github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8=
|
github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8=
|
||||||
github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
|
github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
|
||||||
github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
|
github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
|
||||||
|
github.com/antithesishq/antithesis-sdk-go v0.5.0-default-no-op h1:Ucf+QxEKMbPogRO5guBNe5cgd9uZgfoJLOYs8WWhtjM=
|
||||||
|
github.com/antithesishq/antithesis-sdk-go v0.5.0-default-no-op/go.mod h1:IUpT2DPAKh6i/YhSbt6Gl3v2yvUZjmKncl7U91fup7E=
|
||||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7Dml6nw9rQ=
|
github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7Dml6nw9rQ=
|
||||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0/go.mod h1:lvDnEdqiQrp0O42VQGgmlKpxL1AP2+08jFMw88y4klk=
|
github.com/apapsch/go-jsonmerge/v2 v2.0.0/go.mod h1:lvDnEdqiQrp0O42VQGgmlKpxL1AP2+08jFMw88y4klk=
|
||||||
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q=
|
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q=
|
||||||
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE=
|
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE=
|
||||||
github.com/aws/aws-sdk-go-v2 v1.41.0 h1:tNvqh1s+v0vFYdA1xq0aOJH+Y5cRyZ5upu6roPgPKd4=
|
github.com/aws/aws-sdk-go-v2 v1.41.1 h1:ABlyEARCDLN034NhxlRUSZr4l71mh+T5KAeGh6cerhU=
|
||||||
github.com/aws/aws-sdk-go-v2 v1.41.0/go.mod h1:MayyLB8y+buD9hZqkCW3kX1AKq07Y5pXxtgB+rRFhz0=
|
github.com/aws/aws-sdk-go-v2 v1.41.1/go.mod h1:MayyLB8y+buD9hZqkCW3kX1AKq07Y5pXxtgB+rRFhz0=
|
||||||
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.3 h1:DHctwEM8P8iTXFxC/QK0MRjwEpWQeM9yzidCRjldUz0=
|
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4 h1:489krEF9xIGkOaaX3CE/Be2uWjiXrkCH6gUX+bZA/BU=
|
||||||
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.3/go.mod h1:xdCzcZEtnSTKVDOmUZs4l/j3pSV6rpo1WXl5ugNsL8Y=
|
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4/go.mod h1:IOAPF6oT9KCsceNTvvYMNHy0+kMF8akOjeDvPENWxp4=
|
||||||
github.com/aws/aws-sdk-go-v2/config v1.31.20 h1:/jWF4Wu90EhKCgjTdy1DGxcbcbNrjfBHvksEL79tfQc=
|
github.com/aws/aws-sdk-go-v2/config v1.32.6 h1:hFLBGUKjmLAekvi1evLi5hVvFQtSo3GYwi+Bx4lpJf8=
|
||||||
github.com/aws/aws-sdk-go-v2/config v1.31.20/go.mod h1:95Hh1Tc5VYKL9NJ7tAkDcqeKt+MCXQB1hQZaRdJIZE0=
|
github.com/aws/aws-sdk-go-v2/config v1.32.6/go.mod h1:lcUL/gcd8WyjCrMnxez5OXkO3/rwcNmvfno62tnXNcI=
|
||||||
github.com/aws/aws-sdk-go-v2/credentials v1.18.24 h1:iJ2FmPT35EaIB0+kMa6TnQ+PwG5A1prEdAw+PsMzfHg=
|
github.com/aws/aws-sdk-go-v2/credentials v1.19.7 h1:tHK47VqqtJxOymRrNtUXN5SP/zUTvZKeLx4tH6PGQc8=
|
||||||
github.com/aws/aws-sdk-go-v2/credentials v1.18.24/go.mod h1:U91+DrfjAiXPDEGYhh/x29o4p0qHX5HDqG7y5VViv64=
|
github.com/aws/aws-sdk-go-v2/credentials v1.19.7/go.mod h1:qOZk8sPDrxhf+4Wf4oT2urYJrYt3RejHSzgAquYeppw=
|
||||||
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.13 h1:T1brd5dR3/fzNFAQch/iBKeX07/ffu/cLu+q+RuzEWk=
|
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.17 h1:I0GyV8wiYrP8XpA70g1HBcQO1JlQxCMTW9npl5UbDHY=
|
||||||
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.13/go.mod h1:Peg/GBAQ6JDt+RoBf4meB1wylmAipb7Kg2ZFakZTlwk=
|
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.17/go.mod h1:tyw7BOl5bBe/oqvoIeECFJjMdzXoa/dfVz3QQ5lgHGA=
|
||||||
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.13 h1:a+8/MLcWlIxo1lF9xaGt3J/u3yOZx+CdSveSNwjhD40=
|
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.17 h1:xOLELNKGp2vsiteLsvLPwxC+mYmO6OZ8PYgiuPJzF8U=
|
||||||
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.13/go.mod h1:oGnKwIYZ4XttyU2JWxFrwvhF6YKiK/9/wmE3v3Iu9K8=
|
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.17/go.mod h1:5M5CI3D12dNOtH3/mk6minaRwI2/37ifCURZISxA/IQ=
|
||||||
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.13 h1:HBSI2kDkMdWz4ZM7FjwE7e/pWDEZ+nR95x8Ztet1ooY=
|
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.17 h1:WWLqlh79iO48yLkj1v3ISRNiv+3KdQoZ6JWyfcsyQik=
|
||||||
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.13/go.mod h1:YE94ZoDArI7awZqJzBAZ3PDD2zSfuP7w6P2knOzIn8M=
|
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.17/go.mod h1:EhG22vHRrvF8oXSTYStZhJc1aUgKtnJe+aOiFEV90cM=
|
||||||
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 h1:WKuaxf++XKWlHWu9ECbMlha8WOEGm0OUEZqm4K/Gcfk=
|
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 h1:WKuaxf++XKWlHWu9ECbMlha8WOEGm0OUEZqm4K/Gcfk=
|
||||||
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4/go.mod h1:ZWy7j6v1vWGmPReu0iSGvRiise4YI5SkR3OHKTZ6Wuc=
|
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4/go.mod h1:ZWy7j6v1vWGmPReu0iSGvRiise4YI5SkR3OHKTZ6Wuc=
|
||||||
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.13 h1:eg/WYAa12vqTphzIdWMzqYRVKKnCboVPRlvaybNCqPA=
|
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.16 h1:CjMzUs78RDDv4ROu3JnJn/Ig1r6ZD7/T2DXLLRpejic=
|
||||||
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.13/go.mod h1:/FDdxWhz1486obGrKKC1HONd7krpk38LBt+dutLcN9k=
|
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.16/go.mod h1:uVW4OLBqbJXSHJYA9svT9BluSvvwbzLQ2Crf6UPzR3c=
|
||||||
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.3 h1:x2Ibm/Af8Fi+BH+Hsn9TXGdT+hKbDd5XOTZxTMxDk7o=
|
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4 h1:0ryTNEdJbzUCEWkVXEXoqlXV72J5keC1GvILMOuD00E=
|
||||||
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.3/go.mod h1:IW1jwyrQgMdhisceG8fQLmQIydcT/jWY21rFhzgaKwo=
|
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4/go.mod h1:HQ4qwNZh32C3CBeO6iJLQlgtMzqeG17ziAA/3KDJFow=
|
||||||
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.4 h1:NvMjwvv8hpGUILarKw7Z4Q0w1H9anXKsesMxtw++MA4=
|
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.7 h1:DIBqIrJ7hv+e4CmIk2z3pyKT+3B6qVMgRsawHiR3qso=
|
||||||
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.4/go.mod h1:455WPHSwaGj2waRSpQp7TsnpOnBfw8iDfPfbwl7KPJE=
|
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.7/go.mod h1:vLm00xmBke75UmpNvOcZQ/Q30ZFjbczeLFqGx5urmGo=
|
||||||
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.13 h1:kDqdFvMY4AtKoACfzIGD8A0+hbT41KTKF//gq7jITfM=
|
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.17 h1:RuNSMoozM8oXlgLG/n6WLaFGoea7/CddrCfIiSA+xdY=
|
||||||
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.13/go.mod h1:lmKuogqSU3HzQCwZ9ZtcqOc5XGMqtDK7OIc2+DxiUEg=
|
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.17/go.mod h1:F2xxQ9TZz5gDWsclCtPQscGpP0VUOc8RqgFM3vDENmU=
|
||||||
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.13 h1:zhBJXdhWIFZ1acfDYIhu4+LCzdUS2Vbcum7D01dXlHQ=
|
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.16 h1:NSbvS17MlI2lurYgXnCOLvCFX38sBW4eiVER7+kkgsU=
|
||||||
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.13/go.mod h1:JaaOeCE368qn2Hzi3sEzY6FgAZVCIYcC2nwbro2QCh8=
|
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.16/go.mod h1:SwT8Tmqd4sA6G1qaGdzWCJN99bUmPGHfRwwq3G5Qb+A=
|
||||||
github.com/aws/aws-sdk-go-v2/service/s3 v1.90.2 h1:DhdbtDl4FdNlj31+xiRXANxEE+eC7n8JQz+/ilwQ8Uc=
|
github.com/aws/aws-sdk-go-v2/service/s3 v1.95.0 h1:MIWra+MSq53CFaXXAywB2qg9YvVZifkk6vEGl/1Qor0=
|
||||||
github.com/aws/aws-sdk-go-v2/service/s3 v1.90.2/go.mod h1:+wArOOrcHUevqdto9k1tKOF5++YTe9JEcPSc9Tx2ZSw=
|
github.com/aws/aws-sdk-go-v2/service/s3 v1.95.0/go.mod h1:79S2BdqCJpScXZA2y+cpZuocWsjGjJINyXnOsf5DTz8=
|
||||||
github.com/aws/aws-sdk-go-v2/service/sso v1.30.3 h1:NjShtS1t8r5LUfFVtFeI8xLAHQNTa7UI0VawXlrBMFQ=
|
github.com/aws/aws-sdk-go-v2/service/signin v1.0.5 h1:VrhDvQib/i0lxvr3zqlUwLwJP4fpmpyD9wYG1vfSu+Y=
|
||||||
github.com/aws/aws-sdk-go-v2/service/sso v1.30.3/go.mod h1:fKvyjJcz63iL/ftA6RaM8sRCtN4r4zl4tjL3qw5ec7k=
|
github.com/aws/aws-sdk-go-v2/service/signin v1.0.5/go.mod h1:k029+U8SY30/3/ras4G/Fnv/b88N4mAfliNn08Dem4M=
|
||||||
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.7 h1:gTsnx0xXNQ6SBbymoDvcoRHL+q4l/dAFsQuKfDWSaGc=
|
github.com/aws/aws-sdk-go-v2/service/sso v1.30.9 h1:v6EiMvhEYBoHABfbGB4alOYmCIrcgyPPiBE1wZAEbqk=
|
||||||
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.7/go.mod h1:klO+ejMvYsB4QATfEOIXk8WAEwN4N0aBfJpvC+5SZBo=
|
github.com/aws/aws-sdk-go-v2/service/sso v1.30.9/go.mod h1:yifAsgBxgJWn3ggx70A3urX2AN49Y5sJTD1UQFlfqBw=
|
||||||
github.com/aws/aws-sdk-go-v2/service/sts v1.40.2 h1:HK5ON3KmQV2HcAunnx4sKLB9aPf3gKGwVAf7xnx0QT0=
|
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.13 h1:gd84Omyu9JLriJVCbGApcLzVR3XtmC4ZDPcAI6Ftvds=
|
||||||
github.com/aws/aws-sdk-go-v2/service/sts v1.40.2/go.mod h1:E19xDjpzPZC7LS2knI9E6BaRFDK43Eul7vd6rSq2HWk=
|
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.13/go.mod h1:sTGThjphYE4Ohw8vJiRStAcu3rbjtXRsdNB0TvZ5wwo=
|
||||||
|
github.com/aws/aws-sdk-go-v2/service/sts v1.41.6 h1:5fFjR/ToSOzB2OQ/XqWpZBmNvmP/pJ1jOWYlFDJTjRQ=
|
||||||
|
github.com/aws/aws-sdk-go-v2/service/sts v1.41.6/go.mod h1:qgFDZQSD/Kys7nJnVqYlWKnh0SSdMjAi0uSwON4wgYQ=
|
||||||
github.com/aws/smithy-go v1.24.0 h1:LpilSUItNPFr1eY85RYgTIg5eIEPtvFbskaFcmmIUnk=
|
github.com/aws/smithy-go v1.24.0 h1:LpilSUItNPFr1eY85RYgTIg5eIEPtvFbskaFcmmIUnk=
|
||||||
github.com/aws/smithy-go v1.24.0/go.mod h1:LEj2LM3rBRQJxPZTB4KuzZkaZYnZPnvgIhb4pu07mx0=
|
github.com/aws/smithy-go v1.24.0/go.mod h1:LEj2LM3rBRQJxPZTB4KuzZkaZYnZPnvgIhb4pu07mx0=
|
||||||
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||||
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||||
|
github.com/bmatcuk/doublestar v1.1.1/go.mod h1:UD6OnuiIn0yFxxA2le/rnRU1G4RaI4UvFv1sNto9p6w=
|
||||||
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
||||||
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||||
github.com/coreos/go-oidc/v3 v3.16.0 h1:qRQUCFstKpXwmEjDQTIbyY/5jF00+asXzSkmkoa/mow=
|
github.com/coreos/go-oidc/v3 v3.17.0 h1:hWBGaQfbi0iVviX4ibC7bk8OKT5qNr4klBaCHVNvehc=
|
||||||
github.com/coreos/go-oidc/v3 v3.16.0/go.mod h1:wqPbKFrVnE90vty060SB40FCJ8fTHTxSwyXJqZH+sI8=
|
github.com/coreos/go-oidc/v3 v3.17.0/go.mod h1:wqPbKFrVnE90vty060SB40FCJ8fTHTxSwyXJqZH+sI8=
|
||||||
github.com/cpuguy83/go-md2man/v2 v2.0.7 h1:zbFlGlXEAKlwXpmvle3d8Oe3YnkKIK4xSRTd3sHPnBo=
|
github.com/cpuguy83/go-md2man/v2 v2.0.7 h1:zbFlGlXEAKlwXpmvle3d8Oe3YnkKIK4xSRTd3sHPnBo=
|
||||||
github.com/cpuguy83/go-md2man/v2 v2.0.7/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
|
github.com/cpuguy83/go-md2man/v2 v2.0.7/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
|
||||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||||
@@ -77,8 +83,8 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1
|
|||||||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54 h1:SG7nF6SRlWhcT7cNTs5R6Hk4V2lcmLz2NsG2VnInyNo=
|
github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54 h1:SG7nF6SRlWhcT7cNTs5R6Hk4V2lcmLz2NsG2VnInyNo=
|
||||||
github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA=
|
github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA=
|
||||||
github.com/expr-lang/expr v1.17.6 h1:1h6i8ONk9cexhDmowO/A64VPxHScu7qfSl2k8OlINec=
|
github.com/expr-lang/expr v1.17.7 h1:Q0xY/e/2aCIp8g9s/LGvMDCC5PxYlvHgDZRQ4y16JX8=
|
||||||
github.com/expr-lang/expr v1.17.6/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4=
|
github.com/expr-lang/expr v1.17.7/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4=
|
||||||
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
|
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
|
||||||
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
|
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
|
||||||
github.com/frankban/quicktest v1.11.0/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s=
|
github.com/frankban/quicktest v1.11.0/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s=
|
||||||
@@ -89,8 +95,8 @@ github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S
|
|||||||
github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
|
github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
|
||||||
github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667 h1:BP4M0CvQ4S3TGls2FvczZtj5Re/2ZzkV9VwqPHH/3Bo=
|
github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667 h1:BP4M0CvQ4S3TGls2FvczZtj5Re/2ZzkV9VwqPHH/3Bo=
|
||||||
github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0=
|
github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0=
|
||||||
github.com/go-co-op/gocron/v2 v2.18.2 h1:+5VU41FUXPWSPKLXZQ/77SGzUiPCcakU0v7ENc2H20Q=
|
github.com/go-co-op/gocron/v2 v2.19.0 h1:OKf2y6LXPs/BgBI2fl8PxUpNAI1DA9Mg+hSeGOS38OU=
|
||||||
github.com/go-co-op/gocron/v2 v2.18.2/go.mod h1:Zii6he+Zfgy5W9B+JKk/KwejFOW0kZTFvHtwIpR4aBI=
|
github.com/go-co-op/gocron/v2 v2.19.0/go.mod h1:5lEiCKk1oVJV39Zg7/YG10OnaVrDAV5GGR6O0663k6U=
|
||||||
github.com/go-jose/go-jose/v4 v4.1.3 h1:CVLmWDhDVRa6Mi/IgCgaopNosCaHz7zrMeF9MlZRkrs=
|
github.com/go-jose/go-jose/v4 v4.1.3 h1:CVLmWDhDVRa6Mi/IgCgaopNosCaHz7zrMeF9MlZRkrs=
|
||||||
github.com/go-jose/go-jose/v4 v4.1.3/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08=
|
github.com/go-jose/go-jose/v4 v4.1.3/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08=
|
||||||
github.com/go-ldap/ldap/v3 v3.4.12 h1:1b81mv7MagXZ7+1r7cLTWmyuTqVqdwbtJSjC0DAp9s4=
|
github.com/go-ldap/ldap/v3 v3.4.12 h1:1b81mv7MagXZ7+1r7cLTWmyuTqVqdwbtJSjC0DAp9s4=
|
||||||
@@ -140,7 +146,8 @@ github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
|
|||||||
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||||
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||||
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
||||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
github.com/google/go-tpm v0.9.7 h1:u89J4tUUeDTlH8xxC3CTW7OHZjbjKoHdQ9W7gCUhtxA=
|
||||||
|
github.com/google/go-tpm v0.9.7/go.mod h1:h9jEsEECg7gtLis0upRBQU+GhYVH6jMjrFxI8u6bVUY=
|
||||||
github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
|
github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
|
||||||
github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||||
github.com/google/gops v0.3.28 h1:2Xr57tqKAmQYRAfG12E+yLcoa2Y42UJo2lOrUFL9ark=
|
github.com/google/gops v0.3.28 h1:2Xr57tqKAmQYRAfG12E+yLcoa2Y42UJo2lOrUFL9ark=
|
||||||
@@ -190,12 +197,9 @@ github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
|
|||||||
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
|
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
|
||||||
github.com/jonboulle/clockwork v0.5.0 h1:Hyh9A8u51kptdkR+cqRpT1EebBwTn1oK9YfGYbdFz6I=
|
github.com/jonboulle/clockwork v0.5.0 h1:Hyh9A8u51kptdkR+cqRpT1EebBwTn1oK9YfGYbdFz6I=
|
||||||
github.com/jonboulle/clockwork v0.5.0/go.mod h1:3mZlmanh0g2NDKO5TWZVJAfofYk64M7XN3SzBPjZF60=
|
github.com/jonboulle/clockwork v0.5.0/go.mod h1:3mZlmanh0g2NDKO5TWZVJAfofYk64M7XN3SzBPjZF60=
|
||||||
github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA=
|
github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE=
|
||||||
github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
|
github.com/klauspost/compress v1.18.2 h1:iiPHWW0YrcFgpBYhsA6D1+fqHssJscY/Tm/y2Uqnapk=
|
||||||
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
github.com/klauspost/compress v1.18.2/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4=
|
||||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
|
||||||
github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co=
|
|
||||||
github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0=
|
|
||||||
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
|
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
|
||||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||||
@@ -214,27 +218,27 @@ github.com/linkedin/goavro/v2 v2.14.1 h1:/8VjDpd38PRsy02JS0jflAu7JZPfJcGTwqWgMkF
|
|||||||
github.com/linkedin/goavro/v2 v2.14.1/go.mod h1:KXx+erlq+RPlGSPmLF7xGo6SAbh8sCQ53x064+ioxhk=
|
github.com/linkedin/goavro/v2 v2.14.1/go.mod h1:KXx+erlq+RPlGSPmLF7xGo6SAbh8sCQ53x064+ioxhk=
|
||||||
github.com/mattn/go-sqlite3 v1.10.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
|
github.com/mattn/go-sqlite3 v1.10.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
|
||||||
github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
||||||
github.com/mattn/go-sqlite3 v1.14.32 h1:JD12Ag3oLy1zQA+BNn74xRgaBbdhbNIDYvQUEuuErjs=
|
github.com/mattn/go-sqlite3 v1.14.33 h1:A5blZ5ulQo2AtayQ9/limgHEkFreKj1Dv226a1K73s0=
|
||||||
github.com/mattn/go-sqlite3 v1.14.32/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
github.com/mattn/go-sqlite3 v1.14.33/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
||||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
github.com/minio/highwayhash v1.0.4-0.20251030100505-070ab1a87a76 h1:KGuD/pM2JpL9FAYvBrnBBeENKZNh6eNtjqytV6TYjnk=
|
||||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
|
github.com/minio/highwayhash v1.0.4-0.20251030100505-070ab1a87a76/go.mod h1:GGYsuwP/fPD6Y9hMiXuapVvlIUEhFhMTh0rxU3ik1LQ=
|
||||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
|
||||||
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
|
|
||||||
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
|
|
||||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
|
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
|
||||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
||||||
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU=
|
github.com/nats-io/jwt/v2 v2.8.0 h1:K7uzyz50+yGZDO5o772eRE7atlcSEENpL7P+b74JV1g=
|
||||||
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
|
github.com/nats-io/jwt/v2 v2.8.0/go.mod h1:me11pOkwObtcBNR8AiMrUbtVOUGkqYjMQZ6jnSdVUIA=
|
||||||
|
github.com/nats-io/nats-server/v2 v2.12.3 h1:KRv+1n7lddMVgkJPQer+pt36TcO0ENxjilBmeWdjcHs=
|
||||||
|
github.com/nats-io/nats-server/v2 v2.12.3/go.mod h1:MQXjG9WjyXKz9koWzUc3jYUMKD8x3CLmTNy91IQQz3Y=
|
||||||
github.com/nats-io/nats.go v1.47.0 h1:YQdADw6J/UfGUd2Oy6tn4Hq6YHxCaJrVKayxxFqYrgM=
|
github.com/nats-io/nats.go v1.47.0 h1:YQdADw6J/UfGUd2Oy6tn4Hq6YHxCaJrVKayxxFqYrgM=
|
||||||
github.com/nats-io/nats.go v1.47.0/go.mod h1:iRWIPokVIFbVijxuMQq4y9ttaBTMe0SFdlZfMDd+33g=
|
github.com/nats-io/nats.go v1.47.0/go.mod h1:iRWIPokVIFbVijxuMQq4y9ttaBTMe0SFdlZfMDd+33g=
|
||||||
github.com/nats-io/nkeys v0.4.11 h1:q44qGV008kYd9W1b1nEBkNzvnWxtRSQ7A8BoqRrcfa0=
|
github.com/nats-io/nkeys v0.4.12 h1:nssm7JKOG9/x4J8II47VWCL1Ds29avyiQDRn0ckMvDc=
|
||||||
github.com/nats-io/nkeys v0.4.11/go.mod h1:szDimtgmfOi9n25JpfIdGw12tZFYXqhGxjhVxsatHVE=
|
github.com/nats-io/nkeys v0.4.12/go.mod h1:MT59A1HYcjIcyQDJStTfaOY6vhy9XTUjOFo+SVsvpBg=
|
||||||
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
|
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
|
||||||
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
|
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
|
||||||
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
|
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
|
||||||
github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmtpMYro=
|
github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmtpMYro=
|
||||||
github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg=
|
github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg=
|
||||||
github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
|
github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
|
||||||
|
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
|
||||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
|
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
|
||||||
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
@@ -250,6 +254,7 @@ github.com/qustavo/sqlhooks/v2 v2.1.0 h1:54yBemHnGHp/7xgT+pxwmIlMSDNYKx5JW5dfRAi
|
|||||||
github.com/qustavo/sqlhooks/v2 v2.1.0/go.mod h1:aMREyKo7fOKTwiLuWPsaHRXEmtqG4yREztO0idF83AU=
|
github.com/qustavo/sqlhooks/v2 v2.1.0/go.mod h1:aMREyKo7fOKTwiLuWPsaHRXEmtqG4yREztO0idF83AU=
|
||||||
github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
|
github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
|
||||||
github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro=
|
github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro=
|
||||||
|
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
|
||||||
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
|
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
|
||||||
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
|
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
|
||||||
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
|
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
|
||||||
@@ -260,6 +265,9 @@ github.com/sergi/go-diff v1.3.1 h1:xkr+Oxo4BOQKmkn/B9eMK0g5Kg/983T9DqqPHwYqD+8=
|
|||||||
github.com/sergi/go-diff v1.3.1/go.mod h1:aMJSSKb2lpPvRNec0+w3fl7LP9IOFzdc9Pa4NFbPK1I=
|
github.com/sergi/go-diff v1.3.1/go.mod h1:aMJSSKb2lpPvRNec0+w3fl7LP9IOFzdc9Pa4NFbPK1I=
|
||||||
github.com/sosodev/duration v1.3.1 h1:qtHBDMQ6lvMQsL15g4aopM4HEfOaYuhWBw3NPTtlqq4=
|
github.com/sosodev/duration v1.3.1 h1:qtHBDMQ6lvMQsL15g4aopM4HEfOaYuhWBw3NPTtlqq4=
|
||||||
github.com/sosodev/duration v1.3.1/go.mod h1:RQIBBX0+fMLc/D9+Jb/fwvVmo0eZvDDEERAikUR6SDg=
|
github.com/sosodev/duration v1.3.1/go.mod h1:RQIBBX0+fMLc/D9+Jb/fwvVmo0eZvDDEERAikUR6SDg=
|
||||||
|
github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKkMo8ZTx3f+BZEkzsRUY10Xsm2mwU0=
|
||||||
|
github.com/stmcginnis/gofish v0.20.0 h1:hH2V2Qe898F2wWT1loApnkDUrXXiLKqbSlMaH3Y1n08=
|
||||||
|
github.com/stmcginnis/gofish v0.20.0/go.mod h1:PzF5i8ecRG9A2ol8XT64npKUunyraJ+7t0kYMpQAtqU=
|
||||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||||
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
|
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
|
||||||
@@ -294,33 +302,33 @@ go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
|
|||||||
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
|
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
|
||||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||||
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||||
golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q=
|
golang.org/x/crypto v0.46.0 h1:cKRW/pmt1pKAfetfu+RCEvjvZkA9RimPbh7bhFjGVBU=
|
||||||
golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4=
|
golang.org/x/crypto v0.46.0/go.mod h1:Evb/oLKmMraqjZ2iQTwDwvCtJkczlDuTmdJXoZVzqU0=
|
||||||
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b h1:M2rDM6z3Fhozi9O7NWsxAkg/yqS/lQJ6PmkyIV3YP+o=
|
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b h1:M2rDM6z3Fhozi9O7NWsxAkg/yqS/lQJ6PmkyIV3YP+o=
|
||||||
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8=
|
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8=
|
||||||
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
||||||
golang.org/x/mod v0.30.0 h1:fDEXFVZ/fmCKProc/yAXXUijritrDzahmwwefnjoPFk=
|
golang.org/x/mod v0.31.0 h1:HaW9xtz0+kOcWKwli0ZXy79Ix+UW/vOfmWI5QVd2tgI=
|
||||||
golang.org/x/mod v0.30.0/go.mod h1:lAsf5O2EvJeSFMiBxXDki7sCgAxEUcZHXoXMKT4GJKc=
|
golang.org/x/mod v0.31.0/go.mod h1:43JraMp9cGx1Rx3AqioxrbrhNsLl2l/iNAvuBkrezpg=
|
||||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||||
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
||||||
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
|
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
|
||||||
golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
|
golang.org/x/net v0.48.0 h1:zyQRTTrjc33Lhh0fBgT/H3oZq9WuvRR5gPC70xpDiQU=
|
||||||
golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
|
golang.org/x/net v0.48.0/go.mod h1:+ndRgGjkh8FGtu1w1FGbEC31if4VrNVMuKTgcAAnQRY=
|
||||||
golang.org/x/oauth2 v0.32.0 h1:jsCblLleRMDrxMN29H3z/k1KliIvpLgCkE6R8FXXNgY=
|
golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw=
|
||||||
golang.org/x/oauth2 v0.32.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
|
golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
|
||||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I=
|
golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4=
|
||||||
golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
|
golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
|
||||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
|
golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk=
|
||||||
golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
||||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||||
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
|
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
|
||||||
@@ -328,19 +336,19 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
|||||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||||
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
|
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
|
||||||
golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
|
golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU=
|
||||||
golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
|
golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY=
|
||||||
golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI=
|
golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI=
|
||||||
golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4=
|
golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4=
|
||||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||||
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
|
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
|
||||||
golang.org/x/tools v0.39.0 h1:ik4ho21kwuQln40uelmciQPp9SipgNDdrafrYA4TmQQ=
|
golang.org/x/tools v0.40.0 h1:yLkxfA+Qnul4cs9QA3KnlFu0lVmd8JJfoq+E41uSutA=
|
||||||
golang.org/x/tools v0.39.0/go.mod h1:JnefbkDPyD8UU2kI5fuf8ZX4/yUeh9W877ZeBONxUqQ=
|
golang.org/x/tools v0.40.0/go.mod h1:Ik/tzLRlbscWpqqMRjyWYDisX8bG13FrdXp3o4Sr9lc=
|
||||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE=
|
google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
|
||||||
google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
|
google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||||
|
|||||||
52
gqlgen.yml
52
gqlgen.yml
@@ -52,51 +52,51 @@ models:
|
|||||||
- github.com/99designs/gqlgen/graphql.Int64
|
- github.com/99designs/gqlgen/graphql.Int64
|
||||||
- github.com/99designs/gqlgen/graphql.Int32
|
- github.com/99designs/gqlgen/graphql.Int32
|
||||||
Job:
|
Job:
|
||||||
model: "github.com/ClusterCockpit/cc-lib/schema.Job"
|
model: "github.com/ClusterCockpit/cc-lib/v2/schema.Job"
|
||||||
fields:
|
fields:
|
||||||
tags:
|
tags:
|
||||||
resolver: true
|
resolver: true
|
||||||
metaData:
|
metaData:
|
||||||
resolver: true
|
resolver: true
|
||||||
Cluster:
|
Cluster:
|
||||||
model: "github.com/ClusterCockpit/cc-lib/schema.Cluster"
|
model: "github.com/ClusterCockpit/cc-lib/v2/schema.Cluster"
|
||||||
fields:
|
fields:
|
||||||
partitions:
|
partitions:
|
||||||
resolver: true
|
resolver: true
|
||||||
# Node:
|
# Node:
|
||||||
# model: "github.com/ClusterCockpit/cc-lib/schema.Node"
|
# model: "github.com/ClusterCockpit/cc-lib/v2/schema.Node"
|
||||||
# fields:
|
# fields:
|
||||||
# metaData:
|
# metaData:
|
||||||
# resolver: true
|
# resolver: true
|
||||||
NullableFloat: { model: "github.com/ClusterCockpit/cc-lib/schema.Float" }
|
NullableFloat: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.Float" }
|
||||||
MetricScope: { model: "github.com/ClusterCockpit/cc-lib/schema.MetricScope" }
|
MetricScope: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.MetricScope" }
|
||||||
MetricValue: { model: "github.com/ClusterCockpit/cc-lib/schema.MetricValue" }
|
MetricValue: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.MetricValue" }
|
||||||
JobStatistics:
|
JobStatistics:
|
||||||
{ model: "github.com/ClusterCockpit/cc-lib/schema.JobStatistics" }
|
{ model: "github.com/ClusterCockpit/cc-lib/v2/schema.JobStatistics" }
|
||||||
GlobalMetricListItem:
|
GlobalMetricListItem:
|
||||||
{ model: "github.com/ClusterCockpit/cc-lib/schema.GlobalMetricListItem" }
|
{ model: "github.com/ClusterCockpit/cc-lib/v2/schema.GlobalMetricListItem" }
|
||||||
ClusterSupport:
|
ClusterSupport:
|
||||||
{ model: "github.com/ClusterCockpit/cc-lib/schema.ClusterSupport" }
|
{ model: "github.com/ClusterCockpit/cc-lib/v2/schema.ClusterSupport" }
|
||||||
Tag: { model: "github.com/ClusterCockpit/cc-lib/schema.Tag" }
|
Tag: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.Tag" }
|
||||||
Resource: { model: "github.com/ClusterCockpit/cc-lib/schema.Resource" }
|
Resource: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.Resource" }
|
||||||
JobState: { model: "github.com/ClusterCockpit/cc-lib/schema.JobState" }
|
JobState: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.JobState" }
|
||||||
Node: { model: "github.com/ClusterCockpit/cc-lib/schema.Node" }
|
Node: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.Node" }
|
||||||
SchedulerState:
|
SchedulerState:
|
||||||
{ model: "github.com/ClusterCockpit/cc-lib/schema.SchedulerState" }
|
{ model: "github.com/ClusterCockpit/cc-lib/v2/schema.SchedulerState" }
|
||||||
HealthState:
|
HealthState:
|
||||||
{ model: "github.com/ClusterCockpit/cc-lib/schema.MonitoringState" }
|
{ model: "github.com/ClusterCockpit/cc-lib/v2/schema.MonitoringState" }
|
||||||
JobMetric: { model: "github.com/ClusterCockpit/cc-lib/schema.JobMetric" }
|
JobMetric: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.JobMetric" }
|
||||||
Series: { model: "github.com/ClusterCockpit/cc-lib/schema.Series" }
|
Series: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.Series" }
|
||||||
MetricStatistics:
|
MetricStatistics:
|
||||||
{ model: "github.com/ClusterCockpit/cc-lib/schema.MetricStatistics" }
|
{ model: "github.com/ClusterCockpit/cc-lib/v2/schema.MetricStatistics" }
|
||||||
MetricConfig:
|
MetricConfig:
|
||||||
{ model: "github.com/ClusterCockpit/cc-lib/schema.MetricConfig" }
|
{ model: "github.com/ClusterCockpit/cc-lib/v2/schema.MetricConfig" }
|
||||||
SubClusterConfig:
|
SubClusterConfig:
|
||||||
{ model: "github.com/ClusterCockpit/cc-lib/schema.SubClusterConfig" }
|
{ model: "github.com/ClusterCockpit/cc-lib/v2/schema.SubClusterConfig" }
|
||||||
Accelerator: { model: "github.com/ClusterCockpit/cc-lib/schema.Accelerator" }
|
Accelerator: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.Accelerator" }
|
||||||
Topology: { model: "github.com/ClusterCockpit/cc-lib/schema.Topology" }
|
Topology: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.Topology" }
|
||||||
FilterRanges:
|
FilterRanges:
|
||||||
{ model: "github.com/ClusterCockpit/cc-lib/schema.FilterRanges" }
|
{ model: "github.com/ClusterCockpit/cc-lib/v2/schema.FilterRanges" }
|
||||||
SubCluster: { model: "github.com/ClusterCockpit/cc-lib/schema.SubCluster" }
|
SubCluster: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.SubCluster" }
|
||||||
StatsSeries: { model: "github.com/ClusterCockpit/cc-lib/schema.StatsSeries" }
|
StatsSeries: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.StatsSeries" }
|
||||||
Unit: { model: "github.com/ClusterCockpit/cc-lib/schema.Unit" }
|
Unit: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.Unit" }
|
||||||
|
|||||||
@@ -17,26 +17,27 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
"sync"
|
|
||||||
|
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/api"
|
"github.com/ClusterCockpit/cc-backend/internal/api"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/memorystore"
|
"github.com/ClusterCockpit/cc-backend/internal/metricdispatch"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/metricstore"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
ccconf "github.com/ClusterCockpit/cc-lib/ccConfig"
|
ccconf "github.com/ClusterCockpit/cc-lib/v2/ccConfig"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
"github.com/gorilla/mux"
|
"github.com/gorilla/mux"
|
||||||
|
|
||||||
_ "github.com/mattn/go-sqlite3"
|
_ "github.com/mattn/go-sqlite3"
|
||||||
)
|
)
|
||||||
|
|
||||||
func setup(t *testing.T) *api.RestAPI {
|
func setup(t *testing.T) *api.RestAPI {
|
||||||
|
repository.ResetConnection()
|
||||||
|
|
||||||
const testconfig = `{
|
const testconfig = `{
|
||||||
"main": {
|
"main": {
|
||||||
"addr": "0.0.0.0:8080",
|
"addr": "0.0.0.0:8080",
|
||||||
@@ -53,17 +54,7 @@ func setup(t *testing.T) *api.RestAPI {
|
|||||||
"jwts": {
|
"jwts": {
|
||||||
"max-age": "2m"
|
"max-age": "2m"
|
||||||
}
|
}
|
||||||
},
|
|
||||||
"clusters": [
|
|
||||||
{
|
|
||||||
"name": "testcluster",
|
|
||||||
"filterRanges": {
|
|
||||||
"numNodes": { "from": 1, "to": 64 },
|
|
||||||
"duration": { "from": 0, "to": 86400 },
|
|
||||||
"startTime": { "from": "2022-01-01T00:00:00Z", "to": null }
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
]
|
|
||||||
}`
|
}`
|
||||||
const testclusterJSON = `{
|
const testclusterJSON = `{
|
||||||
"name": "testcluster",
|
"name": "testcluster",
|
||||||
@@ -155,11 +146,7 @@ func setup(t *testing.T) *api.RestAPI {
|
|||||||
|
|
||||||
// Load and check main configuration
|
// Load and check main configuration
|
||||||
if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
|
if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
|
||||||
if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil {
|
config.Init(cfg)
|
||||||
config.Init(cfg, clustercfg)
|
|
||||||
} else {
|
|
||||||
cclog.Abort("Cluster configuration must be present")
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
cclog.Abort("Main configuration must be present")
|
cclog.Abort("Main configuration must be present")
|
||||||
}
|
}
|
||||||
@@ -171,13 +158,7 @@ func setup(t *testing.T) *api.RestAPI {
|
|||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize memorystore (optional - will return nil if not configured)
|
// metricstore initialization removed - it's initialized via callback in tests
|
||||||
// For this test, we don't initialize it to test the nil handling
|
|
||||||
mscfg := ccconf.GetPackageConfig("metric-store")
|
|
||||||
if mscfg != nil {
|
|
||||||
var wg sync.WaitGroup
|
|
||||||
memorystore.Init(mscfg, &wg)
|
|
||||||
}
|
|
||||||
|
|
||||||
archiver.Start(repository.GetJobRepository(), context.Background())
|
archiver.Start(repository.GetJobRepository(), context.Background())
|
||||||
|
|
||||||
@@ -194,30 +175,45 @@ func setup(t *testing.T) *api.RestAPI {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func cleanup() {
|
func cleanup() {
|
||||||
// Gracefully shutdown archiver with timeout
|
|
||||||
if err := archiver.Shutdown(5 * time.Second); err != nil {
|
if err := archiver.Shutdown(5 * time.Second); err != nil {
|
||||||
cclog.Warnf("Archiver shutdown timeout in tests: %v", err)
|
cclog.Warnf("Archiver shutdown timeout in tests: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Shutdown memorystore if it was initialized
|
|
||||||
memorystore.Shutdown()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This function starts a job, stops it, and tests the REST API.
|
* This function starts a job, stops it, and then reads its data from the job-archive.
|
||||||
* Do not run sub-tests in parallel! Tests should not be run in parallel at all, because
|
* Do not run sub-tests in parallel! Tests should not be run in parallel at all, because
|
||||||
* at least `setup` modifies global state.
|
* at least `setup` modifies global state.
|
||||||
*/
|
*/
|
||||||
func TestRestApi(t *testing.T) {
|
func TestRestApi(t *testing.T) {
|
||||||
restapi := setup(t)
|
restapi := setup(t)
|
||||||
t.Cleanup(cleanup)
|
t.Cleanup(cleanup)
|
||||||
|
testData := schema.JobData{
|
||||||
|
"load_one": map[schema.MetricScope]*schema.JobMetric{
|
||||||
|
schema.MetricScopeNode: {
|
||||||
|
Unit: schema.Unit{Base: "load"},
|
||||||
|
Timestep: 60,
|
||||||
|
Series: []schema.Series{
|
||||||
|
{
|
||||||
|
Hostname: "host123",
|
||||||
|
Statistics: schema.MetricStatistics{Min: 0.1, Avg: 0.2, Max: 0.3},
|
||||||
|
Data: []schema.Float{0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.3, 0.3, 0.3},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
metricstore.TestLoadDataCallback = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) {
|
||||||
|
return testData, nil
|
||||||
|
}
|
||||||
|
|
||||||
r := mux.NewRouter()
|
r := mux.NewRouter()
|
||||||
r.PathPrefix("/api").Subrouter()
|
r.PathPrefix("/api").Subrouter()
|
||||||
r.StrictSlash(true)
|
r.StrictSlash(true)
|
||||||
restapi.MountAPIRoutes(r)
|
restapi.MountAPIRoutes(r)
|
||||||
|
|
||||||
var TestJobId int64 = 123
|
var TestJobID int64 = 123
|
||||||
TestClusterName := "testcluster"
|
TestClusterName := "testcluster"
|
||||||
var TestStartTime int64 = 123456789
|
var TestStartTime int64 = 123456789
|
||||||
|
|
||||||
@@ -265,12 +261,18 @@ func TestRestApi(t *testing.T) {
|
|||||||
if response.StatusCode != http.StatusCreated {
|
if response.StatusCode != http.StatusCreated {
|
||||||
t.Fatal(response.Status, recorder.Body.String())
|
t.Fatal(response.Status, recorder.Body.String())
|
||||||
}
|
}
|
||||||
|
// resolver := graph.GetResolverInstance()
|
||||||
restapi.JobRepository.SyncJobs()
|
restapi.JobRepository.SyncJobs()
|
||||||
job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime)
|
job, err := restapi.JobRepository.Find(&TestJobID, &TestClusterName, &TestStartTime)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// job.Tags, err = resolver.Job().Tags(ctx, job)
|
||||||
|
// if err != nil {
|
||||||
|
// t.Fatal(err)
|
||||||
|
// }
|
||||||
|
|
||||||
if job.JobID != 123 ||
|
if job.JobID != 123 ||
|
||||||
job.User != "testuser" ||
|
job.User != "testuser" ||
|
||||||
job.Project != "testproj" ||
|
job.Project != "testproj" ||
|
||||||
@@ -288,6 +290,10 @@ func TestRestApi(t *testing.T) {
|
|||||||
job.StartTime != 123456789 {
|
job.StartTime != 123456789 {
|
||||||
t.Fatalf("unexpected job properties: %#v", job)
|
t.Fatalf("unexpected job properties: %#v", job)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// if len(job.Tags) != 1 || job.Tags[0].Type != "testTagType" || job.Tags[0].Name != "testTagName" || job.Tags[0].Scope != "testuser" {
|
||||||
|
// t.Fatalf("unexpected tags: %#v", job.Tags)
|
||||||
|
// }
|
||||||
}); !ok {
|
}); !ok {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -301,6 +307,7 @@ func TestRestApi(t *testing.T) {
|
|||||||
"stopTime": 123457789
|
"stopTime": 123457789
|
||||||
}`
|
}`
|
||||||
|
|
||||||
|
var stoppedJob *schema.Job
|
||||||
if ok := t.Run("StopJob", func(t *testing.T) {
|
if ok := t.Run("StopJob", func(t *testing.T) {
|
||||||
req := httptest.NewRequest(http.MethodPost, "/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBody)))
|
req := httptest.NewRequest(http.MethodPost, "/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBody)))
|
||||||
recorder := httptest.NewRecorder()
|
recorder := httptest.NewRecorder()
|
||||||
@@ -314,7 +321,7 @@ func TestRestApi(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Archiving happens asynchronously, will be completed in cleanup
|
// Archiving happens asynchronously, will be completed in cleanup
|
||||||
job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime)
|
job, err := restapi.JobRepository.Find(&TestJobID, &TestClusterName, &TestStartTime)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
@@ -336,12 +343,21 @@ func TestRestApi(t *testing.T) {
|
|||||||
t.Fatalf("unexpected job.metaData: %#v", job.MetaData)
|
t.Fatalf("unexpected job.metaData: %#v", job.MetaData)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
stoppedJob = job
|
||||||
}); !ok {
|
}); !ok {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note: We skip the CheckArchive test because without memorystore initialized,
|
t.Run("CheckArchive", func(t *testing.T) {
|
||||||
// archiving will fail gracefully. This test now focuses on the REST API itself.
|
data, err := metricdispatch.LoadData(stoppedJob, []string{"load_one"}, []schema.MetricScope{schema.MetricScopeNode}, context.Background(), 60)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !reflect.DeepEqual(data, testData) {
|
||||||
|
t.Fatal("unexpected data fetched from archive")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
t.Run("CheckDoubleStart", func(t *testing.T) {
|
t.Run("CheckDoubleStart", func(t *testing.T) {
|
||||||
// Starting a job with the same jobId and cluster should only be allowed if the startTime is far appart!
|
// Starting a job with the same jobId and cluster should only be allowed if the startTime is far appart!
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ import (
|
|||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
// GetClustersAPIResponse model
|
// GetClustersAPIResponse model
|
||||||
@@ -27,7 +27,7 @@ type GetClustersAPIResponse struct {
|
|||||||
// @description Get a list of all cluster configs. Specific cluster can be requested using query parameter.
|
// @description Get a list of all cluster configs. Specific cluster can be requested using query parameter.
|
||||||
// @produce json
|
// @produce json
|
||||||
// @param cluster query string false "Job Cluster"
|
// @param cluster query string false "Job Cluster"
|
||||||
// @success 200 {object} api.GetClustersApiResponse "Array of clusters"
|
// @success 200 {object} api.GetClustersAPIResponse "Array of clusters"
|
||||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
|||||||
1126
internal/api/docs.go
1126
internal/api/docs.go
File diff suppressed because it is too large
Load Diff
@@ -22,11 +22,11 @@ import (
|
|||||||
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/metricdispatcher"
|
"github.com/ClusterCockpit/cc-backend/internal/metricdispatch"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
"github.com/gorilla/mux"
|
"github.com/gorilla/mux"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -104,7 +104,7 @@ type JobMetricWithName struct {
|
|||||||
// @param items-per-page query int false "Items per page (Default: 25)"
|
// @param items-per-page query int false "Items per page (Default: 25)"
|
||||||
// @param page query int false "Page Number (Default: 1)"
|
// @param page query int false "Page Number (Default: 1)"
|
||||||
// @param with-metadata query bool false "Include metadata (e.g. jobScript) in response"
|
// @param with-metadata query bool false "Include metadata (e.g. jobScript) in response"
|
||||||
// @success 200 {object} api.GetJobsApiResponse "Job array and page info"
|
// @success 200 {object} api.GetJobsAPIResponse "Job array and page info"
|
||||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
@@ -232,7 +232,7 @@ func (api *RestAPI) getJobs(rw http.ResponseWriter, r *http.Request) {
|
|||||||
// @produce json
|
// @produce json
|
||||||
// @param id path int true "Database ID of Job"
|
// @param id path int true "Database ID of Job"
|
||||||
// @param all-metrics query bool false "Include all available metrics"
|
// @param all-metrics query bool false "Include all available metrics"
|
||||||
// @success 200 {object} api.GetJobApiResponse "Job resource"
|
// @success 200 {object} api.GetJobAPIResponse "Job resource"
|
||||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
@@ -293,7 +293,7 @@ func (api *RestAPI) getCompleteJobByID(rw http.ResponseWriter, r *http.Request)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if r.URL.Query().Get("all-metrics") == "true" {
|
if r.URL.Query().Get("all-metrics") == "true" {
|
||||||
data, err = metricdispatcher.LoadData(job, nil, scopes, r.Context(), resolution)
|
data, err = metricdispatch.LoadData(job, nil, scopes, r.Context(), resolution)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Warnf("REST: error while loading all-metrics job data for JobID %d on %s", job.JobID, job.Cluster)
|
cclog.Warnf("REST: error while loading all-metrics job data for JobID %d on %s", job.JobID, job.Cluster)
|
||||||
return
|
return
|
||||||
@@ -324,8 +324,8 @@ func (api *RestAPI) getCompleteJobByID(rw http.ResponseWriter, r *http.Request)
|
|||||||
// @accept json
|
// @accept json
|
||||||
// @produce json
|
// @produce json
|
||||||
// @param id path int true "Database ID of Job"
|
// @param id path int true "Database ID of Job"
|
||||||
// @param request body api.GetJobApiRequest true "Array of metric names"
|
// @param request body api.GetJobAPIRequest true "Array of metric names"
|
||||||
// @success 200 {object} api.GetJobApiResponse "Job resource"
|
// @success 200 {object} api.GetJobAPIResponse "Job resource"
|
||||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
@@ -389,7 +389,7 @@ func (api *RestAPI) getJobByID(rw http.ResponseWriter, r *http.Request) {
|
|||||||
resolution = max(resolution, mc.Timestep)
|
resolution = max(resolution, mc.Timestep)
|
||||||
}
|
}
|
||||||
|
|
||||||
data, err := metricdispatcher.LoadData(job, metrics, scopes, r.Context(), resolution)
|
data, err := metricdispatch.LoadData(job, metrics, scopes, r.Context(), resolution)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Warnf("REST: error while loading job data for JobID %d on %s", job.JobID, job.Cluster)
|
cclog.Warnf("REST: error while loading job data for JobID %d on %s", job.JobID, job.Cluster)
|
||||||
return
|
return
|
||||||
@@ -478,7 +478,7 @@ func (api *RestAPI) editMeta(rw http.ResponseWriter, r *http.Request) {
|
|||||||
// @accept json
|
// @accept json
|
||||||
// @produce json
|
// @produce json
|
||||||
// @param id path int true "Job Database ID"
|
// @param id path int true "Job Database ID"
|
||||||
// @param request body api.TagJobApiRequest true "Array of tag-objects to add"
|
// @param request body api.TagJobAPIRequest true "Array of tag-objects to add"
|
||||||
// @success 200 {object} schema.Job "Updated job resource"
|
// @success 200 {object} schema.Job "Updated job resource"
|
||||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
@@ -542,7 +542,7 @@ func (api *RestAPI) tagJob(rw http.ResponseWriter, r *http.Request) {
|
|||||||
// @accept json
|
// @accept json
|
||||||
// @produce json
|
// @produce json
|
||||||
// @param id path int true "Job Database ID"
|
// @param id path int true "Job Database ID"
|
||||||
// @param request body api.TagJobApiRequest true "Array of tag-objects to remove"
|
// @param request body api.TagJobAPIRequest true "Array of tag-objects to remove"
|
||||||
// @success 200 {object} schema.Job "Updated job resource"
|
// @success 200 {object} schema.Job "Updated job resource"
|
||||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
@@ -606,7 +606,7 @@ func (api *RestAPI) removeTagJob(rw http.ResponseWriter, r *http.Request) {
|
|||||||
// @description Tag wills be removed from respective archive files.
|
// @description Tag wills be removed from respective archive files.
|
||||||
// @accept json
|
// @accept json
|
||||||
// @produce plain
|
// @produce plain
|
||||||
// @param request body api.TagJobApiRequest true "Array of tag-objects to remove"
|
// @param request body api.TagJobAPIRequest true "Array of tag-objects to remove"
|
||||||
// @success 200 {string} string "Success Response"
|
// @success 200 {string} string "Success Response"
|
||||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
@@ -650,7 +650,7 @@ func (api *RestAPI) removeTags(rw http.ResponseWriter, r *http.Request) {
|
|||||||
// @accept json
|
// @accept json
|
||||||
// @produce json
|
// @produce json
|
||||||
// @param request body schema.Job true "Job to add"
|
// @param request body schema.Job true "Job to add"
|
||||||
// @success 201 {object} api.DefaultApiResponse "Job added successfully"
|
// @success 201 {object} api.DefaultAPIResponse "Job added successfully"
|
||||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
@@ -728,7 +728,7 @@ func (api *RestAPI) startJob(rw http.ResponseWriter, r *http.Request) {
|
|||||||
// @description Job to stop is specified by request body. All fields are required in this case.
|
// @description Job to stop is specified by request body. All fields are required in this case.
|
||||||
// @description Returns full job resource information according to 'Job' scheme.
|
// @description Returns full job resource information according to 'Job' scheme.
|
||||||
// @produce json
|
// @produce json
|
||||||
// @param request body api.StopJobApiRequest true "All fields required"
|
// @param request body api.StopJobAPIRequest true "All fields required"
|
||||||
// @success 200 {object} schema.Job "Success message"
|
// @success 200 {object} schema.Job "Success message"
|
||||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
@@ -754,7 +754,6 @@ func (api *RestAPI) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// cclog.Printf("loading db job for stopJobByRequest... : stopJobApiRequest=%v", req)
|
|
||||||
job, err = api.JobRepository.Find(req.JobID, req.Cluster, req.StartTime)
|
job, err = api.JobRepository.Find(req.JobID, req.Cluster, req.StartTime)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// Try cached jobs if not found in main repository
|
// Try cached jobs if not found in main repository
|
||||||
@@ -776,7 +775,7 @@ func (api *RestAPI) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
|
|||||||
// @description Job to remove is specified by database ID. This will not remove the job from the job archive.
|
// @description Job to remove is specified by database ID. This will not remove the job from the job archive.
|
||||||
// @produce json
|
// @produce json
|
||||||
// @param id path int true "Database ID of Job"
|
// @param id path int true "Database ID of Job"
|
||||||
// @success 200 {object} api.DefaultApiResponse "Success message"
|
// @success 200 {object} api.DefaultAPIResponse "Success message"
|
||||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
@@ -820,8 +819,8 @@ func (api *RestAPI) deleteJobByID(rw http.ResponseWriter, r *http.Request) {
|
|||||||
// @description Job to delete is specified by request body. All fields are required in this case.
|
// @description Job to delete is specified by request body. All fields are required in this case.
|
||||||
// @accept json
|
// @accept json
|
||||||
// @produce json
|
// @produce json
|
||||||
// @param request body api.DeleteJobApiRequest true "All fields required"
|
// @param request body api.DeleteJobAPIRequest true "All fields required"
|
||||||
// @success 200 {object} api.DefaultApiResponse "Success message"
|
// @success 200 {object} api.DefaultAPIResponse "Success message"
|
||||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
@@ -873,7 +872,7 @@ func (api *RestAPI) deleteJobByRequest(rw http.ResponseWriter, r *http.Request)
|
|||||||
// @description Remove all jobs with start time before timestamp. The jobs will not be removed from the job archive.
|
// @description Remove all jobs with start time before timestamp. The jobs will not be removed from the job archive.
|
||||||
// @produce json
|
// @produce json
|
||||||
// @param ts path int true "Unix epoch timestamp"
|
// @param ts path int true "Unix epoch timestamp"
|
||||||
// @success 200 {object} api.DefaultApiResponse "Success message"
|
// @success 200 {object} api.DefaultAPIResponse "Success message"
|
||||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
|||||||
@@ -15,8 +15,8 @@ import (
|
|||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/memorystore"
|
"github.com/ClusterCockpit/cc-backend/internal/metricstore"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
|
|
||||||
"github.com/influxdata/line-protocol/v2/lineprotocol"
|
"github.com/influxdata/line-protocol/v2/lineprotocol"
|
||||||
)
|
)
|
||||||
@@ -58,7 +58,7 @@ func freeMetrics(rw http.ResponseWriter, r *http.Request) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
ms := memorystore.GetMemoryStore()
|
ms := metricstore.GetMemoryStore()
|
||||||
n := 0
|
n := 0
|
||||||
for _, sel := range selectors {
|
for _, sel := range selectors {
|
||||||
bn, err := ms.Free(sel, to)
|
bn, err := ms.Free(sel, to)
|
||||||
@@ -97,9 +97,9 @@ func writeMetrics(rw http.ResponseWriter, r *http.Request) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
ms := memorystore.GetMemoryStore()
|
ms := metricstore.GetMemoryStore()
|
||||||
dec := lineprotocol.NewDecoderWithBytes(bytes)
|
dec := lineprotocol.NewDecoderWithBytes(bytes)
|
||||||
if err := memorystore.DecodeLine(dec, ms, r.URL.Query().Get("cluster")); err != nil {
|
if err := metricstore.DecodeLine(dec, ms, r.URL.Query().Get("cluster")); err != nil {
|
||||||
cclog.Errorf("/api/write error: %s", err.Error())
|
cclog.Errorf("/api/write error: %s", err.Error())
|
||||||
handleError(err, http.StatusBadRequest, rw)
|
handleError(err, http.StatusBadRequest, rw)
|
||||||
return
|
return
|
||||||
@@ -129,7 +129,7 @@ func debugMetrics(rw http.ResponseWriter, r *http.Request) {
|
|||||||
selector = strings.Split(raw, ":")
|
selector = strings.Split(raw, ":")
|
||||||
}
|
}
|
||||||
|
|
||||||
ms := memorystore.GetMemoryStore()
|
ms := metricstore.GetMemoryStore()
|
||||||
if err := ms.DebugDump(bufio.NewWriter(rw), selector); err != nil {
|
if err := ms.DebugDump(bufio.NewWriter(rw), selector); err != nil {
|
||||||
handleError(err, http.StatusBadRequest, rw)
|
handleError(err, http.StatusBadRequest, rw)
|
||||||
return
|
return
|
||||||
@@ -162,7 +162,7 @@ func metricsHealth(rw http.ResponseWriter, r *http.Request) {
|
|||||||
|
|
||||||
selector := []string{rawCluster, rawNode}
|
selector := []string{rawCluster, rawNode}
|
||||||
|
|
||||||
ms := memorystore.GetMemoryStore()
|
ms := metricstore.GetMemoryStore()
|
||||||
if err := ms.HealthCheck(bufio.NewWriter(rw), selector); err != nil {
|
if err := ms.HealthCheck(bufio.NewWriter(rw), selector); err != nil {
|
||||||
handleError(err, http.StatusBadRequest, rw)
|
handleError(err, http.StatusBadRequest, rw)
|
||||||
return
|
return
|
||||||
@@ -6,9 +6,9 @@
|
|||||||
package api
|
package api
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"database/sql"
|
"database/sql"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -17,12 +17,48 @@ import (
|
|||||||
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/nats"
|
"github.com/ClusterCockpit/cc-backend/pkg/nats"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/v2/receivers"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
|
influx "github.com/influxdata/line-protocol/v2/lineprotocol"
|
||||||
)
|
)
|
||||||
|
|
||||||
// NatsAPI provides NATS subscription-based handlers for Job and Node operations.
|
// NatsAPI provides NATS subscription-based handlers for Job and Node operations.
|
||||||
// It mirrors the functionality of the REST API but uses NATS messaging.
|
// It mirrors the functionality of the REST API but uses NATS messaging with
|
||||||
|
// InfluxDB line protocol as the message format.
|
||||||
|
//
|
||||||
|
// # Message Format
|
||||||
|
//
|
||||||
|
// All NATS messages use InfluxDB line protocol format (https://docs.influxdata.com/influxdb/v2.0/reference/syntax/line-protocol/)
|
||||||
|
// with the following structure:
|
||||||
|
//
|
||||||
|
// measurement,tag1=value1,tag2=value2 field1=value1,field2=value2 timestamp
|
||||||
|
//
|
||||||
|
// # Job Events
|
||||||
|
//
|
||||||
|
// Job start/stop events use the "job" measurement with a "function" tag to distinguish operations:
|
||||||
|
//
|
||||||
|
// job,function=start_job event="{...JSON payload...}" <timestamp>
|
||||||
|
// job,function=stop_job event="{...JSON payload...}" <timestamp>
|
||||||
|
//
|
||||||
|
// The JSON payload in the "event" field follows the schema.Job or StopJobAPIRequest structure.
|
||||||
|
//
|
||||||
|
// Example job start message:
|
||||||
|
//
|
||||||
|
// job,function=start_job event="{\"jobId\":1001,\"user\":\"testuser\",\"cluster\":\"testcluster\",...}" 1234567890000000000
|
||||||
|
//
|
||||||
|
// # Node State Events
|
||||||
|
//
|
||||||
|
// Node state updates use the "nodestate" measurement with cluster information:
|
||||||
|
//
|
||||||
|
// nodestate event="{...JSON payload...}" <timestamp>
|
||||||
|
//
|
||||||
|
// The JSON payload follows the UpdateNodeStatesRequest structure.
|
||||||
|
//
|
||||||
|
// Example node state message:
|
||||||
|
//
|
||||||
|
// nodestate event="{\"cluster\":\"testcluster\",\"nodes\":[{\"hostname\":\"node01\",\"states\":[\"idle\"]}]}" 1234567890000000000
|
||||||
type NatsAPI struct {
|
type NatsAPI struct {
|
||||||
JobRepository *repository.JobRepository
|
JobRepository *repository.JobRepository
|
||||||
// RepositoryMutex protects job creation operations from race conditions
|
// RepositoryMutex protects job creation operations from race conditions
|
||||||
@@ -50,11 +86,7 @@ func (api *NatsAPI) StartSubscriptions() error {
|
|||||||
|
|
||||||
s := config.Keys.APISubjects
|
s := config.Keys.APISubjects
|
||||||
|
|
||||||
if err := client.Subscribe(s.SubjectJobStart, api.handleStartJob); err != nil {
|
if err := client.Subscribe(s.SubjectJobEvent, api.handleJobEvent); err != nil {
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := client.Subscribe(s.SubjectJobStop, api.handleStopJob); err != nil {
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -67,26 +99,96 @@ func (api *NatsAPI) StartSubscriptions() error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// processJobEvent routes job event messages to the appropriate handler based on the "function" tag.
|
||||||
|
// Validates that required tags and fields are present before processing.
|
||||||
|
func (api *NatsAPI) processJobEvent(msg lp.CCMessage) {
|
||||||
|
function, ok := msg.GetTag("function")
|
||||||
|
if !ok {
|
||||||
|
cclog.Errorf("Job event is missing required tag 'function': measurement=%s", msg.Name())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
switch function {
|
||||||
|
case "start_job":
|
||||||
|
v, ok := msg.GetEventValue()
|
||||||
|
if !ok {
|
||||||
|
cclog.Errorf("Job start event is missing event field with JSON payload")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
api.handleStartJob(v)
|
||||||
|
|
||||||
|
case "stop_job":
|
||||||
|
v, ok := msg.GetEventValue()
|
||||||
|
if !ok {
|
||||||
|
cclog.Errorf("Job stop event is missing event field with JSON payload")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
api.handleStopJob(v)
|
||||||
|
|
||||||
|
default:
|
||||||
|
cclog.Warnf("Unknown job event function '%s', expected 'start_job' or 'stop_job'", function)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// handleJobEvent processes job-related messages received via NATS using InfluxDB line protocol.
|
||||||
|
// The message must be in line protocol format with measurement="job" and include:
|
||||||
|
// - tag "function" with value "start_job" or "stop_job"
|
||||||
|
// - field "event" containing JSON payload (schema.Job or StopJobAPIRequest)
|
||||||
|
//
|
||||||
|
// Example: job,function=start_job event="{\"jobId\":1001,...}" 1234567890000000000
|
||||||
|
func (api *NatsAPI) handleJobEvent(subject string, data []byte) {
|
||||||
|
if len(data) == 0 {
|
||||||
|
cclog.Warnf("NATS %s: received empty message", subject)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
d := influx.NewDecoderWithBytes(data)
|
||||||
|
|
||||||
|
for d.Next() {
|
||||||
|
m, err := receivers.DecodeInfluxMessage(d)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Errorf("NATS %s: failed to decode InfluxDB line protocol message: %v", subject, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if !m.IsEvent() {
|
||||||
|
cclog.Debugf("NATS %s: received non-event message, skipping", subject)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if m.Name() == "job" {
|
||||||
|
api.processJobEvent(m)
|
||||||
|
} else {
|
||||||
|
cclog.Debugf("NATS %s: unexpected measurement name '%s', expected 'job'", subject, m.Name())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// handleStartJob processes job start messages received via NATS.
|
// handleStartJob processes job start messages received via NATS.
|
||||||
// Expected JSON payload follows the schema.Job structure.
|
// The payload parameter contains JSON following the schema.Job structure.
|
||||||
func (api *NatsAPI) handleStartJob(subject string, data []byte) {
|
// Jobs are validated, checked for duplicates, and inserted into the database.
|
||||||
|
func (api *NatsAPI) handleStartJob(payload string) {
|
||||||
|
if payload == "" {
|
||||||
|
cclog.Error("NATS start job: payload is empty")
|
||||||
|
return
|
||||||
|
}
|
||||||
req := schema.Job{
|
req := schema.Job{
|
||||||
Shared: "none",
|
Shared: "none",
|
||||||
MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
|
MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
|
||||||
}
|
}
|
||||||
|
|
||||||
dec := json.NewDecoder(bytes.NewReader(data))
|
dec := json.NewDecoder(strings.NewReader(payload))
|
||||||
dec.DisallowUnknownFields()
|
dec.DisallowUnknownFields()
|
||||||
if err := dec.Decode(&req); err != nil {
|
if err := dec.Decode(&req); err != nil {
|
||||||
cclog.Errorf("NATS %s: parsing request failed: %v", subject, err)
|
cclog.Errorf("NATS start job: parsing request failed: %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
cclog.Debugf("NATS %s: %s", subject, req.GoString())
|
cclog.Debugf("NATS start job: %s", req.GoString())
|
||||||
req.State = schema.JobStateRunning
|
req.State = schema.JobStateRunning
|
||||||
|
|
||||||
if err := importer.SanityChecks(&req); err != nil {
|
if err := importer.SanityChecks(&req); err != nil {
|
||||||
cclog.Errorf("NATS %s: sanity check failed: %v", subject, err)
|
cclog.Errorf("NATS start job: sanity check failed: %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -96,14 +198,14 @@ func (api *NatsAPI) handleStartJob(subject string, data []byte) {
|
|||||||
|
|
||||||
jobs, err := api.JobRepository.FindAll(&req.JobID, &req.Cluster, nil)
|
jobs, err := api.JobRepository.FindAll(&req.JobID, &req.Cluster, nil)
|
||||||
if err != nil && err != sql.ErrNoRows {
|
if err != nil && err != sql.ErrNoRows {
|
||||||
cclog.Errorf("NATS %s: checking for duplicate failed: %v", subject, err)
|
cclog.Errorf("NATS start job: checking for duplicate failed: %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if err == nil {
|
if err == nil {
|
||||||
for _, job := range jobs {
|
for _, job := range jobs {
|
||||||
if (req.StartTime - job.StartTime) < secondsPerDay {
|
if (req.StartTime - job.StartTime) < secondsPerDay {
|
||||||
cclog.Errorf("NATS %s: job with jobId %d, cluster %s already exists (dbid: %d)",
|
cclog.Errorf("NATS start job: job with jobId %d, cluster %s already exists (dbid: %d)",
|
||||||
subject, req.JobID, req.Cluster, job.ID)
|
req.JobID, req.Cluster, job.ID)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -111,14 +213,14 @@ func (api *NatsAPI) handleStartJob(subject string, data []byte) {
|
|||||||
|
|
||||||
id, err := api.JobRepository.Start(&req)
|
id, err := api.JobRepository.Start(&req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("NATS %s: insert into database failed: %v", subject, err)
|
cclog.Errorf("NATS start job: insert into database failed: %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
unlockOnce.Do(api.RepositoryMutex.Unlock)
|
unlockOnce.Do(api.RepositoryMutex.Unlock)
|
||||||
|
|
||||||
for _, tag := range req.Tags {
|
for _, tag := range req.Tags {
|
||||||
if _, err := api.JobRepository.AddTagOrCreate(nil, id, tag.Type, tag.Name, tag.Scope); err != nil {
|
if _, err := api.JobRepository.AddTagOrCreate(nil, id, tag.Type, tag.Name, tag.Scope); err != nil {
|
||||||
cclog.Errorf("NATS %s: adding tag to new job %d failed: %v", subject, id, err)
|
cclog.Errorf("NATS start job: adding tag to new job %d failed: %v", id, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -128,19 +230,24 @@ func (api *NatsAPI) handleStartJob(subject string, data []byte) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// handleStopJob processes job stop messages received via NATS.
|
// handleStopJob processes job stop messages received via NATS.
|
||||||
// Expected JSON payload follows the StopJobAPIRequest structure.
|
// The payload parameter contains JSON following the StopJobAPIRequest structure.
|
||||||
func (api *NatsAPI) handleStopJob(subject string, data []byte) {
|
// The job is marked as stopped in the database and archiving is triggered if monitoring is enabled.
|
||||||
|
func (api *NatsAPI) handleStopJob(payload string) {
|
||||||
|
if payload == "" {
|
||||||
|
cclog.Error("NATS stop job: payload is empty")
|
||||||
|
return
|
||||||
|
}
|
||||||
var req StopJobAPIRequest
|
var req StopJobAPIRequest
|
||||||
|
|
||||||
dec := json.NewDecoder(bytes.NewReader(data))
|
dec := json.NewDecoder(strings.NewReader(payload))
|
||||||
dec.DisallowUnknownFields()
|
dec.DisallowUnknownFields()
|
||||||
if err := dec.Decode(&req); err != nil {
|
if err := dec.Decode(&req); err != nil {
|
||||||
cclog.Errorf("NATS %s: parsing request failed: %v", subject, err)
|
cclog.Errorf("NATS job stop: parsing request failed: %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if req.JobID == nil {
|
if req.JobID == nil {
|
||||||
cclog.Errorf("NATS %s: the field 'jobId' is required", subject)
|
cclog.Errorf("NATS job stop: the field 'jobId' is required")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -148,28 +255,28 @@ func (api *NatsAPI) handleStopJob(subject string, data []byte) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
cachedJob, cachedErr := api.JobRepository.FindCached(req.JobID, req.Cluster, req.StartTime)
|
cachedJob, cachedErr := api.JobRepository.FindCached(req.JobID, req.Cluster, req.StartTime)
|
||||||
if cachedErr != nil {
|
if cachedErr != nil {
|
||||||
cclog.Errorf("NATS %s: finding job failed: %v (cached lookup also failed: %v)",
|
cclog.Errorf("NATS job stop: finding job failed: %v (cached lookup also failed: %v)",
|
||||||
subject, err, cachedErr)
|
err, cachedErr)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
job = cachedJob
|
job = cachedJob
|
||||||
}
|
}
|
||||||
|
|
||||||
if job.State != schema.JobStateRunning {
|
if job.State != schema.JobStateRunning {
|
||||||
cclog.Errorf("NATS %s: jobId %d (id %d) on %s: job has already been stopped (state is: %s)",
|
cclog.Errorf("NATS job stop: jobId %d (id %d) on %s: job has already been stopped (state is: %s)",
|
||||||
subject, job.JobID, job.ID, job.Cluster, job.State)
|
job.JobID, job.ID, job.Cluster, job.State)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if job.StartTime > req.StopTime {
|
if job.StartTime > req.StopTime {
|
||||||
cclog.Errorf("NATS %s: jobId %d (id %d) on %s: stopTime %d must be >= startTime %d",
|
cclog.Errorf("NATS job stop: jobId %d (id %d) on %s: stopTime %d must be >= startTime %d",
|
||||||
subject, job.JobID, job.ID, job.Cluster, req.StopTime, job.StartTime)
|
job.JobID, job.ID, job.Cluster, req.StopTime, job.StartTime)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if req.State != "" && !req.State.Valid() {
|
if req.State != "" && !req.State.Valid() {
|
||||||
cclog.Errorf("NATS %s: jobId %d (id %d) on %s: invalid job state: %#v",
|
cclog.Errorf("NATS job stop: jobId %d (id %d) on %s: invalid job state: %#v",
|
||||||
subject, job.JobID, job.ID, job.Cluster, req.State)
|
job.JobID, job.ID, job.Cluster, req.State)
|
||||||
return
|
return
|
||||||
} else if req.State == "" {
|
} else if req.State == "" {
|
||||||
req.State = schema.JobStateCompleted
|
req.State = schema.JobStateCompleted
|
||||||
@@ -182,8 +289,8 @@ func (api *NatsAPI) handleStopJob(subject string, data []byte) {
|
|||||||
|
|
||||||
if err := api.JobRepository.Stop(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
|
if err := api.JobRepository.Stop(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
|
||||||
if err := api.JobRepository.StopCached(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
|
if err := api.JobRepository.StopCached(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
|
||||||
cclog.Errorf("NATS %s: jobId %d (id %d) on %s: marking job as '%s' failed: %v",
|
cclog.Errorf("NATS job stop: jobId %d (id %d) on %s: marking job as '%s' failed: %v",
|
||||||
subject, job.JobID, job.ID, job.Cluster, job.State, err)
|
job.JobID, job.ID, job.Cluster, job.State, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -198,15 +305,21 @@ func (api *NatsAPI) handleStopJob(subject string, data []byte) {
|
|||||||
archiver.TriggerArchiving(job)
|
archiver.TriggerArchiving(job)
|
||||||
}
|
}
|
||||||
|
|
||||||
// handleNodeState processes node state update messages received via NATS.
|
// processNodestateEvent extracts and processes node state data from the InfluxDB message.
|
||||||
// Expected JSON payload follows the UpdateNodeStatesRequest structure.
|
// Updates node states in the repository for all nodes in the payload.
|
||||||
func (api *NatsAPI) handleNodeState(subject string, data []byte) {
|
func (api *NatsAPI) processNodestateEvent(msg lp.CCMessage) {
|
||||||
|
v, ok := msg.GetEventValue()
|
||||||
|
if !ok {
|
||||||
|
cclog.Errorf("Nodestate event is missing event field with JSON payload")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
var req UpdateNodeStatesRequest
|
var req UpdateNodeStatesRequest
|
||||||
|
|
||||||
dec := json.NewDecoder(bytes.NewReader(data))
|
dec := json.NewDecoder(strings.NewReader(v))
|
||||||
dec.DisallowUnknownFields()
|
dec.DisallowUnknownFields()
|
||||||
if err := dec.Decode(&req); err != nil {
|
if err := dec.Decode(&req); err != nil {
|
||||||
cclog.Errorf("NATS %s: parsing request failed: %v", subject, err)
|
cclog.Errorf("NATS nodestate: parsing request failed: %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -224,8 +337,44 @@ func (api *NatsAPI) handleNodeState(subject string, data []byte) {
|
|||||||
JobsRunning: node.JobsRunning,
|
JobsRunning: node.JobsRunning,
|
||||||
}
|
}
|
||||||
|
|
||||||
repo.UpdateNodeState(node.Hostname, req.Cluster, &nodeState)
|
if err := repo.UpdateNodeState(node.Hostname, req.Cluster, &nodeState); err != nil {
|
||||||
|
cclog.Errorf("NATS nodestate: updating node state for %s on %s failed: %v",
|
||||||
|
node.Hostname, req.Cluster, err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cclog.Debugf("NATS %s: updated %d node states for cluster %s", subject, len(req.Nodes), req.Cluster)
|
cclog.Debugf("NATS nodestate: updated %d node states for cluster %s", len(req.Nodes), req.Cluster)
|
||||||
|
}
|
||||||
|
|
||||||
|
// handleNodeState processes node state update messages received via NATS using InfluxDB line protocol.
|
||||||
|
// The message must be in line protocol format with measurement="nodestate" and include:
|
||||||
|
// - field "event" containing JSON payload (UpdateNodeStatesRequest)
|
||||||
|
//
|
||||||
|
// Example: nodestate event="{\"cluster\":\"testcluster\",\"nodes\":[...]}" 1234567890000000000
|
||||||
|
func (api *NatsAPI) handleNodeState(subject string, data []byte) {
|
||||||
|
if len(data) == 0 {
|
||||||
|
cclog.Warnf("NATS %s: received empty message", subject)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
d := influx.NewDecoderWithBytes(data)
|
||||||
|
|
||||||
|
for d.Next() {
|
||||||
|
m, err := receivers.DecodeInfluxMessage(d)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Errorf("NATS %s: failed to decode InfluxDB line protocol message: %v", subject, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if !m.IsEvent() {
|
||||||
|
cclog.Warnf("NATS %s: received non-event message, skipping", subject)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if m.Name() == "nodestate" {
|
||||||
|
api.processNodestateEvent(m)
|
||||||
|
} else {
|
||||||
|
cclog.Warnf("NATS %s: unexpected measurement name '%s', expected 'nodestate'", subject, m.Name())
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
947
internal/api/nats_test.go
Normal file
947
internal/api/nats_test.go
Normal file
@@ -0,0 +1,947 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"database/sql"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/metricstore"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
|
ccconf "github.com/ClusterCockpit/cc-lib/v2/ccConfig"
|
||||||
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
|
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
|
|
||||||
|
_ "github.com/mattn/go-sqlite3"
|
||||||
|
)
|
||||||
|
|
||||||
|
func setupNatsTest(t *testing.T) *NatsAPI {
|
||||||
|
repository.ResetConnection()
|
||||||
|
|
||||||
|
const testconfig = `{
|
||||||
|
"main": {
|
||||||
|
"addr": "0.0.0.0:8080",
|
||||||
|
"validate": false,
|
||||||
|
"apiAllowedIPs": [
|
||||||
|
"*"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"archive": {
|
||||||
|
"kind": "file",
|
||||||
|
"path": "./var/job-archive"
|
||||||
|
},
|
||||||
|
"auth": {
|
||||||
|
"jwts": {
|
||||||
|
"max-age": "2m"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}`
|
||||||
|
const testclusterJSON = `{
|
||||||
|
"name": "testcluster",
|
||||||
|
"subClusters": [
|
||||||
|
{
|
||||||
|
"name": "sc1",
|
||||||
|
"nodes": "host123,host124,host125",
|
||||||
|
"processorType": "Intel Core i7-4770",
|
||||||
|
"socketsPerNode": 1,
|
||||||
|
"coresPerSocket": 4,
|
||||||
|
"threadsPerCore": 2,
|
||||||
|
"flopRateScalar": {
|
||||||
|
"unit": {
|
||||||
|
"prefix": "G",
|
||||||
|
"base": "F/s"
|
||||||
|
},
|
||||||
|
"value": 14
|
||||||
|
},
|
||||||
|
"flopRateSimd": {
|
||||||
|
"unit": {
|
||||||
|
"prefix": "G",
|
||||||
|
"base": "F/s"
|
||||||
|
},
|
||||||
|
"value": 112
|
||||||
|
},
|
||||||
|
"memoryBandwidth": {
|
||||||
|
"unit": {
|
||||||
|
"prefix": "G",
|
||||||
|
"base": "B/s"
|
||||||
|
},
|
||||||
|
"value": 24
|
||||||
|
},
|
||||||
|
"numberOfNodes": 70,
|
||||||
|
"topology": {
|
||||||
|
"node": [0, 1, 2, 3, 4, 5, 6, 7],
|
||||||
|
"socket": [[0, 1, 2, 3, 4, 5, 6, 7]],
|
||||||
|
"memoryDomain": [[0, 1, 2, 3, 4, 5, 6, 7]],
|
||||||
|
"die": [[0, 1, 2, 3, 4, 5, 6, 7]],
|
||||||
|
"core": [[0], [1], [2], [3], [4], [5], [6], [7]]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metricConfig": [
|
||||||
|
{
|
||||||
|
"name": "load_one",
|
||||||
|
"unit": { "base": ""},
|
||||||
|
"scope": "node",
|
||||||
|
"timestep": 60,
|
||||||
|
"aggregation": "avg",
|
||||||
|
"peak": 8,
|
||||||
|
"normal": 0,
|
||||||
|
"caution": 0,
|
||||||
|
"alert": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}`
|
||||||
|
|
||||||
|
cclog.Init("info", true)
|
||||||
|
tmpdir := t.TempDir()
|
||||||
|
jobarchive := filepath.Join(tmpdir, "job-archive")
|
||||||
|
if err := os.Mkdir(jobarchive, 0o777); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), fmt.Appendf(nil, "%d", 3), 0o666); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := os.Mkdir(filepath.Join(jobarchive, "testcluster"), 0o777); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := os.WriteFile(filepath.Join(jobarchive, "testcluster", "cluster.json"), []byte(testclusterJSON), 0o666); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
dbfilepath := filepath.Join(tmpdir, "test.db")
|
||||||
|
err := repository.MigrateDB(dbfilepath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
cfgFilePath := filepath.Join(tmpdir, "config.json")
|
||||||
|
if err := os.WriteFile(cfgFilePath, []byte(testconfig), 0o666); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
ccconf.Init(cfgFilePath)
|
||||||
|
|
||||||
|
// Load and check main configuration
|
||||||
|
if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
|
||||||
|
config.Init(cfg)
|
||||||
|
} else {
|
||||||
|
cclog.Abort("Main configuration must be present")
|
||||||
|
}
|
||||||
|
archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", jobarchive)
|
||||||
|
|
||||||
|
repository.Connect("sqlite3", dbfilepath)
|
||||||
|
|
||||||
|
if err := archive.Init(json.RawMessage(archiveCfg), config.Keys.DisableArchive); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// metricstore initialization removed - it's initialized via callback in tests
|
||||||
|
|
||||||
|
archiver.Start(repository.GetJobRepository(), context.Background())
|
||||||
|
|
||||||
|
if cfg := ccconf.GetPackageConfig("auth"); cfg != nil {
|
||||||
|
auth.Init(&cfg)
|
||||||
|
} else {
|
||||||
|
cclog.Warn("Authentication disabled due to missing configuration")
|
||||||
|
auth.Init(nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
graph.Init()
|
||||||
|
|
||||||
|
return NewNatsAPI()
|
||||||
|
}
|
||||||
|
|
||||||
|
func cleanupNatsTest() {
|
||||||
|
if err := archiver.Shutdown(5 * time.Second); err != nil {
|
||||||
|
cclog.Warnf("Archiver shutdown timeout in tests: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNatsHandleStartJob(t *testing.T) {
|
||||||
|
natsAPI := setupNatsTest(t)
|
||||||
|
t.Cleanup(cleanupNatsTest)
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
payload string
|
||||||
|
expectError bool
|
||||||
|
validateJob func(t *testing.T, job *schema.Job)
|
||||||
|
shouldFindJob bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "valid job start",
|
||||||
|
payload: `{
|
||||||
|
"jobId": 1001,
|
||||||
|
"user": "testuser1",
|
||||||
|
"project": "testproj1",
|
||||||
|
"cluster": "testcluster",
|
||||||
|
"partition": "main",
|
||||||
|
"walltime": 7200,
|
||||||
|
"numNodes": 1,
|
||||||
|
"numHwthreads": 8,
|
||||||
|
"numAcc": 0,
|
||||||
|
"shared": "none",
|
||||||
|
"monitoringStatus": 1,
|
||||||
|
"smt": 1,
|
||||||
|
"resources": [
|
||||||
|
{
|
||||||
|
"hostname": "host123",
|
||||||
|
"hwthreads": [0, 1, 2, 3, 4, 5, 6, 7]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"startTime": 1234567890
|
||||||
|
}`,
|
||||||
|
expectError: false,
|
||||||
|
shouldFindJob: true,
|
||||||
|
validateJob: func(t *testing.T, job *schema.Job) {
|
||||||
|
if job.JobID != 1001 {
|
||||||
|
t.Errorf("expected JobID 1001, got %d", job.JobID)
|
||||||
|
}
|
||||||
|
if job.User != "testuser1" {
|
||||||
|
t.Errorf("expected user testuser1, got %s", job.User)
|
||||||
|
}
|
||||||
|
if job.State != schema.JobStateRunning {
|
||||||
|
t.Errorf("expected state running, got %s", job.State)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalid JSON",
|
||||||
|
payload: `{
|
||||||
|
"jobId": "not a number",
|
||||||
|
"user": "testuser2"
|
||||||
|
}`,
|
||||||
|
expectError: true,
|
||||||
|
shouldFindJob: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "missing required fields",
|
||||||
|
payload: `{
|
||||||
|
"jobId": 1002
|
||||||
|
}`,
|
||||||
|
expectError: true,
|
||||||
|
shouldFindJob: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "job with unknown fields (should fail due to DisallowUnknownFields)",
|
||||||
|
payload: `{
|
||||||
|
"jobId": 1003,
|
||||||
|
"user": "testuser3",
|
||||||
|
"project": "testproj3",
|
||||||
|
"cluster": "testcluster",
|
||||||
|
"partition": "main",
|
||||||
|
"walltime": 3600,
|
||||||
|
"numNodes": 1,
|
||||||
|
"numHwthreads": 8,
|
||||||
|
"unknownField": "should cause error",
|
||||||
|
"startTime": 1234567900
|
||||||
|
}`,
|
||||||
|
expectError: true,
|
||||||
|
shouldFindJob: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "job with tags",
|
||||||
|
payload: `{
|
||||||
|
"jobId": 1004,
|
||||||
|
"user": "testuser4",
|
||||||
|
"project": "testproj4",
|
||||||
|
"cluster": "testcluster",
|
||||||
|
"partition": "main",
|
||||||
|
"walltime": 3600,
|
||||||
|
"numNodes": 1,
|
||||||
|
"numHwthreads": 8,
|
||||||
|
"numAcc": 0,
|
||||||
|
"shared": "none",
|
||||||
|
"monitoringStatus": 1,
|
||||||
|
"smt": 1,
|
||||||
|
"resources": [
|
||||||
|
{
|
||||||
|
"hostname": "host123",
|
||||||
|
"hwthreads": [0, 1, 2, 3]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"tags": [
|
||||||
|
{
|
||||||
|
"type": "test",
|
||||||
|
"name": "testtag",
|
||||||
|
"scope": "testuser4"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"startTime": 1234567910
|
||||||
|
}`,
|
||||||
|
expectError: false,
|
||||||
|
shouldFindJob: true,
|
||||||
|
validateJob: func(t *testing.T, job *schema.Job) {
|
||||||
|
if job.JobID != 1004 {
|
||||||
|
t.Errorf("expected JobID 1004, got %d", job.JobID)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
natsAPI.handleStartJob(tt.payload)
|
||||||
|
natsAPI.JobRepository.SyncJobs()
|
||||||
|
|
||||||
|
// Allow some time for async operations
|
||||||
|
time.Sleep(100 * time.Millisecond)
|
||||||
|
|
||||||
|
if tt.shouldFindJob {
|
||||||
|
// Extract jobId from payload
|
||||||
|
var payloadMap map[string]any
|
||||||
|
json.Unmarshal([]byte(tt.payload), &payloadMap)
|
||||||
|
jobID := int64(payloadMap["jobId"].(float64))
|
||||||
|
cluster := payloadMap["cluster"].(string)
|
||||||
|
startTime := int64(payloadMap["startTime"].(float64))
|
||||||
|
|
||||||
|
job, err := natsAPI.JobRepository.Find(&jobID, &cluster, &startTime)
|
||||||
|
if err != nil {
|
||||||
|
if !tt.expectError {
|
||||||
|
t.Fatalf("expected to find job, but got error: %v", err)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if tt.validateJob != nil {
|
||||||
|
tt.validateJob(t, job)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNatsHandleStopJob(t *testing.T) {
|
||||||
|
natsAPI := setupNatsTest(t)
|
||||||
|
t.Cleanup(cleanupNatsTest)
|
||||||
|
|
||||||
|
// First, create a running job
|
||||||
|
startPayload := `{
|
||||||
|
"jobId": 2001,
|
||||||
|
"user": "testuser",
|
||||||
|
"project": "testproj",
|
||||||
|
"cluster": "testcluster",
|
||||||
|
"partition": "main",
|
||||||
|
"walltime": 3600,
|
||||||
|
"numNodes": 1,
|
||||||
|
"numHwthreads": 8,
|
||||||
|
"numAcc": 0,
|
||||||
|
"shared": "none",
|
||||||
|
"monitoringStatus": 1,
|
||||||
|
"smt": 1,
|
||||||
|
"resources": [
|
||||||
|
{
|
||||||
|
"hostname": "host123",
|
||||||
|
"hwthreads": [0, 1, 2, 3, 4, 5, 6, 7]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"startTime": 1234567890
|
||||||
|
}`
|
||||||
|
|
||||||
|
natsAPI.handleStartJob(startPayload)
|
||||||
|
natsAPI.JobRepository.SyncJobs()
|
||||||
|
time.Sleep(100 * time.Millisecond)
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
payload string
|
||||||
|
expectError bool
|
||||||
|
validateJob func(t *testing.T, job *schema.Job)
|
||||||
|
setupJobFunc func() // Optional: create specific test job
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "valid job stop - completed",
|
||||||
|
payload: `{
|
||||||
|
"jobId": 2001,
|
||||||
|
"cluster": "testcluster",
|
||||||
|
"startTime": 1234567890,
|
||||||
|
"jobState": "completed",
|
||||||
|
"stopTime": 1234571490
|
||||||
|
}`,
|
||||||
|
expectError: false,
|
||||||
|
validateJob: func(t *testing.T, job *schema.Job) {
|
||||||
|
if job.State != schema.JobStateCompleted {
|
||||||
|
t.Errorf("expected state completed, got %s", job.State)
|
||||||
|
}
|
||||||
|
expectedDuration := int32(1234571490 - 1234567890)
|
||||||
|
if job.Duration != expectedDuration {
|
||||||
|
t.Errorf("expected duration %d, got %d", expectedDuration, job.Duration)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "valid job stop - failed",
|
||||||
|
setupJobFunc: func() {
|
||||||
|
startPayloadFailed := `{
|
||||||
|
"jobId": 2002,
|
||||||
|
"user": "testuser",
|
||||||
|
"project": "testproj",
|
||||||
|
"cluster": "testcluster",
|
||||||
|
"partition": "main",
|
||||||
|
"walltime": 3600,
|
||||||
|
"numNodes": 1,
|
||||||
|
"numHwthreads": 8,
|
||||||
|
"numAcc": 0,
|
||||||
|
"shared": "none",
|
||||||
|
"monitoringStatus": 1,
|
||||||
|
"smt": 1,
|
||||||
|
"resources": [
|
||||||
|
{
|
||||||
|
"hostname": "host123",
|
||||||
|
"hwthreads": [0, 1, 2, 3]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"startTime": 1234567900
|
||||||
|
}`
|
||||||
|
natsAPI.handleStartJob(startPayloadFailed)
|
||||||
|
natsAPI.JobRepository.SyncJobs()
|
||||||
|
time.Sleep(100 * time.Millisecond)
|
||||||
|
},
|
||||||
|
payload: `{
|
||||||
|
"jobId": 2002,
|
||||||
|
"cluster": "testcluster",
|
||||||
|
"startTime": 1234567900,
|
||||||
|
"jobState": "failed",
|
||||||
|
"stopTime": 1234569900
|
||||||
|
}`,
|
||||||
|
expectError: false,
|
||||||
|
validateJob: func(t *testing.T, job *schema.Job) {
|
||||||
|
if job.State != schema.JobStateFailed {
|
||||||
|
t.Errorf("expected state failed, got %s", job.State)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalid JSON",
|
||||||
|
payload: `{
|
||||||
|
"jobId": "not a number"
|
||||||
|
}`,
|
||||||
|
expectError: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "missing jobId",
|
||||||
|
payload: `{
|
||||||
|
"cluster": "testcluster",
|
||||||
|
"jobState": "completed",
|
||||||
|
"stopTime": 1234571490
|
||||||
|
}`,
|
||||||
|
expectError: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalid job state",
|
||||||
|
setupJobFunc: func() {
|
||||||
|
startPayloadInvalid := `{
|
||||||
|
"jobId": 2003,
|
||||||
|
"user": "testuser",
|
||||||
|
"project": "testproj",
|
||||||
|
"cluster": "testcluster",
|
||||||
|
"partition": "main",
|
||||||
|
"walltime": 3600,
|
||||||
|
"numNodes": 1,
|
||||||
|
"numHwthreads": 8,
|
||||||
|
"numAcc": 0,
|
||||||
|
"shared": "none",
|
||||||
|
"monitoringStatus": 1,
|
||||||
|
"smt": 1,
|
||||||
|
"resources": [
|
||||||
|
{
|
||||||
|
"hostname": "host123",
|
||||||
|
"hwthreads": [0, 1]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"startTime": 1234567910
|
||||||
|
}`
|
||||||
|
natsAPI.handleStartJob(startPayloadInvalid)
|
||||||
|
natsAPI.JobRepository.SyncJobs()
|
||||||
|
time.Sleep(100 * time.Millisecond)
|
||||||
|
},
|
||||||
|
payload: `{
|
||||||
|
"jobId": 2003,
|
||||||
|
"cluster": "testcluster",
|
||||||
|
"startTime": 1234567910,
|
||||||
|
"jobState": "invalid_state",
|
||||||
|
"stopTime": 1234571510
|
||||||
|
}`,
|
||||||
|
expectError: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "stopTime before startTime",
|
||||||
|
setupJobFunc: func() {
|
||||||
|
startPayloadTime := `{
|
||||||
|
"jobId": 2004,
|
||||||
|
"user": "testuser",
|
||||||
|
"project": "testproj",
|
||||||
|
"cluster": "testcluster",
|
||||||
|
"partition": "main",
|
||||||
|
"walltime": 3600,
|
||||||
|
"numNodes": 1,
|
||||||
|
"numHwthreads": 8,
|
||||||
|
"numAcc": 0,
|
||||||
|
"shared": "none",
|
||||||
|
"monitoringStatus": 1,
|
||||||
|
"smt": 1,
|
||||||
|
"resources": [
|
||||||
|
{
|
||||||
|
"hostname": "host123",
|
||||||
|
"hwthreads": [0]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"startTime": 1234567920
|
||||||
|
}`
|
||||||
|
natsAPI.handleStartJob(startPayloadTime)
|
||||||
|
natsAPI.JobRepository.SyncJobs()
|
||||||
|
time.Sleep(100 * time.Millisecond)
|
||||||
|
},
|
||||||
|
payload: `{
|
||||||
|
"jobId": 2004,
|
||||||
|
"cluster": "testcluster",
|
||||||
|
"startTime": 1234567920,
|
||||||
|
"jobState": "completed",
|
||||||
|
"stopTime": 1234567900
|
||||||
|
}`,
|
||||||
|
expectError: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "job not found",
|
||||||
|
payload: `{
|
||||||
|
"jobId": 99999,
|
||||||
|
"cluster": "testcluster",
|
||||||
|
"startTime": 1234567890,
|
||||||
|
"jobState": "completed",
|
||||||
|
"stopTime": 1234571490
|
||||||
|
}`,
|
||||||
|
expectError: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
testData := schema.JobData{
|
||||||
|
"load_one": map[schema.MetricScope]*schema.JobMetric{
|
||||||
|
schema.MetricScopeNode: {
|
||||||
|
Unit: schema.Unit{Base: "load"},
|
||||||
|
Timestep: 60,
|
||||||
|
Series: []schema.Series{
|
||||||
|
{
|
||||||
|
Hostname: "host123",
|
||||||
|
Statistics: schema.MetricStatistics{Min: 0.1, Avg: 0.2, Max: 0.3},
|
||||||
|
Data: []schema.Float{0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.3, 0.3, 0.3},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
metricstore.TestLoadDataCallback = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) {
|
||||||
|
return testData, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
if tt.setupJobFunc != nil {
|
||||||
|
tt.setupJobFunc()
|
||||||
|
}
|
||||||
|
|
||||||
|
natsAPI.handleStopJob(tt.payload)
|
||||||
|
|
||||||
|
// Allow some time for async operations
|
||||||
|
time.Sleep(100 * time.Millisecond)
|
||||||
|
|
||||||
|
if !tt.expectError && tt.validateJob != nil {
|
||||||
|
// Extract job details from payload
|
||||||
|
var payloadMap map[string]any
|
||||||
|
json.Unmarshal([]byte(tt.payload), &payloadMap)
|
||||||
|
jobID := int64(payloadMap["jobId"].(float64))
|
||||||
|
cluster := payloadMap["cluster"].(string)
|
||||||
|
|
||||||
|
var startTime *int64
|
||||||
|
if st, ok := payloadMap["startTime"]; ok {
|
||||||
|
t := int64(st.(float64))
|
||||||
|
startTime = &t
|
||||||
|
}
|
||||||
|
|
||||||
|
job, err := natsAPI.JobRepository.Find(&jobID, &cluster, startTime)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("expected to find job, but got error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
tt.validateJob(t, job)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNatsHandleNodeState(t *testing.T) {
|
||||||
|
natsAPI := setupNatsTest(t)
|
||||||
|
t.Cleanup(cleanupNatsTest)
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
data []byte
|
||||||
|
expectError bool
|
||||||
|
validateFn func(t *testing.T)
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "valid node state update",
|
||||||
|
data: []byte(`nodestate event="{\"cluster\":\"testcluster\",\"nodes\":[{\"hostname\":\"host123\",\"states\":[\"allocated\"],\"cpusAllocated\":8,\"memoryAllocated\":16384,\"gpusAllocated\":0,\"jobsRunning\":1}]}" 1234567890000000000`),
|
||||||
|
expectError: false,
|
||||||
|
validateFn: func(t *testing.T) {
|
||||||
|
// In a full test, we would verify the node state was updated in the database
|
||||||
|
// For now, just ensure no error occurred
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "multiple nodes",
|
||||||
|
data: []byte(`nodestate event="{\"cluster\":\"testcluster\",\"nodes\":[{\"hostname\":\"host123\",\"states\":[\"idle\"],\"cpusAllocated\":0,\"memoryAllocated\":0,\"gpusAllocated\":0,\"jobsRunning\":0},{\"hostname\":\"host124\",\"states\":[\"allocated\"],\"cpusAllocated\":4,\"memoryAllocated\":8192,\"gpusAllocated\":1,\"jobsRunning\":1}]}" 1234567890000000000`),
|
||||||
|
expectError: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalid JSON in event field",
|
||||||
|
data: []byte(`nodestate event="{\"cluster\":\"testcluster\",\"nodes\":\"not an array\"}" 1234567890000000000`),
|
||||||
|
expectError: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty nodes array",
|
||||||
|
data: []byte(`nodestate event="{\"cluster\":\"testcluster\",\"nodes\":[]}" 1234567890000000000`),
|
||||||
|
expectError: false, // Empty array should not cause error
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalid line protocol format",
|
||||||
|
data: []byte(`invalid line protocol format`),
|
||||||
|
expectError: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty data",
|
||||||
|
data: []byte(``),
|
||||||
|
expectError: false, // Should be handled gracefully with warning
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
natsAPI.handleNodeState("test.subject", tt.data)
|
||||||
|
|
||||||
|
// Allow some time for async operations
|
||||||
|
time.Sleep(50 * time.Millisecond)
|
||||||
|
|
||||||
|
if tt.validateFn != nil {
|
||||||
|
tt.validateFn(t)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNatsProcessJobEvent(t *testing.T) {
|
||||||
|
natsAPI := setupNatsTest(t)
|
||||||
|
t.Cleanup(cleanupNatsTest)
|
||||||
|
|
||||||
|
msgStartJob, err := lp.NewMessage(
|
||||||
|
"job",
|
||||||
|
map[string]string{"function": "start_job"},
|
||||||
|
nil,
|
||||||
|
map[string]any{
|
||||||
|
"event": `{
|
||||||
|
"jobId": 3001,
|
||||||
|
"user": "testuser",
|
||||||
|
"project": "testproj",
|
||||||
|
"cluster": "testcluster",
|
||||||
|
"partition": "main",
|
||||||
|
"walltime": 3600,
|
||||||
|
"numNodes": 1,
|
||||||
|
"numHwthreads": 8,
|
||||||
|
"numAcc": 0,
|
||||||
|
"shared": "none",
|
||||||
|
"monitoringStatus": 1,
|
||||||
|
"smt": 1,
|
||||||
|
"resources": [
|
||||||
|
{
|
||||||
|
"hostname": "host123",
|
||||||
|
"hwthreads": [0, 1, 2, 3]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"startTime": 1234567890
|
||||||
|
}`,
|
||||||
|
},
|
||||||
|
time.Now(),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to create test message: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
msgMissingTag, err := lp.NewMessage(
|
||||||
|
"job",
|
||||||
|
map[string]string{},
|
||||||
|
nil,
|
||||||
|
map[string]any{
|
||||||
|
"event": `{}`,
|
||||||
|
},
|
||||||
|
time.Now(),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to create test message: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
msgUnknownFunc, err := lp.NewMessage(
|
||||||
|
"job",
|
||||||
|
map[string]string{"function": "unknown_function"},
|
||||||
|
nil,
|
||||||
|
map[string]any{
|
||||||
|
"event": `{}`,
|
||||||
|
},
|
||||||
|
time.Now(),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to create test message: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
message lp.CCMessage
|
||||||
|
expectError bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "start_job function",
|
||||||
|
message: msgStartJob,
|
||||||
|
expectError: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "missing function tag",
|
||||||
|
message: msgMissingTag,
|
||||||
|
expectError: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "unknown function",
|
||||||
|
message: msgUnknownFunc,
|
||||||
|
expectError: false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
natsAPI.processJobEvent(tt.message)
|
||||||
|
time.Sleep(50 * time.Millisecond)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNatsHandleJobEvent(t *testing.T) {
|
||||||
|
natsAPI := setupNatsTest(t)
|
||||||
|
t.Cleanup(cleanupNatsTest)
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
data []byte
|
||||||
|
expectError bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "valid influx line protocol",
|
||||||
|
data: []byte(`job,function=start_job event="{\"jobId\":4001,\"user\":\"testuser\",\"project\":\"testproj\",\"cluster\":\"testcluster\",\"partition\":\"main\",\"walltime\":3600,\"numNodes\":1,\"numHwthreads\":8,\"numAcc\":0,\"shared\":\"none\",\"monitoringStatus\":1,\"smt\":1,\"resources\":[{\"hostname\":\"host123\",\"hwthreads\":[0,1,2,3]}],\"startTime\":1234567890}" 1234567890000000000`),
|
||||||
|
expectError: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalid influx line protocol",
|
||||||
|
data: []byte(`invalid line protocol format`),
|
||||||
|
expectError: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty data",
|
||||||
|
data: []byte(``),
|
||||||
|
expectError: false, // Decoder should handle empty input gracefully
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
// HandleJobEvent doesn't return errors, it logs them
|
||||||
|
// We're just ensuring it doesn't panic
|
||||||
|
natsAPI.handleJobEvent("test.subject", tt.data)
|
||||||
|
time.Sleep(50 * time.Millisecond)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNatsHandleJobEventEdgeCases(t *testing.T) {
|
||||||
|
natsAPI := setupNatsTest(t)
|
||||||
|
t.Cleanup(cleanupNatsTest)
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
data []byte
|
||||||
|
expectError bool
|
||||||
|
description string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "non-event message (metric data)",
|
||||||
|
data: []byte(`job,function=start_job value=123.45 1234567890000000000`),
|
||||||
|
expectError: false,
|
||||||
|
description: "Should skip non-event messages gracefully",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "wrong measurement name",
|
||||||
|
data: []byte(`wrongmeasurement,function=start_job event="{}" 1234567890000000000`),
|
||||||
|
expectError: false,
|
||||||
|
description: "Should warn about unexpected measurement but not fail",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "missing event field",
|
||||||
|
data: []byte(`job,function=start_job other_field="value" 1234567890000000000`),
|
||||||
|
expectError: true,
|
||||||
|
description: "Should error when event field is missing",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "multiple measurements in one message",
|
||||||
|
data: []byte("job,function=start_job event=\"{}\" 1234567890000000000\njob,function=stop_job event=\"{}\" 1234567890000000000"),
|
||||||
|
expectError: false,
|
||||||
|
description: "Should process multiple lines",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "escaped quotes in JSON payload",
|
||||||
|
data: []byte(`job,function=start_job event="{\"jobId\":6001,\"user\":\"test\\\"user\",\"cluster\":\"test\"}" 1234567890000000000`),
|
||||||
|
expectError: true,
|
||||||
|
description: "Should handle escaped quotes (though JSON parsing may fail)",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
natsAPI.handleJobEvent("test.subject", tt.data)
|
||||||
|
time.Sleep(50 * time.Millisecond)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNatsHandleNodeStateEdgeCases(t *testing.T) {
|
||||||
|
natsAPI := setupNatsTest(t)
|
||||||
|
t.Cleanup(cleanupNatsTest)
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
data []byte
|
||||||
|
expectError bool
|
||||||
|
description string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "missing cluster field in JSON",
|
||||||
|
data: []byte(`nodestate event="{\"nodes\":[]}" 1234567890000000000`),
|
||||||
|
expectError: true,
|
||||||
|
description: "Should fail when cluster is missing",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "malformed JSON with unescaped quotes",
|
||||||
|
data: []byte(`nodestate event="{\"cluster\":\"test"cluster\",\"nodes\":[]}" 1234567890000000000`),
|
||||||
|
expectError: true,
|
||||||
|
description: "Should fail on malformed JSON",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "unicode characters in hostname",
|
||||||
|
data: []byte(`nodestate event="{\"cluster\":\"testcluster\",\"nodes\":[{\"hostname\":\"host-ñ123\",\"states\":[\"idle\"],\"cpusAllocated\":0,\"memoryAllocated\":0,\"gpusAllocated\":0,\"jobsRunning\":0}]}" 1234567890000000000`),
|
||||||
|
expectError: false,
|
||||||
|
description: "Should handle unicode characters",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "very large node count",
|
||||||
|
data: []byte(`nodestate event="{\"cluster\":\"testcluster\",\"nodes\":[{\"hostname\":\"node1\",\"states\":[\"idle\"],\"cpusAllocated\":0,\"memoryAllocated\":0,\"gpusAllocated\":0,\"jobsRunning\":0},{\"hostname\":\"node2\",\"states\":[\"idle\"],\"cpusAllocated\":0,\"memoryAllocated\":0,\"gpusAllocated\":0,\"jobsRunning\":0},{\"hostname\":\"node3\",\"states\":[\"idle\"],\"cpusAllocated\":0,\"memoryAllocated\":0,\"gpusAllocated\":0,\"jobsRunning\":0}]}" 1234567890000000000`),
|
||||||
|
expectError: false,
|
||||||
|
description: "Should handle multiple nodes efficiently",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "timestamp in past",
|
||||||
|
data: []byte(`nodestate event="{\"cluster\":\"testcluster\",\"nodes\":[]}" 1000000000000000000`),
|
||||||
|
expectError: false,
|
||||||
|
description: "Should accept any valid timestamp",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
natsAPI.handleNodeState("test.subject", tt.data)
|
||||||
|
time.Sleep(50 * time.Millisecond)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNatsHandleStartJobDuplicatePrevention(t *testing.T) {
|
||||||
|
natsAPI := setupNatsTest(t)
|
||||||
|
t.Cleanup(cleanupNatsTest)
|
||||||
|
|
||||||
|
// Start a job
|
||||||
|
payload := `{
|
||||||
|
"jobId": 5001,
|
||||||
|
"user": "testuser",
|
||||||
|
"project": "testproj",
|
||||||
|
"cluster": "testcluster",
|
||||||
|
"partition": "main",
|
||||||
|
"walltime": 3600,
|
||||||
|
"numNodes": 1,
|
||||||
|
"numHwthreads": 8,
|
||||||
|
"numAcc": 0,
|
||||||
|
"shared": "none",
|
||||||
|
"monitoringStatus": 1,
|
||||||
|
"smt": 1,
|
||||||
|
"resources": [
|
||||||
|
{
|
||||||
|
"hostname": "host123",
|
||||||
|
"hwthreads": [0, 1, 2, 3]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"startTime": 1234567890
|
||||||
|
}`
|
||||||
|
|
||||||
|
natsAPI.handleStartJob(payload)
|
||||||
|
natsAPI.JobRepository.SyncJobs()
|
||||||
|
time.Sleep(100 * time.Millisecond)
|
||||||
|
|
||||||
|
// Try to start the same job again (within 24 hours)
|
||||||
|
duplicatePayload := `{
|
||||||
|
"jobId": 5001,
|
||||||
|
"user": "testuser",
|
||||||
|
"project": "testproj",
|
||||||
|
"cluster": "testcluster",
|
||||||
|
"partition": "main",
|
||||||
|
"walltime": 3600,
|
||||||
|
"numNodes": 1,
|
||||||
|
"numHwthreads": 8,
|
||||||
|
"numAcc": 0,
|
||||||
|
"shared": "none",
|
||||||
|
"monitoringStatus": 1,
|
||||||
|
"smt": 1,
|
||||||
|
"resources": [
|
||||||
|
{
|
||||||
|
"hostname": "host123",
|
||||||
|
"hwthreads": [0, 1, 2, 3]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"startTime": 1234567900
|
||||||
|
}`
|
||||||
|
|
||||||
|
natsAPI.handleStartJob(duplicatePayload)
|
||||||
|
natsAPI.JobRepository.SyncJobs()
|
||||||
|
time.Sleep(100 * time.Millisecond)
|
||||||
|
|
||||||
|
// Verify only one job exists
|
||||||
|
jobID := int64(5001)
|
||||||
|
cluster := "testcluster"
|
||||||
|
jobs, err := natsAPI.JobRepository.FindAll(&jobID, &cluster, nil)
|
||||||
|
if err != nil && err != sql.ErrNoRows {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(jobs) != 1 {
|
||||||
|
t.Errorf("expected 1 job, got %d", len(jobs))
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -12,7 +12,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
type UpdateNodeStatesRequest struct {
|
type UpdateNodeStatesRequest struct {
|
||||||
@@ -47,7 +47,7 @@ func determineState(states []string) schema.SchedulerState {
|
|||||||
// @description Required query-parameter defines if all users or only users with additional special roles are returned.
|
// @description Required query-parameter defines if all users or only users with additional special roles are returned.
|
||||||
// @produce json
|
// @produce json
|
||||||
// @param request body UpdateNodeStatesRequest true "Request body containing nodes and their states"
|
// @param request body UpdateNodeStatesRequest true "Request body containing nodes and their states"
|
||||||
// @success 200 {object} api.DefaultApiResponse "Success message"
|
// @success 200 {object} api.DefaultAPIResponse "Success message"
|
||||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
|||||||
@@ -22,9 +22,9 @@ import (
|
|||||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
"github.com/ClusterCockpit/cc-lib/util"
|
"github.com/ClusterCockpit/cc-lib/v2/util"
|
||||||
"github.com/gorilla/mux"
|
"github.com/gorilla/mux"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -48,6 +48,7 @@ import (
|
|||||||
const (
|
const (
|
||||||
noticeFilePath = "./var/notice.txt"
|
noticeFilePath = "./var/notice.txt"
|
||||||
noticeFilePerms = 0o644
|
noticeFilePerms = 0o644
|
||||||
|
maxNoticeLength = 10000 // Maximum allowed notice content length in characters
|
||||||
)
|
)
|
||||||
|
|
||||||
type RestAPI struct {
|
type RestAPI struct {
|
||||||
@@ -61,6 +62,7 @@ type RestAPI struct {
|
|||||||
RepositoryMutex sync.Mutex
|
RepositoryMutex sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// New creates and initializes a new RestAPI instance with configured dependencies.
|
||||||
func New() *RestAPI {
|
func New() *RestAPI {
|
||||||
return &RestAPI{
|
return &RestAPI{
|
||||||
JobRepository: repository.GetJobRepository(),
|
JobRepository: repository.GetJobRepository(),
|
||||||
@@ -69,6 +71,8 @@ func New() *RestAPI {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MountAPIRoutes registers REST API endpoints for job and cluster management.
|
||||||
|
// These routes use JWT token authentication via the X-Auth-Token header.
|
||||||
func (api *RestAPI) MountAPIRoutes(r *mux.Router) {
|
func (api *RestAPI) MountAPIRoutes(r *mux.Router) {
|
||||||
r.StrictSlash(true)
|
r.StrictSlash(true)
|
||||||
// REST API Uses TokenAuth
|
// REST API Uses TokenAuth
|
||||||
@@ -103,6 +107,8 @@ func (api *RestAPI) MountAPIRoutes(r *mux.Router) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MountUserAPIRoutes registers user-accessible REST API endpoints.
|
||||||
|
// These are limited endpoints for regular users with JWT token authentication.
|
||||||
func (api *RestAPI) MountUserAPIRoutes(r *mux.Router) {
|
func (api *RestAPI) MountUserAPIRoutes(r *mux.Router) {
|
||||||
r.StrictSlash(true)
|
r.StrictSlash(true)
|
||||||
// REST API Uses TokenAuth
|
// REST API Uses TokenAuth
|
||||||
@@ -112,6 +118,8 @@ func (api *RestAPI) MountUserAPIRoutes(r *mux.Router) {
|
|||||||
r.HandleFunc("/jobs/metrics/{id}", api.getJobMetrics).Methods(http.MethodGet)
|
r.HandleFunc("/jobs/metrics/{id}", api.getJobMetrics).Methods(http.MethodGet)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MountMetricStoreAPIRoutes registers metric storage API endpoints.
|
||||||
|
// These endpoints handle metric data ingestion and health checks with JWT token authentication.
|
||||||
func (api *RestAPI) MountMetricStoreAPIRoutes(r *mux.Router) {
|
func (api *RestAPI) MountMetricStoreAPIRoutes(r *mux.Router) {
|
||||||
// REST API Uses TokenAuth
|
// REST API Uses TokenAuth
|
||||||
// Note: StrictSlash handles trailing slash variations automatically
|
// Note: StrictSlash handles trailing slash variations automatically
|
||||||
@@ -126,6 +134,8 @@ func (api *RestAPI) MountMetricStoreAPIRoutes(r *mux.Router) {
|
|||||||
r.HandleFunc("/api/healthcheck/", metricsHealth).Methods(http.MethodGet)
|
r.HandleFunc("/api/healthcheck/", metricsHealth).Methods(http.MethodGet)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MountConfigAPIRoutes registers configuration and user management endpoints.
|
||||||
|
// These routes use session-based authentication and require admin privileges.
|
||||||
func (api *RestAPI) MountConfigAPIRoutes(r *mux.Router) {
|
func (api *RestAPI) MountConfigAPIRoutes(r *mux.Router) {
|
||||||
r.StrictSlash(true)
|
r.StrictSlash(true)
|
||||||
// Settings Frontend Uses SessionAuth
|
// Settings Frontend Uses SessionAuth
|
||||||
@@ -139,6 +149,8 @@ func (api *RestAPI) MountConfigAPIRoutes(r *mux.Router) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MountFrontendAPIRoutes registers frontend-specific API endpoints.
|
||||||
|
// These routes support JWT generation and user configuration updates with session authentication.
|
||||||
func (api *RestAPI) MountFrontendAPIRoutes(r *mux.Router) {
|
func (api *RestAPI) MountFrontendAPIRoutes(r *mux.Router) {
|
||||||
r.StrictSlash(true)
|
r.StrictSlash(true)
|
||||||
// Settings Frontend Uses SessionAuth
|
// Settings Frontend Uses SessionAuth
|
||||||
@@ -160,6 +172,8 @@ type DefaultAPIResponse struct {
|
|||||||
Message string `json:"msg"`
|
Message string `json:"msg"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// handleError writes a standardized JSON error response with the given status code.
|
||||||
|
// It logs the error at WARN level and ensures proper Content-Type headers are set.
|
||||||
func handleError(err error, statusCode int, rw http.ResponseWriter) {
|
func handleError(err error, statusCode int, rw http.ResponseWriter) {
|
||||||
cclog.Warnf("REST ERROR : %s", err.Error())
|
cclog.Warnf("REST ERROR : %s", err.Error())
|
||||||
rw.Header().Add("Content-Type", "application/json")
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
@@ -172,15 +186,38 @@ func handleError(err error, statusCode int, rw http.ResponseWriter) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// decode reads JSON from r into val with strict validation that rejects unknown fields.
|
||||||
func decode(r io.Reader, val any) error {
|
func decode(r io.Reader, val any) error {
|
||||||
dec := json.NewDecoder(r)
|
dec := json.NewDecoder(r)
|
||||||
dec.DisallowUnknownFields()
|
dec.DisallowUnknownFields()
|
||||||
return dec.Decode(val)
|
return dec.Decode(val)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (api *RestAPI) editNotice(rw http.ResponseWriter, r *http.Request) {
|
// validatePathComponent checks if a path component contains potentially malicious patterns
|
||||||
// SecuredCheck() only worked with TokenAuth: Removed
|
// that could be used for path traversal attacks. Returns an error if validation fails.
|
||||||
|
func validatePathComponent(component, componentName string) error {
|
||||||
|
if strings.Contains(component, "..") ||
|
||||||
|
strings.Contains(component, "/") ||
|
||||||
|
strings.Contains(component, "\\") {
|
||||||
|
return fmt.Errorf("invalid %s", componentName)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// editNotice godoc
|
||||||
|
// @summary Update system notice
|
||||||
|
// @tags Config
|
||||||
|
// @description Updates the notice.txt file content. Only admins are allowed. Content is limited to 10000 characters.
|
||||||
|
// @accept mpfd
|
||||||
|
// @produce plain
|
||||||
|
// @param new-content formData string true "New notice content (max 10000 characters)"
|
||||||
|
// @success 200 {string} string "Update Notice Content Success"
|
||||||
|
// @failure 400 {object} ErrorResponse "Bad Request"
|
||||||
|
// @failure 403 {object} ErrorResponse "Forbidden"
|
||||||
|
// @failure 500 {object} ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /notice/ [post]
|
||||||
|
func (api *RestAPI) editNotice(rw http.ResponseWriter, r *http.Request) {
|
||||||
if user := repository.GetUserFromContext(r.Context()); !user.HasRole(schema.RoleAdmin) {
|
if user := repository.GetUserFromContext(r.Context()); !user.HasRole(schema.RoleAdmin) {
|
||||||
handleError(fmt.Errorf("only admins are allowed to update the notice.txt file"), http.StatusForbidden, rw)
|
handleError(fmt.Errorf("only admins are allowed to update the notice.txt file"), http.StatusForbidden, rw)
|
||||||
return
|
return
|
||||||
@@ -189,9 +226,8 @@ func (api *RestAPI) editNotice(rw http.ResponseWriter, r *http.Request) {
|
|||||||
// Get Value
|
// Get Value
|
||||||
newContent := r.FormValue("new-content")
|
newContent := r.FormValue("new-content")
|
||||||
|
|
||||||
// Validate content length to prevent DoS
|
if len(newContent) > maxNoticeLength {
|
||||||
if len(newContent) > 10000 {
|
handleError(fmt.Errorf("notice content exceeds maximum length of %d characters", maxNoticeLength), http.StatusBadRequest, rw)
|
||||||
handleError(fmt.Errorf("notice content exceeds maximum length of 10000 characters"), http.StatusBadRequest, rw)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -203,7 +239,9 @@ func (api *RestAPI) editNotice(rw http.ResponseWriter, r *http.Request) {
|
|||||||
handleError(fmt.Errorf("creating notice file failed: %w", err), http.StatusInternalServerError, rw)
|
handleError(fmt.Errorf("creating notice file failed: %w", err), http.StatusInternalServerError, rw)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
ntxt.Close()
|
if err := ntxt.Close(); err != nil {
|
||||||
|
cclog.Warnf("Failed to close notice file: %v", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := os.WriteFile(noticeFilePath, []byte(newContent), noticeFilePerms); err != nil {
|
if err := os.WriteFile(noticeFilePath, []byte(newContent), noticeFilePerms); err != nil {
|
||||||
@@ -213,13 +251,30 @@ func (api *RestAPI) editNotice(rw http.ResponseWriter, r *http.Request) {
|
|||||||
|
|
||||||
rw.Header().Set("Content-Type", "text/plain")
|
rw.Header().Set("Content-Type", "text/plain")
|
||||||
rw.WriteHeader(http.StatusOK)
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
var msg []byte
|
||||||
if newContent != "" {
|
if newContent != "" {
|
||||||
rw.Write([]byte("Update Notice Content Success"))
|
msg = []byte("Update Notice Content Success")
|
||||||
} else {
|
} else {
|
||||||
rw.Write([]byte("Empty Notice Content Success"))
|
msg = []byte("Empty Notice Content Success")
|
||||||
|
}
|
||||||
|
if _, err := rw.Write(msg); err != nil {
|
||||||
|
cclog.Errorf("Failed to write response: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getJWT godoc
|
||||||
|
// @summary Generate JWT token
|
||||||
|
// @tags Frontend
|
||||||
|
// @description Generates a JWT token for a user. Admins can generate tokens for any user, regular users only for themselves.
|
||||||
|
// @accept mpfd
|
||||||
|
// @produce plain
|
||||||
|
// @param username formData string true "Username to generate JWT for"
|
||||||
|
// @success 200 {string} string "JWT token"
|
||||||
|
// @failure 403 {object} ErrorResponse "Forbidden"
|
||||||
|
// @failure 404 {object} ErrorResponse "User Not Found"
|
||||||
|
// @failure 500 {object} ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /jwt/ [get]
|
||||||
func (api *RestAPI) getJWT(rw http.ResponseWriter, r *http.Request) {
|
func (api *RestAPI) getJWT(rw http.ResponseWriter, r *http.Request) {
|
||||||
rw.Header().Set("Content-Type", "text/plain")
|
rw.Header().Set("Content-Type", "text/plain")
|
||||||
username := r.FormValue("username")
|
username := r.FormValue("username")
|
||||||
@@ -244,12 +299,22 @@ func (api *RestAPI) getJWT(rw http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
rw.WriteHeader(http.StatusOK)
|
rw.WriteHeader(http.StatusOK)
|
||||||
rw.Write([]byte(jwt))
|
if _, err := rw.Write([]byte(jwt)); err != nil {
|
||||||
|
cclog.Errorf("Failed to write JWT response: %v", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getRoles godoc
|
||||||
|
// @summary Get available roles
|
||||||
|
// @tags Config
|
||||||
|
// @description Returns a list of valid user roles. Only admins are allowed.
|
||||||
|
// @produce json
|
||||||
|
// @success 200 {array} string "List of role names"
|
||||||
|
// @failure 403 {object} ErrorResponse "Forbidden"
|
||||||
|
// @failure 500 {object} ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /roles/ [get]
|
||||||
func (api *RestAPI) getRoles(rw http.ResponseWriter, r *http.Request) {
|
func (api *RestAPI) getRoles(rw http.ResponseWriter, r *http.Request) {
|
||||||
// SecuredCheck() only worked with TokenAuth: Removed
|
|
||||||
|
|
||||||
user := repository.GetUserFromContext(r.Context())
|
user := repository.GetUserFromContext(r.Context())
|
||||||
if !user.HasRole(schema.RoleAdmin) {
|
if !user.HasRole(schema.RoleAdmin) {
|
||||||
handleError(fmt.Errorf("only admins are allowed to fetch a list of roles"), http.StatusForbidden, rw)
|
handleError(fmt.Errorf("only admins are allowed to fetch a list of roles"), http.StatusForbidden, rw)
|
||||||
@@ -268,6 +333,18 @@ func (api *RestAPI) getRoles(rw http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// updateConfiguration godoc
|
||||||
|
// @summary Update user configuration
|
||||||
|
// @tags Frontend
|
||||||
|
// @description Updates a user's configuration key-value pair.
|
||||||
|
// @accept mpfd
|
||||||
|
// @produce plain
|
||||||
|
// @param key formData string true "Configuration key"
|
||||||
|
// @param value formData string true "Configuration value"
|
||||||
|
// @success 200 {string} string "success"
|
||||||
|
// @failure 500 {object} ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /configuration/ [post]
|
||||||
func (api *RestAPI) updateConfiguration(rw http.ResponseWriter, r *http.Request) {
|
func (api *RestAPI) updateConfiguration(rw http.ResponseWriter, r *http.Request) {
|
||||||
rw.Header().Set("Content-Type", "text/plain")
|
rw.Header().Set("Content-Type", "text/plain")
|
||||||
key, value := r.FormValue("key"), r.FormValue("value")
|
key, value := r.FormValue("key"), r.FormValue("value")
|
||||||
@@ -278,9 +355,25 @@ func (api *RestAPI) updateConfiguration(rw http.ResponseWriter, r *http.Request)
|
|||||||
}
|
}
|
||||||
|
|
||||||
rw.WriteHeader(http.StatusOK)
|
rw.WriteHeader(http.StatusOK)
|
||||||
rw.Write([]byte("success"))
|
if _, err := rw.Write([]byte("success")); err != nil {
|
||||||
|
cclog.Errorf("Failed to write response: %v", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// putMachineState godoc
|
||||||
|
// @summary Store machine state
|
||||||
|
// @tags Machine State
|
||||||
|
// @description Stores machine state data for a specific cluster node. Validates cluster and host names to prevent path traversal.
|
||||||
|
// @accept json
|
||||||
|
// @produce plain
|
||||||
|
// @param cluster path string true "Cluster name"
|
||||||
|
// @param host path string true "Host name"
|
||||||
|
// @success 201 "Created"
|
||||||
|
// @failure 400 {object} ErrorResponse "Bad Request"
|
||||||
|
// @failure 404 {object} ErrorResponse "Machine state not enabled"
|
||||||
|
// @failure 500 {object} ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /machine_state/{cluster}/{host} [put]
|
||||||
func (api *RestAPI) putMachineState(rw http.ResponseWriter, r *http.Request) {
|
func (api *RestAPI) putMachineState(rw http.ResponseWriter, r *http.Request) {
|
||||||
if api.MachineStateDir == "" {
|
if api.MachineStateDir == "" {
|
||||||
handleError(fmt.Errorf("machine state not enabled"), http.StatusNotFound, rw)
|
handleError(fmt.Errorf("machine state not enabled"), http.StatusNotFound, rw)
|
||||||
@@ -291,13 +384,12 @@ func (api *RestAPI) putMachineState(rw http.ResponseWriter, r *http.Request) {
|
|||||||
cluster := vars["cluster"]
|
cluster := vars["cluster"]
|
||||||
host := vars["host"]
|
host := vars["host"]
|
||||||
|
|
||||||
// Validate cluster and host to prevent path traversal attacks
|
if err := validatePathComponent(cluster, "cluster name"); err != nil {
|
||||||
if strings.Contains(cluster, "..") || strings.Contains(cluster, "/") || strings.Contains(cluster, "\\") {
|
handleError(err, http.StatusBadRequest, rw)
|
||||||
handleError(fmt.Errorf("invalid cluster name"), http.StatusBadRequest, rw)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if strings.Contains(host, "..") || strings.Contains(host, "/") || strings.Contains(host, "\\") {
|
if err := validatePathComponent(host, "host name"); err != nil {
|
||||||
handleError(fmt.Errorf("invalid host name"), http.StatusBadRequest, rw)
|
handleError(err, http.StatusBadRequest, rw)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -323,6 +415,18 @@ func (api *RestAPI) putMachineState(rw http.ResponseWriter, r *http.Request) {
|
|||||||
rw.WriteHeader(http.StatusCreated)
|
rw.WriteHeader(http.StatusCreated)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getMachineState godoc
|
||||||
|
// @summary Retrieve machine state
|
||||||
|
// @tags Machine State
|
||||||
|
// @description Retrieves stored machine state data for a specific cluster node. Validates cluster and host names to prevent path traversal.
|
||||||
|
// @produce json
|
||||||
|
// @param cluster path string true "Cluster name"
|
||||||
|
// @param host path string true "Host name"
|
||||||
|
// @success 200 {object} object "Machine state JSON data"
|
||||||
|
// @failure 400 {object} ErrorResponse "Bad Request"
|
||||||
|
// @failure 404 {object} ErrorResponse "Machine state not enabled or file not found"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /machine_state/{cluster}/{host} [get]
|
||||||
func (api *RestAPI) getMachineState(rw http.ResponseWriter, r *http.Request) {
|
func (api *RestAPI) getMachineState(rw http.ResponseWriter, r *http.Request) {
|
||||||
if api.MachineStateDir == "" {
|
if api.MachineStateDir == "" {
|
||||||
handleError(fmt.Errorf("machine state not enabled"), http.StatusNotFound, rw)
|
handleError(fmt.Errorf("machine state not enabled"), http.StatusNotFound, rw)
|
||||||
@@ -333,13 +437,12 @@ func (api *RestAPI) getMachineState(rw http.ResponseWriter, r *http.Request) {
|
|||||||
cluster := vars["cluster"]
|
cluster := vars["cluster"]
|
||||||
host := vars["host"]
|
host := vars["host"]
|
||||||
|
|
||||||
// Validate cluster and host to prevent path traversal attacks
|
if err := validatePathComponent(cluster, "cluster name"); err != nil {
|
||||||
if strings.Contains(cluster, "..") || strings.Contains(cluster, "/") || strings.Contains(cluster, "\\") {
|
handleError(err, http.StatusBadRequest, rw)
|
||||||
handleError(fmt.Errorf("invalid cluster name"), http.StatusBadRequest, rw)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if strings.Contains(host, "..") || strings.Contains(host, "/") || strings.Contains(host, "\\") {
|
if err := validatePathComponent(host, "host name"); err != nil {
|
||||||
handleError(fmt.Errorf("invalid host name"), http.StatusBadRequest, rw)
|
handleError(err, http.StatusBadRequest, rw)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -11,8 +11,8 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
"github.com/gorilla/mux"
|
"github.com/gorilla/mux"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -31,7 +31,7 @@ type APIReturnedUser struct {
|
|||||||
// @description Required query-parameter defines if all users or only users with additional special roles are returned.
|
// @description Required query-parameter defines if all users or only users with additional special roles are returned.
|
||||||
// @produce json
|
// @produce json
|
||||||
// @param not-just-user query bool true "If returned list should contain all users or only users with additional special roles"
|
// @param not-just-user query bool true "If returned list should contain all users or only users with additional special roles"
|
||||||
// @success 200 {array} api.ApiReturnedUser "List of users returned successfully"
|
// @success 200 {array} api.APIReturnedUser "List of users returned successfully"
|
||||||
// @failure 400 {string} string "Bad Request"
|
// @failure 400 {string} string "Bad Request"
|
||||||
// @failure 401 {string} string "Unauthorized"
|
// @failure 401 {string} string "Unauthorized"
|
||||||
// @failure 403 {string} string "Forbidden"
|
// @failure 403 {string} string "Forbidden"
|
||||||
|
|||||||
@@ -106,7 +106,7 @@ Data is archived at the highest available resolution (typically 60s intervals).
|
|||||||
|
|
||||||
```go
|
```go
|
||||||
// In archiver.go ArchiveJob() function
|
// In archiver.go ArchiveJob() function
|
||||||
jobData, err := metricdispatcher.LoadData(job, allMetrics, scopes, ctx, 300)
|
jobData, err := metricdispatch.LoadData(job, allMetrics, scopes, ctx, 300)
|
||||||
// 0 = highest resolution
|
// 0 = highest resolution
|
||||||
// 300 = 5-minute resolution
|
// 300 = 5-minute resolution
|
||||||
```
|
```
|
||||||
@@ -185,6 +185,6 @@ Internal state is protected by:
|
|||||||
## Dependencies
|
## Dependencies
|
||||||
|
|
||||||
- `internal/repository`: Database operations for job metadata
|
- `internal/repository`: Database operations for job metadata
|
||||||
- `internal/metricdispatcher`: Loading metric data from various backends
|
- `internal/metricdispatch`: Loading metric data from various backends
|
||||||
- `pkg/archive`: Archive backend abstraction (filesystem, S3, SQLite)
|
- `pkg/archive`: Archive backend abstraction (filesystem, S3, SQLite)
|
||||||
- `cc-lib/schema`: Job and metric data structures
|
- `cc-lib/schema`: Job and metric data structures
|
||||||
|
|||||||
@@ -54,8 +54,8 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
sq "github.com/Masterminds/squirrel"
|
sq "github.com/Masterminds/squirrel"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -10,10 +10,10 @@ import (
|
|||||||
"math"
|
"math"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/metricdispatcher"
|
"github.com/ClusterCockpit/cc-backend/internal/metricdispatch"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ArchiveJob archives a completed job's metric data to the configured archive backend.
|
// ArchiveJob archives a completed job's metric data to the configured archive backend.
|
||||||
@@ -60,7 +60,7 @@ func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.Job, error) {
|
|||||||
scopes = append(scopes, schema.MetricScopeAccelerator)
|
scopes = append(scopes, schema.MetricScopeAccelerator)
|
||||||
}
|
}
|
||||||
|
|
||||||
jobData, err := metricdispatcher.LoadData(job, allMetrics, scopes, ctx, 0) // 0 Resulotion-Value retrieves highest res (60s)
|
jobData, err := metricdispatch.LoadData(job, allMetrics, scopes, ctx, 0) // 0 Resulotion-Value retrieves highest res (60s)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Error("Error wile loading job data for archiving")
|
cclog.Error("Error wile loading job data for archiving")
|
||||||
return nil, err
|
return nil, err
|
||||||
|
|||||||
@@ -25,9 +25,9 @@ import (
|
|||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
"github.com/ClusterCockpit/cc-lib/util"
|
"github.com/ClusterCockpit/cc-lib/v2/util"
|
||||||
"github.com/gorilla/sessions"
|
"github.com/gorilla/sessions"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -14,8 +14,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
"github.com/golang-jwt/jwt/v5"
|
"github.com/golang-jwt/jwt/v5"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -12,8 +12,8 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
"github.com/golang-jwt/jwt/v5"
|
"github.com/golang-jwt/jwt/v5"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -11,8 +11,8 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
"github.com/golang-jwt/jwt/v5"
|
"github.com/golang-jwt/jwt/v5"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ package auth
|
|||||||
import (
|
import (
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
"github.com/golang-jwt/jwt/v5"
|
"github.com/golang-jwt/jwt/v5"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -237,7 +237,6 @@ func TestGetUserFromJWT_NoValidation(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
user, err := getUserFromJWT(claims, false, schema.AuthToken, -1)
|
user, err := getUserFromJWT(claims, false, schema.AuthToken, -1)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Unexpected error: %v", err)
|
t.Fatalf("Unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,8 +13,8 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
"github.com/golang-jwt/jwt/v5"
|
"github.com/golang-jwt/jwt/v5"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -13,8 +13,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
"github.com/go-ldap/ldap/v3"
|
"github.com/go-ldap/ldap/v3"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -9,8 +9,8 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
"golang.org/x/crypto/bcrypt"
|
"golang.org/x/crypto/bcrypt"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -15,8 +15,8 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
"github.com/coreos/go-oidc/v3/oidc"
|
"github.com/coreos/go-oidc/v3/oidc"
|
||||||
"github.com/gorilla/mux"
|
"github.com/gorilla/mux"
|
||||||
"golang.org/x/oauth2"
|
"golang.org/x/oauth2"
|
||||||
|
|||||||
@@ -11,8 +11,8 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/resampler"
|
"github.com/ClusterCockpit/cc-lib/v2/resampler"
|
||||||
)
|
)
|
||||||
|
|
||||||
type ProgramConfig struct {
|
type ProgramConfig struct {
|
||||||
@@ -78,9 +78,6 @@ type ProgramConfig struct {
|
|||||||
|
|
||||||
// If exists, will enable dynamic zoom in frontend metric plots using the configured values
|
// If exists, will enable dynamic zoom in frontend metric plots using the configured values
|
||||||
EnableResampling *ResampleConfig `json:"resampling"`
|
EnableResampling *ResampleConfig `json:"resampling"`
|
||||||
|
|
||||||
// Global upstream metric repository configuration for metric pull workers
|
|
||||||
UpstreamMetricRepository *json.RawMessage `json:"upstreamMetricRepository,omitempty"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type ResampleConfig struct {
|
type ResampleConfig struct {
|
||||||
@@ -93,8 +90,7 @@ type ResampleConfig struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type NATSConfig struct {
|
type NATSConfig struct {
|
||||||
SubjectJobStart string `json:"subjectJobStart"`
|
SubjectJobEvent string `json:"subjectJobEvent"`
|
||||||
SubjectJobStop string `json:"subjectJobStop"`
|
|
||||||
SubjectNodeState string `json:"subjectNodeState"`
|
SubjectNodeState string `json:"subjectNodeState"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -115,13 +111,6 @@ type FilterRanges struct {
|
|||||||
StartTime *TimeRange `json:"startTime"`
|
StartTime *TimeRange `json:"startTime"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type ClusterConfig struct {
|
|
||||||
Name string `json:"name"`
|
|
||||||
FilterRanges *FilterRanges `json:"filterRanges"`
|
|
||||||
}
|
|
||||||
|
|
||||||
var Clusters []*ClusterConfig
|
|
||||||
|
|
||||||
var Keys ProgramConfig = ProgramConfig{
|
var Keys ProgramConfig = ProgramConfig{
|
||||||
Addr: "localhost:8080",
|
Addr: "localhost:8080",
|
||||||
DisableAuthentication: false,
|
DisableAuthentication: false,
|
||||||
@@ -135,7 +124,7 @@ var Keys ProgramConfig = ProgramConfig{
|
|||||||
ShortRunningJobsDuration: 5 * 60,
|
ShortRunningJobsDuration: 5 * 60,
|
||||||
}
|
}
|
||||||
|
|
||||||
func Init(mainConfig json.RawMessage, clusterConfig json.RawMessage) {
|
func Init(mainConfig json.RawMessage) {
|
||||||
Validate(configSchema, mainConfig)
|
Validate(configSchema, mainConfig)
|
||||||
dec := json.NewDecoder(bytes.NewReader(mainConfig))
|
dec := json.NewDecoder(bytes.NewReader(mainConfig))
|
||||||
dec.DisallowUnknownFields()
|
dec.DisallowUnknownFields()
|
||||||
@@ -143,17 +132,6 @@ func Init(mainConfig json.RawMessage, clusterConfig json.RawMessage) {
|
|||||||
cclog.Abortf("Config Init: Could not decode config file '%s'.\nError: %s\n", mainConfig, err.Error())
|
cclog.Abortf("Config Init: Could not decode config file '%s'.\nError: %s\n", mainConfig, err.Error())
|
||||||
}
|
}
|
||||||
|
|
||||||
Validate(clustersSchema, clusterConfig)
|
|
||||||
dec = json.NewDecoder(bytes.NewReader(clusterConfig))
|
|
||||||
dec.DisallowUnknownFields()
|
|
||||||
if err := dec.Decode(&Clusters); err != nil {
|
|
||||||
cclog.Abortf("Config Init: Could not decode config file '%s'.\nError: %s\n", mainConfig, err.Error())
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(Clusters) < 1 {
|
|
||||||
cclog.Abort("Config Init: At least one cluster required in config. Exited with error.")
|
|
||||||
}
|
|
||||||
|
|
||||||
if Keys.EnableResampling != nil && Keys.EnableResampling.MinimumPoints > 0 {
|
if Keys.EnableResampling != nil && Keys.EnableResampling.MinimumPoints > 0 {
|
||||||
resampler.SetMinimumRequiredPoints(Keys.EnableResampling.MinimumPoints)
|
resampler.SetMinimumRequiredPoints(Keys.EnableResampling.MinimumPoints)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,19 +8,15 @@ package config
|
|||||||
import (
|
import (
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
ccconf "github.com/ClusterCockpit/cc-lib/ccConfig"
|
ccconf "github.com/ClusterCockpit/cc-lib/v2/ccConfig"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestInit(t *testing.T) {
|
func TestInit(t *testing.T) {
|
||||||
fp := "../../configs/config.json"
|
fp := "../../configs/config.json"
|
||||||
ccconf.Init(fp)
|
ccconf.Init(fp)
|
||||||
if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
|
if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
|
||||||
if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil {
|
Init(cfg)
|
||||||
Init(cfg, clustercfg)
|
|
||||||
} else {
|
|
||||||
cclog.Abort("Cluster configuration must be present")
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
cclog.Abort("Main configuration must be present")
|
cclog.Abort("Main configuration must be present")
|
||||||
}
|
}
|
||||||
@@ -34,11 +30,7 @@ func TestInitMinimal(t *testing.T) {
|
|||||||
fp := "../../configs/config-demo.json"
|
fp := "../../configs/config-demo.json"
|
||||||
ccconf.Init(fp)
|
ccconf.Init(fp)
|
||||||
if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
|
if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
|
||||||
if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil {
|
Init(cfg)
|
||||||
Init(cfg, clustercfg)
|
|
||||||
} else {
|
|
||||||
cclog.Abort("Cluster configuration must be present")
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
cclog.Abort("Main configuration must be present")
|
cclog.Abort("Main configuration must be present")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -120,103 +120,21 @@ var configSchema = `
|
|||||||
},
|
},
|
||||||
"required": ["trigger", "resolutions"]
|
"required": ["trigger", "resolutions"]
|
||||||
},
|
},
|
||||||
"upstreamMetricRepository": {
|
"apiSubjects": {
|
||||||
"description": "Global upstream metric repository configuration for metric pull workers",
|
"description": "NATS subjects configuration for subscribing to job and node events.",
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"kind": {
|
"subjectJobEvent": {
|
||||||
"type": "string",
|
"description": "NATS subject for job events (start_job, stop_job)",
|
||||||
"enum": ["influxdb", "prometheus", "cc-metric-store", "cc-metric-store-internal", "test"]
|
|
||||||
},
|
|
||||||
"url": {
|
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
"token": {
|
"subjectNodeState": {
|
||||||
|
"description": "NATS subject for node state updates",
|
||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": ["kind"]
|
"required": ["subjectJobEvent", "subjectNodeState"]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": ["apiAllowedIPs"]
|
"required": ["apiAllowedIPs"]
|
||||||
}`
|
}`
|
||||||
|
|
||||||
var clustersSchema = `
|
|
||||||
{
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"name": {
|
|
||||||
"description": "The name of the cluster.",
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"metricDataRepository": {
|
|
||||||
"description": "Type of the metric data repository for this cluster",
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"kind": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": ["influxdb", "prometheus", "cc-metric-store", "cc-metric-store-internal", "test"]
|
|
||||||
},
|
|
||||||
"url": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"token": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": ["kind"]
|
|
||||||
},
|
|
||||||
"filterRanges": {
|
|
||||||
"description": "This option controls the slider ranges for the UI controls of numNodes, duration, and startTime.",
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"numNodes": {
|
|
||||||
"description": "UI slider range for number of nodes",
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"from": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"to": {
|
|
||||||
"type": "integer"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": ["from", "to"]
|
|
||||||
},
|
|
||||||
"duration": {
|
|
||||||
"description": "UI slider range for duration",
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"from": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"to": {
|
|
||||||
"type": "integer"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": ["from", "to"]
|
|
||||||
},
|
|
||||||
"startTime": {
|
|
||||||
"description": "UI slider range for start time",
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"from": {
|
|
||||||
"type": "string",
|
|
||||||
"format": "date-time"
|
|
||||||
},
|
|
||||||
"to": {
|
|
||||||
"type": "null"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": ["from", "to"]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": ["numNodes", "duration", "startTime"]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": ["name", "filterRanges"],
|
|
||||||
"minItems": 1
|
|
||||||
}
|
|
||||||
}`
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ package config
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/santhosh-tekuri/jsonschema/v5"
|
"github.com/santhosh-tekuri/jsonschema/v5"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -10,7 +10,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
type ClusterMetricWithName struct {
|
type ClusterMetricWithName struct {
|
||||||
@@ -82,6 +82,7 @@ type JobFilter struct {
|
|||||||
State []schema.JobState `json:"state,omitempty"`
|
State []schema.JobState `json:"state,omitempty"`
|
||||||
MetricStats []*MetricStatItem `json:"metricStats,omitempty"`
|
MetricStats []*MetricStatItem `json:"metricStats,omitempty"`
|
||||||
Shared *string `json:"shared,omitempty"`
|
Shared *string `json:"shared,omitempty"`
|
||||||
|
Schedule *string `json:"schedule,omitempty"`
|
||||||
Node *StringInput `json:"node,omitempty"`
|
Node *StringInput `json:"node,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/jmoiron/sqlx"
|
"github.com/jmoiron/sqlx"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ package graph
|
|||||||
// This file will be automatically regenerated based on the schema, any resolver
|
// This file will be automatically regenerated based on the schema, any resolver
|
||||||
// implementations
|
// implementations
|
||||||
// will be copied through when generating and any unknown code will be moved to the end.
|
// will be copied through when generating and any unknown code will be moved to the end.
|
||||||
// Code generated by github.com/99designs/gqlgen version v0.17.84
|
// Code generated by github.com/99designs/gqlgen version v0.17.85
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
@@ -19,11 +19,11 @@ import (
|
|||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
|
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/metricdispatcher"
|
"github.com/ClusterCockpit/cc-backend/internal/metricdispatch"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Partitions is the resolver for the partitions field.
|
// Partitions is the resolver for the partitions field.
|
||||||
@@ -283,7 +283,7 @@ func (r *mutationResolver) RemoveTagFromList(ctx context.Context, tagIds []strin
|
|||||||
// Test Access: Admins && Admin Tag OR Everyone && Private Tag
|
// Test Access: Admins && Admin Tag OR Everyone && Private Tag
|
||||||
if user.HasRole(schema.RoleAdmin) && (tscope == "global" || tscope == "admin") || user.Username == tscope {
|
if user.HasRole(schema.RoleAdmin) && (tscope == "global" || tscope == "admin") || user.Username == tscope {
|
||||||
// Remove from DB
|
// Remove from DB
|
||||||
if err = r.Repo.RemoveTagById(tid); err != nil {
|
if err = r.Repo.RemoveTagByID(tid); err != nil {
|
||||||
cclog.Warn("Error while removing tag")
|
cclog.Warn("Error while removing tag")
|
||||||
return nil, err
|
return nil, err
|
||||||
} else {
|
} else {
|
||||||
@@ -484,7 +484,7 @@ func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []str
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
data, err := metricdispatcher.LoadData(job, metrics, scopes, ctx, *resolution)
|
data, err := metricdispatch.LoadData(job, metrics, scopes, ctx, *resolution)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Warn("Error while loading job data")
|
cclog.Warn("Error while loading job data")
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -512,7 +512,7 @@ func (r *queryResolver) JobStats(ctx context.Context, id string, metrics []strin
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
data, err := metricdispatcher.LoadJobStats(job, metrics, ctx)
|
data, err := metricdispatch.LoadJobStats(job, metrics, ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Warnf("Error while loading jobStats data for job id %s", id)
|
cclog.Warnf("Error while loading jobStats data for job id %s", id)
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -537,7 +537,7 @@ func (r *queryResolver) ScopedJobStats(ctx context.Context, id string, metrics [
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
data, err := metricdispatcher.LoadScopedJobStats(job, metrics, scopes, ctx)
|
data, err := metricdispatch.LoadScopedJobStats(job, metrics, scopes, ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Warnf("Error while loading scopedJobStats data for job id %s", id)
|
cclog.Warnf("Error while loading scopedJobStats data for job id %s", id)
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -590,10 +590,13 @@ func (r *queryResolver) Jobs(ctx context.Context, filter []*model.JobFilter, pag
|
|||||||
|
|
||||||
// Note: Even if App-Default 'config.Keys.UiDefaults["job_list_usePaging"]' is set, always return hasNextPage boolean.
|
// Note: Even if App-Default 'config.Keys.UiDefaults["job_list_usePaging"]' is set, always return hasNextPage boolean.
|
||||||
// Users can decide in frontend to use continuous scroll, even if app-default is paging!
|
// Users can decide in frontend to use continuous scroll, even if app-default is paging!
|
||||||
|
// Skip if page.ItemsPerPage == -1 ("Load All" -> No Next Page required, Status Dashboards)
|
||||||
/*
|
/*
|
||||||
Example Page 4 @ 10 IpP : Does item 41 exist?
|
Example Page 4 @ 10 IpP : Does item 41 exist?
|
||||||
Minimal Page 41 @ 1 IpP : If len(result) is 1, Page 5 @ 10 IpP exists.
|
Minimal Page 41 @ 1 IpP : If len(result) is 1, Page 5 @ 10 IpP exists.
|
||||||
*/
|
*/
|
||||||
|
hasNextPage := false
|
||||||
|
if page.ItemsPerPage != -1 {
|
||||||
nextPage := &model.PageRequest{
|
nextPage := &model.PageRequest{
|
||||||
ItemsPerPage: 1,
|
ItemsPerPage: 1,
|
||||||
Page: ((page.Page * page.ItemsPerPage) + 1),
|
Page: ((page.Page * page.ItemsPerPage) + 1),
|
||||||
@@ -603,8 +606,8 @@ func (r *queryResolver) Jobs(ctx context.Context, filter []*model.JobFilter, pag
|
|||||||
cclog.Warn("Error while querying next jobs")
|
cclog.Warn("Error while querying next jobs")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
hasNextPage = len(nextJobs) == 1
|
||||||
hasNextPage := len(nextJobs) == 1
|
}
|
||||||
|
|
||||||
return &model.JobResultList{Items: jobs, Count: &count, HasNextPage: &hasNextPage}, nil
|
return &model.JobResultList{Items: jobs, Count: &count, HasNextPage: &hasNextPage}, nil
|
||||||
}
|
}
|
||||||
@@ -702,7 +705,7 @@ func (r *queryResolver) JobsMetricStats(ctx context.Context, filter []*model.Job
|
|||||||
|
|
||||||
res := []*model.JobStats{}
|
res := []*model.JobStats{}
|
||||||
for _, job := range jobs {
|
for _, job := range jobs {
|
||||||
data, err := metricdispatcher.LoadJobStats(job, metrics, ctx)
|
data, err := metricdispatch.LoadJobStats(job, metrics, ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Warnf("Error while loading comparison jobStats data for job id %d", job.JobID)
|
cclog.Warnf("Error while loading comparison jobStats data for job id %d", job.JobID)
|
||||||
continue
|
continue
|
||||||
@@ -753,13 +756,19 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
|
|||||||
return nil, errors.New("you need to be administrator or support staff for this query")
|
return nil, errors.New("you need to be administrator or support staff for this query")
|
||||||
}
|
}
|
||||||
|
|
||||||
if metrics == nil {
|
defaultMetrics := make([]string, 0)
|
||||||
for _, mc := range archive.GetCluster(cluster).MetricConfig {
|
for _, mc := range archive.GetCluster(cluster).MetricConfig {
|
||||||
metrics = append(metrics, mc.Name)
|
defaultMetrics = append(defaultMetrics, mc.Name)
|
||||||
}
|
}
|
||||||
|
if metrics == nil {
|
||||||
|
metrics = defaultMetrics
|
||||||
|
} else {
|
||||||
|
metrics = slices.DeleteFunc(metrics, func(metric string) bool {
|
||||||
|
return !slices.Contains(defaultMetrics, metric) // Remove undefined metrics.
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
data, err := metricdispatcher.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
data, err := metricdispatch.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Warn("error while loading node data")
|
cclog.Warn("error while loading node data")
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -825,7 +834,7 @@ func (r *queryResolver) NodeMetricsList(ctx context.Context, cluster string, sub
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
data, err := metricdispatcher.LoadNodeListData(cluster, subCluster, nodes, metrics, scopes, *resolution, from, to, ctx)
|
data, err := metricdispatch.LoadNodeListData(cluster, subCluster, nodes, metrics, scopes, *resolution, from, to, ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Warn("error while loading node data (Resolver.NodeMetricsList")
|
cclog.Warn("error while loading node data (Resolver.NodeMetricsList")
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -880,7 +889,7 @@ func (r *queryResolver) ClusterMetrics(ctx context.Context, cluster string, metr
|
|||||||
|
|
||||||
// 'nodes' == nil -> Defaults to all nodes of cluster for existing query workflow
|
// 'nodes' == nil -> Defaults to all nodes of cluster for existing query workflow
|
||||||
scopes := []schema.MetricScope{"node"}
|
scopes := []schema.MetricScope{"node"}
|
||||||
data, err := metricdispatcher.LoadNodeData(cluster, metrics, nil, scopes, from, to, ctx)
|
data, err := metricdispatch.LoadNodeData(cluster, metrics, nil, scopes, from, to, ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Warn("error while loading node data")
|
cclog.Warn("error while loading node data")
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -972,12 +981,10 @@ func (r *Resolver) Query() generated.QueryResolver { return &queryResolver{r} }
|
|||||||
// SubCluster returns generated.SubClusterResolver implementation.
|
// SubCluster returns generated.SubClusterResolver implementation.
|
||||||
func (r *Resolver) SubCluster() generated.SubClusterResolver { return &subClusterResolver{r} }
|
func (r *Resolver) SubCluster() generated.SubClusterResolver { return &subClusterResolver{r} }
|
||||||
|
|
||||||
type (
|
type clusterResolver struct{ *Resolver }
|
||||||
clusterResolver struct{ *Resolver }
|
type jobResolver struct{ *Resolver }
|
||||||
jobResolver struct{ *Resolver }
|
type metricValueResolver struct{ *Resolver }
|
||||||
metricValueResolver struct{ *Resolver }
|
type mutationResolver struct{ *Resolver }
|
||||||
mutationResolver struct{ *Resolver }
|
type nodeResolver struct{ *Resolver }
|
||||||
nodeResolver struct{ *Resolver }
|
type queryResolver struct{ *Resolver }
|
||||||
queryResolver struct{ *Resolver }
|
type subClusterResolver struct{ *Resolver }
|
||||||
subClusterResolver struct{ *Resolver }
|
|
||||||
)
|
|
||||||
|
|||||||
@@ -13,9 +13,9 @@ import (
|
|||||||
|
|
||||||
"github.com/99designs/gqlgen/graphql"
|
"github.com/99designs/gqlgen/graphql"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/metricdispatcher"
|
"github.com/ClusterCockpit/cc-backend/internal/metricdispatch"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
const MAX_JOBS_FOR_ANALYSIS = 500
|
const MAX_JOBS_FOR_ANALYSIS = 500
|
||||||
@@ -55,7 +55,7 @@ func (r *queryResolver) rooflineHeatmap(
|
|||||||
// resolution = max(resolution, mc.Timestep)
|
// resolution = max(resolution, mc.Timestep)
|
||||||
// }
|
// }
|
||||||
|
|
||||||
jobdata, err := metricdispatcher.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0)
|
jobdata, err := metricdispatch.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("Error while loading roofline metrics for job %d", job.ID)
|
cclog.Errorf("Error while loading roofline metrics for job %d", job.ID)
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -128,7 +128,7 @@ func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobF
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := metricdispatcher.LoadAverages(job, metrics, avgs, ctx); err != nil {
|
if err := metricdispatch.LoadAverages(job, metrics, avgs, ctx); err != nil {
|
||||||
cclog.Error("Error while loading averages for footprint")
|
cclog.Error("Error while loading averages for footprint")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
// All rights reserved. This file is part of cc-backend.
|
// All rights reserved. This file is part of cc-backend.
|
||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package importer
|
package importer
|
||||||
|
|
||||||
import (
|
import (
|
||||||
@@ -14,8 +15,8 @@ import (
|
|||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
// HandleImportFlag imports jobs from file pairs specified in a comma-separated flag string.
|
// HandleImportFlag imports jobs from file pairs specified in a comma-separated flag string.
|
||||||
|
|||||||
@@ -16,8 +16,8 @@ import (
|
|||||||
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
ccconf "github.com/ClusterCockpit/cc-lib/ccConfig"
|
ccconf "github.com/ClusterCockpit/cc-lib/v2/ccConfig"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
)
|
)
|
||||||
|
|
||||||
// copyFile copies a file from source path to destination path.
|
// copyFile copies a file from source path to destination path.
|
||||||
@@ -56,33 +56,8 @@ func setup(t *testing.T) *repository.JobRepository {
|
|||||||
"archive": {
|
"archive": {
|
||||||
"kind": "file",
|
"kind": "file",
|
||||||
"path": "./var/job-archive"
|
"path": "./var/job-archive"
|
||||||
},
|
|
||||||
"clusters": [
|
|
||||||
{
|
|
||||||
"name": "testcluster",
|
|
||||||
"filterRanges": {
|
|
||||||
"numNodes": { "from": 1, "to": 64 },
|
|
||||||
"duration": { "from": 0, "to": 86400 },
|
|
||||||
"startTime": { "from": "2022-01-01T00:00:00Z", "to": null }
|
|
||||||
}
|
}
|
||||||
},
|
}`
|
||||||
{
|
|
||||||
"name": "fritz",
|
|
||||||
"filterRanges": {
|
|
||||||
"numNodes": { "from": 1, "to": 944 },
|
|
||||||
"duration": { "from": 0, "to": 86400 },
|
|
||||||
"startTime": { "from": "2022-01-01T00:00:00Z", "to": null }
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "taurus",
|
|
||||||
"filterRanges": {
|
|
||||||
"numNodes": { "from": 1, "to": 4000 },
|
|
||||||
"duration": { "from": 0, "to": 604800 },
|
|
||||||
"startTime": { "from": "2010-01-01T00:00:00Z", "to": null }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]}`
|
|
||||||
|
|
||||||
cclog.Init("info", true)
|
cclog.Init("info", true)
|
||||||
tmpdir := t.TempDir()
|
tmpdir := t.TempDir()
|
||||||
@@ -118,11 +93,7 @@ func setup(t *testing.T) *repository.JobRepository {
|
|||||||
|
|
||||||
// Load and check main configuration
|
// Load and check main configuration
|
||||||
if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
|
if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
|
||||||
if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil {
|
config.Init(cfg)
|
||||||
config.Init(cfg, clustercfg)
|
|
||||||
} else {
|
|
||||||
t.Fatal("Cluster configuration must be present")
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
t.Fatal("Main configuration must be present")
|
t.Fatal("Main configuration must be present")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,8 +22,8 @@ import (
|
|||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
|||||||
@@ -2,12 +2,13 @@
|
|||||||
// All rights reserved. This file is part of cc-backend.
|
// All rights reserved. This file is part of cc-backend.
|
||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package importer
|
package importer
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"math"
|
"math"
|
||||||
|
|
||||||
ccunits "github.com/ClusterCockpit/cc-lib/ccUnits"
|
ccunits "github.com/ClusterCockpit/cc-lib/v2/ccUnits"
|
||||||
)
|
)
|
||||||
|
|
||||||
// getNormalizationFactor calculates the scaling factor needed to normalize a value
|
// getNormalizationFactor calculates the scaling factor needed to normalize a value
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
ccunits "github.com/ClusterCockpit/cc-lib/ccUnits"
|
ccunits "github.com/ClusterCockpit/cc-lib/v2/ccUnits"
|
||||||
)
|
)
|
||||||
|
|
||||||
// TestNormalizeFactor tests the normalization of large byte values to gigabyte prefix.
|
// TestNormalizeFactor tests the normalization of large byte values to gigabyte prefix.
|
||||||
|
|||||||
@@ -1,95 +0,0 @@
|
|||||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
|
||||||
// All rights reserved. This file is part of cc-backend.
|
|
||||||
// Use of this source code is governed by a MIT-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package memorystore
|
|
||||||
|
|
||||||
const configSchema = `{
|
|
||||||
"type": "object",
|
|
||||||
"description": "Configuration specific to built-in metric-store.",
|
|
||||||
"properties": {
|
|
||||||
"checkpoints": {
|
|
||||||
"description": "Configuration for checkpointing the metrics within metric-store",
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"file-format": {
|
|
||||||
"description": "Specify the type of checkpoint file. There are 2 variants: 'avro' and 'json'. If nothing is specified, 'avro' is default.",
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"interval": {
|
|
||||||
"description": "Interval at which the metrics should be checkpointed.",
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"directory": {
|
|
||||||
"description": "Specify the parent directy in which the checkpointed files should be placed.",
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"restore": {
|
|
||||||
"description": "When cc-backend starts up, look for checkpointed files that are less than X hours old and load metrics from these selected checkpoint files.",
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"archive": {
|
|
||||||
"description": "Configuration for archiving the already checkpointed files.",
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"interval": {
|
|
||||||
"description": "Interval at which the checkpointed files should be archived.",
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"directory": {
|
|
||||||
"description": "Specify the parent directy in which the archived files should be placed.",
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"retention-in-memory": {
|
|
||||||
"description": "Keep the metrics within memory for given time interval. Retention for X hours, then the metrics would be freed.",
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"nats": {
|
|
||||||
"description": "Configuration for accepting published data through NATS.",
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"address": {
|
|
||||||
"description": "Address of the NATS server.",
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"username": {
|
|
||||||
"description": "Optional: If configured with username/password method.",
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"password": {
|
|
||||||
"description": "Optional: If configured with username/password method.",
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"creds-file-path": {
|
|
||||||
"description": "Optional: If configured with Credential File method. Path to your NATS cred file.",
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"subscriptions": {
|
|
||||||
"description": "Array of various subscriptions. Allows to subscibe to different subjects and publishers.",
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"subscribe-to": {
|
|
||||||
"description": "Channel name",
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"cluster-tag": {
|
|
||||||
"description": "Optional: Allow lines without a cluster tag, use this as default",
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}`
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -1,127 +0,0 @@
|
|||||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
|
||||||
// All rights reserved. This file is part of cc-backend.
|
|
||||||
// Use of this source code is governed by a MIT-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package metricdata
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
|
||||||
)
|
|
||||||
|
|
||||||
type MetricDataRepository interface {
|
|
||||||
// Initialize this MetricDataRepository. One instance of
|
|
||||||
// this interface will only ever be responsible for one cluster.
|
|
||||||
Init(rawConfig json.RawMessage) error
|
|
||||||
|
|
||||||
// Return the JobData for the given job, only with the requested metrics.
|
|
||||||
LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error)
|
|
||||||
|
|
||||||
// Return a map of metrics to a map of nodes to the metric statistics of the job. node scope only.
|
|
||||||
LoadStats(job *schema.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error)
|
|
||||||
|
|
||||||
// Return a map of metrics to a map of scopes to the scoped metric statistics of the job.
|
|
||||||
LoadScopedStats(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.ScopedJobStats, error)
|
|
||||||
|
|
||||||
// Return a map of hosts to a map of metrics at the requested scopes (currently only node) for that node.
|
|
||||||
LoadNodeData(cluster string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error)
|
|
||||||
|
|
||||||
// Return a map of hosts to a map of metrics to a map of scopes for multiple nodes.
|
|
||||||
LoadNodeListData(cluster, subCluster string, nodes, metrics []string, scopes []schema.MetricScope, resolution int, from, to time.Time, ctx context.Context) (map[string]schema.JobData, error)
|
|
||||||
}
|
|
||||||
|
|
||||||
var upstreamMetricDataRepo MetricDataRepository
|
|
||||||
|
|
||||||
// func Init() error {
|
|
||||||
// for _, cluster := range config.Clusters {
|
|
||||||
// if cluster.MetricDataRepository != nil {
|
|
||||||
// var kind struct {
|
|
||||||
// Kind string `json:"kind"`
|
|
||||||
// }
|
|
||||||
// if err := json.Unmarshal(cluster.MetricDataRepository, &kind); err != nil {
|
|
||||||
// cclog.Warn("Error while unmarshaling raw json MetricDataRepository")
|
|
||||||
// return err
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// var mdr MetricDataRepository
|
|
||||||
// switch kind.Kind {
|
|
||||||
// case "cc-metric-store":
|
|
||||||
// mdr = &CCMetricStore{}
|
|
||||||
// case "prometheus":
|
|
||||||
// mdr = &PrometheusDataRepository{}
|
|
||||||
// case "test":
|
|
||||||
// mdr = &TestMetricDataRepository{}
|
|
||||||
// default:
|
|
||||||
// return fmt.Errorf("METRICDATA/METRICDATA > Unknown MetricDataRepository %v for cluster %v", kind.Kind, cluster.Name)
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// if err := mdr.Init(cluster.MetricDataRepository); err != nil {
|
|
||||||
// cclog.Errorf("Error initializing MetricDataRepository %v for cluster %v", kind.Kind, cluster.Name)
|
|
||||||
// return err
|
|
||||||
// }
|
|
||||||
// metricDataRepos[cluster.Name] = mdr
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// return nil
|
|
||||||
// }
|
|
||||||
|
|
||||||
// func GetMetricDataRepo(cluster string) (MetricDataRepository, error) {
|
|
||||||
// var err error
|
|
||||||
// repo, ok := metricDataRepos[cluster]
|
|
||||||
//
|
|
||||||
// if !ok {
|
|
||||||
// err = fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// return repo, err
|
|
||||||
// }
|
|
||||||
|
|
||||||
// InitUpstreamRepos initializes global upstream metric data repository for the pull worker
|
|
||||||
func InitUpstreamRepos() error {
|
|
||||||
if config.Keys.UpstreamMetricRepository == nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
var kind struct {
|
|
||||||
Kind string `json:"kind"`
|
|
||||||
}
|
|
||||||
if err := json.Unmarshal(*config.Keys.UpstreamMetricRepository, &kind); err != nil {
|
|
||||||
cclog.Warn("Error while unmarshaling raw json UpstreamMetricRepository")
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
var mdr MetricDataRepository
|
|
||||||
switch kind.Kind {
|
|
||||||
case "cc-metric-store":
|
|
||||||
mdr = &CCMetricStore{}
|
|
||||||
case "prometheus":
|
|
||||||
mdr = &PrometheusDataRepository{}
|
|
||||||
case "test":
|
|
||||||
mdr = &TestMetricDataRepository{}
|
|
||||||
default:
|
|
||||||
return fmt.Errorf("METRICDATA/METRICDATA > Unknown UpstreamMetricRepository %v", kind.Kind)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := mdr.Init(*config.Keys.UpstreamMetricRepository); err != nil {
|
|
||||||
cclog.Errorf("Error initializing UpstreamMetricRepository %v", kind.Kind)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
upstreamMetricDataRepo = mdr
|
|
||||||
cclog.Infof("Initialized global upstream metric repository '%s'", kind.Kind)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetUpstreamMetricDataRepo returns the global upstream metric data repository
|
|
||||||
func GetUpstreamMetricDataRepo() (MetricDataRepository, error) {
|
|
||||||
if upstreamMetricDataRepo == nil {
|
|
||||||
return nil, fmt.Errorf("METRICDATA/METRICDATA > no upstream metric data repository configured")
|
|
||||||
}
|
|
||||||
return upstreamMetricDataRepo, nil
|
|
||||||
}
|
|
||||||
@@ -1,588 +0,0 @@
|
|||||||
// Copyright (C) 2022 DKRZ
|
|
||||||
// All rights reserved. This file is part of cc-backend.
|
|
||||||
// Use of this source code is governed by a MIT-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package metricdata
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"context"
|
|
||||||
"encoding/json"
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"math"
|
|
||||||
"net/http"
|
|
||||||
"os"
|
|
||||||
"regexp"
|
|
||||||
"sort"
|
|
||||||
"strings"
|
|
||||||
"sync"
|
|
||||||
"text/template"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
|
||||||
promapi "github.com/prometheus/client_golang/api"
|
|
||||||
promv1 "github.com/prometheus/client_golang/api/prometheus/v1"
|
|
||||||
promcfg "github.com/prometheus/common/config"
|
|
||||||
promm "github.com/prometheus/common/model"
|
|
||||||
)
|
|
||||||
|
|
||||||
type PrometheusDataRepositoryConfig struct {
|
|
||||||
Url string `json:"url"`
|
|
||||||
Username string `json:"username,omitempty"`
|
|
||||||
Suffix string `json:"suffix,omitempty"`
|
|
||||||
Templates map[string]string `json:"query-templates"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type PrometheusDataRepository struct {
|
|
||||||
client promapi.Client
|
|
||||||
queryClient promv1.API
|
|
||||||
suffix string
|
|
||||||
templates map[string]*template.Template
|
|
||||||
}
|
|
||||||
|
|
||||||
type PromQLArgs struct {
|
|
||||||
Nodes string
|
|
||||||
}
|
|
||||||
|
|
||||||
type Trie map[rune]Trie
|
|
||||||
|
|
||||||
var logOnce sync.Once
|
|
||||||
|
|
||||||
func contains(s []schema.MetricScope, str schema.MetricScope) bool {
|
|
||||||
for _, v := range s {
|
|
||||||
if v == str {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func MinMaxMean(data []schema.Float) (float64, float64, float64) {
|
|
||||||
if len(data) == 0 {
|
|
||||||
return 0.0, 0.0, 0.0
|
|
||||||
}
|
|
||||||
min := math.MaxFloat64
|
|
||||||
max := -math.MaxFloat64
|
|
||||||
var sum float64
|
|
||||||
var n float64
|
|
||||||
for _, val := range data {
|
|
||||||
if val.IsNaN() {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
sum += float64(val)
|
|
||||||
n += 1
|
|
||||||
if float64(val) > max {
|
|
||||||
max = float64(val)
|
|
||||||
}
|
|
||||||
if float64(val) < min {
|
|
||||||
min = float64(val)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return min, max, sum / n
|
|
||||||
}
|
|
||||||
|
|
||||||
// Rewritten from
|
|
||||||
// https://github.com/ermanh/trieregex/blob/master/trieregex/trieregex.py
|
|
||||||
func nodeRegex(nodes []string) string {
|
|
||||||
root := Trie{}
|
|
||||||
// add runes of each compute node to trie
|
|
||||||
for _, node := range nodes {
|
|
||||||
_trie := root
|
|
||||||
for _, c := range node {
|
|
||||||
if _, ok := _trie[c]; !ok {
|
|
||||||
_trie[c] = Trie{}
|
|
||||||
}
|
|
||||||
_trie = _trie[c]
|
|
||||||
}
|
|
||||||
_trie['*'] = Trie{}
|
|
||||||
}
|
|
||||||
// recursively build regex from rune trie
|
|
||||||
var trieRegex func(trie Trie, reset bool) string
|
|
||||||
trieRegex = func(trie Trie, reset bool) string {
|
|
||||||
if reset == true {
|
|
||||||
trie = root
|
|
||||||
}
|
|
||||||
if len(trie) == 0 {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
if len(trie) == 1 {
|
|
||||||
for key, _trie := range trie {
|
|
||||||
if key == '*' {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
return regexp.QuoteMeta(string(key)) + trieRegex(_trie, false)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
sequences := []string{}
|
|
||||||
for key, _trie := range trie {
|
|
||||||
if key != '*' {
|
|
||||||
sequences = append(sequences, regexp.QuoteMeta(string(key))+trieRegex(_trie, false))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sort.Slice(sequences, func(i, j int) bool {
|
|
||||||
return (-len(sequences[i]) < -len(sequences[j])) || (sequences[i] < sequences[j])
|
|
||||||
})
|
|
||||||
var result string
|
|
||||||
// single edge from this tree node
|
|
||||||
if len(sequences) == 1 {
|
|
||||||
result = sequences[0]
|
|
||||||
if len(result) > 1 {
|
|
||||||
result = "(?:" + result + ")"
|
|
||||||
}
|
|
||||||
// multiple edges, each length 1
|
|
||||||
} else if s := strings.Join(sequences, ""); len(s) == len(sequences) {
|
|
||||||
// char or numeric range
|
|
||||||
if len(s)-1 == int(s[len(s)-1])-int(s[0]) {
|
|
||||||
result = fmt.Sprintf("[%c-%c]", s[0], s[len(s)-1])
|
|
||||||
// char or numeric set
|
|
||||||
} else {
|
|
||||||
result = "[" + s + "]"
|
|
||||||
}
|
|
||||||
// multiple edges of different lengths
|
|
||||||
} else {
|
|
||||||
result = "(?:" + strings.Join(sequences, "|") + ")"
|
|
||||||
}
|
|
||||||
if _, ok := trie['*']; ok {
|
|
||||||
result += "?"
|
|
||||||
}
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
return trieRegex(root, true)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (pdb *PrometheusDataRepository) Init(rawConfig json.RawMessage) error {
|
|
||||||
var config PrometheusDataRepositoryConfig
|
|
||||||
// parse config
|
|
||||||
if err := json.Unmarshal(rawConfig, &config); err != nil {
|
|
||||||
cclog.Warn("Error while unmarshaling raw json config")
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
// support basic authentication
|
|
||||||
var rt http.RoundTripper = nil
|
|
||||||
if prom_pw := os.Getenv("PROMETHEUS_PASSWORD"); prom_pw != "" && config.Username != "" {
|
|
||||||
prom_pw := promcfg.Secret(prom_pw)
|
|
||||||
rt = promcfg.NewBasicAuthRoundTripper(promcfg.NewInlineSecret(config.Username), promcfg.NewInlineSecret(string(prom_pw)), promapi.DefaultRoundTripper)
|
|
||||||
} else {
|
|
||||||
if config.Username != "" {
|
|
||||||
return errors.New("METRICDATA/PROMETHEUS > Prometheus username provided, but PROMETHEUS_PASSWORD not set")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// init client
|
|
||||||
client, err := promapi.NewClient(promapi.Config{
|
|
||||||
Address: config.Url,
|
|
||||||
RoundTripper: rt,
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
cclog.Error("Error while initializing new prometheus client")
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
// init query client
|
|
||||||
pdb.client = client
|
|
||||||
pdb.queryClient = promv1.NewAPI(pdb.client)
|
|
||||||
// site config
|
|
||||||
pdb.suffix = config.Suffix
|
|
||||||
// init query templates
|
|
||||||
pdb.templates = make(map[string]*template.Template)
|
|
||||||
for metric, templ := range config.Templates {
|
|
||||||
pdb.templates[metric], err = template.New(metric).Parse(templ)
|
|
||||||
if err == nil {
|
|
||||||
cclog.Debugf("Added PromQL template for %s: %s", metric, templ)
|
|
||||||
} else {
|
|
||||||
cclog.Warnf("Failed to parse PromQL template %s for metric %s", templ, metric)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: respect scope argument
|
|
||||||
func (pdb *PrometheusDataRepository) FormatQuery(
|
|
||||||
metric string,
|
|
||||||
scope schema.MetricScope,
|
|
||||||
nodes []string,
|
|
||||||
cluster string,
|
|
||||||
) (string, error) {
|
|
||||||
args := PromQLArgs{}
|
|
||||||
if len(nodes) > 0 {
|
|
||||||
args.Nodes = fmt.Sprintf("(%s)%s", nodeRegex(nodes), pdb.suffix)
|
|
||||||
} else {
|
|
||||||
args.Nodes = fmt.Sprintf(".*%s", pdb.suffix)
|
|
||||||
}
|
|
||||||
|
|
||||||
buf := &bytes.Buffer{}
|
|
||||||
if templ, ok := pdb.templates[metric]; ok {
|
|
||||||
err := templ.Execute(buf, args)
|
|
||||||
if err != nil {
|
|
||||||
return "", errors.New(fmt.Sprintf("METRICDATA/PROMETHEUS > Error compiling template %v", templ))
|
|
||||||
} else {
|
|
||||||
query := buf.String()
|
|
||||||
cclog.Debugf("PromQL: %s", query)
|
|
||||||
return query, nil
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return "", errors.New(fmt.Sprintf("METRICDATA/PROMETHEUS > No PromQL for metric %s configured.", metric))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Convert PromAPI row to CC schema.Series
|
|
||||||
func (pdb *PrometheusDataRepository) RowToSeries(
|
|
||||||
from time.Time,
|
|
||||||
step int64,
|
|
||||||
steps int64,
|
|
||||||
row *promm.SampleStream,
|
|
||||||
) schema.Series {
|
|
||||||
ts := from.Unix()
|
|
||||||
hostname := strings.TrimSuffix(string(row.Metric["exported_instance"]), pdb.suffix)
|
|
||||||
// init array of expected length with NaN
|
|
||||||
values := make([]schema.Float, steps+1)
|
|
||||||
for i := range values {
|
|
||||||
values[i] = schema.NaN
|
|
||||||
}
|
|
||||||
// copy recorded values from prom sample pair
|
|
||||||
for _, v := range row.Values {
|
|
||||||
idx := (v.Timestamp.Unix() - ts) / step
|
|
||||||
values[idx] = schema.Float(v.Value)
|
|
||||||
}
|
|
||||||
min, max, mean := MinMaxMean(values)
|
|
||||||
// output struct
|
|
||||||
return schema.Series{
|
|
||||||
Hostname: hostname,
|
|
||||||
Data: values,
|
|
||||||
Statistics: schema.MetricStatistics{
|
|
||||||
Avg: mean,
|
|
||||||
Min: min,
|
|
||||||
Max: max,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (pdb *PrometheusDataRepository) LoadData(
|
|
||||||
job *schema.Job,
|
|
||||||
metrics []string,
|
|
||||||
scopes []schema.MetricScope,
|
|
||||||
ctx context.Context,
|
|
||||||
resolution int,
|
|
||||||
) (schema.JobData, error) {
|
|
||||||
// TODO respect requested scope
|
|
||||||
if len(scopes) == 0 || !contains(scopes, schema.MetricScopeNode) {
|
|
||||||
scopes = append(scopes, schema.MetricScopeNode)
|
|
||||||
}
|
|
||||||
|
|
||||||
jobData := make(schema.JobData)
|
|
||||||
// parse job specs
|
|
||||||
nodes := make([]string, len(job.Resources))
|
|
||||||
for i, resource := range job.Resources {
|
|
||||||
nodes[i] = resource.Hostname
|
|
||||||
}
|
|
||||||
from := time.Unix(job.StartTime, 0)
|
|
||||||
to := time.Unix(job.StartTime+int64(job.Duration), 0)
|
|
||||||
|
|
||||||
for _, scope := range scopes {
|
|
||||||
if scope != schema.MetricScopeNode {
|
|
||||||
logOnce.Do(func() {
|
|
||||||
cclog.Infof("Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope)
|
|
||||||
})
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, metric := range metrics {
|
|
||||||
metricConfig := archive.GetMetricConfig(job.Cluster, metric)
|
|
||||||
if metricConfig == nil {
|
|
||||||
cclog.Warnf("Error in LoadData: Metric %s for cluster %s not configured", metric, job.Cluster)
|
|
||||||
return nil, errors.New("Prometheus config error")
|
|
||||||
}
|
|
||||||
query, err := pdb.FormatQuery(metric, scope, nodes, job.Cluster)
|
|
||||||
if err != nil {
|
|
||||||
cclog.Warn("Error while formatting prometheus query")
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// ranged query over all job nodes
|
|
||||||
r := promv1.Range{
|
|
||||||
Start: from,
|
|
||||||
End: to,
|
|
||||||
Step: time.Duration(metricConfig.Timestep * 1e9),
|
|
||||||
}
|
|
||||||
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
|
|
||||||
if err != nil {
|
|
||||||
cclog.Errorf("Prometheus query error in LoadData: %v\nQuery: %s", err, query)
|
|
||||||
return nil, errors.New("Prometheus query error")
|
|
||||||
}
|
|
||||||
if len(warnings) > 0 {
|
|
||||||
cclog.Warnf("Warnings: %v\n", warnings)
|
|
||||||
}
|
|
||||||
|
|
||||||
// init data structures
|
|
||||||
if _, ok := jobData[metric]; !ok {
|
|
||||||
jobData[metric] = make(map[schema.MetricScope]*schema.JobMetric)
|
|
||||||
}
|
|
||||||
jobMetric, ok := jobData[metric][scope]
|
|
||||||
if !ok {
|
|
||||||
jobMetric = &schema.JobMetric{
|
|
||||||
Unit: metricConfig.Unit,
|
|
||||||
Timestep: metricConfig.Timestep,
|
|
||||||
Series: make([]schema.Series, 0),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
step := int64(metricConfig.Timestep)
|
|
||||||
steps := int64(to.Sub(from).Seconds()) / step
|
|
||||||
// iter rows of host, metric, values
|
|
||||||
for _, row := range result.(promm.Matrix) {
|
|
||||||
jobMetric.Series = append(jobMetric.Series,
|
|
||||||
pdb.RowToSeries(from, step, steps, row))
|
|
||||||
}
|
|
||||||
// only add metric if at least one host returned data
|
|
||||||
if !ok && len(jobMetric.Series) > 0 {
|
|
||||||
jobData[metric][scope] = jobMetric
|
|
||||||
}
|
|
||||||
// sort by hostname to get uniform coloring
|
|
||||||
sort.Slice(jobMetric.Series, func(i, j int) bool {
|
|
||||||
return (jobMetric.Series[i].Hostname < jobMetric.Series[j].Hostname)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return jobData, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO change implementation to precomputed/cached stats
|
|
||||||
func (pdb *PrometheusDataRepository) LoadStats(
|
|
||||||
job *schema.Job,
|
|
||||||
metrics []string,
|
|
||||||
ctx context.Context,
|
|
||||||
) (map[string]map[string]schema.MetricStatistics, error) {
|
|
||||||
// map of metrics of nodes of stats
|
|
||||||
stats := map[string]map[string]schema.MetricStatistics{}
|
|
||||||
|
|
||||||
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0 /*resolution here*/)
|
|
||||||
if err != nil {
|
|
||||||
cclog.Warn("Error while loading job for stats")
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
for metric, metricData := range data {
|
|
||||||
stats[metric] = make(map[string]schema.MetricStatistics)
|
|
||||||
for _, series := range metricData[schema.MetricScopeNode].Series {
|
|
||||||
stats[metric][series.Hostname] = series.Statistics
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return stats, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (pdb *PrometheusDataRepository) LoadNodeData(
|
|
||||||
cluster string,
|
|
||||||
metrics, nodes []string,
|
|
||||||
scopes []schema.MetricScope,
|
|
||||||
from, to time.Time,
|
|
||||||
ctx context.Context,
|
|
||||||
) (map[string]map[string][]*schema.JobMetric, error) {
|
|
||||||
t0 := time.Now()
|
|
||||||
// Map of hosts of metrics of value slices
|
|
||||||
data := make(map[string]map[string][]*schema.JobMetric)
|
|
||||||
// query db for each metric
|
|
||||||
// TODO: scopes seems to be always empty
|
|
||||||
if len(scopes) == 0 || !contains(scopes, schema.MetricScopeNode) {
|
|
||||||
scopes = append(scopes, schema.MetricScopeNode)
|
|
||||||
}
|
|
||||||
for _, scope := range scopes {
|
|
||||||
if scope != schema.MetricScopeNode {
|
|
||||||
logOnce.Do(func() {
|
|
||||||
cclog.Infof("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope)
|
|
||||||
})
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
for _, metric := range metrics {
|
|
||||||
metricConfig := archive.GetMetricConfig(cluster, metric)
|
|
||||||
if metricConfig == nil {
|
|
||||||
cclog.Warnf("Error in LoadNodeData: Metric %s for cluster %s not configured", metric, cluster)
|
|
||||||
return nil, errors.New("Prometheus config error")
|
|
||||||
}
|
|
||||||
query, err := pdb.FormatQuery(metric, scope, nodes, cluster)
|
|
||||||
if err != nil {
|
|
||||||
cclog.Warn("Error while formatting prometheus query")
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// ranged query over all nodes
|
|
||||||
r := promv1.Range{
|
|
||||||
Start: from,
|
|
||||||
End: to,
|
|
||||||
Step: time.Duration(metricConfig.Timestep * 1e9),
|
|
||||||
}
|
|
||||||
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
|
|
||||||
if err != nil {
|
|
||||||
cclog.Errorf("Prometheus query error in LoadNodeData: %v\n", err)
|
|
||||||
return nil, errors.New("Prometheus query error")
|
|
||||||
}
|
|
||||||
if len(warnings) > 0 {
|
|
||||||
cclog.Warnf("Warnings: %v\n", warnings)
|
|
||||||
}
|
|
||||||
|
|
||||||
step := int64(metricConfig.Timestep)
|
|
||||||
steps := int64(to.Sub(from).Seconds()) / step
|
|
||||||
|
|
||||||
// iter rows of host, metric, values
|
|
||||||
for _, row := range result.(promm.Matrix) {
|
|
||||||
hostname := strings.TrimSuffix(string(row.Metric["exported_instance"]), pdb.suffix)
|
|
||||||
hostdata, ok := data[hostname]
|
|
||||||
if !ok {
|
|
||||||
hostdata = make(map[string][]*schema.JobMetric)
|
|
||||||
data[hostname] = hostdata
|
|
||||||
}
|
|
||||||
// output per host and metric
|
|
||||||
hostdata[metric] = append(hostdata[metric], &schema.JobMetric{
|
|
||||||
Unit: metricConfig.Unit,
|
|
||||||
Timestep: metricConfig.Timestep,
|
|
||||||
Series: []schema.Series{pdb.RowToSeries(from, step, steps, row)},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
t1 := time.Since(t0)
|
|
||||||
cclog.Debugf("LoadNodeData of %v nodes took %s", len(data), t1)
|
|
||||||
return data, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Implemented by NHR@FAU; Used in Job-View StatsTable
|
|
||||||
func (pdb *PrometheusDataRepository) LoadScopedStats(
|
|
||||||
job *schema.Job,
|
|
||||||
metrics []string,
|
|
||||||
scopes []schema.MetricScope,
|
|
||||||
ctx context.Context,
|
|
||||||
) (schema.ScopedJobStats, error) {
|
|
||||||
// Assumption: pdb.loadData() only returns series node-scope - use node scope for statsTable
|
|
||||||
scopedJobStats := make(schema.ScopedJobStats)
|
|
||||||
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0 /*resolution here*/)
|
|
||||||
if err != nil {
|
|
||||||
cclog.Warn("Error while loading job for scopedJobStats")
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
for metric, metricData := range data {
|
|
||||||
for _, scope := range scopes {
|
|
||||||
if scope != schema.MetricScopeNode {
|
|
||||||
logOnce.Do(func() {
|
|
||||||
cclog.Infof("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope)
|
|
||||||
})
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if _, ok := scopedJobStats[metric]; !ok {
|
|
||||||
scopedJobStats[metric] = make(map[schema.MetricScope][]*schema.ScopedStats)
|
|
||||||
}
|
|
||||||
|
|
||||||
if _, ok := scopedJobStats[metric][scope]; !ok {
|
|
||||||
scopedJobStats[metric][scope] = make([]*schema.ScopedStats, 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, series := range metricData[scope].Series {
|
|
||||||
scopedJobStats[metric][scope] = append(scopedJobStats[metric][scope], &schema.ScopedStats{
|
|
||||||
Hostname: series.Hostname,
|
|
||||||
Data: &series.Statistics,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return scopedJobStats, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Implemented by NHR@FAU; Used in NodeList-View
|
|
||||||
func (pdb *PrometheusDataRepository) LoadNodeListData(
|
|
||||||
cluster, subCluster string,
|
|
||||||
nodes []string,
|
|
||||||
metrics []string,
|
|
||||||
scopes []schema.MetricScope,
|
|
||||||
resolution int,
|
|
||||||
from, to time.Time,
|
|
||||||
ctx context.Context,
|
|
||||||
) (map[string]schema.JobData, error) {
|
|
||||||
// Assumption: pdb.loadData() only returns series node-scope - use node scope for NodeList
|
|
||||||
|
|
||||||
// Fetch Data, based on pdb.LoadNodeData()
|
|
||||||
t0 := time.Now()
|
|
||||||
// Map of hosts of jobData
|
|
||||||
data := make(map[string]schema.JobData)
|
|
||||||
|
|
||||||
// query db for each metric
|
|
||||||
// TODO: scopes seems to be always empty
|
|
||||||
if len(scopes) == 0 || !contains(scopes, schema.MetricScopeNode) {
|
|
||||||
scopes = append(scopes, schema.MetricScopeNode)
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, scope := range scopes {
|
|
||||||
if scope != schema.MetricScopeNode {
|
|
||||||
logOnce.Do(func() {
|
|
||||||
cclog.Infof("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope)
|
|
||||||
})
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, metric := range metrics {
|
|
||||||
metricConfig := archive.GetMetricConfig(cluster, metric)
|
|
||||||
if metricConfig == nil {
|
|
||||||
cclog.Warnf("Error in LoadNodeListData: Metric %s for cluster %s not configured", metric, cluster)
|
|
||||||
return nil, errors.New("Prometheus config error")
|
|
||||||
}
|
|
||||||
query, err := pdb.FormatQuery(metric, scope, nodes, cluster)
|
|
||||||
if err != nil {
|
|
||||||
cclog.Warn("Error while formatting prometheus query")
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// ranged query over all nodes
|
|
||||||
r := promv1.Range{
|
|
||||||
Start: from,
|
|
||||||
End: to,
|
|
||||||
Step: time.Duration(metricConfig.Timestep * 1e9),
|
|
||||||
}
|
|
||||||
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
|
|
||||||
if err != nil {
|
|
||||||
cclog.Errorf("Prometheus query error in LoadNodeData: %v\n", err)
|
|
||||||
return nil, errors.New("Prometheus query error")
|
|
||||||
}
|
|
||||||
if len(warnings) > 0 {
|
|
||||||
cclog.Warnf("Warnings: %v\n", warnings)
|
|
||||||
}
|
|
||||||
|
|
||||||
step := int64(metricConfig.Timestep)
|
|
||||||
steps := int64(to.Sub(from).Seconds()) / step
|
|
||||||
|
|
||||||
// iter rows of host, metric, values
|
|
||||||
for _, row := range result.(promm.Matrix) {
|
|
||||||
hostname := strings.TrimSuffix(string(row.Metric["exported_instance"]), pdb.suffix)
|
|
||||||
|
|
||||||
hostdata, ok := data[hostname]
|
|
||||||
if !ok {
|
|
||||||
hostdata = make(schema.JobData)
|
|
||||||
data[hostname] = hostdata
|
|
||||||
}
|
|
||||||
|
|
||||||
metricdata, ok := hostdata[metric]
|
|
||||||
if !ok {
|
|
||||||
metricdata = make(map[schema.MetricScope]*schema.JobMetric)
|
|
||||||
data[hostname][metric] = metricdata
|
|
||||||
}
|
|
||||||
|
|
||||||
// output per host, metric and scope
|
|
||||||
scopeData, ok := metricdata[scope]
|
|
||||||
if !ok {
|
|
||||||
scopeData = &schema.JobMetric{
|
|
||||||
Unit: metricConfig.Unit,
|
|
||||||
Timestep: metricConfig.Timestep,
|
|
||||||
Series: []schema.Series{pdb.RowToSeries(from, step, steps, row)},
|
|
||||||
}
|
|
||||||
data[hostname][metric][scope] = scopeData
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
t1 := time.Since(t0)
|
|
||||||
cclog.Debugf("LoadNodeListData of %v nodes took %s", len(data), t1)
|
|
||||||
return data, nil
|
|
||||||
}
|
|
||||||
@@ -1,74 +0,0 @@
|
|||||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
|
||||||
// All rights reserved. This file is part of cc-backend.
|
|
||||||
// Use of this source code is governed by a MIT-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package metricdata
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"encoding/json"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
|
||||||
)
|
|
||||||
|
|
||||||
var TestLoadDataCallback func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) {
|
|
||||||
panic("TODO")
|
|
||||||
}
|
|
||||||
|
|
||||||
// TestMetricDataRepository is only a mock for unit-testing.
|
|
||||||
type TestMetricDataRepository struct{}
|
|
||||||
|
|
||||||
func (tmdr *TestMetricDataRepository) Init(_ json.RawMessage) error {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tmdr *TestMetricDataRepository) LoadData(
|
|
||||||
job *schema.Job,
|
|
||||||
metrics []string,
|
|
||||||
scopes []schema.MetricScope,
|
|
||||||
ctx context.Context,
|
|
||||||
resolution int,
|
|
||||||
) (schema.JobData, error) {
|
|
||||||
return TestLoadDataCallback(job, metrics, scopes, ctx, resolution)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tmdr *TestMetricDataRepository) LoadStats(
|
|
||||||
job *schema.Job,
|
|
||||||
metrics []string,
|
|
||||||
ctx context.Context,
|
|
||||||
) (map[string]map[string]schema.MetricStatistics, error) {
|
|
||||||
panic("TODO")
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tmdr *TestMetricDataRepository) LoadScopedStats(
|
|
||||||
job *schema.Job,
|
|
||||||
metrics []string,
|
|
||||||
scopes []schema.MetricScope,
|
|
||||||
ctx context.Context,
|
|
||||||
) (schema.ScopedJobStats, error) {
|
|
||||||
panic("TODO")
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tmdr *TestMetricDataRepository) LoadNodeData(
|
|
||||||
cluster string,
|
|
||||||
metrics, nodes []string,
|
|
||||||
scopes []schema.MetricScope,
|
|
||||||
from, to time.Time,
|
|
||||||
ctx context.Context,
|
|
||||||
) (map[string]map[string][]*schema.JobMetric, error) {
|
|
||||||
panic("TODO")
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tmdr *TestMetricDataRepository) LoadNodeListData(
|
|
||||||
cluster, subCluster string,
|
|
||||||
nodes []string,
|
|
||||||
metrics []string,
|
|
||||||
scopes []schema.MetricScope,
|
|
||||||
resolution int,
|
|
||||||
from, to time.Time,
|
|
||||||
ctx context.Context,
|
|
||||||
) (map[string]schema.JobData, error) {
|
|
||||||
panic("TODO")
|
|
||||||
}
|
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
// Package metricdispatcher provides a unified interface for loading and caching job metric data.
|
// Package metricdispatch provides a unified interface for loading and caching job metric data.
|
||||||
//
|
//
|
||||||
// This package serves as a central dispatcher that routes metric data requests to the appropriate
|
// This package serves as a central dispatcher that routes metric data requests to the appropriate
|
||||||
// backend based on job state. For running jobs, data is fetched from the metric store (e.g., cc-metric-store).
|
// backend based on job state. For running jobs, data is fetched from the metric store (e.g., cc-metric-store).
|
||||||
@@ -29,13 +29,13 @@
|
|||||||
//
|
//
|
||||||
// The primary entry point is LoadData, which automatically handles both running and archived jobs:
|
// The primary entry point is LoadData, which automatically handles both running and archived jobs:
|
||||||
//
|
//
|
||||||
// jobData, err := metricdispatcher.LoadData(job, metrics, scopes, ctx, resolution)
|
// jobData, err := metricdispatch.LoadData(job, metrics, scopes, ctx, resolution)
|
||||||
// if err != nil {
|
// if err != nil {
|
||||||
// // Handle error
|
// // Handle error
|
||||||
// }
|
// }
|
||||||
//
|
//
|
||||||
// For statistics only, use LoadJobStats, LoadScopedJobStats, or LoadAverages depending on the required format.
|
// For statistics only, use LoadJobStats, LoadScopedJobStats, or LoadAverages depending on the required format.
|
||||||
package metricdispatcher
|
package metricdispatch
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
@@ -44,12 +44,12 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/memorystore"
|
"github.com/ClusterCockpit/cc-backend/internal/metricstore"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/lrucache"
|
"github.com/ClusterCockpit/cc-lib/v2/lrucache"
|
||||||
"github.com/ClusterCockpit/cc-lib/resampler"
|
"github.com/ClusterCockpit/cc-lib/v2/resampler"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
// cache is an LRU cache with 128 MB capacity for storing loaded job metric data.
|
// cache is an LRU cache with 128 MB capacity for storing loaded job metric data.
|
||||||
@@ -109,7 +109,7 @@ func LoadData(job *schema.Job,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
jd, err = memorystore.LoadData(job, metrics, scopes, ctx, resolution)
|
jd, err = metricstore.LoadData(job, metrics, scopes, ctx, resolution)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if len(jd) != 0 {
|
if len(jd) != 0 {
|
||||||
cclog.Warnf("partial error loading metrics from store for job %d (user: %s, project: %s): %s",
|
cclog.Warnf("partial error loading metrics from store for job %d (user: %s, project: %s): %s",
|
||||||
@@ -238,7 +238,7 @@ func LoadAverages(
|
|||||||
return archive.LoadAveragesFromArchive(job, metrics, data) // #166 change also here?
|
return archive.LoadAveragesFromArchive(job, metrics, data) // #166 change also here?
|
||||||
}
|
}
|
||||||
|
|
||||||
stats, err := memorystore.LoadStats(job, metrics, ctx)
|
stats, err := metricstore.LoadStats(job, metrics, ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("failed to load statistics from metric store for job %d (user: %s, project: %s): %s",
|
cclog.Errorf("failed to load statistics from metric store for job %d (user: %s, project: %s): %s",
|
||||||
job.JobID, job.User, job.Project, err.Error())
|
job.JobID, job.User, job.Project, err.Error())
|
||||||
@@ -275,7 +275,7 @@ func LoadScopedJobStats(
|
|||||||
return archive.LoadScopedStatsFromArchive(job, metrics, scopes)
|
return archive.LoadScopedStatsFromArchive(job, metrics, scopes)
|
||||||
}
|
}
|
||||||
|
|
||||||
scopedStats, err := memorystore.LoadScopedStats(job, metrics, scopes, ctx)
|
scopedStats, err := metricstore.LoadScopedStats(job, metrics, scopes, ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("failed to load scoped statistics from metric store for job %d (user: %s, project: %s): %s",
|
cclog.Errorf("failed to load scoped statistics from metric store for job %d (user: %s, project: %s): %s",
|
||||||
job.JobID, job.User, job.Project, err.Error())
|
job.JobID, job.User, job.Project, err.Error())
|
||||||
@@ -299,7 +299,7 @@ func LoadJobStats(
|
|||||||
|
|
||||||
data := make(map[string]schema.MetricStatistics, len(metrics))
|
data := make(map[string]schema.MetricStatistics, len(metrics))
|
||||||
|
|
||||||
stats, err := memorystore.LoadStats(job, metrics, ctx)
|
stats, err := metricstore.LoadStats(job, metrics, ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("failed to load statistics from metric store for job %d (user: %s, project: %s): %s",
|
cclog.Errorf("failed to load statistics from metric store for job %d (user: %s, project: %s): %s",
|
||||||
job.JobID, job.User, job.Project, err.Error())
|
job.JobID, job.User, job.Project, err.Error())
|
||||||
@@ -348,7 +348,7 @@ func LoadNodeData(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
data, err := memorystore.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
data, err := metricstore.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if len(data) != 0 {
|
if len(data) != 0 {
|
||||||
cclog.Warnf("partial error loading node data from metric store for cluster %s: %s", cluster, err.Error())
|
cclog.Warnf("partial error loading node data from metric store for cluster %s: %s", cluster, err.Error())
|
||||||
@@ -385,7 +385,7 @@ func LoadNodeListData(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
data, err := memorystore.LoadNodeListData(cluster, subCluster, nodes, metrics, scopes, resolution, from, to, ctx)
|
data, err := metricstore.LoadNodeListData(cluster, subCluster, nodes, metrics, scopes, resolution, from, to, ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if len(data) != 0 {
|
if len(data) != 0 {
|
||||||
cclog.Warnf("partial error loading node list data from metric store for cluster %s, subcluster %s: %s",
|
cclog.Warnf("partial error loading node list data from metric store for cluster %s, subcluster %s: %s",
|
||||||
@@ -3,12 +3,12 @@
|
|||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package metricdispatcher
|
package metricdispatch
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestDeepCopy(t *testing.T) {
|
func TestDeepCopy(t *testing.T) {
|
||||||
@@ -3,15 +3,15 @@
|
|||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package memorystore
|
package metricstore
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
"github.com/ClusterCockpit/cc-lib/util"
|
"github.com/ClusterCockpit/cc-lib/v2/util"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@@ -129,7 +129,6 @@ func FetchData(req APIQueryRequest) (*APIQueryResponse, error) {
|
|||||||
return nil, fmt.Errorf("memorystore not initialized")
|
return nil, fmt.Errorf("memorystore not initialized")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
response := APIQueryResponse{
|
response := APIQueryResponse{
|
||||||
Results: make([][]APIMetricData, 0, len(req.Queries)),
|
Results: make([][]APIMetricData, 0, len(req.Queries)),
|
||||||
}
|
}
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package memorystore
|
package metricstore
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"archive/zip"
|
"archive/zip"
|
||||||
@@ -18,13 +18,13 @@ import (
|
|||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
)
|
)
|
||||||
|
|
||||||
func Archiving(wg *sync.WaitGroup, ctx context.Context) {
|
func Archiving(wg *sync.WaitGroup, ctx context.Context) {
|
||||||
go func() {
|
go func() {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
d, err := time.ParseDuration(Keys.Archive.Interval)
|
d, err := time.ParseDuration(Keys.Archive.ArchiveInterval)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Fatalf("[METRICSTORE]> error parsing archive interval duration: %v\n", err)
|
cclog.Fatalf("[METRICSTORE]> error parsing archive interval duration: %v\n", err)
|
||||||
}
|
}
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package memorystore
|
package metricstore
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
@@ -19,13 +19,15 @@ import (
|
|||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
"github.com/linkedin/goavro/v2"
|
"github.com/linkedin/goavro/v2"
|
||||||
)
|
)
|
||||||
|
|
||||||
var NumAvroWorkers int = DefaultAvroWorkers
|
var (
|
||||||
var startUp bool = true
|
NumAvroWorkers int = DefaultAvroWorkers
|
||||||
|
startUp bool = true
|
||||||
|
)
|
||||||
|
|
||||||
func (as *AvroStore) ToCheckpoint(dir string, dumpAll bool) (int, error) {
|
func (as *AvroStore) ToCheckpoint(dir string, dumpAll bool) (int, error) {
|
||||||
levels := make([]*AvroLevel, 0)
|
levels := make([]*AvroLevel, 0)
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package memorystore
|
package metricstore
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
@@ -11,7 +11,7 @@ import (
|
|||||||
"strconv"
|
"strconv"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
)
|
)
|
||||||
|
|
||||||
func DataStaging(wg *sync.WaitGroup, ctx context.Context) {
|
func DataStaging(wg *sync.WaitGroup, ctx context.Context) {
|
||||||
@@ -30,8 +30,51 @@ func DataStaging(wg *sync.WaitGroup, ctx context.Context) {
|
|||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
|
// Drain any remaining messages in channel before exiting
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case val, ok := <-LineProtocolMessages:
|
||||||
|
if !ok {
|
||||||
|
// Channel closed
|
||||||
return
|
return
|
||||||
case val := <-LineProtocolMessages:
|
}
|
||||||
|
// Process remaining message
|
||||||
|
freq, err := GetMetricFrequency(val.MetricName)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
metricName := ""
|
||||||
|
for _, selectorName := range val.Selector {
|
||||||
|
metricName += selectorName + SelectorDelimiter
|
||||||
|
}
|
||||||
|
metricName += val.MetricName
|
||||||
|
|
||||||
|
var selector []string
|
||||||
|
selector = append(selector, val.Cluster, val.Node, strconv.FormatInt(freq, 10))
|
||||||
|
|
||||||
|
if !stringSlicesEqual(oldSelector, selector) {
|
||||||
|
avroLevel = avroStore.root.findAvroLevelOrCreate(selector)
|
||||||
|
if avroLevel == nil {
|
||||||
|
cclog.Errorf("Error creating or finding the level with cluster : %s, node : %s, metric : %s\n", val.Cluster, val.Node, val.MetricName)
|
||||||
|
}
|
||||||
|
oldSelector = slices.Clone(selector)
|
||||||
|
}
|
||||||
|
|
||||||
|
if avroLevel != nil {
|
||||||
|
avroLevel.addMetric(metricName, val.Value, val.Timestamp, int(freq))
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
// No more messages, exit
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case val, ok := <-LineProtocolMessages:
|
||||||
|
if !ok {
|
||||||
|
// Channel closed, exit gracefully
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
// Fetch the frequency of the metric from the global configuration
|
// Fetch the frequency of the metric from the global configuration
|
||||||
freq, err := GetMetricFrequency(val.MetricName)
|
freq, err := GetMetricFrequency(val.MetricName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -65,9 +108,11 @@ func DataStaging(wg *sync.WaitGroup, ctx context.Context) {
|
|||||||
oldSelector = slices.Clone(selector)
|
oldSelector = slices.Clone(selector)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if avroLevel != nil {
|
||||||
avroLevel.addMetric(metricName, val.Value, val.Timestamp, int(freq))
|
avroLevel.addMetric(metricName, val.Value, val.Timestamp, int(freq))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3,12 +3,12 @@
|
|||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package memorystore
|
package metricstore
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@@ -3,13 +3,13 @@
|
|||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package memorystore
|
package metricstore
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
// BufferCap is the default buffer capacity.
|
// BufferCap is the default buffer capacity.
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package memorystore
|
package metricstore
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
@@ -23,8 +23,8 @@ import (
|
|||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
"github.com/linkedin/goavro/v2"
|
"github.com/linkedin/goavro/v2"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -408,7 +408,6 @@ func (m *MemoryStore) FromCheckpointFiles(dir string, from int64) (int, error) {
|
|||||||
return m.FromCheckpoint(dir, from, altFormat)
|
return m.FromCheckpoint(dir, from, altFormat)
|
||||||
}
|
}
|
||||||
|
|
||||||
cclog.Print("[METRICSTORE]> No valid checkpoint files found in the directory")
|
|
||||||
return 0, nil
|
return 0, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package memorystore
|
package metricstore
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
@@ -19,36 +19,49 @@ const (
|
|||||||
DefaultAvroCheckpointInterval = time.Minute
|
DefaultAvroCheckpointInterval = time.Minute
|
||||||
)
|
)
|
||||||
|
|
||||||
type MetricStoreConfig struct {
|
type Checkpoints struct {
|
||||||
// Number of concurrent workers for checkpoint and archive operations.
|
|
||||||
// If not set or 0, defaults to min(runtime.NumCPU()/2+1, 10)
|
|
||||||
NumWorkers int `json:"num-workers"`
|
|
||||||
Checkpoints struct {
|
|
||||||
FileFormat string `json:"file-format"`
|
FileFormat string `json:"file-format"`
|
||||||
Interval string `json:"interval"`
|
Interval string `json:"interval"`
|
||||||
RootDir string `json:"directory"`
|
RootDir string `json:"directory"`
|
||||||
Restore string `json:"restore"`
|
}
|
||||||
} `json:"checkpoints"`
|
|
||||||
Debug struct {
|
type Debug struct {
|
||||||
DumpToFile string `json:"dump-to-file"`
|
DumpToFile string `json:"dump-to-file"`
|
||||||
EnableGops bool `json:"gops"`
|
EnableGops bool `json:"gops"`
|
||||||
} `json:"debug"`
|
}
|
||||||
RetentionInMemory string `json:"retention-in-memory"`
|
|
||||||
Archive struct {
|
type Archive struct {
|
||||||
Interval string `json:"interval"`
|
ArchiveInterval string `json:"interval"`
|
||||||
RootDir string `json:"directory"`
|
RootDir string `json:"directory"`
|
||||||
DeleteInstead bool `json:"delete-instead"`
|
DeleteInstead bool `json:"delete-instead"`
|
||||||
} `json:"archive"`
|
}
|
||||||
Subscriptions []struct {
|
|
||||||
|
type Subscriptions []struct {
|
||||||
// Channel name
|
// Channel name
|
||||||
SubscribeTo string `json:"subscribe-to"`
|
SubscribeTo string `json:"subscribe-to"`
|
||||||
|
|
||||||
// Allow lines without a cluster tag, use this as default, optional
|
// Allow lines without a cluster tag, use this as default, optional
|
||||||
ClusterTag string `json:"cluster-tag"`
|
ClusterTag string `json:"cluster-tag"`
|
||||||
} `json:"subscriptions"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var Keys MetricStoreConfig
|
type MetricStoreConfig struct {
|
||||||
|
// Number of concurrent workers for checkpoint and archive operations.
|
||||||
|
// If not set or 0, defaults to min(runtime.NumCPU()/2+1, 10)
|
||||||
|
NumWorkers int `json:"num-workers"`
|
||||||
|
RetentionInMemory string `json:"retention-in-memory"`
|
||||||
|
MemoryCap int `json:"memory-cap"`
|
||||||
|
Checkpoints Checkpoints `json:"checkpoints"`
|
||||||
|
Debug *Debug `json:"debug"`
|
||||||
|
Archive *Archive `json:"archive"`
|
||||||
|
Subscriptions *Subscriptions `json:"nats-subscriptions"`
|
||||||
|
}
|
||||||
|
|
||||||
|
var Keys MetricStoreConfig = MetricStoreConfig{
|
||||||
|
Checkpoints: Checkpoints{
|
||||||
|
FileFormat: "avro",
|
||||||
|
RootDir: "./var/checkpoints",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
// AggregationStrategy for aggregation over multiple values at different cpus/sockets/..., not time!
|
// AggregationStrategy for aggregation over multiple values at different cpus/sockets/..., not time!
|
||||||
type AggregationStrategy int
|
type AggregationStrategy int
|
||||||
77
internal/metricstore/configSchema.go
Normal file
77
internal/metricstore/configSchema.go
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package metricstore
|
||||||
|
|
||||||
|
const configSchema = `{
|
||||||
|
"type": "object",
|
||||||
|
"description": "Configuration specific to built-in metric-store.",
|
||||||
|
"properties": {
|
||||||
|
"num-workers": {
|
||||||
|
"description": "Number of concurrent workers for checkpoint and archive operations",
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"checkpoints": {
|
||||||
|
"description": "Configuration for checkpointing the metrics within metric-store",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"file-format": {
|
||||||
|
"description": "Specify the type of checkpoint file. There are 2 variants: 'avro' and 'json'. If nothing is specified, 'avro' is default.",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"interval": {
|
||||||
|
"description": "Interval at which the metrics should be checkpointed.",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"directory": {
|
||||||
|
"description": "Specify the parent directy in which the checkpointed files should be placed.",
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["interval"]
|
||||||
|
},
|
||||||
|
"archive": {
|
||||||
|
"description": "Configuration for archiving the already checkpointed files.",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"interval": {
|
||||||
|
"description": "Interval at which the checkpointed files should be archived.",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"directory": {
|
||||||
|
"description": "Specify the directy in which the archived files should be placed.",
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["interval", "directory"]
|
||||||
|
},
|
||||||
|
"retention-in-memory": {
|
||||||
|
"description": "Keep the metrics within memory for given time interval. Retention for X hours, then the metrics would be freed.",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"memory-cap": {
|
||||||
|
"description": "Upper memory capacity limit used by metricstore in GB",
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"nats-subscriptions": {
|
||||||
|
"description": "Array of various subscriptions. Allows to subscibe to different subjects and publishers.",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"subscribe-to": {
|
||||||
|
"description": "Channel name",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"cluster-tag": {
|
||||||
|
"description": "Optional: Allow lines without a cluster tag, use this as default",
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["checkpoints", "retention-in-memory"]
|
||||||
|
}`
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package memorystore
|
package metricstore
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package memorystore
|
package metricstore
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
@@ -3,13 +3,13 @@
|
|||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package memorystore
|
package metricstore
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"sync"
|
"sync"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-lib/util"
|
"github.com/ClusterCockpit/cc-lib/v2/util"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Could also be called "node" as this forms a node in a tree structure.
|
// Could also be called "node" as this forms a node in a tree structure.
|
||||||
@@ -72,6 +72,29 @@ func (l *Level) findLevelOrCreate(selector []string, nMetrics int) *Level {
|
|||||||
return child.findLevelOrCreate(selector[1:], nMetrics)
|
return child.findLevelOrCreate(selector[1:], nMetrics)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (l *Level) collectPaths(currentDepth, targetDepth int, currentPath []string, results *[][]string) {
|
||||||
|
l.lock.RLock()
|
||||||
|
defer l.lock.RUnlock()
|
||||||
|
|
||||||
|
for key, child := range l.children {
|
||||||
|
if child == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// We explicitly make a new slice and copy data to avoid sharing underlying arrays between siblings
|
||||||
|
newPath := make([]string, len(currentPath))
|
||||||
|
copy(newPath, currentPath)
|
||||||
|
newPath = append(newPath, key)
|
||||||
|
|
||||||
|
// Check depth, and just return if depth reached
|
||||||
|
if currentDepth+1 == targetDepth {
|
||||||
|
*results = append(*results, newPath)
|
||||||
|
} else {
|
||||||
|
child.collectPaths(currentDepth+1, targetDepth, newPath, results)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (l *Level) free(t int64) (int, error) {
|
func (l *Level) free(t int64) (int, error) {
|
||||||
l.lock.Lock()
|
l.lock.Lock()
|
||||||
defer l.lock.Unlock()
|
defer l.lock.Unlock()
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package memorystore
|
package metricstore
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
@@ -12,8 +12,8 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/nats"
|
"github.com/ClusterCockpit/cc-backend/pkg/nats"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
"github.com/influxdata/line-protocol/v2/lineprotocol"
|
"github.com/influxdata/line-protocol/v2/lineprotocol"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -29,29 +29,30 @@ func ReceiveNats(ms *MemoryStore,
|
|||||||
}
|
}
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
|
|
||||||
msgs := make(chan []byte, workers*2)
|
msgs := make(chan []byte, workers*2)
|
||||||
|
|
||||||
for _, sc := range Keys.Subscriptions {
|
for _, sc := range *Keys.Subscriptions {
|
||||||
clusterTag := sc.ClusterTag
|
clusterTag := sc.ClusterTag
|
||||||
if workers > 1 {
|
if workers > 1 {
|
||||||
wg.Add(workers)
|
wg.Add(workers)
|
||||||
|
|
||||||
for range workers {
|
for range workers {
|
||||||
go func() {
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
for m := range msgs {
|
for m := range msgs {
|
||||||
dec := lineprotocol.NewDecoderWithBytes(m)
|
dec := lineprotocol.NewDecoderWithBytes(m)
|
||||||
if err := DecodeLine(dec, ms, clusterTag); err != nil {
|
if err := DecodeLine(dec, ms, clusterTag); err != nil {
|
||||||
cclog.Errorf("error: %s", err.Error())
|
cclog.Errorf("error: %s", err.Error())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
wg.Done()
|
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
nc.Subscribe(sc.SubscribeTo, func(subject string, data []byte) {
|
nc.Subscribe(sc.SubscribeTo, func(subject string, data []byte) {
|
||||||
msgs <- data
|
select {
|
||||||
|
case msgs <- data:
|
||||||
|
case <-ctx.Done():
|
||||||
|
}
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
nc.Subscribe(sc.SubscribeTo, func(subject string, data []byte) {
|
nc.Subscribe(sc.SubscribeTo, func(subject string, data []byte) {
|
||||||
@@ -64,7 +65,11 @@ func ReceiveNats(ms *MemoryStore,
|
|||||||
cclog.Infof("NATS subscription to '%s' established", sc.SubscribeTo)
|
cclog.Infof("NATS subscription to '%s' established", sc.SubscribeTo)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
<-ctx.Done()
|
||||||
close(msgs)
|
close(msgs)
|
||||||
|
}()
|
||||||
|
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
// Package memorystore provides an efficient in-memory time-series metric storage system
|
// Package metricstore provides an efficient in-memory time-series metric storage system
|
||||||
// with support for hierarchical data organization, checkpointing, and archiving.
|
// with support for hierarchical data organization, checkpointing, and archiving.
|
||||||
//
|
//
|
||||||
// The package organizes metrics in a tree structure (cluster → host → component) and
|
// The package organizes metrics in a tree structure (cluster → host → component) and
|
||||||
@@ -17,7 +17,7 @@
|
|||||||
// - Concurrent checkpoint/archive workers
|
// - Concurrent checkpoint/archive workers
|
||||||
// - Support for sum and average aggregation
|
// - Support for sum and average aggregation
|
||||||
// - NATS integration for metric ingestion
|
// - NATS integration for metric ingestion
|
||||||
package memorystore
|
package metricstore
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
@@ -25,15 +25,16 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"slices"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/resampler"
|
"github.com/ClusterCockpit/cc-lib/v2/resampler"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
"github.com/ClusterCockpit/cc-lib/util"
|
"github.com/ClusterCockpit/cc-lib/v2/util"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@@ -44,6 +45,15 @@ var (
|
|||||||
shutdownFunc context.CancelFunc
|
shutdownFunc context.CancelFunc
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// NodeProvider provides information about nodes currently in use by running jobs.
|
||||||
|
// This interface allows metricstore to query job information without directly
|
||||||
|
// depending on the repository package, breaking the import cycle.
|
||||||
|
type NodeProvider interface {
|
||||||
|
// GetUsedNodes returns a map of cluster names to sorted lists of unique hostnames
|
||||||
|
// that are currently in use by jobs that started before the given timestamp.
|
||||||
|
GetUsedNodes(ts int64) (map[string][]string, error)
|
||||||
|
}
|
||||||
|
|
||||||
type Metric struct {
|
type Metric struct {
|
||||||
Name string
|
Name string
|
||||||
Value schema.Float
|
Value schema.Float
|
||||||
@@ -53,6 +63,7 @@ type Metric struct {
|
|||||||
type MemoryStore struct {
|
type MemoryStore struct {
|
||||||
Metrics map[string]MetricConfig
|
Metrics map[string]MetricConfig
|
||||||
root Level
|
root Level
|
||||||
|
nodeProvider NodeProvider // Injected dependency for querying running jobs
|
||||||
}
|
}
|
||||||
|
|
||||||
func Init(rawConfig json.RawMessage, wg *sync.WaitGroup) {
|
func Init(rawConfig json.RawMessage, wg *sync.WaitGroup) {
|
||||||
@@ -61,7 +72,7 @@ func Init(rawConfig json.RawMessage, wg *sync.WaitGroup) {
|
|||||||
if rawConfig != nil {
|
if rawConfig != nil {
|
||||||
config.Validate(configSchema, rawConfig)
|
config.Validate(configSchema, rawConfig)
|
||||||
dec := json.NewDecoder(bytes.NewReader(rawConfig))
|
dec := json.NewDecoder(bytes.NewReader(rawConfig))
|
||||||
// dec.DisallowUnknownFields()
|
dec.DisallowUnknownFields()
|
||||||
if err := dec.Decode(&Keys); err != nil {
|
if err := dec.Decode(&Keys); err != nil {
|
||||||
cclog.Abortf("[METRICSTORE]> Metric Store Config Init: Could not decode config file '%s'.\nError: %s\n", rawConfig, err.Error())
|
cclog.Abortf("[METRICSTORE]> Metric Store Config Init: Could not decode config file '%s'.\nError: %s\n", rawConfig, err.Error())
|
||||||
}
|
}
|
||||||
@@ -74,7 +85,7 @@ func Init(rawConfig json.RawMessage, wg *sync.WaitGroup) {
|
|||||||
cclog.Debugf("[METRICSTORE]> Using %d workers for checkpoint/archive operations\n", Keys.NumWorkers)
|
cclog.Debugf("[METRICSTORE]> Using %d workers for checkpoint/archive operations\n", Keys.NumWorkers)
|
||||||
|
|
||||||
// Helper function to add metric configuration
|
// Helper function to add metric configuration
|
||||||
addMetricConfig := func(mc schema.MetricConfig) {
|
addMetricConfig := func(mc *schema.MetricConfig) {
|
||||||
agg, err := AssignAggregationStrategy(mc.Aggregation)
|
agg, err := AssignAggregationStrategy(mc.Aggregation)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Warnf("Could not find aggregation strategy for metric config '%s': %s", mc.Name, err.Error())
|
cclog.Warnf("Could not find aggregation strategy for metric config '%s': %s", mc.Name, err.Error())
|
||||||
@@ -88,7 +99,7 @@ func Init(rawConfig json.RawMessage, wg *sync.WaitGroup) {
|
|||||||
|
|
||||||
for _, c := range archive.Clusters {
|
for _, c := range archive.Clusters {
|
||||||
for _, mc := range c.MetricConfig {
|
for _, mc := range c.MetricConfig {
|
||||||
addMetricConfig(*mc)
|
addMetricConfig(mc)
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, sc := range c.SubClusters {
|
for _, sc := range c.SubClusters {
|
||||||
@@ -103,7 +114,7 @@ func Init(rawConfig json.RawMessage, wg *sync.WaitGroup) {
|
|||||||
|
|
||||||
ms := GetMemoryStore()
|
ms := GetMemoryStore()
|
||||||
|
|
||||||
d, err := time.ParseDuration(Keys.Checkpoints.Restore)
|
d, err := time.ParseDuration(Keys.RetentionInMemory)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Fatal(err)
|
cclog.Fatal(err)
|
||||||
}
|
}
|
||||||
@@ -128,11 +139,21 @@ func Init(rawConfig json.RawMessage, wg *sync.WaitGroup) {
|
|||||||
|
|
||||||
ctx, shutdown := context.WithCancel(context.Background())
|
ctx, shutdown := context.WithCancel(context.Background())
|
||||||
|
|
||||||
wg.Add(4)
|
retentionGoroutines := 1
|
||||||
|
checkpointingGoroutines := 1
|
||||||
|
dataStagingGoroutines := 1
|
||||||
|
archivingGoroutines := 0
|
||||||
|
if Keys.Archive != nil {
|
||||||
|
archivingGoroutines = 1
|
||||||
|
}
|
||||||
|
totalGoroutines := retentionGoroutines + checkpointingGoroutines + dataStagingGoroutines + archivingGoroutines
|
||||||
|
wg.Add(totalGoroutines)
|
||||||
|
|
||||||
Retention(wg, ctx)
|
Retention(wg, ctx)
|
||||||
Checkpointing(wg, ctx)
|
Checkpointing(wg, ctx)
|
||||||
|
if Keys.Archive != nil {
|
||||||
Archiving(wg, ctx)
|
Archiving(wg, ctx)
|
||||||
|
}
|
||||||
DataStaging(wg, ctx)
|
DataStaging(wg, ctx)
|
||||||
|
|
||||||
// Note: Signal handling has been removed from this function.
|
// Note: Signal handling has been removed from this function.
|
||||||
@@ -141,10 +162,12 @@ func Init(rawConfig json.RawMessage, wg *sync.WaitGroup) {
|
|||||||
// Store the shutdown function for later use by Shutdown()
|
// Store the shutdown function for later use by Shutdown()
|
||||||
shutdownFunc = shutdown
|
shutdownFunc = shutdown
|
||||||
|
|
||||||
|
if Keys.Subscriptions != nil {
|
||||||
err = ReceiveNats(ms, 1, ctx)
|
err = ReceiveNats(ms, 1, ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Fatal(err)
|
cclog.Fatal(err)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// InitMetrics creates a new, initialized instance of a MemoryStore.
|
// InitMetrics creates a new, initialized instance of a MemoryStore.
|
||||||
@@ -177,24 +200,29 @@ func InitMetrics(metrics map[string]MetricConfig) {
|
|||||||
|
|
||||||
func GetMemoryStore() *MemoryStore {
|
func GetMemoryStore() *MemoryStore {
|
||||||
if msInstance == nil {
|
if msInstance == nil {
|
||||||
return nil
|
cclog.Fatalf("[METRICSTORE]> MemoryStore not initialized!")
|
||||||
}
|
}
|
||||||
|
|
||||||
return msInstance
|
return msInstance
|
||||||
}
|
}
|
||||||
|
|
||||||
func Shutdown() {
|
// SetNodeProvider sets the NodeProvider implementation for the MemoryStore.
|
||||||
// Check if memorystore was initialized
|
// This must be called during initialization to provide job state information
|
||||||
if msInstance == nil {
|
// for selective buffer retention during Free operations.
|
||||||
cclog.Debug("[METRICSTORE]> MemoryStore not initialized, skipping shutdown")
|
// If not set, the Free function will fall back to freeing all buffers.
|
||||||
return
|
func (ms *MemoryStore) SetNodeProvider(provider NodeProvider) {
|
||||||
}
|
ms.nodeProvider = provider
|
||||||
|
}
|
||||||
|
|
||||||
// Cancel the context to signal all background goroutines to stop
|
func Shutdown() {
|
||||||
if shutdownFunc != nil {
|
if shutdownFunc != nil {
|
||||||
shutdownFunc()
|
shutdownFunc()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if Keys.Checkpoints.FileFormat != "json" {
|
||||||
|
close(LineProtocolMessages)
|
||||||
|
}
|
||||||
|
|
||||||
cclog.Infof("[METRICSTORE]> Writing to '%s'...\n", Keys.Checkpoints.RootDir)
|
cclog.Infof("[METRICSTORE]> Writing to '%s'...\n", Keys.Checkpoints.RootDir)
|
||||||
var files int
|
var files int
|
||||||
var err error
|
var err error
|
||||||
@@ -205,7 +233,6 @@ func Shutdown() {
|
|||||||
files, err = ms.ToCheckpoint(Keys.Checkpoints.RootDir, lastCheckpoint.Unix(), time.Now().Unix())
|
files, err = ms.ToCheckpoint(Keys.Checkpoints.RootDir, lastCheckpoint.Unix(), time.Now().Unix())
|
||||||
} else {
|
} else {
|
||||||
files, err = GetAvroStore().ToCheckpoint(Keys.Checkpoints.RootDir, true)
|
files, err = GetAvroStore().ToCheckpoint(Keys.Checkpoints.RootDir, true)
|
||||||
close(LineProtocolMessages)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -214,15 +241,6 @@ func Shutdown() {
|
|||||||
cclog.Infof("[METRICSTORE]> Done! (%d files written)\n", files)
|
cclog.Infof("[METRICSTORE]> Done! (%d files written)\n", files)
|
||||||
}
|
}
|
||||||
|
|
||||||
func getName(m *MemoryStore, i int) string {
|
|
||||||
for key, val := range m.Metrics {
|
|
||||||
if val.offset == i {
|
|
||||||
return key
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
func Retention(wg *sync.WaitGroup, ctx context.Context) {
|
func Retention(wg *sync.WaitGroup, ctx context.Context) {
|
||||||
ms := GetMemoryStore()
|
ms := GetMemoryStore()
|
||||||
|
|
||||||
@@ -250,7 +268,8 @@ func Retention(wg *sync.WaitGroup, ctx context.Context) {
|
|||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
t := time.Now().Add(-d)
|
t := time.Now().Add(-d)
|
||||||
cclog.Infof("[METRICSTORE]> start freeing buffers (older than %s)...\n", t.Format(time.RFC3339))
|
cclog.Infof("[METRICSTORE]> start freeing buffers (older than %s)...\n", t.Format(time.RFC3339))
|
||||||
freed, err := ms.Free(nil, t.Unix())
|
|
||||||
|
freed, err := Free(ms, t)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("[METRICSTORE]> freeing up buffers failed: %s\n", err.Error())
|
cclog.Errorf("[METRICSTORE]> freeing up buffers failed: %s\n", err.Error())
|
||||||
} else {
|
} else {
|
||||||
@@ -261,6 +280,104 @@ func Retention(wg *sync.WaitGroup, ctx context.Context) {
|
|||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func Free(ms *MemoryStore, t time.Time) (int, error) {
|
||||||
|
// If no NodeProvider is configured, free all buffers older than t
|
||||||
|
if ms.nodeProvider == nil {
|
||||||
|
return ms.Free(nil, t.Unix())
|
||||||
|
}
|
||||||
|
|
||||||
|
excludeSelectors, err := ms.nodeProvider.GetUsedNodes(t.Unix())
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// excludeSelectors := make(map[string][]string, 0)
|
||||||
|
|
||||||
|
// excludeSelectors := map[string][]string{
|
||||||
|
// "alex": {"a0122", "a0123", "a0225"},
|
||||||
|
// "fritz": {"f0201", "f0202"},
|
||||||
|
// }
|
||||||
|
|
||||||
|
switch lenMap := len(excludeSelectors); lenMap {
|
||||||
|
|
||||||
|
// If the length of the map returned by GetUsedNodes() is 0,
|
||||||
|
// then use default Free method with nil selector
|
||||||
|
case 0:
|
||||||
|
return ms.Free(nil, t.Unix())
|
||||||
|
|
||||||
|
// Else formulate selectors, exclude those from the map
|
||||||
|
// and free the rest of the selectors
|
||||||
|
default:
|
||||||
|
selectors := GetSelectors(ms, excludeSelectors)
|
||||||
|
return FreeSelected(ms, selectors, t)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// A function to free specific selectors. Used when we want to retain some specific nodes
|
||||||
|
// beyond the retention time.
|
||||||
|
func FreeSelected(ms *MemoryStore, selectors [][]string, t time.Time) (int, error) {
|
||||||
|
freed := 0
|
||||||
|
|
||||||
|
for _, selector := range selectors {
|
||||||
|
|
||||||
|
freedBuffers, err := ms.Free(selector, t.Unix())
|
||||||
|
if err != nil {
|
||||||
|
cclog.Errorf("error while freeing selected buffers: %#v", err)
|
||||||
|
}
|
||||||
|
freed += freedBuffers
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return freed, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function will populate all the second last levels - meaning nodes
|
||||||
|
// From that we can exclude the specific selectosr/node we want to retain.
|
||||||
|
func GetSelectors(ms *MemoryStore, excludeSelectors map[string][]string) [][]string {
|
||||||
|
allSelectors := ms.GetPaths(2)
|
||||||
|
|
||||||
|
filteredSelectors := make([][]string, 0, len(allSelectors))
|
||||||
|
|
||||||
|
for _, path := range allSelectors {
|
||||||
|
if len(path) < 2 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
key := path[0] // The "Key" (Level 1)
|
||||||
|
value := path[1] // The "Value" (Level 2)
|
||||||
|
|
||||||
|
exclude := false
|
||||||
|
|
||||||
|
// Check if the key exists in our exclusion map
|
||||||
|
if excludedValues, exists := excludeSelectors[key]; exists {
|
||||||
|
// The key exists, now check if the specific value is in the exclusion list
|
||||||
|
if slices.Contains(excludedValues, value) {
|
||||||
|
exclude = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !exclude {
|
||||||
|
filteredSelectors = append(filteredSelectors, path)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// fmt.Printf("All selectors: %#v\n\n", allSelectors)
|
||||||
|
// fmt.Printf("filteredSelectors: %#v\n\n", filteredSelectors)
|
||||||
|
|
||||||
|
return filteredSelectors
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetPaths returns a list of lists (paths) to the specified depth.
|
||||||
|
func (ms *MemoryStore) GetPaths(targetDepth int) [][]string {
|
||||||
|
var results [][]string
|
||||||
|
|
||||||
|
// Start recursion. Initial path is empty.
|
||||||
|
// We treat Root as depth 0.
|
||||||
|
ms.root.collectPaths(0, targetDepth, []string{}, &results)
|
||||||
|
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
// Write all values in `metrics` to the level specified by `selector` for time `ts`.
|
// Write all values in `metrics` to the level specified by `selector` for time `ts`.
|
||||||
// Look at `findLevelOrCreate` for how selectors work.
|
// Look at `findLevelOrCreate` for how selectors work.
|
||||||
func (m *MemoryStore) Write(selector []string, ts int64, metrics []Metric) error {
|
func (m *MemoryStore) Write(selector []string, ts int64, metrics []Metric) error {
|
||||||
@@ -3,12 +3,12 @@
|
|||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package memorystore
|
package metricstore
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestAssignAggregationStrategy(t *testing.T) {
|
func TestAssignAggregationStrategy(t *testing.T) {
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package memorystore
|
package metricstore
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
@@ -13,10 +13,13 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// TestLoadDataCallback allows tests to override LoadData behavior
|
||||||
|
var TestLoadDataCallback func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error)
|
||||||
|
|
||||||
func LoadData(
|
func LoadData(
|
||||||
job *schema.Job,
|
job *schema.Job,
|
||||||
metrics []string,
|
metrics []string,
|
||||||
@@ -24,6 +27,10 @@ func LoadData(
|
|||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
resolution int,
|
resolution int,
|
||||||
) (schema.JobData, error) {
|
) (schema.JobData, error) {
|
||||||
|
if TestLoadDataCallback != nil {
|
||||||
|
return TestLoadDataCallback(job, metrics, scopes, ctx, resolution)
|
||||||
|
}
|
||||||
|
|
||||||
queries, assignedScope, err := buildQueries(job, metrics, scopes, int64(resolution))
|
queries, assignedScope, err := buildQueries(job, metrics, scopes, int64(resolution))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("Error while building queries for jobId %d, Metrics %v, Scopes %v: %s", job.JobID, metrics, scopes, err.Error())
|
cclog.Errorf("Error while building queries for jobId %d, Metrics %v, Scopes %v: %s", job.JobID, metrics, scopes, err.Error())
|
||||||
@@ -3,13 +3,13 @@
|
|||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package memorystore
|
package metricstore
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"math"
|
"math"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-lib/util"
|
"github.com/ClusterCockpit/cc-lib/v2/util"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Stats struct {
|
type Stats struct {
|
||||||
@@ -12,7 +12,7 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/jmoiron/sqlx"
|
"github.com/jmoiron/sqlx"
|
||||||
"github.com/mattn/go-sqlite3"
|
"github.com/mattn/go-sqlite3"
|
||||||
"github.com/qustavo/sqlhooks/v2"
|
"github.com/qustavo/sqlhooks/v2"
|
||||||
@@ -115,3 +115,26 @@ func GetConnection() *DBConnection {
|
|||||||
|
|
||||||
return dbConnInstance
|
return dbConnInstance
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ResetConnection closes the current database connection and resets the connection state.
|
||||||
|
// This function is intended for testing purposes only to allow test isolation.
|
||||||
|
func ResetConnection() error {
|
||||||
|
if dbConnInstance != nil && dbConnInstance.DB != nil {
|
||||||
|
if err := dbConnInstance.DB.Close(); err != nil {
|
||||||
|
return fmt.Errorf("failed to close database connection: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dbConnInstance = nil
|
||||||
|
dbConnOnce = sync.Once{}
|
||||||
|
jobRepoInstance = nil
|
||||||
|
jobRepoOnce = sync.Once{}
|
||||||
|
nodeRepoInstance = nil
|
||||||
|
nodeRepoOnce = sync.Once{}
|
||||||
|
userRepoInstance = nil
|
||||||
|
userRepoOnce = sync.Once{}
|
||||||
|
userCfgRepoInstance = nil
|
||||||
|
userCfgRepoOnce = sync.Once{}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -2,13 +2,14 @@
|
|||||||
// All rights reserved. This file is part of cc-backend.
|
// All rights reserved. This file is part of cc-backend.
|
||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package repository
|
package repository
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Hooks satisfies the sqlhook.Hooks interface
|
// Hooks satisfies the sqlhook.Hooks interface
|
||||||
|
|||||||
274
internal/repository/hooks_test.go
Normal file
274
internal/repository/hooks_test.go
Normal file
@@ -0,0 +1,274 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package repository
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
|
_ "github.com/mattn/go-sqlite3"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
type MockJobHook struct {
|
||||||
|
startCalled bool
|
||||||
|
stopCalled bool
|
||||||
|
startJobs []*schema.Job
|
||||||
|
stopJobs []*schema.Job
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MockJobHook) JobStartCallback(job *schema.Job) {
|
||||||
|
m.startCalled = true
|
||||||
|
m.startJobs = append(m.startJobs, job)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MockJobHook) JobStopCallback(job *schema.Job) {
|
||||||
|
m.stopCalled = true
|
||||||
|
m.stopJobs = append(m.stopJobs, job)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRegisterJobHook(t *testing.T) {
|
||||||
|
t.Run("register single hook", func(t *testing.T) {
|
||||||
|
hooks = nil
|
||||||
|
mock := &MockJobHook{}
|
||||||
|
|
||||||
|
RegisterJobHook(mock)
|
||||||
|
|
||||||
|
assert.NotNil(t, hooks)
|
||||||
|
assert.Len(t, hooks, 1)
|
||||||
|
assert.Equal(t, mock, hooks[0])
|
||||||
|
|
||||||
|
hooks = nil
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("register multiple hooks", func(t *testing.T) {
|
||||||
|
hooks = nil
|
||||||
|
mock1 := &MockJobHook{}
|
||||||
|
mock2 := &MockJobHook{}
|
||||||
|
|
||||||
|
RegisterJobHook(mock1)
|
||||||
|
RegisterJobHook(mock2)
|
||||||
|
|
||||||
|
assert.Len(t, hooks, 2)
|
||||||
|
assert.Equal(t, mock1, hooks[0])
|
||||||
|
assert.Equal(t, mock2, hooks[1])
|
||||||
|
|
||||||
|
hooks = nil
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("register nil hook does not add to hooks", func(t *testing.T) {
|
||||||
|
hooks = nil
|
||||||
|
RegisterJobHook(nil)
|
||||||
|
|
||||||
|
if hooks != nil {
|
||||||
|
assert.Len(t, hooks, 0, "Nil hook should not be added")
|
||||||
|
}
|
||||||
|
|
||||||
|
hooks = nil
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCallJobStartHooks(t *testing.T) {
|
||||||
|
t.Run("call start hooks with single job", func(t *testing.T) {
|
||||||
|
hooks = nil
|
||||||
|
mock := &MockJobHook{}
|
||||||
|
RegisterJobHook(mock)
|
||||||
|
|
||||||
|
job := &schema.Job{
|
||||||
|
JobID: 123,
|
||||||
|
User: "testuser",
|
||||||
|
Cluster: "testcluster",
|
||||||
|
}
|
||||||
|
|
||||||
|
CallJobStartHooks([]*schema.Job{job})
|
||||||
|
|
||||||
|
assert.True(t, mock.startCalled)
|
||||||
|
assert.False(t, mock.stopCalled)
|
||||||
|
assert.Len(t, mock.startJobs, 1)
|
||||||
|
assert.Equal(t, int64(123), mock.startJobs[0].JobID)
|
||||||
|
|
||||||
|
hooks = nil
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("call start hooks with multiple jobs", func(t *testing.T) {
|
||||||
|
hooks = nil
|
||||||
|
mock := &MockJobHook{}
|
||||||
|
RegisterJobHook(mock)
|
||||||
|
|
||||||
|
jobs := []*schema.Job{
|
||||||
|
{JobID: 1, User: "user1", Cluster: "cluster1"},
|
||||||
|
{JobID: 2, User: "user2", Cluster: "cluster2"},
|
||||||
|
{JobID: 3, User: "user3", Cluster: "cluster3"},
|
||||||
|
}
|
||||||
|
|
||||||
|
CallJobStartHooks(jobs)
|
||||||
|
|
||||||
|
assert.True(t, mock.startCalled)
|
||||||
|
assert.Len(t, mock.startJobs, 3)
|
||||||
|
assert.Equal(t, int64(1), mock.startJobs[0].JobID)
|
||||||
|
assert.Equal(t, int64(2), mock.startJobs[1].JobID)
|
||||||
|
assert.Equal(t, int64(3), mock.startJobs[2].JobID)
|
||||||
|
|
||||||
|
hooks = nil
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("call start hooks with multiple registered hooks", func(t *testing.T) {
|
||||||
|
hooks = nil
|
||||||
|
mock1 := &MockJobHook{}
|
||||||
|
mock2 := &MockJobHook{}
|
||||||
|
RegisterJobHook(mock1)
|
||||||
|
RegisterJobHook(mock2)
|
||||||
|
|
||||||
|
job := &schema.Job{
|
||||||
|
JobID: 456, User: "testuser", Cluster: "testcluster",
|
||||||
|
}
|
||||||
|
|
||||||
|
CallJobStartHooks([]*schema.Job{job})
|
||||||
|
|
||||||
|
assert.True(t, mock1.startCalled)
|
||||||
|
assert.True(t, mock2.startCalled)
|
||||||
|
assert.Len(t, mock1.startJobs, 1)
|
||||||
|
assert.Len(t, mock2.startJobs, 1)
|
||||||
|
|
||||||
|
hooks = nil
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("call start hooks with nil hooks", func(t *testing.T) {
|
||||||
|
hooks = nil
|
||||||
|
|
||||||
|
job := &schema.Job{
|
||||||
|
JobID: 789, User: "testuser", Cluster: "testcluster",
|
||||||
|
}
|
||||||
|
|
||||||
|
CallJobStartHooks([]*schema.Job{job})
|
||||||
|
|
||||||
|
hooks = nil
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("call start hooks with empty job list", func(t *testing.T) {
|
||||||
|
hooks = nil
|
||||||
|
mock := &MockJobHook{}
|
||||||
|
RegisterJobHook(mock)
|
||||||
|
|
||||||
|
CallJobStartHooks([]*schema.Job{})
|
||||||
|
|
||||||
|
assert.False(t, mock.startCalled)
|
||||||
|
assert.Len(t, mock.startJobs, 0)
|
||||||
|
|
||||||
|
hooks = nil
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCallJobStopHooks(t *testing.T) {
|
||||||
|
t.Run("call stop hooks with single job", func(t *testing.T) {
|
||||||
|
hooks = nil
|
||||||
|
mock := &MockJobHook{}
|
||||||
|
RegisterJobHook(mock)
|
||||||
|
|
||||||
|
job := &schema.Job{
|
||||||
|
JobID: 123,
|
||||||
|
User: "testuser",
|
||||||
|
Cluster: "testcluster",
|
||||||
|
}
|
||||||
|
|
||||||
|
CallJobStopHooks(job)
|
||||||
|
|
||||||
|
assert.True(t, mock.stopCalled)
|
||||||
|
assert.False(t, mock.startCalled)
|
||||||
|
assert.Len(t, mock.stopJobs, 1)
|
||||||
|
assert.Equal(t, int64(123), mock.stopJobs[0].JobID)
|
||||||
|
|
||||||
|
hooks = nil
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("call stop hooks with multiple registered hooks", func(t *testing.T) {
|
||||||
|
hooks = nil
|
||||||
|
mock1 := &MockJobHook{}
|
||||||
|
mock2 := &MockJobHook{}
|
||||||
|
RegisterJobHook(mock1)
|
||||||
|
RegisterJobHook(mock2)
|
||||||
|
|
||||||
|
job := &schema.Job{
|
||||||
|
JobID: 456, User: "testuser", Cluster: "testcluster",
|
||||||
|
}
|
||||||
|
|
||||||
|
CallJobStopHooks(job)
|
||||||
|
|
||||||
|
assert.True(t, mock1.stopCalled)
|
||||||
|
assert.True(t, mock2.stopCalled)
|
||||||
|
assert.Len(t, mock1.stopJobs, 1)
|
||||||
|
assert.Len(t, mock2.stopJobs, 1)
|
||||||
|
|
||||||
|
hooks = nil
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("call stop hooks with nil hooks", func(t *testing.T) {
|
||||||
|
hooks = nil
|
||||||
|
|
||||||
|
job := &schema.Job{
|
||||||
|
JobID: 789, User: "testuser", Cluster: "testcluster",
|
||||||
|
}
|
||||||
|
|
||||||
|
CallJobStopHooks(job)
|
||||||
|
|
||||||
|
hooks = nil
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSQLHooks(t *testing.T) {
|
||||||
|
_ = setup(t)
|
||||||
|
|
||||||
|
t.Run("hooks log queries in debug mode", func(t *testing.T) {
|
||||||
|
h := &Hooks{}
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
query := "SELECT * FROM job WHERE job_id = ?"
|
||||||
|
args := []any{123}
|
||||||
|
|
||||||
|
ctxWithTime, err := h.Before(ctx, query, args...)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.NotNil(t, ctxWithTime)
|
||||||
|
|
||||||
|
beginTime := ctxWithTime.Value("begin")
|
||||||
|
require.NotNil(t, beginTime)
|
||||||
|
_, ok := beginTime.(time.Time)
|
||||||
|
assert.True(t, ok, "Begin time should be time.Time")
|
||||||
|
|
||||||
|
time.Sleep(10 * time.Millisecond)
|
||||||
|
|
||||||
|
ctxAfter, err := h.After(ctxWithTime, query, args...)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.NotNil(t, ctxAfter)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHookIntegration(t *testing.T) {
|
||||||
|
t.Run("hooks are called during job lifecycle", func(t *testing.T) {
|
||||||
|
hooks = nil
|
||||||
|
mock := &MockJobHook{}
|
||||||
|
RegisterJobHook(mock)
|
||||||
|
|
||||||
|
job := &schema.Job{
|
||||||
|
JobID: 999,
|
||||||
|
User: "integrationuser",
|
||||||
|
Cluster: "integrationcluster",
|
||||||
|
}
|
||||||
|
|
||||||
|
CallJobStartHooks([]*schema.Job{job})
|
||||||
|
assert.True(t, mock.startCalled)
|
||||||
|
assert.Equal(t, 1, len(mock.startJobs))
|
||||||
|
|
||||||
|
CallJobStopHooks(job)
|
||||||
|
assert.True(t, mock.stopCalled)
|
||||||
|
assert.Equal(t, 1, len(mock.stopJobs))
|
||||||
|
|
||||||
|
assert.Equal(t, mock.startJobs[0].JobID, mock.stopJobs[0].JobID)
|
||||||
|
|
||||||
|
hooks = nil
|
||||||
|
})
|
||||||
|
}
|
||||||
@@ -66,31 +66,47 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"maps"
|
"maps"
|
||||||
"math"
|
"math"
|
||||||
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/lrucache"
|
"github.com/ClusterCockpit/cc-lib/v2/lrucache"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
sq "github.com/Masterminds/squirrel"
|
sq "github.com/Masterminds/squirrel"
|
||||||
"github.com/jmoiron/sqlx"
|
"github.com/jmoiron/sqlx"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
// jobRepoOnce ensures singleton initialization of the JobRepository
|
||||||
jobRepoOnce sync.Once
|
jobRepoOnce sync.Once
|
||||||
|
// jobRepoInstance holds the single instance of JobRepository
|
||||||
jobRepoInstance *JobRepository
|
jobRepoInstance *JobRepository
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// JobRepository provides database access for job-related operations.
|
||||||
|
// It implements the repository pattern to abstract database interactions
|
||||||
|
// and provides caching for improved performance.
|
||||||
|
//
|
||||||
|
// The repository is a singleton initialized via GetJobRepository().
|
||||||
|
// All database queries use prepared statements via stmtCache for efficiency.
|
||||||
|
// Frequently accessed data (metadata, energy footprints) is cached in an LRU cache.
|
||||||
type JobRepository struct {
|
type JobRepository struct {
|
||||||
DB *sqlx.DB
|
DB *sqlx.DB // Database connection pool
|
||||||
stmtCache *sq.StmtCache
|
stmtCache *sq.StmtCache // Prepared statement cache for query optimization
|
||||||
cache *lrucache.Cache
|
cache *lrucache.Cache // LRU cache for metadata and footprint data
|
||||||
driver string
|
driver string // Database driver name (e.g., "sqlite3")
|
||||||
Mutex sync.Mutex
|
Mutex sync.Mutex // Mutex for thread-safe operations
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetJobRepository returns the singleton instance of JobRepository.
|
||||||
|
// The repository is initialized lazily on first access with database connection,
|
||||||
|
// prepared statement cache, and LRU cache configured from repoConfig.
|
||||||
|
//
|
||||||
|
// This function is thread-safe and ensures only one instance is created.
|
||||||
|
// It must be called after Connect() has established a database connection.
|
||||||
func GetJobRepository() *JobRepository {
|
func GetJobRepository() *JobRepository {
|
||||||
jobRepoOnce.Do(func() {
|
jobRepoOnce.Do(func() {
|
||||||
db := GetConnection()
|
db := GetConnection()
|
||||||
@@ -106,6 +122,8 @@ func GetJobRepository() *JobRepository {
|
|||||||
return jobRepoInstance
|
return jobRepoInstance
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// jobColumns defines the standard set of columns selected from the job table.
|
||||||
|
// Used consistently across all job queries to ensure uniform data retrieval.
|
||||||
var jobColumns []string = []string{
|
var jobColumns []string = []string{
|
||||||
"job.id", "job.job_id", "job.hpc_user", "job.project", "job.cluster", "job.subcluster",
|
"job.id", "job.job_id", "job.hpc_user", "job.project", "job.cluster", "job.subcluster",
|
||||||
"job.start_time", "job.cluster_partition", "job.array_job_id", "job.num_nodes",
|
"job.start_time", "job.cluster_partition", "job.array_job_id", "job.num_nodes",
|
||||||
@@ -114,6 +132,8 @@ var jobColumns []string = []string{
|
|||||||
"job.footprint", "job.energy",
|
"job.footprint", "job.energy",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// jobCacheColumns defines columns from the job_cache table, mirroring jobColumns.
|
||||||
|
// Used for queries against cached job data for performance optimization.
|
||||||
var jobCacheColumns []string = []string{
|
var jobCacheColumns []string = []string{
|
||||||
"job_cache.id", "job_cache.job_id", "job_cache.hpc_user", "job_cache.project", "job_cache.cluster",
|
"job_cache.id", "job_cache.job_id", "job_cache.hpc_user", "job_cache.project", "job_cache.cluster",
|
||||||
"job_cache.subcluster", "job_cache.start_time", "job_cache.cluster_partition",
|
"job_cache.subcluster", "job_cache.start_time", "job_cache.cluster_partition",
|
||||||
@@ -123,6 +143,14 @@ var jobCacheColumns []string = []string{
|
|||||||
"job_cache.footprint", "job_cache.energy",
|
"job_cache.footprint", "job_cache.energy",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// scanJob converts a database row into a schema.Job struct.
|
||||||
|
// It handles JSON unmarshaling of resources and footprint fields,
|
||||||
|
// and calculates accurate duration for running jobs.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - row: Database row implementing Scan() interface (sql.Row or sql.Rows)
|
||||||
|
//
|
||||||
|
// Returns the populated Job struct or an error if scanning or unmarshaling fails.
|
||||||
func scanJob(row interface{ Scan(...any) error }) (*schema.Job, error) {
|
func scanJob(row interface{ Scan(...any) error }) (*schema.Job, error) {
|
||||||
job := &schema.Job{}
|
job := &schema.Job{}
|
||||||
|
|
||||||
@@ -155,27 +183,51 @@ func scanJob(row interface{ Scan(...any) error }) (*schema.Job, error) {
|
|||||||
return job, nil
|
return job, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Optimize performs database optimization by running VACUUM command.
|
||||||
|
// This reclaims unused space and defragments the database file.
|
||||||
|
// Should be run periodically during maintenance windows.
|
||||||
func (r *JobRepository) Optimize() error {
|
func (r *JobRepository) Optimize() error {
|
||||||
if _, err := r.DB.Exec(`VACUUM`); err != nil {
|
if _, err := r.DB.Exec(`VACUUM`); err != nil {
|
||||||
return err
|
cclog.Errorf("Error while executing VACUUM: %v", err)
|
||||||
|
return fmt.Errorf("failed to optimize database: %w", err)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Flush removes all data from job-related tables (jobtag, tag, job).
|
||||||
|
// WARNING: This is a destructive operation that deletes all job data.
|
||||||
|
// Use with extreme caution, typically only for testing or complete resets.
|
||||||
func (r *JobRepository) Flush() error {
|
func (r *JobRepository) Flush() error {
|
||||||
if _, err := r.DB.Exec(`DELETE FROM jobtag`); err != nil {
|
if _, err := r.DB.Exec(`DELETE FROM jobtag`); err != nil {
|
||||||
return err
|
cclog.Errorf("Error while deleting from jobtag table: %v", err)
|
||||||
|
return fmt.Errorf("failed to flush jobtag table: %w", err)
|
||||||
}
|
}
|
||||||
if _, err := r.DB.Exec(`DELETE FROM tag`); err != nil {
|
if _, err := r.DB.Exec(`DELETE FROM tag`); err != nil {
|
||||||
return err
|
cclog.Errorf("Error while deleting from tag table: %v", err)
|
||||||
|
return fmt.Errorf("failed to flush tag table: %w", err)
|
||||||
}
|
}
|
||||||
if _, err := r.DB.Exec(`DELETE FROM job`); err != nil {
|
if _, err := r.DB.Exec(`DELETE FROM job`); err != nil {
|
||||||
return err
|
cclog.Errorf("Error while deleting from job table: %v", err)
|
||||||
|
return fmt.Errorf("failed to flush job table: %w", err)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FetchMetadata retrieves and unmarshals the metadata JSON for a job.
|
||||||
|
// Metadata is cached with a 24-hour TTL to improve performance.
|
||||||
|
//
|
||||||
|
// The metadata field stores arbitrary key-value pairs associated with a job,
|
||||||
|
// such as tags, labels, or custom attributes added by external systems.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - job: Job struct with valid ID field, metadata will be populated in job.MetaData
|
||||||
|
//
|
||||||
|
// Returns the metadata map or an error if the job is nil or database query fails.
|
||||||
func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error) {
|
func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error) {
|
||||||
|
if job == nil {
|
||||||
|
return nil, fmt.Errorf("job cannot be nil")
|
||||||
|
}
|
||||||
|
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
cachekey := fmt.Sprintf("metadata:%d", job.ID)
|
cachekey := fmt.Sprintf("metadata:%d", job.ID)
|
||||||
if cached := r.cache.Get(cachekey, nil); cached != nil {
|
if cached := r.cache.Get(cachekey, nil); cached != nil {
|
||||||
@@ -185,8 +237,8 @@ func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error
|
|||||||
|
|
||||||
if err := sq.Select("job.meta_data").From("job").Where("job.id = ?", job.ID).
|
if err := sq.Select("job.meta_data").From("job").Where("job.id = ?", job.ID).
|
||||||
RunWith(r.stmtCache).QueryRow().Scan(&job.RawMetaData); err != nil {
|
RunWith(r.stmtCache).QueryRow().Scan(&job.RawMetaData); err != nil {
|
||||||
cclog.Warn("Error while scanning for job metadata")
|
cclog.Warnf("Error while scanning for job metadata (ID=%d): %v", job.ID, err)
|
||||||
return nil, err
|
return nil, fmt.Errorf("failed to fetch metadata for job %d: %w", job.ID, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(job.RawMetaData) == 0 {
|
if len(job.RawMetaData) == 0 {
|
||||||
@@ -194,8 +246,8 @@ func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error
|
|||||||
}
|
}
|
||||||
|
|
||||||
if err := json.Unmarshal(job.RawMetaData, &job.MetaData); err != nil {
|
if err := json.Unmarshal(job.RawMetaData, &job.MetaData); err != nil {
|
||||||
cclog.Warn("Error while unmarshaling raw metadata json")
|
cclog.Warnf("Error while unmarshaling raw metadata json (ID=%d): %v", job.ID, err)
|
||||||
return nil, err
|
return nil, fmt.Errorf("failed to unmarshal metadata for job %d: %w", job.ID, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
r.cache.Put(cachekey, job.MetaData, len(job.RawMetaData), 24*time.Hour)
|
r.cache.Put(cachekey, job.MetaData, len(job.RawMetaData), 24*time.Hour)
|
||||||
@@ -203,13 +255,27 @@ func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error
|
|||||||
return job.MetaData, nil
|
return job.MetaData, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// UpdateMetadata adds or updates a single metadata key-value pair for a job.
|
||||||
|
// The entire metadata map is re-marshaled and stored, and the cache is invalidated.
|
||||||
|
// Also triggers archive metadata update via archive.UpdateMetadata.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - job: Job struct with valid ID, existing metadata will be fetched if not present
|
||||||
|
// - key: Metadata key to set
|
||||||
|
// - val: Metadata value to set
|
||||||
|
//
|
||||||
|
// Returns an error if the job is nil, metadata fetch fails, or database update fails.
|
||||||
func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err error) {
|
func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err error) {
|
||||||
|
if job == nil {
|
||||||
|
return fmt.Errorf("job cannot be nil")
|
||||||
|
}
|
||||||
|
|
||||||
cachekey := fmt.Sprintf("metadata:%d", job.ID)
|
cachekey := fmt.Sprintf("metadata:%d", job.ID)
|
||||||
r.cache.Del(cachekey)
|
r.cache.Del(cachekey)
|
||||||
if job.MetaData == nil {
|
if job.MetaData == nil {
|
||||||
if _, err = r.FetchMetadata(job); err != nil {
|
if _, err = r.FetchMetadata(job); err != nil {
|
||||||
cclog.Warnf("Error while fetching metadata for job, DB ID '%v'", job.ID)
|
cclog.Warnf("Error while fetching metadata for job, DB ID '%v'", job.ID)
|
||||||
return err
|
return fmt.Errorf("failed to fetch metadata for job %d: %w", job.ID, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -224,7 +290,7 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
|
|||||||
|
|
||||||
if job.RawMetaData, err = json.Marshal(job.MetaData); err != nil {
|
if job.RawMetaData, err = json.Marshal(job.MetaData); err != nil {
|
||||||
cclog.Warnf("Error while marshaling metadata for job, DB ID '%v'", job.ID)
|
cclog.Warnf("Error while marshaling metadata for job, DB ID '%v'", job.ID)
|
||||||
return err
|
return fmt.Errorf("failed to marshal metadata for job %d: %w", job.ID, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if _, err = sq.Update("job").
|
if _, err = sq.Update("job").
|
||||||
@@ -232,20 +298,34 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
|
|||||||
Where("job.id = ?", job.ID).
|
Where("job.id = ?", job.ID).
|
||||||
RunWith(r.stmtCache).Exec(); err != nil {
|
RunWith(r.stmtCache).Exec(); err != nil {
|
||||||
cclog.Warnf("Error while updating metadata for job, DB ID '%v'", job.ID)
|
cclog.Warnf("Error while updating metadata for job, DB ID '%v'", job.ID)
|
||||||
return err
|
return fmt.Errorf("failed to update metadata in database for job %d: %w", job.ID, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
r.cache.Put(cachekey, job.MetaData, len(job.RawMetaData), 24*time.Hour)
|
r.cache.Put(cachekey, job.MetaData, len(job.RawMetaData), 24*time.Hour)
|
||||||
return archive.UpdateMetadata(job, job.MetaData)
|
return archive.UpdateMetadata(job, job.MetaData)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FetchFootprint retrieves and unmarshals the performance footprint JSON for a job.
|
||||||
|
// Unlike FetchMetadata, footprints are NOT cached as they can be large and change frequently.
|
||||||
|
//
|
||||||
|
// The footprint contains summary statistics (avg/min/max) for monitored metrics,
|
||||||
|
// stored as JSON with keys like "cpu_load_avg", "mem_used_max", etc.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - job: Job struct with valid ID, footprint will be populated in job.Footprint
|
||||||
|
//
|
||||||
|
// Returns the footprint map or an error if the job is nil or database query fails.
|
||||||
func (r *JobRepository) FetchFootprint(job *schema.Job) (map[string]float64, error) {
|
func (r *JobRepository) FetchFootprint(job *schema.Job) (map[string]float64, error) {
|
||||||
|
if job == nil {
|
||||||
|
return nil, fmt.Errorf("job cannot be nil")
|
||||||
|
}
|
||||||
|
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
|
|
||||||
if err := sq.Select("job.footprint").From("job").Where("job.id = ?", job.ID).
|
if err := sq.Select("job.footprint").From("job").Where("job.id = ?", job.ID).
|
||||||
RunWith(r.stmtCache).QueryRow().Scan(&job.RawFootprint); err != nil {
|
RunWith(r.stmtCache).QueryRow().Scan(&job.RawFootprint); err != nil {
|
||||||
cclog.Warn("Error while scanning for job footprint")
|
cclog.Warnf("Error while scanning for job footprint (ID=%d): %v", job.ID, err)
|
||||||
return nil, err
|
return nil, fmt.Errorf("failed to fetch footprint for job %d: %w", job.ID, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(job.RawFootprint) == 0 {
|
if len(job.RawFootprint) == 0 {
|
||||||
@@ -253,15 +333,29 @@ func (r *JobRepository) FetchFootprint(job *schema.Job) (map[string]float64, err
|
|||||||
}
|
}
|
||||||
|
|
||||||
if err := json.Unmarshal(job.RawFootprint, &job.Footprint); err != nil {
|
if err := json.Unmarshal(job.RawFootprint, &job.Footprint); err != nil {
|
||||||
cclog.Warn("Error while unmarshaling raw footprint json")
|
cclog.Warnf("Error while unmarshaling raw footprint json (ID=%d): %v", job.ID, err)
|
||||||
return nil, err
|
return nil, fmt.Errorf("failed to unmarshal footprint for job %d: %w", job.ID, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
cclog.Debugf("Timer FetchFootprint %s", time.Since(start))
|
cclog.Debugf("Timer FetchFootprint %s", time.Since(start))
|
||||||
return job.Footprint, nil
|
return job.Footprint, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FetchEnergyFootprint retrieves and unmarshals the energy footprint JSON for a job.
|
||||||
|
// Energy footprints are cached with a 24-hour TTL as they are frequently accessed but rarely change.
|
||||||
|
//
|
||||||
|
// The energy footprint contains calculated energy consumption (in kWh) per metric,
|
||||||
|
// stored as JSON with keys like "power_avg", "acc_power_avg", etc.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - job: Job struct with valid ID, energy footprint will be populated in job.EnergyFootprint
|
||||||
|
//
|
||||||
|
// Returns the energy footprint map or an error if the job is nil or database query fails.
|
||||||
func (r *JobRepository) FetchEnergyFootprint(job *schema.Job) (map[string]float64, error) {
|
func (r *JobRepository) FetchEnergyFootprint(job *schema.Job) (map[string]float64, error) {
|
||||||
|
if job == nil {
|
||||||
|
return nil, fmt.Errorf("job cannot be nil")
|
||||||
|
}
|
||||||
|
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
cachekey := fmt.Sprintf("energyFootprint:%d", job.ID)
|
cachekey := fmt.Sprintf("energyFootprint:%d", job.ID)
|
||||||
if cached := r.cache.Get(cachekey, nil); cached != nil {
|
if cached := r.cache.Get(cachekey, nil); cached != nil {
|
||||||
@@ -271,8 +365,8 @@ func (r *JobRepository) FetchEnergyFootprint(job *schema.Job) (map[string]float6
|
|||||||
|
|
||||||
if err := sq.Select("job.energy_footprint").From("job").Where("job.id = ?", job.ID).
|
if err := sq.Select("job.energy_footprint").From("job").Where("job.id = ?", job.ID).
|
||||||
RunWith(r.stmtCache).QueryRow().Scan(&job.RawEnergyFootprint); err != nil {
|
RunWith(r.stmtCache).QueryRow().Scan(&job.RawEnergyFootprint); err != nil {
|
||||||
cclog.Warn("Error while scanning for job energy_footprint")
|
cclog.Warnf("Error while scanning for job energy_footprint (ID=%d): %v", job.ID, err)
|
||||||
return nil, err
|
return nil, fmt.Errorf("failed to fetch energy footprint for job %d: %w", job.ID, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(job.RawEnergyFootprint) == 0 {
|
if len(job.RawEnergyFootprint) == 0 {
|
||||||
@@ -280,8 +374,8 @@ func (r *JobRepository) FetchEnergyFootprint(job *schema.Job) (map[string]float6
|
|||||||
}
|
}
|
||||||
|
|
||||||
if err := json.Unmarshal(job.RawEnergyFootprint, &job.EnergyFootprint); err != nil {
|
if err := json.Unmarshal(job.RawEnergyFootprint, &job.EnergyFootprint); err != nil {
|
||||||
cclog.Warn("Error while unmarshaling raw energy footprint json")
|
cclog.Warnf("Error while unmarshaling raw energy footprint json (ID=%d): %v", job.ID, err)
|
||||||
return nil, err
|
return nil, fmt.Errorf("failed to unmarshal energy footprint for job %d: %w", job.ID, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
r.cache.Put(cachekey, job.EnergyFootprint, len(job.EnergyFootprint), 24*time.Hour)
|
r.cache.Put(cachekey, job.EnergyFootprint, len(job.EnergyFootprint), 24*time.Hour)
|
||||||
@@ -289,6 +383,18 @@ func (r *JobRepository) FetchEnergyFootprint(job *schema.Job) (map[string]float6
|
|||||||
return job.EnergyFootprint, nil
|
return job.EnergyFootprint, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DeleteJobsBefore removes jobs older than the specified start time.
|
||||||
|
// Optionally preserves tagged jobs to protect important data from deletion.
|
||||||
|
// Cache entries for deleted jobs are automatically invalidated.
|
||||||
|
//
|
||||||
|
// This is typically used for data retention policies and cleanup operations.
|
||||||
|
// WARNING: This is a destructive operation that permanently deletes job records.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - startTime: Unix timestamp, jobs with start_time < this value will be deleted
|
||||||
|
// - omitTagged: If true, skip jobs that have associated tags (jobtag entries)
|
||||||
|
//
|
||||||
|
// Returns the count of deleted jobs or an error if the operation fails.
|
||||||
func (r *JobRepository) DeleteJobsBefore(startTime int64, omitTagged bool) (int, error) {
|
func (r *JobRepository) DeleteJobsBefore(startTime int64, omitTagged bool) (int, error) {
|
||||||
var cnt int
|
var cnt int
|
||||||
q := sq.Select("count(*)").From("job").Where("job.start_time < ?", startTime)
|
q := sq.Select("count(*)").From("job").Where("job.start_time < ?", startTime)
|
||||||
@@ -344,6 +450,13 @@ func (r *JobRepository) DeleteJobsBefore(startTime int64, omitTagged bool) (int,
|
|||||||
return cnt, err
|
return cnt, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DeleteJobByID permanently removes a single job by its database ID.
|
||||||
|
// Cache entries for the deleted job are automatically invalidated.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - id: Database ID (primary key) of the job to delete
|
||||||
|
//
|
||||||
|
// Returns an error if the deletion fails.
|
||||||
func (r *JobRepository) DeleteJobByID(id int64) error {
|
func (r *JobRepository) DeleteJobByID(id int64) error {
|
||||||
// Invalidate cache entries before deletion
|
// Invalidate cache entries before deletion
|
||||||
r.cache.Del(fmt.Sprintf("metadata:%d", id))
|
r.cache.Del(fmt.Sprintf("metadata:%d", id))
|
||||||
@@ -361,7 +474,29 @@ func (r *JobRepository) DeleteJobByID(id int64) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FindUserOrProjectOrJobname attempts to interpret a search term as a job ID,
|
||||||
|
// username, project ID, or job name by querying the database.
|
||||||
|
//
|
||||||
|
// Search logic (in priority order):
|
||||||
|
// 1. If searchterm is numeric, treat as job ID (returned immediately)
|
||||||
|
// 2. Try exact match in job.hpc_user column (username)
|
||||||
|
// 3. Try LIKE match in hpc_user.name column (real name)
|
||||||
|
// 4. Try exact match in job.project column (project ID)
|
||||||
|
// 5. If no matches, return searchterm as jobname for GraphQL query
|
||||||
|
//
|
||||||
|
// This powers the searchbar functionality for flexible job searching.
|
||||||
|
// Requires authenticated user for database lookups (returns empty if user is nil).
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - user: Authenticated user context, required for database access
|
||||||
|
// - searchterm: Search string to interpret
|
||||||
|
//
|
||||||
|
// Returns up to one non-empty value among (jobid, username, project, jobname).
|
||||||
func (r *JobRepository) FindUserOrProjectOrJobname(user *schema.User, searchterm string) (jobid string, username string, project string, jobname string) {
|
func (r *JobRepository) FindUserOrProjectOrJobname(user *schema.User, searchterm string) (jobid string, username string, project string, jobname string) {
|
||||||
|
if searchterm == "" {
|
||||||
|
return "", "", "", ""
|
||||||
|
}
|
||||||
|
|
||||||
if _, err := strconv.Atoi(searchterm); err == nil { // Return empty on successful conversion: parent method will redirect for integer jobId
|
if _, err := strconv.Atoi(searchterm); err == nil { // Return empty on successful conversion: parent method will redirect for integer jobId
|
||||||
return searchterm, "", "", ""
|
return searchterm, "", "", ""
|
||||||
} else { // Has to have letters and logged-in user for other guesses
|
} else { // Has to have letters and logged-in user for other guesses
|
||||||
@@ -392,7 +527,24 @@ var (
|
|||||||
ErrForbidden = errors.New("not authorized")
|
ErrForbidden = errors.New("not authorized")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// FindColumnValue performs a generic column lookup in a database table with role-based access control.
|
||||||
|
// Only users with admin, support, or manager roles can execute this query.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - user: User context for authorization check
|
||||||
|
// - searchterm: Value to search for (exact match or LIKE pattern)
|
||||||
|
// - table: Database table name to query
|
||||||
|
// - selectColumn: Column name to return in results
|
||||||
|
// - whereColumn: Column name to filter on
|
||||||
|
// - isLike: If true, use LIKE with wildcards; if false, use exact equality
|
||||||
|
//
|
||||||
|
// Returns the first matching value, ErrForbidden if user lacks permission,
|
||||||
|
// or ErrNotFound if no matches are found.
|
||||||
func (r *JobRepository) FindColumnValue(user *schema.User, searchterm string, table string, selectColumn string, whereColumn string, isLike bool) (result string, err error) {
|
func (r *JobRepository) FindColumnValue(user *schema.User, searchterm string, table string, selectColumn string, whereColumn string, isLike bool) (result string, err error) {
|
||||||
|
if user == nil {
|
||||||
|
return "", fmt.Errorf("user cannot be nil")
|
||||||
|
}
|
||||||
|
|
||||||
compareStr := " = ?"
|
compareStr := " = ?"
|
||||||
query := searchterm
|
query := searchterm
|
||||||
if isLike {
|
if isLike {
|
||||||
@@ -403,17 +555,11 @@ func (r *JobRepository) FindColumnValue(user *schema.User, searchterm string, ta
|
|||||||
theQuery := sq.Select(table+"."+selectColumn).Distinct().From(table).
|
theQuery := sq.Select(table+"."+selectColumn).Distinct().From(table).
|
||||||
Where(table+"."+whereColumn+compareStr, query)
|
Where(table+"."+whereColumn+compareStr, query)
|
||||||
|
|
||||||
// theSql, args, theErr := theQuery.ToSql()
|
|
||||||
// if theErr != nil {
|
|
||||||
// cclog.Warn("Error while converting query to sql")
|
|
||||||
// return "", err
|
|
||||||
// }
|
|
||||||
// cclog.Debugf("SQL query (FindColumnValue): `%s`, args: %#v", theSql, args)
|
|
||||||
|
|
||||||
err := theQuery.RunWith(r.stmtCache).QueryRow().Scan(&result)
|
err := theQuery.RunWith(r.stmtCache).QueryRow().Scan(&result)
|
||||||
|
|
||||||
if err != nil && err != sql.ErrNoRows {
|
if err != nil && err != sql.ErrNoRows {
|
||||||
return "", err
|
cclog.Warnf("Error while querying FindColumnValue (table=%s, column=%s): %v", table, selectColumn, err)
|
||||||
|
return "", fmt.Errorf("failed to find column value: %w", err)
|
||||||
} else if err == nil {
|
} else if err == nil {
|
||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
@@ -424,22 +570,40 @@ func (r *JobRepository) FindColumnValue(user *schema.User, searchterm string, ta
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FindColumnValues performs a generic column lookup returning multiple matches with role-based access control.
|
||||||
|
// Similar to FindColumnValue but returns all matching values instead of just the first.
|
||||||
|
// Only users with admin, support, or manager roles can execute this query.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - user: User context for authorization check
|
||||||
|
// - query: Search pattern (always uses LIKE with wildcards)
|
||||||
|
// - table: Database table name to query
|
||||||
|
// - selectColumn: Column name to return in results
|
||||||
|
// - whereColumn: Column name to filter on
|
||||||
|
//
|
||||||
|
// Returns a slice of matching values, ErrForbidden if user lacks permission,
|
||||||
|
// or ErrNotFound if no matches are found.
|
||||||
func (r *JobRepository) FindColumnValues(user *schema.User, query string, table string, selectColumn string, whereColumn string) (results []string, err error) {
|
func (r *JobRepository) FindColumnValues(user *schema.User, query string, table string, selectColumn string, whereColumn string) (results []string, err error) {
|
||||||
|
if user == nil {
|
||||||
|
return nil, fmt.Errorf("user cannot be nil")
|
||||||
|
}
|
||||||
|
|
||||||
emptyResult := make([]string, 0)
|
emptyResult := make([]string, 0)
|
||||||
if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport, schema.RoleManager}) {
|
if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport, schema.RoleManager}) {
|
||||||
rows, err := sq.Select(table+"."+selectColumn).Distinct().From(table).
|
rows, err := sq.Select(table+"."+selectColumn).Distinct().From(table).
|
||||||
Where(table+"."+whereColumn+" LIKE ?", fmt.Sprint("%", query, "%")).
|
Where(table+"."+whereColumn+" LIKE ?", fmt.Sprint("%", query, "%")).
|
||||||
RunWith(r.stmtCache).Query()
|
RunWith(r.stmtCache).Query()
|
||||||
if err != nil && err != sql.ErrNoRows {
|
if err != nil && err != sql.ErrNoRows {
|
||||||
return emptyResult, err
|
cclog.Errorf("Error while querying FindColumnValues (table=%s, column=%s): %v", table, selectColumn, err)
|
||||||
|
return emptyResult, fmt.Errorf("failed to find column values: %w", err)
|
||||||
} else if err == nil {
|
} else if err == nil {
|
||||||
|
defer rows.Close()
|
||||||
for rows.Next() {
|
for rows.Next() {
|
||||||
var result string
|
var result string
|
||||||
err := rows.Scan(&result)
|
err := rows.Scan(&result)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
rows.Close()
|
cclog.Warnf("Error while scanning rows in FindColumnValues: %v", err)
|
||||||
cclog.Warnf("Error while scanning rows: %v", err)
|
return emptyResult, fmt.Errorf("failed to scan column value: %w", err)
|
||||||
return emptyResult, err
|
|
||||||
}
|
}
|
||||||
results = append(results, result)
|
results = append(results, result)
|
||||||
}
|
}
|
||||||
@@ -453,6 +617,13 @@ func (r *JobRepository) FindColumnValues(user *schema.User, query string, table
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Partitions returns a list of distinct cluster partitions for a given cluster.
|
||||||
|
// Results are cached with a 1-hour TTL to improve performance.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - cluster: Cluster name to query partitions for
|
||||||
|
//
|
||||||
|
// Returns a slice of partition names or an error if the database query fails.
|
||||||
func (r *JobRepository) Partitions(cluster string) ([]string, error) {
|
func (r *JobRepository) Partitions(cluster string) ([]string, error) {
|
||||||
var err error
|
var err error
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
@@ -481,8 +652,8 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
|
|||||||
Where("job.cluster = ?", cluster).
|
Where("job.cluster = ?", cluster).
|
||||||
RunWith(r.stmtCache).Query()
|
RunWith(r.stmtCache).Query()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Error("Error while running query")
|
cclog.Errorf("Error while running AllocatedNodes query for cluster=%s: %v", cluster, err)
|
||||||
return nil, err
|
return nil, fmt.Errorf("failed to query allocated nodes for cluster %s: %w", cluster, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
var raw []byte
|
var raw []byte
|
||||||
@@ -492,12 +663,12 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
|
|||||||
var resources []*schema.Resource
|
var resources []*schema.Resource
|
||||||
var subcluster string
|
var subcluster string
|
||||||
if err := rows.Scan(&raw, &subcluster); err != nil {
|
if err := rows.Scan(&raw, &subcluster); err != nil {
|
||||||
cclog.Warn("Error while scanning rows")
|
cclog.Warnf("Error while scanning rows in AllocatedNodes: %v", err)
|
||||||
return nil, err
|
return nil, fmt.Errorf("failed to scan allocated nodes row: %w", err)
|
||||||
}
|
}
|
||||||
if err := json.Unmarshal(raw, &resources); err != nil {
|
if err := json.Unmarshal(raw, &resources); err != nil {
|
||||||
cclog.Warn("Error while unmarshaling raw resources json")
|
cclog.Warnf("Error while unmarshaling raw resources json in AllocatedNodes: %v", err)
|
||||||
return nil, err
|
return nil, fmt.Errorf("failed to unmarshal resources in AllocatedNodes: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
hosts, ok := subclusters[subcluster]
|
hosts, ok := subclusters[subcluster]
|
||||||
@@ -515,7 +686,19 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
|
|||||||
return subclusters, nil
|
return subclusters, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: Set duration to requested walltime?
|
// StopJobsExceedingWalltimeBy marks running jobs as failed if they exceed their walltime limit.
|
||||||
|
// This is typically called periodically to clean up stuck or orphaned jobs.
|
||||||
|
//
|
||||||
|
// Jobs are marked with:
|
||||||
|
// - monitoring_status: MonitoringStatusArchivingFailed
|
||||||
|
// - duration: 0
|
||||||
|
// - job_state: JobStateFailed
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - seconds: Grace period beyond walltime before marking as failed
|
||||||
|
//
|
||||||
|
// Returns an error if the database update fails.
|
||||||
|
// Logs the number of jobs marked as failed if any were affected.
|
||||||
func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
|
func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
currentTime := time.Now().Unix()
|
currentTime := time.Now().Unix()
|
||||||
@@ -528,14 +711,14 @@ func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
|
|||||||
Where("(? - job.start_time) > (job.walltime + ?)", currentTime, seconds).
|
Where("(? - job.start_time) > (job.walltime + ?)", currentTime, seconds).
|
||||||
RunWith(r.DB).Exec()
|
RunWith(r.DB).Exec()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Warn("Error while stopping jobs exceeding walltime")
|
cclog.Warnf("Error while stopping jobs exceeding walltime: %v", err)
|
||||||
return err
|
return fmt.Errorf("failed to stop jobs exceeding walltime: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
rowsAffected, err := res.RowsAffected()
|
rowsAffected, err := res.RowsAffected()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Warn("Error while fetching affected rows after stopping due to exceeded walltime")
|
cclog.Warnf("Error while fetching affected rows after stopping due to exceeded walltime: %v", err)
|
||||||
return err
|
return fmt.Errorf("failed to get rows affected count: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if rowsAffected > 0 {
|
if rowsAffected > 0 {
|
||||||
@@ -545,24 +728,31 @@ func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FindJobIdsByTag returns all job database IDs associated with a specific tag.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - tagID: Database ID of the tag to search for
|
||||||
|
//
|
||||||
|
// Returns a slice of job IDs or an error if the query fails.
|
||||||
func (r *JobRepository) FindJobIdsByTag(tagID int64) ([]int64, error) {
|
func (r *JobRepository) FindJobIdsByTag(tagID int64) ([]int64, error) {
|
||||||
query := sq.Select("job.id").From("job").
|
query := sq.Select("job.id").From("job").
|
||||||
Join("jobtag ON jobtag.job_id = job.id").
|
Join("jobtag ON jobtag.job_id = job.id").
|
||||||
Where(sq.Eq{"jobtag.tag_id": tagID}).Distinct()
|
Where(sq.Eq{"jobtag.tag_id": tagID}).Distinct()
|
||||||
rows, err := query.RunWith(r.stmtCache).Query()
|
rows, err := query.RunWith(r.stmtCache).Query()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Error("Error while running query")
|
cclog.Errorf("Error while running FindJobIdsByTag query for tagID=%d: %v", tagID, err)
|
||||||
return nil, err
|
return nil, fmt.Errorf("failed to find job IDs by tag %d: %w", tagID, err)
|
||||||
}
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
|
||||||
jobIds := make([]int64, 0, 100)
|
jobIds := make([]int64, 0, 100)
|
||||||
|
|
||||||
for rows.Next() {
|
for rows.Next() {
|
||||||
var jobID int64
|
var jobID int64
|
||||||
|
|
||||||
if err := rows.Scan(&jobID); err != nil {
|
if err := rows.Scan(&jobID); err != nil {
|
||||||
rows.Close()
|
cclog.Warnf("Error while scanning rows in FindJobIdsByTag: %v", err)
|
||||||
cclog.Warn("Error while scanning rows")
|
return nil, fmt.Errorf("failed to scan job ID in FindJobIdsByTag: %w", err)
|
||||||
return nil, err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
jobIds = append(jobIds, jobID)
|
jobIds = append(jobIds, jobID)
|
||||||
@@ -571,7 +761,13 @@ func (r *JobRepository) FindJobIdsByTag(tagID int64) ([]int64, error) {
|
|||||||
return jobIds, nil
|
return jobIds, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: Reconsider filtering short jobs with harcoded threshold
|
// FindRunningJobs returns all currently running jobs for a specific cluster.
|
||||||
|
// Filters out short-running jobs based on repoConfig.MinRunningJobDuration threshold.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - cluster: Cluster name to filter jobs
|
||||||
|
//
|
||||||
|
// Returns a slice of running job objects or an error if the query fails.
|
||||||
func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) {
|
func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) {
|
||||||
query := sq.Select(jobColumns...).From("job").
|
query := sq.Select(jobColumns...).From("job").
|
||||||
Where("job.cluster = ?", cluster).
|
Where("job.cluster = ?", cluster).
|
||||||
@@ -580,8 +776,8 @@ func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) {
|
|||||||
|
|
||||||
rows, err := query.RunWith(r.stmtCache).Query()
|
rows, err := query.RunWith(r.stmtCache).Query()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Error("Error while running query")
|
cclog.Errorf("Error while running FindRunningJobs query for cluster=%s: %v", cluster, err)
|
||||||
return nil, err
|
return nil, fmt.Errorf("failed to find running jobs for cluster %s: %w", cluster, err)
|
||||||
}
|
}
|
||||||
defer rows.Close()
|
defer rows.Close()
|
||||||
|
|
||||||
@@ -589,16 +785,22 @@ func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) {
|
|||||||
for rows.Next() {
|
for rows.Next() {
|
||||||
job, err := scanJob(rows)
|
job, err := scanJob(rows)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Warn("Error while scanning rows")
|
cclog.Warnf("Error while scanning rows in FindRunningJobs: %v", err)
|
||||||
return nil, err
|
return nil, fmt.Errorf("failed to scan job in FindRunningJobs: %w", err)
|
||||||
}
|
}
|
||||||
jobs = append(jobs, job)
|
jobs = append(jobs, job)
|
||||||
}
|
}
|
||||||
|
|
||||||
cclog.Infof("Return job count %d", len(jobs))
|
cclog.Debugf("JobRepository.FindRunningJobs(): Return job count %d (cluster: %s)", len(jobs), cluster)
|
||||||
return jobs, nil
|
return jobs, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// UpdateDuration recalculates and updates the duration field for all running jobs.
|
||||||
|
// Called periodically to keep job durations current without querying individual jobs.
|
||||||
|
//
|
||||||
|
// Duration is calculated as: current_time - job.start_time
|
||||||
|
//
|
||||||
|
// Returns an error if the database update fails.
|
||||||
func (r *JobRepository) UpdateDuration() error {
|
func (r *JobRepository) UpdateDuration() error {
|
||||||
stmnt := sq.Update("job").
|
stmnt := sq.Update("job").
|
||||||
Set("duration", sq.Expr("? - job.start_time", time.Now().Unix())).
|
Set("duration", sq.Expr("? - job.start_time", time.Now().Unix())).
|
||||||
@@ -606,12 +808,23 @@ func (r *JobRepository) UpdateDuration() error {
|
|||||||
|
|
||||||
_, err := stmnt.RunWith(r.stmtCache).Exec()
|
_, err := stmnt.RunWith(r.stmtCache).Exec()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
cclog.Errorf("Error while updating duration for running jobs: %v", err)
|
||||||
|
return fmt.Errorf("failed to update duration for running jobs: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FindJobsBetween returns jobs within a specified time range.
|
||||||
|
// If startTimeBegin is 0, returns all jobs before startTimeEnd.
|
||||||
|
// Optionally excludes tagged jobs from results.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - startTimeBegin: Unix timestamp for range start (use 0 for unbounded start)
|
||||||
|
// - startTimeEnd: Unix timestamp for range end
|
||||||
|
// - omitTagged: If true, exclude jobs with associated tags
|
||||||
|
//
|
||||||
|
// Returns a slice of jobs or an error if the time range is invalid or query fails.
|
||||||
func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64, omitTagged bool) ([]*schema.Job, error) {
|
func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64, omitTagged bool) ([]*schema.Job, error) {
|
||||||
var query sq.SelectBuilder
|
var query sq.SelectBuilder
|
||||||
|
|
||||||
@@ -633,8 +846,8 @@ func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64
|
|||||||
|
|
||||||
rows, err := query.RunWith(r.stmtCache).Query()
|
rows, err := query.RunWith(r.stmtCache).Query()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Error("Error while running query")
|
cclog.Errorf("Error while running FindJobsBetween query: %v", err)
|
||||||
return nil, err
|
return nil, fmt.Errorf("failed to find jobs between %d and %d: %w", startTimeBegin, startTimeEnd, err)
|
||||||
}
|
}
|
||||||
defer rows.Close()
|
defer rows.Close()
|
||||||
|
|
||||||
@@ -642,16 +855,24 @@ func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64
|
|||||||
for rows.Next() {
|
for rows.Next() {
|
||||||
job, err := scanJob(rows)
|
job, err := scanJob(rows)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Warn("Error while scanning rows")
|
cclog.Warnf("Error while scanning rows in FindJobsBetween: %v", err)
|
||||||
return nil, err
|
return nil, fmt.Errorf("failed to scan job in FindJobsBetween: %w", err)
|
||||||
}
|
}
|
||||||
jobs = append(jobs, job)
|
jobs = append(jobs, job)
|
||||||
}
|
}
|
||||||
|
|
||||||
cclog.Infof("Return job count %d", len(jobs))
|
cclog.Debugf("JobRepository.FindJobsBetween(): Return job count %d (omitTagged: %v)", len(jobs), omitTagged)
|
||||||
return jobs, nil
|
return jobs, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// UpdateMonitoringStatus updates the monitoring status for a job and invalidates its cache entries.
|
||||||
|
// Cache invalidation affects both metadata and energy footprint to ensure consistency.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - job: Database ID of the job to update
|
||||||
|
// - monitoringStatus: New monitoring status value (see schema.MonitoringStatus constants)
|
||||||
|
//
|
||||||
|
// Returns an error if the database update fails.
|
||||||
func (r *JobRepository) UpdateMonitoringStatus(job int64, monitoringStatus int32) (err error) {
|
func (r *JobRepository) UpdateMonitoringStatus(job int64, monitoringStatus int32) (err error) {
|
||||||
// Invalidate cache entries as monitoring status affects job state
|
// Invalidate cache entries as monitoring status affects job state
|
||||||
r.cache.Del(fmt.Sprintf("metadata:%d", job))
|
r.cache.Del(fmt.Sprintf("metadata:%d", job))
|
||||||
@@ -661,18 +882,37 @@ func (r *JobRepository) UpdateMonitoringStatus(job int64, monitoringStatus int32
|
|||||||
Set("monitoring_status", monitoringStatus).
|
Set("monitoring_status", monitoringStatus).
|
||||||
Where("job.id = ?", job)
|
Where("job.id = ?", job)
|
||||||
|
|
||||||
_, err = stmt.RunWith(r.stmtCache).Exec()
|
if _, err = stmt.RunWith(r.stmtCache).Exec(); err != nil {
|
||||||
return err
|
cclog.Errorf("Error while updating monitoring status for job %d: %v", job, err)
|
||||||
|
return fmt.Errorf("failed to update monitoring status for job %d: %w", job, err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Execute runs a Squirrel UpdateBuilder statement against the database.
|
||||||
|
// This is a generic helper for executing pre-built update queries.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - stmt: Squirrel UpdateBuilder with prepared update query
|
||||||
|
//
|
||||||
|
// Returns an error if the execution fails.
|
||||||
func (r *JobRepository) Execute(stmt sq.UpdateBuilder) error {
|
func (r *JobRepository) Execute(stmt sq.UpdateBuilder) error {
|
||||||
if _, err := stmt.RunWith(r.stmtCache).Exec(); err != nil {
|
if _, err := stmt.RunWith(r.stmtCache).Exec(); err != nil {
|
||||||
return err
|
cclog.Errorf("Error while executing statement: %v", err)
|
||||||
|
return fmt.Errorf("failed to execute update statement: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MarkArchived adds monitoring status update to an existing UpdateBuilder statement.
|
||||||
|
// This is a builder helper used when constructing multi-field update queries.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - stmt: Existing UpdateBuilder to modify
|
||||||
|
// - monitoringStatus: Monitoring status value to set
|
||||||
|
//
|
||||||
|
// Returns the modified UpdateBuilder for method chaining.
|
||||||
func (r *JobRepository) MarkArchived(
|
func (r *JobRepository) MarkArchived(
|
||||||
stmt sq.UpdateBuilder,
|
stmt sq.UpdateBuilder,
|
||||||
monitoringStatus int32,
|
monitoringStatus int32,
|
||||||
@@ -680,11 +920,22 @@ func (r *JobRepository) MarkArchived(
|
|||||||
return stmt.Set("monitoring_status", monitoringStatus)
|
return stmt.Set("monitoring_status", monitoringStatus)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// UpdateEnergy calculates and updates the energy consumption for a job.
|
||||||
|
// This is called for running jobs during intermediate updates or when archiving.
|
||||||
|
//
|
||||||
|
// Energy calculation formula:
|
||||||
|
// - For "power" metrics: Energy (kWh) = (Power_avg * NumNodes * Duration_hours) / 1000
|
||||||
|
// - For "energy" metrics: Currently not implemented (would need sum statistics)
|
||||||
|
//
|
||||||
|
// The calculation accounts for:
|
||||||
|
// - Multi-node jobs: Multiplies by NumNodes to get total cluster energy
|
||||||
|
// - Shared jobs: Node average is already based on partial resources, so NumNodes=1
|
||||||
|
// - Unit conversion: Watts * hours / 1000 = kilowatt-hours (kWh)
|
||||||
|
// - Rounding: Results rounded to 2 decimal places
|
||||||
func (r *JobRepository) UpdateEnergy(
|
func (r *JobRepository) UpdateEnergy(
|
||||||
stmt sq.UpdateBuilder,
|
stmt sq.UpdateBuilder,
|
||||||
jobMeta *schema.Job,
|
jobMeta *schema.Job,
|
||||||
) (sq.UpdateBuilder, error) {
|
) (sq.UpdateBuilder, error) {
|
||||||
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
|
|
||||||
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("cannot get subcluster: %s", err.Error())
|
cclog.Errorf("cannot get subcluster: %s", err.Error())
|
||||||
@@ -692,25 +943,27 @@ func (r *JobRepository) UpdateEnergy(
|
|||||||
}
|
}
|
||||||
energyFootprint := make(map[string]float64)
|
energyFootprint := make(map[string]float64)
|
||||||
|
|
||||||
// Total Job Energy Outside Loop
|
// Accumulate total energy across all energy-related metrics
|
||||||
totalEnergy := 0.0
|
totalEnergy := 0.0
|
||||||
for _, fp := range sc.EnergyFootprint {
|
for _, fp := range sc.EnergyFootprint {
|
||||||
// Always Init Metric Energy Inside Loop
|
// Calculate energy for this specific metric
|
||||||
metricEnergy := 0.0
|
metricEnergy := 0.0
|
||||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
|
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
|
||||||
// Note: For DB data, calculate and save as kWh
|
|
||||||
switch sc.MetricConfig[i].Energy {
|
switch sc.MetricConfig[i].Energy {
|
||||||
case "energy": // this metric has energy as unit (Joules or Wh)
|
case "energy": // Metric already in energy units (Joules or Wh)
|
||||||
cclog.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", jobMeta.JobID, jobMeta.Cluster, fp)
|
cclog.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", jobMeta.JobID, jobMeta.Cluster, fp)
|
||||||
// FIXME: Needs sum as stats type
|
// FIXME: Needs sum as stats type to accumulate energy values over time
|
||||||
case "power": // this metric has power as unit (Watt)
|
case "power": // Metric in power units (Watts)
|
||||||
// Energy: Power (in Watts) * Time (in Seconds)
|
// Energy (kWh) = Power (W) × Time (h) / 1000
|
||||||
// Unit: (W * (s / 3600)) / 1000 = kWh
|
// Formula: (avg_power_per_node * num_nodes) * (duration_sec / 3600) / 1000
|
||||||
// Round 2 Digits: round(Energy * 100) / 100
|
//
|
||||||
// Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000
|
// Breakdown:
|
||||||
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
|
// LoadJobStat(jobMeta, fp, "avg") = average power per node (W)
|
||||||
|
// jobMeta.NumNodes = number of nodes (1 for shared jobs)
|
||||||
|
// jobMeta.Duration / 3600.0 = duration in hours
|
||||||
|
// / 1000.0 = convert Wh to kWh
|
||||||
rawEnergy := ((LoadJobStat(jobMeta, fp, "avg") * float64(jobMeta.NumNodes)) * (float64(jobMeta.Duration) / 3600.0)) / 1000.0
|
rawEnergy := ((LoadJobStat(jobMeta, fp, "avg") * float64(jobMeta.NumNodes)) * (float64(jobMeta.Duration) / 3600.0)) / 1000.0
|
||||||
metricEnergy = math.Round(rawEnergy*100.0) / 100.0
|
metricEnergy = math.Round(rawEnergy*100.0) / 100.0 // Round to 2 decimal places
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
cclog.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
|
cclog.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
|
||||||
@@ -718,8 +971,6 @@ func (r *JobRepository) UpdateEnergy(
|
|||||||
|
|
||||||
energyFootprint[fp] = metricEnergy
|
energyFootprint[fp] = metricEnergy
|
||||||
totalEnergy += metricEnergy
|
totalEnergy += metricEnergy
|
||||||
|
|
||||||
// cclog.Infof("Metric %s Average %f -> %f kWh | Job %d Total -> %f kWh", fp, LoadJobStat(jobMeta, fp, "avg"), energy, jobMeta.JobID, totalEnergy)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var rawFootprint []byte
|
var rawFootprint []byte
|
||||||
@@ -731,11 +982,19 @@ func (r *JobRepository) UpdateEnergy(
|
|||||||
return stmt.Set("energy_footprint", string(rawFootprint)).Set("energy", (math.Round(totalEnergy*100.0) / 100.0)), nil
|
return stmt.Set("energy_footprint", string(rawFootprint)).Set("energy", (math.Round(totalEnergy*100.0) / 100.0)), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// UpdateFootprint calculates and updates the performance footprint for a job.
|
||||||
|
// This is called for running jobs during intermediate updates or when archiving.
|
||||||
|
//
|
||||||
|
// A footprint is a summary statistic (avg/min/max) for each monitored metric.
|
||||||
|
// The specific statistic type is defined in the cluster config's Footprint field.
|
||||||
|
// Results are stored as JSON with keys like "metric_avg", "metric_max", etc.
|
||||||
|
//
|
||||||
|
// Example: For a "cpu_load" metric with Footprint="avg", this stores
|
||||||
|
// the average CPU load across all nodes as "cpu_load_avg": 85.3
|
||||||
func (r *JobRepository) UpdateFootprint(
|
func (r *JobRepository) UpdateFootprint(
|
||||||
stmt sq.UpdateBuilder,
|
stmt sq.UpdateBuilder,
|
||||||
jobMeta *schema.Job,
|
jobMeta *schema.Job,
|
||||||
) (sq.UpdateBuilder, error) {
|
) (sq.UpdateBuilder, error) {
|
||||||
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
|
|
||||||
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("cannot get subcluster: %s", err.Error())
|
cclog.Errorf("cannot get subcluster: %s", err.Error())
|
||||||
@@ -743,7 +1002,10 @@ func (r *JobRepository) UpdateFootprint(
|
|||||||
}
|
}
|
||||||
footprint := make(map[string]float64)
|
footprint := make(map[string]float64)
|
||||||
|
|
||||||
|
// Build footprint map with metric_stattype as keys
|
||||||
for _, fp := range sc.Footprint {
|
for _, fp := range sc.Footprint {
|
||||||
|
// Determine which statistic to use: avg, min, or max
|
||||||
|
// First check global metric config, then cluster-specific config
|
||||||
var statType string
|
var statType string
|
||||||
for _, gm := range archive.GlobalMetricList {
|
for _, gm := range archive.GlobalMetricList {
|
||||||
if gm.Name == fp {
|
if gm.Name == fp {
|
||||||
@@ -751,15 +1013,18 @@ func (r *JobRepository) UpdateFootprint(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Validate statistic type
|
||||||
if statType != "avg" && statType != "min" && statType != "max" {
|
if statType != "avg" && statType != "min" && statType != "max" {
|
||||||
cclog.Warnf("unknown statType for footprint update: %s", statType)
|
cclog.Warnf("unknown statType for footprint update: %s", statType)
|
||||||
return stmt, fmt.Errorf("unknown statType for footprint update: %s", statType)
|
return stmt, fmt.Errorf("unknown statType for footprint update: %s", statType)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Override with cluster-specific config if available
|
||||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
|
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
|
||||||
statType = sc.MetricConfig[i].Footprint
|
statType = sc.MetricConfig[i].Footprint
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Store as "metric_stattype": value (e.g., "cpu_load_avg": 85.3)
|
||||||
name := fmt.Sprintf("%s_%s", fp, statType)
|
name := fmt.Sprintf("%s_%s", fp, statType)
|
||||||
footprint[name] = LoadJobStat(jobMeta, fp, statType)
|
footprint[name] = LoadJobStat(jobMeta, fp, statType)
|
||||||
}
|
}
|
||||||
@@ -772,3 +1037,84 @@ func (r *JobRepository) UpdateFootprint(
|
|||||||
|
|
||||||
return stmt.Set("footprint", string(rawFootprint)), nil
|
return stmt.Set("footprint", string(rawFootprint)), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetUsedNodes returns a map of cluster names to sorted lists of unique hostnames
|
||||||
|
// that are currently in use by jobs that started before the given timestamp and
|
||||||
|
// are still in running state.
|
||||||
|
//
|
||||||
|
// The timestamp parameter (ts) is compared against job.start_time to find
|
||||||
|
// relevant jobs. Returns an error if the database query fails or row iteration
|
||||||
|
// encounters errors. Individual row parsing errors are logged but don't fail
|
||||||
|
// the entire operation.
|
||||||
|
func (r *JobRepository) GetUsedNodes(ts int64) (map[string][]string, error) {
|
||||||
|
// Note: Query expects index on (job_state, start_time) for optimal performance
|
||||||
|
q := sq.Select("job.cluster", "job.resources").From("job").
|
||||||
|
Where("job.start_time < ?", ts).
|
||||||
|
Where(sq.Eq{"job.job_state": "running"})
|
||||||
|
|
||||||
|
rows, err := q.RunWith(r.stmtCache).Query()
|
||||||
|
if err != nil {
|
||||||
|
queryString, queryVars, _ := q.ToSql()
|
||||||
|
return nil, fmt.Errorf("query failed [%s] %v: %w", queryString, queryVars, err)
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
|
||||||
|
// Use a map of sets for efficient deduplication
|
||||||
|
nodeSet := make(map[string]map[string]struct{})
|
||||||
|
|
||||||
|
var (
|
||||||
|
cluster string
|
||||||
|
rawResources []byte
|
||||||
|
resources []*schema.Resource
|
||||||
|
skippedRows int
|
||||||
|
)
|
||||||
|
|
||||||
|
for rows.Next() {
|
||||||
|
if err := rows.Scan(&cluster, &rawResources); err != nil {
|
||||||
|
cclog.Warnf("Error scanning job row in GetUsedNodes: %v", err)
|
||||||
|
skippedRows++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
resources = resources[:0] // Clear slice, keep capacity
|
||||||
|
if err := json.Unmarshal(rawResources, &resources); err != nil {
|
||||||
|
cclog.Warnf("Error unmarshaling resources for cluster %s: %v", cluster, err)
|
||||||
|
skippedRows++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(resources) == 0 {
|
||||||
|
cclog.Debugf("Job in cluster %s has no resources", cluster)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, ok := nodeSet[cluster]; !ok {
|
||||||
|
nodeSet[cluster] = make(map[string]struct{})
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, res := range resources {
|
||||||
|
nodeSet[cluster][res.Hostname] = struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := rows.Err(); err != nil {
|
||||||
|
return nil, fmt.Errorf("error iterating rows: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if skippedRows > 0 {
|
||||||
|
cclog.Warnf("GetUsedNodes: Skipped %d rows due to parsing errors", skippedRows)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert sets to sorted slices
|
||||||
|
nodeList := make(map[string][]string, len(nodeSet))
|
||||||
|
for cluster, nodes := range nodeSet {
|
||||||
|
list := make([]string, 0, len(nodes))
|
||||||
|
for node := range nodes {
|
||||||
|
list = append(list, node)
|
||||||
|
}
|
||||||
|
sort.Strings(list)
|
||||||
|
nodeList[cluster] = list
|
||||||
|
}
|
||||||
|
|
||||||
|
return nodeList, nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -9,8 +9,8 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
sq "github.com/Masterminds/squirrel"
|
sq "github.com/Masterminds/squirrel"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user