This commit is contained in:
Christoph Kluge
2026-01-26 15:53:02 +01:00
4 changed files with 178 additions and 37 deletions

View File

@@ -1,6 +1,6 @@
TARGET = ./cc-backend TARGET = ./cc-backend
FRONTEND = ./web/frontend FRONTEND = ./web/frontend
VERSION = 1.4.4 VERSION = 1.5.0
GIT_HASH := $(shell git rev-parse --short HEAD || echo 'development') GIT_HASH := $(shell git rev-parse --short HEAD || echo 'development')
CURRENT_TIME = $(shell date +"%Y-%m-%d:T%H:%M:%S") CURRENT_TIME = $(shell date +"%Y-%m-%d:T%H:%M:%S")
LD_FLAGS = '-s -X main.date=${CURRENT_TIME} -X main.version=${VERSION} -X main.commit=${GIT_HASH}' LD_FLAGS = '-s -X main.date=${CURRENT_TIME} -X main.version=${VERSION} -X main.commit=${GIT_HASH}'
@@ -46,7 +46,7 @@ $(TARGET): $(SVELTE_TARGETS)
frontend: frontend:
$(info ===> BUILD frontend) $(info ===> BUILD frontend)
cd web/frontend && npm install && npm run build cd web/frontend && npm ci && npm run build
swagger: swagger:
$(info ===> GENERATE swagger) $(info ===> GENERATE swagger)

View File

@@ -1,42 +1,183 @@
# `cc-backend` version 1.4.4 # `cc-backend` version 1.5.0
Supports job archive version 2 and database version 8. Supports job archive version 3 and database version 10.
This is a bug fix release of `cc-backend`, the API backend and frontend This is a feature release of `cc-backend`, the API backend and frontend
implementation of ClusterCockpit. implementation of ClusterCockpit.
For release specific notes visit the [ClusterCockpit Documentation](https://clusterockpit.org/docs/release/). For release specific notes visit the [ClusterCockpit Documentation](https://clusterockpit.org/docs/release/).
## Breaking changes ## Breaking changes
The option `apiAllowedIPs` is now a required configuration attribute in ### Configuration changes
`config.json`. This option restricts access to the admin API.
To retain the previous behavior that the API is per default accessible from - **JSON attribute naming**: All JSON configuration attributes now use `kebab-case`
everywhere set: style consistently (e.g., `api-allowed-ips` instead of `apiAllowedIPs`).
Update your `config.json` accordingly.
- **Removed `disable-archive` option**: This obsolete configuration option has been removed.
- **Removed `clusters` config section**: The separate clusters configuration section
has been removed. Cluster information is now derived from the job archive.
- **`apiAllowedIPs` is now optional**: If not specified, defaults to secure settings.
### Architecture changes
- **MetricStore moved**: The `metricstore` package has been moved from `internal/`
to `pkg/` as it is now part of the public API.
- **MySQL/MariaDB support removed**: Only SQLite is now supported as the database backend.
- **Archive to Cleanup renaming**: Archive-related functions have been refactored
and renamed to "Cleanup" for clarity.
### Dependency changes
- **cc-lib v2**: Switched to cc-lib version 2 with updated APIs
- **cclib NATS client**: Now using the cclib NATS client implementation
- Removed obsolete `util.Float` usage from cclib
## Major new features
### NATS API Integration
- **Real-time job events**: Subscribe to job start/stop events via NATS
- **Node state updates**: Receive real-time node state changes via NATS
- **Configurable subjects**: NATS API subjects are now configurable via `api-subjects`
- **Deadlock fixes**: Improved NATS client stability and graceful shutdown
### Public Dashboard
- **Public-facing interface**: New public dashboard route for external users
- **DoubleMetricPlot component**: New visualization component for comparing metrics
- **Improved layout**: Reviewed and optimized dashboard layouts for better readability
### Enhanced Node Management
- **Node state tracking**: New node table in database with timestamp tracking
- **Node state filtering**: Filter jobs by node state in systems view
- **Node metrics improvements**: Better handling of node-level metrics and data
- **Node list enhancements**: Improved paging, filtering, and continuous scroll support
### MetricStore Improvements
- **Memory tracking worker**: New worker for CCMS memory usage tracking
- **Dynamic retention**: Support for cluster/subcluster-specific retention times
- **Improved compression**: Transparent compression for job archive imports
- **Parallel processing**: Parallelized Iter function in all archive backends
### Job Tagging System
- **Job tagger option**: Enable automatic job tagging via configuration flag
- **Application detection**: Automatic detection of applications (MATLAB, GROMACS, etc.)
- **Job classifaction**: Automatic detection of pathological jobs
- **omitTagged flag**: Option to exclude tagged jobs from retention/cleanup operations
### Archive Backends
- **S3 backend**: Full support for S3-compatible object storage
- **SQLite backend**: Full support for SQLite backend using blobs
- **Performance improvements**: Fixed performance bugs in archive backends
- **Better error handling**: Improved error messages and fallback handling
## New features and improvements
### Frontend
- **Loading indicators**: Added loading indicators to status detail and job lists
- **Job info layout**: Reviewed and improved job info row layout
- **Metric selection**: Enhanced metric selection with drag-and-drop fixes
- **Filter presets**: Move list filter preset to URL for easy sharing
- **Job comparison**: Improved job comparison views and plots
- **Subcluster reactivity**: Job list now reacts to subcluster filter changes
- **Frontend dependencies**: Bumped frontend dependencies to latest versions
- **Svelte 5 compatibility**: Fixed Svelte state warnings and compatibility issues
### Backend
- **Progress bars**: Import function now shows progress during long operations
- **Better logging**: Improved logging with appropriate log levels throughout
- **Graceful shutdown**: Fixed shutdown timeout bugs and hanging issues
- **Configuration defaults**: Sensible defaults for most configuration options
- **Documentation**: Extensive documentation improvements across packages
### API improvements
- **Role-based metric visibility**: Metrics can now have role-based access control
- **Job exclusivity filter**: New filter for exclusive vs. shared jobs
- **Improved error messages**: Better error messages and documentation in REST API
- **GraphQL enhancements**: Improved GraphQL queries and resolvers
### Performance
- **Database indices**: Optimized SQLite indices for better query performance
- **Job cache**: Introduced caching table for faster job inserts
- **Parallel imports**: Archive imports now run in parallel where possible
- **External tool integration**: Optimized use of external tools (fd) for better performance
### Developer experience
- **AI agent guidelines**: Added documentation for AI coding agents (AGENTS.md, CLAUDE.md)
- **Example API payloads**: Added example JSON API payloads for testing
- **Unit tests**: Added more unit tests for NATS API and other components
- **Test improvements**: Better test coverage and test data
## Bug fixes
- Fixed nodelist paging issues
- Fixed metric select drag and drop functionality
- Fixed render race conditions in nodeList
- Fixed tag count grouping including type
- Fixed wrong metricstore schema (missing comma)
- Fixed configuration issues causing shutdown hangs
- Fixed deadlock when NATS is not configured
- Fixed archive backend performance bugs
- Fixed continuous scroll buildup on refresh
- Improved footprint calculation logic
- Fixed polar plot data query decoupling
- Fixed missing resolution parameter handling
- Fixed node table initialization fallback
## Configuration changes
### New configuration options
```json ```json
"apiAllowedIPs": [ {
"*" "main": {
] "enable-job-taggers": true,
"resampling": {
"minimum-points": 600,
"trigger": 180,
"resolutions": [240, 60]
},
"api-subjects": {
"subject-job-event": "cc.job.event",
"subject-node-state": "cc.node.state"
}
},
"nats": {
"address": "nats://0.0.0.0:4222",
"username": "root",
"password": "root"
},
"cron": {
"commit-job-worker": "1m",
"duration-worker": "5m",
"footprint-worker": "10m"
},
"metric-store": {
"cleanup": {
"mode": "archive",
"interval": "48h",
"directory": "./var/archive"
}
}
}
``` ```
## Breaking changes for minor release 1.4.x ## Migration notes
- You need to perform a database migration. Depending on your database size the - Review and update your `config.json` to use kebab-case attribute names
migration might require several hours! - If using NATS, configure the new `nats` and `api-subjects` sections
- You need to adapt the `cluster.json` configuration files in the job-archive, - If using S3 archive backend, configure the new `archive` section options
add new required attributes to the metric list and after that edit - Test the new public dashboard at `/public` route
`./job-archive/version.txt` to version 2. Only metrics that have the footprint - Review cron worker configuration if you need different frequencies
attribute set can be filtered and show up in the footprint UI and polar plot.
- Continuous scrolling is default now in all job lists. You can change this back
to paging globally, also every user can configure to use paging or continuous
scrolling individually.
- Tags have a scope now. Existing tags will get global scope in the database
migration.
## New features
- Enable to delete tags from the web interface
## Known issues ## Known issues

View File

@@ -136,7 +136,7 @@ func (s *Server) init() error {
userapi := s.router.PathPrefix("/userapi").Subrouter() userapi := s.router.PathPrefix("/userapi").Subrouter()
configapi := s.router.PathPrefix("/config").Subrouter() configapi := s.router.PathPrefix("/config").Subrouter()
frontendapi := s.router.PathPrefix("/frontend").Subrouter() frontendapi := s.router.PathPrefix("/frontend").Subrouter()
metricstoreapi := s.router.PathPrefix("/metricstore").Subrouter() metricstoreapi := s.router.PathPrefix("/api").Subrouter()
if !config.Keys.DisableAuthentication { if !config.Keys.DisableAuthentication {
// Create login failure handler (used by both /login and /jwt-login) // Create login failure handler (used by both /login and /jwt-login)

View File

@@ -123,15 +123,15 @@ func (api *RestAPI) MountUserAPIRoutes(r *mux.Router) {
func (api *RestAPI) MountMetricStoreAPIRoutes(r *mux.Router) { func (api *RestAPI) MountMetricStoreAPIRoutes(r *mux.Router) {
// REST API Uses TokenAuth // REST API Uses TokenAuth
// Note: StrictSlash handles trailing slash variations automatically // Note: StrictSlash handles trailing slash variations automatically
r.HandleFunc("/api/free", freeMetrics).Methods(http.MethodPost) r.HandleFunc("/free", freeMetrics).Methods(http.MethodPost)
r.HandleFunc("/api/write", writeMetrics).Methods(http.MethodPost) r.HandleFunc("/write", writeMetrics).Methods(http.MethodPost)
r.HandleFunc("/api/debug", debugMetrics).Methods(http.MethodGet) r.HandleFunc("/debug", debugMetrics).Methods(http.MethodGet)
r.HandleFunc("/api/healthcheck", metricsHealth).Methods(http.MethodGet) r.HandleFunc("/healthcheck", metricsHealth).Methods(http.MethodGet)
// Same endpoints but with trailing slash // Same endpoints but with trailing slash
r.HandleFunc("/api/free/", freeMetrics).Methods(http.MethodPost) r.HandleFunc("/free/", freeMetrics).Methods(http.MethodPost)
r.HandleFunc("/api/write/", writeMetrics).Methods(http.MethodPost) r.HandleFunc("/write/", writeMetrics).Methods(http.MethodPost)
r.HandleFunc("/api/debug/", debugMetrics).Methods(http.MethodGet) r.HandleFunc("/debug/", debugMetrics).Methods(http.MethodGet)
r.HandleFunc("/api/healthcheck/", metricsHealth).Methods(http.MethodGet) r.HandleFunc("/healthcheck/", metricsHealth).Methods(http.MethodGet)
} }
// MountConfigAPIRoutes registers configuration and user management endpoints. // MountConfigAPIRoutes registers configuration and user management endpoints.