mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-03-20 06:47:30 +01:00
Merge branch 'dev' into status_dashboard
This commit is contained in:
15
.github/dependabot.yml
vendored
Normal file
15
.github/dependabot.yml
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
# To get started with Dependabot version updates, you'll need to specify which
|
||||
# package ecosystems to update and where the package manifests are located.
|
||||
# Please see the documentation for all configuration options:
|
||||
# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
|
||||
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: "gomod"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "npm"
|
||||
directory: "/web/frontend"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -27,3 +27,5 @@ test_ccms_write_api.sh
|
||||
/.vscode/*
|
||||
dist/
|
||||
*.db
|
||||
.idea
|
||||
tools/archive-migration/archive-migration
|
||||
|
||||
14
Makefile
14
Makefile
@@ -1,6 +1,4 @@
|
||||
TARGET = ./cc-backend
|
||||
VAR = ./var
|
||||
CFG = config.json .env
|
||||
FRONTEND = ./web/frontend
|
||||
VERSION = 1.4.4
|
||||
GIT_HASH := $(shell git rev-parse --short HEAD || echo 'development')
|
||||
@@ -42,7 +40,7 @@ SVELTE_SRC = $(wildcard $(FRONTEND)/src/*.svelte) \
|
||||
|
||||
.NOTPARALLEL:
|
||||
|
||||
$(TARGET): $(VAR) $(CFG) $(SVELTE_TARGETS)
|
||||
$(TARGET): $(SVELTE_TARGETS)
|
||||
$(info ===> BUILD cc-backend)
|
||||
@go build -ldflags=${LD_FLAGS} ./cmd/cc-backend
|
||||
|
||||
@@ -68,7 +66,7 @@ distclean:
|
||||
@$(MAKE) clean
|
||||
$(info ===> DISTCLEAN)
|
||||
@rm -rf $(FRONTEND)/node_modules
|
||||
@rm -rf $(VAR)
|
||||
@rm -rf ./var
|
||||
|
||||
test:
|
||||
$(info ===> TESTING)
|
||||
@@ -84,14 +82,6 @@ tags:
|
||||
$(VAR):
|
||||
@mkdir -p $(VAR)
|
||||
|
||||
config.json:
|
||||
$(info ===> Initialize config.json file)
|
||||
@cp configs/config.json config.json
|
||||
|
||||
.env:
|
||||
$(info ===> Initialize .env file)
|
||||
@cp configs/env-template.txt .env
|
||||
|
||||
$(SVELTE_TARGETS): $(SVELTE_SRC)
|
||||
$(info ===> BUILD frontend)
|
||||
cd web/frontend && npm install && npm run build
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
# NOTE
|
||||
|
||||
While we do our best to keep the master branch in a usable state, there is no guarantee the master branch works.
|
||||
Please do not use it for production!
|
||||
|
||||
Please have a look at the [Release
|
||||
Notes](https://github.com/ClusterCockpit/cc-backend/blob/master/ReleaseNotes.md)
|
||||
for breaking changes!
|
||||
|
||||
@@ -2,6 +2,9 @@
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package main provides the entry point for the ClusterCockpit backend server.
|
||||
// This file defines all command-line flags and their default values.
|
||||
package main
|
||||
|
||||
import "flag"
|
||||
|
||||
@@ -2,6 +2,10 @@
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package main provides the entry point for the ClusterCockpit backend server.
|
||||
// This file contains bootstrap logic for initializing the environment,
|
||||
// creating default configuration files, and setting up the database.
|
||||
package main
|
||||
|
||||
import (
|
||||
@@ -24,18 +28,10 @@ SESSION_KEY="67d829bf61dc5f87a73fd814e2c9f629"
|
||||
|
||||
const configString = `
|
||||
{
|
||||
"main": {
|
||||
"addr": "127.0.0.1:8080",
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
},
|
||||
"jwts": {
|
||||
"max-age": "2000h"
|
||||
},
|
||||
"apiAllowedIPs": [
|
||||
"*"
|
||||
],
|
||||
"enable-resampling": {
|
||||
"short-running-jobs-duration": 300,
|
||||
"resampling": {
|
||||
"trigger": 30,
|
||||
"resolutions": [
|
||||
600,
|
||||
@@ -44,30 +40,49 @@ const configString = `
|
||||
60
|
||||
]
|
||||
},
|
||||
"clusters": [
|
||||
{
|
||||
"name": "name",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"url": "http://localhost:8082",
|
||||
"token": ""
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
"from": 1,
|
||||
"to": 64
|
||||
},
|
||||
"duration": {
|
||||
"from": 0,
|
||||
"to": 86400
|
||||
},
|
||||
"startTime": {
|
||||
"from": "2023-01-01T00:00:00Z",
|
||||
"to": null
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
"apiAllowedIPs": [
|
||||
"*"
|
||||
],
|
||||
"emission-constant": 317
|
||||
},
|
||||
"cron": {
|
||||
"commit-job-worker": "2m",
|
||||
"duration-worker": "5m",
|
||||
"footprint-worker": "10m"
|
||||
},
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
},
|
||||
"auth": {
|
||||
"jwts": {
|
||||
"max-age": "2000h"
|
||||
}
|
||||
},
|
||||
"clusters": [
|
||||
{
|
||||
"name": "name",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"url": "http://localhost:8082",
|
||||
"token": ""
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
"from": 1,
|
||||
"to": 64
|
||||
},
|
||||
"duration": {
|
||||
"from": 0,
|
||||
"to": 86400
|
||||
},
|
||||
"startTime": {
|
||||
"from": "2023-01-01T00:00:00Z",
|
||||
"to": null
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
`
|
||||
|
||||
|
||||
@@ -2,9 +2,14 @@
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package main provides the entry point for the ClusterCockpit backend server.
|
||||
// It orchestrates initialization of all subsystems including configuration,
|
||||
// database, authentication, and the HTTP server.
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
@@ -13,6 +18,7 @@ import (
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
@@ -46,90 +52,108 @@ const logoString = `
|
||||
|_|
|
||||
`
|
||||
|
||||
// Environment variable names
|
||||
const (
|
||||
envGOGC = "GOGC"
|
||||
)
|
||||
|
||||
// Default configurations
|
||||
const (
|
||||
defaultArchiveConfig = `{"kind":"file","path":"./var/job-archive"}`
|
||||
)
|
||||
|
||||
var (
|
||||
date string
|
||||
commit string
|
||||
version string
|
||||
)
|
||||
|
||||
func main() {
|
||||
cliInit()
|
||||
func printVersion() {
|
||||
fmt.Print(logoString)
|
||||
fmt.Printf("Version:\t%s\n", version)
|
||||
fmt.Printf("Git hash:\t%s\n", commit)
|
||||
fmt.Printf("Build time:\t%s\n", date)
|
||||
fmt.Printf("SQL db version:\t%d\n", repository.Version)
|
||||
fmt.Printf("Job archive version:\t%d\n", archive.Version)
|
||||
}
|
||||
|
||||
if flagVersion {
|
||||
fmt.Print(logoString)
|
||||
fmt.Printf("Version:\t%s\n", version)
|
||||
fmt.Printf("Git hash:\t%s\n", commit)
|
||||
fmt.Printf("Build time:\t%s\n", date)
|
||||
fmt.Printf("SQL db version:\t%d\n", repository.Version)
|
||||
fmt.Printf("Job archive version:\t%d\n", archive.Version)
|
||||
os.Exit(0)
|
||||
func initGops() error {
|
||||
if !flagGops {
|
||||
return nil
|
||||
}
|
||||
|
||||
cclog.Init(flagLogLevel, flagLogDateTime)
|
||||
|
||||
// If init flag set, run tasks here before any file dependencies cause errors
|
||||
if flagInit {
|
||||
initEnv()
|
||||
cclog.Exit("Successfully setup environment!\n" +
|
||||
"Please review config.json and .env and adjust it to your needs.\n" +
|
||||
"Add your job-archive at ./var/job-archive.")
|
||||
if err := agent.Listen(agent.Options{}); err != nil {
|
||||
return fmt.Errorf("starting gops agent: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// See https://github.com/google/gops (Runtime overhead is almost zero)
|
||||
if flagGops {
|
||||
if err := agent.Listen(agent.Options{}); err != nil {
|
||||
cclog.Abortf("Could not start gops agent with 'gops/agent.Listen(agent.Options{})'. Application startup failed, exited.\nError: %s\n", err.Error())
|
||||
}
|
||||
func loadEnvironment() error {
|
||||
if err := godotenv.Load(); err != nil {
|
||||
return fmt.Errorf("loading .env file: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
err := godotenv.Load()
|
||||
if err != nil {
|
||||
cclog.Abortf("Could not parse existing .env file at location './.env'. Application startup failed, exited.\nError: %s\n", err.Error())
|
||||
}
|
||||
|
||||
// Initialize sub-modules and handle command line flags.
|
||||
// The order here is important!
|
||||
func initConfiguration() error {
|
||||
ccconf.Init(flagConfigFile)
|
||||
|
||||
// Load and check main configuration
|
||||
if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
|
||||
if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil {
|
||||
config.Init(cfg, clustercfg)
|
||||
} else {
|
||||
cclog.Abort("Cluster configuration must be present")
|
||||
}
|
||||
} else {
|
||||
cclog.Abort("Main configuration must be present")
|
||||
cfg := ccconf.GetPackageConfig("main")
|
||||
if cfg == nil {
|
||||
return fmt.Errorf("main configuration must be present")
|
||||
}
|
||||
|
||||
clustercfg := ccconf.GetPackageConfig("clusters")
|
||||
if clustercfg == nil {
|
||||
return fmt.Errorf("cluster configuration must be present")
|
||||
}
|
||||
|
||||
config.Init(cfg, clustercfg)
|
||||
return nil
|
||||
}
|
||||
|
||||
func initDatabase() error {
|
||||
repository.Connect(config.Keys.DBDriver, config.Keys.DB)
|
||||
return nil
|
||||
}
|
||||
|
||||
func handleDatabaseCommands() error {
|
||||
if flagMigrateDB {
|
||||
err := repository.MigrateDB(config.Keys.DBDriver, config.Keys.DB)
|
||||
if err != nil {
|
||||
cclog.Abortf("MigrateDB Failed: Could not migrate '%s' database at location '%s' to version %d.\nError: %s\n", config.Keys.DBDriver, config.Keys.DB, repository.Version, err.Error())
|
||||
return fmt.Errorf("migrating database to version %d: %w", repository.Version, err)
|
||||
}
|
||||
cclog.Exitf("MigrateDB Success: Migrated '%s' database at location '%s' to version %d.\n", config.Keys.DBDriver, config.Keys.DB, repository.Version)
|
||||
cclog.Exitf("MigrateDB Success: Migrated '%s' database at location '%s' to version %d.\n",
|
||||
config.Keys.DBDriver, config.Keys.DB, repository.Version)
|
||||
}
|
||||
|
||||
if flagRevertDB {
|
||||
err := repository.RevertDB(config.Keys.DBDriver, config.Keys.DB)
|
||||
if err != nil {
|
||||
cclog.Abortf("RevertDB Failed: Could not revert '%s' database at location '%s' to version %d.\nError: %s\n", config.Keys.DBDriver, config.Keys.DB, (repository.Version - 1), err.Error())
|
||||
return fmt.Errorf("reverting database to version %d: %w", repository.Version-1, err)
|
||||
}
|
||||
cclog.Exitf("RevertDB Success: Reverted '%s' database at location '%s' to version %d.\n", config.Keys.DBDriver, config.Keys.DB, (repository.Version - 1))
|
||||
cclog.Exitf("RevertDB Success: Reverted '%s' database at location '%s' to version %d.\n",
|
||||
config.Keys.DBDriver, config.Keys.DB, repository.Version-1)
|
||||
}
|
||||
|
||||
if flagForceDB {
|
||||
err := repository.ForceDB(config.Keys.DBDriver, config.Keys.DB)
|
||||
if err != nil {
|
||||
cclog.Abortf("ForceDB Failed: Could not force '%s' database at location '%s' to version %d.\nError: %s\n", config.Keys.DBDriver, config.Keys.DB, repository.Version, err.Error())
|
||||
return fmt.Errorf("forcing database to version %d: %w", repository.Version, err)
|
||||
}
|
||||
cclog.Exitf("ForceDB Success: Forced '%s' database at location '%s' to version %d.\n", config.Keys.DBDriver, config.Keys.DB, repository.Version)
|
||||
cclog.Exitf("ForceDB Success: Forced '%s' database at location '%s' to version %d.\n",
|
||||
config.Keys.DBDriver, config.Keys.DB, repository.Version)
|
||||
}
|
||||
|
||||
repository.Connect(config.Keys.DBDriver, config.Keys.DB)
|
||||
return nil
|
||||
}
|
||||
|
||||
func handleUserCommands() error {
|
||||
if config.Keys.DisableAuthentication && (flagNewUser != "" || flagDelUser != "") {
|
||||
return fmt.Errorf("--add-user and --del-user can only be used if authentication is enabled")
|
||||
}
|
||||
|
||||
if !config.Keys.DisableAuthentication {
|
||||
|
||||
if cfg := ccconf.GetPackageConfig("auth"); cfg != nil {
|
||||
auth.Init(&cfg)
|
||||
} else {
|
||||
@@ -137,157 +161,318 @@ func main() {
|
||||
auth.Init(nil)
|
||||
}
|
||||
|
||||
if flagNewUser != "" {
|
||||
parts := strings.SplitN(flagNewUser, ":", 3)
|
||||
if len(parts) != 3 || len(parts[0]) == 0 {
|
||||
cclog.Abortf("Add User: Could not parse supplied argument format: No changes.\n"+
|
||||
"Want: <username>:[admin,support,manager,api,user]:<password>\n"+
|
||||
"Have: %s\n", flagNewUser)
|
||||
}
|
||||
// Check for default security keys
|
||||
checkDefaultSecurityKeys()
|
||||
|
||||
ur := repository.GetUserRepository()
|
||||
if err := ur.AddUser(&schema.User{
|
||||
Username: parts[0], Projects: make([]string, 0), Password: parts[2], Roles: strings.Split(parts[1], ","),
|
||||
}); err != nil {
|
||||
cclog.Abortf("Add User: Could not add new user authentication for '%s' and roles '%s'.\nError: %s\n", parts[0], parts[1], err.Error())
|
||||
} else {
|
||||
cclog.Printf("Add User: Added new user '%s' with roles '%s'.\n", parts[0], parts[1])
|
||||
if flagNewUser != "" {
|
||||
if err := addUser(flagNewUser); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if flagDelUser != "" {
|
||||
ur := repository.GetUserRepository()
|
||||
if err := ur.DelUser(flagDelUser); err != nil {
|
||||
cclog.Abortf("Delete User: Could not delete user '%s' from DB.\nError: %s\n", flagDelUser, err.Error())
|
||||
} else {
|
||||
cclog.Printf("Delete User: Deleted user '%s' from DB.\n", flagDelUser)
|
||||
if err := delUser(flagDelUser); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
authHandle := auth.GetAuthInstance()
|
||||
|
||||
if flagSyncLDAP {
|
||||
if authHandle.LdapAuth == nil {
|
||||
cclog.Abort("Sync LDAP: LDAP authentication is not configured, could not synchronize. No changes, exited.")
|
||||
if err := syncLDAP(authHandle); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := authHandle.LdapAuth.Sync(); err != nil {
|
||||
cclog.Abortf("Sync LDAP: Could not synchronize, failed with error.\nError: %s\n", err.Error())
|
||||
}
|
||||
cclog.Print("Sync LDAP: LDAP synchronization successfull.")
|
||||
}
|
||||
|
||||
if flagGenJWT != "" {
|
||||
ur := repository.GetUserRepository()
|
||||
user, err := ur.GetUser(flagGenJWT)
|
||||
if err != nil {
|
||||
cclog.Abortf("JWT: Could not get supplied user '%s' from DB. No changes, exited.\nError: %s\n", flagGenJWT, err.Error())
|
||||
if err := generateJWT(authHandle, flagGenJWT); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !user.HasRole(schema.RoleApi) {
|
||||
cclog.Warnf("JWT: User '%s' does not have the role 'api'. REST API endpoints will return error!\n", user.Username)
|
||||
}
|
||||
|
||||
jwt, err := authHandle.JwtAuth.ProvideJWT(user)
|
||||
if err != nil {
|
||||
cclog.Abortf("JWT: User '%s' found in DB, but failed to provide JWT.\nError: %s\n", user.Username, err.Error())
|
||||
}
|
||||
|
||||
cclog.Printf("JWT: Successfully generated JWT for user '%s': %s\n", user.Username, jwt)
|
||||
}
|
||||
|
||||
} else if flagNewUser != "" || flagDelUser != "" {
|
||||
cclog.Abort("Error: Arguments '--add-user' and '--del-user' can only be used if authentication is enabled. No changes, exited.")
|
||||
}
|
||||
|
||||
if archiveCfg := ccconf.GetPackageConfig("archive"); archiveCfg != nil {
|
||||
err = archive.Init(archiveCfg, config.Keys.DisableArchive)
|
||||
} else {
|
||||
err = archive.Init(json.RawMessage("{\"kind\":\"file\",\"path\":\"./var/job-archive\"}"), config.Keys.DisableArchive)
|
||||
return nil
|
||||
}
|
||||
|
||||
// checkDefaultSecurityKeys warns if default JWT keys are detected
|
||||
func checkDefaultSecurityKeys() {
|
||||
// Default JWT public key from init.go
|
||||
defaultJWTPublic := "kzfYrYy+TzpanWZHJ5qSdMj5uKUWgq74BWhQG6copP0="
|
||||
|
||||
if os.Getenv("JWT_PUBLIC_KEY") == defaultJWTPublic {
|
||||
cclog.Warn("Using default JWT keys - not recommended for production environments")
|
||||
}
|
||||
}
|
||||
|
||||
func addUser(userSpec string) error {
|
||||
parts := strings.SplitN(userSpec, ":", 3)
|
||||
if len(parts) != 3 || len(parts[0]) == 0 {
|
||||
return fmt.Errorf("invalid user format, want: <username>:[admin,support,manager,api,user]:<password>, have: %s", userSpec)
|
||||
}
|
||||
|
||||
ur := repository.GetUserRepository()
|
||||
if err := ur.AddUser(&schema.User{
|
||||
Username: parts[0],
|
||||
Projects: make([]string, 0),
|
||||
Password: parts[2],
|
||||
Roles: strings.Split(parts[1], ","),
|
||||
}); err != nil {
|
||||
return fmt.Errorf("adding user '%s' with roles '%s': %w", parts[0], parts[1], err)
|
||||
}
|
||||
|
||||
cclog.Infof("Add User: Added new user '%s' with roles '%s'", parts[0], parts[1])
|
||||
return nil
|
||||
}
|
||||
|
||||
func delUser(username string) error {
|
||||
ur := repository.GetUserRepository()
|
||||
if err := ur.DelUser(username); err != nil {
|
||||
return fmt.Errorf("deleting user '%s': %w", username, err)
|
||||
}
|
||||
cclog.Infof("Delete User: Deleted user '%s' from DB", username)
|
||||
return nil
|
||||
}
|
||||
|
||||
func syncLDAP(authHandle *auth.Authentication) error {
|
||||
if authHandle.LdapAuth == nil {
|
||||
return fmt.Errorf("LDAP authentication is not configured")
|
||||
}
|
||||
|
||||
if err := authHandle.LdapAuth.Sync(); err != nil {
|
||||
return fmt.Errorf("synchronizing LDAP: %w", err)
|
||||
}
|
||||
|
||||
cclog.Print("Sync LDAP: LDAP synchronization successfull.")
|
||||
return nil
|
||||
}
|
||||
|
||||
func generateJWT(authHandle *auth.Authentication, username string) error {
|
||||
ur := repository.GetUserRepository()
|
||||
user, err := ur.GetUser(username)
|
||||
if err != nil {
|
||||
cclog.Abortf("Init: Failed to initialize archive.\nError: %s\n", err.Error())
|
||||
return fmt.Errorf("getting user '%s': %w", username, err)
|
||||
}
|
||||
|
||||
if !user.HasRole(schema.RoleApi) {
|
||||
cclog.Warnf("JWT: User '%s' does not have the role 'api'. REST API endpoints will return error!\n", user.Username)
|
||||
}
|
||||
|
||||
jwt, err := authHandle.JwtAuth.ProvideJWT(user)
|
||||
if err != nil {
|
||||
return fmt.Errorf("generating JWT for user '%s': %w", user.Username, err)
|
||||
}
|
||||
|
||||
cclog.Infof("JWT: Successfully generated JWT for user '%s': %s", user.Username, jwt)
|
||||
return nil
|
||||
}
|
||||
|
||||
func initSubsystems() error {
|
||||
// Initialize job archive
|
||||
archiveCfg := ccconf.GetPackageConfig("archive")
|
||||
if archiveCfg == nil {
|
||||
archiveCfg = json.RawMessage(defaultArchiveConfig)
|
||||
}
|
||||
if err := archive.Init(archiveCfg, config.Keys.DisableArchive); err != nil {
|
||||
return fmt.Errorf("initializing archive: %w", err)
|
||||
}
|
||||
|
||||
// Initialize metricdata
|
||||
if err := metricdata.Init(); err != nil {
|
||||
cclog.Abortf("Init: Failed to initialize metricdata repository.\nError %s\n", err.Error())
|
||||
return fmt.Errorf("initializing metricdata repository: %w", err)
|
||||
}
|
||||
|
||||
// Handle database re-initialization
|
||||
if flagReinitDB {
|
||||
if err := importer.InitDB(); err != nil {
|
||||
cclog.Abortf("Init DB: Failed to re-initialize repository DB.\nError: %s\n", err.Error())
|
||||
} else {
|
||||
cclog.Print("Init DB: Sucessfully re-initialized repository DB.")
|
||||
return fmt.Errorf("re-initializing repository DB: %w", err)
|
||||
}
|
||||
cclog.Print("Init DB: Successfully re-initialized repository DB.")
|
||||
}
|
||||
|
||||
// Handle job import
|
||||
if flagImportJob != "" {
|
||||
if err := importer.HandleImportFlag(flagImportJob); err != nil {
|
||||
cclog.Abortf("Import Job: Job import failed.\nError: %s\n", err.Error())
|
||||
} else {
|
||||
cclog.Printf("Import Job: Imported Job '%s' into DB.\n", flagImportJob)
|
||||
return fmt.Errorf("importing job: %w", err)
|
||||
}
|
||||
cclog.Infof("Import Job: Imported Job '%s' into DB", flagImportJob)
|
||||
}
|
||||
|
||||
// Initialize taggers
|
||||
if config.Keys.EnableJobTaggers {
|
||||
tagger.Init()
|
||||
}
|
||||
|
||||
// Apply tags if requested
|
||||
if flagApplyTags {
|
||||
if err := tagger.RunTaggers(); err != nil {
|
||||
cclog.Abortf("Running job taggers.\nError: %s\n", err.Error())
|
||||
return fmt.Errorf("running job taggers: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if !flagServer {
|
||||
cclog.Exit("No errors, server flag not set. Exiting cc-backend.")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func runServer(ctx context.Context) error {
|
||||
var wg sync.WaitGroup
|
||||
|
||||
// Metric Store starts after all flags have been processes
|
||||
// Start metric store if enabled
|
||||
if memorystore.InternalCCMSFlag {
|
||||
if mscfg := ccconf.GetPackageConfig("metric-store"); mscfg != nil {
|
||||
memorystore.Init(mscfg, &wg)
|
||||
} else {
|
||||
cclog.Abort("Metric Store configuration must be present")
|
||||
mscfg := ccconf.GetPackageConfig("metric-store")
|
||||
if mscfg == nil {
|
||||
return fmt.Errorf("metric store configuration must be present")
|
||||
}
|
||||
memorystore.Init(mscfg, &wg)
|
||||
}
|
||||
archiver.Start(repository.GetJobRepository())
|
||||
|
||||
taskManager.Start(ccconf.GetPackageConfig("cron"),
|
||||
ccconf.GetPackageConfig("archive"))
|
||||
// Start archiver and task manager
|
||||
archiver.Start(repository.GetJobRepository(), ctx)
|
||||
taskManager.Start(ccconf.GetPackageConfig("cron"), ccconf.GetPackageConfig("archive"))
|
||||
|
||||
// Initialize web UI
|
||||
cfg := ccconf.GetPackageConfig("ui")
|
||||
web.Init(cfg)
|
||||
|
||||
serverInit()
|
||||
// Initialize HTTP server
|
||||
srv, err := NewServer(version, commit, date)
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating server: %w", err)
|
||||
}
|
||||
|
||||
// Channel to collect errors from server
|
||||
errChan := make(chan error, 1)
|
||||
|
||||
// Start HTTP server
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
serverStart()
|
||||
if err := srv.Start(ctx); err != nil {
|
||||
errChan <- err
|
||||
}
|
||||
}()
|
||||
|
||||
// Handle shutdown signals
|
||||
wg.Add(1)
|
||||
sigs := make(chan os.Signal, 1)
|
||||
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
<-sigs
|
||||
select {
|
||||
case <-sigs:
|
||||
cclog.Info("Shutdown signal received")
|
||||
case <-ctx.Done():
|
||||
}
|
||||
|
||||
runtimeEnv.SystemdNotifiy(false, "Shutting down ...")
|
||||
|
||||
serverShutdown()
|
||||
|
||||
srv.Shutdown(ctx)
|
||||
util.FsWatcherShutdown()
|
||||
|
||||
taskManager.Shutdown()
|
||||
}()
|
||||
|
||||
if os.Getenv("GOGC") == "" {
|
||||
// Set GC percent if not configured
|
||||
if os.Getenv(envGOGC) == "" {
|
||||
debug.SetGCPercent(25)
|
||||
}
|
||||
runtimeEnv.SystemdNotifiy(true, "running")
|
||||
wg.Wait()
|
||||
|
||||
// Wait for completion or error
|
||||
go func() {
|
||||
wg.Wait()
|
||||
close(errChan)
|
||||
}()
|
||||
|
||||
// Check for server startup errors
|
||||
select {
|
||||
case err := <-errChan:
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
case <-time.After(100 * time.Millisecond):
|
||||
// Server started successfully, wait for completion
|
||||
if err := <-errChan; err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
cclog.Print("Graceful shutdown completed!")
|
||||
return nil
|
||||
}
|
||||
|
||||
func run() error {
|
||||
cliInit()
|
||||
|
||||
if flagVersion {
|
||||
printVersion()
|
||||
return nil
|
||||
}
|
||||
|
||||
// Initialize logger
|
||||
cclog.Init(flagLogLevel, flagLogDateTime)
|
||||
|
||||
// Handle init flag
|
||||
if flagInit {
|
||||
initEnv()
|
||||
cclog.Exit("Successfully setup environment!\n" +
|
||||
"Please review config.json and .env and adjust it to your needs.\n" +
|
||||
"Add your job-archive at ./var/job-archive.")
|
||||
}
|
||||
|
||||
// Initialize gops agent
|
||||
if err := initGops(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Initialize subsystems in dependency order:
|
||||
// 1. Load environment variables from .env file (contains sensitive configuration)
|
||||
// 2. Load configuration from config.json (may reference environment variables)
|
||||
// 3. Handle database migration commands if requested
|
||||
// 4. Initialize database connection (requires config for connection string)
|
||||
// 5. Handle user commands if requested (requires database and authentication config)
|
||||
// 6. Initialize subsystems like archive and metrics (require config and database)
|
||||
|
||||
// Load environment and configuration
|
||||
if err := loadEnvironment(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := initConfiguration(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Handle database migration (migrate, revert, force)
|
||||
if err := handleDatabaseCommands(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Initialize database
|
||||
if err := initDatabase(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Handle user commands (add, delete, sync, JWT)
|
||||
if err := handleUserCommands(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Initialize subsystems (archive, metrics, taggers)
|
||||
if err := initSubsystems(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Exit if start server is not requested
|
||||
if !flagServer {
|
||||
cclog.Exit("No errors, server flag not set. Exiting cc-backend.")
|
||||
}
|
||||
|
||||
// Run server with context
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
return runServer(ctx)
|
||||
}
|
||||
|
||||
func main() {
|
||||
if err := run(); err != nil {
|
||||
cclog.Error(err.Error())
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,9 @@
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// Package main provides the entry point for the ClusterCockpit backend server.
|
||||
// This file contains HTTP server setup, routing configuration, and
|
||||
// authentication middleware integration.
|
||||
package main
|
||||
|
||||
import (
|
||||
@@ -36,11 +39,19 @@ import (
|
||||
httpSwagger "github.com/swaggo/http-swagger"
|
||||
)
|
||||
|
||||
var (
|
||||
var buildInfo web.Build
|
||||
|
||||
// Environment variable names
|
||||
const (
|
||||
envDebug = "DEBUG"
|
||||
)
|
||||
|
||||
// Server encapsulates the HTTP server state and dependencies
|
||||
type Server struct {
|
||||
router *mux.Router
|
||||
server *http.Server
|
||||
apiHandle *api.RestApi
|
||||
)
|
||||
}
|
||||
|
||||
func onFailureResponse(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
@@ -51,25 +62,31 @@ func onFailureResponse(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
})
|
||||
}
|
||||
|
||||
func serverInit() {
|
||||
// NewServer creates and initializes a new Server instance
|
||||
func NewServer(version, commit, buildDate string) (*Server, error) {
|
||||
buildInfo = web.Build{Version: version, Hash: commit, Buildtime: buildDate}
|
||||
|
||||
s := &Server{
|
||||
router: mux.NewRouter(),
|
||||
}
|
||||
|
||||
if err := s.init(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return s, nil
|
||||
}
|
||||
|
||||
func (s *Server) init() error {
|
||||
// Setup the http.Handler/Router used by the server
|
||||
graph.Init()
|
||||
resolver := graph.GetResolverInstance()
|
||||
graphQLServer := handler.New(
|
||||
generated.NewExecutableSchema(generated.Config{Resolvers: resolver}))
|
||||
|
||||
// graphQLServer.AddTransport(transport.SSE{})
|
||||
graphQLServer.AddTransport(transport.POST{})
|
||||
// graphQLServer.AddTransport(transport.Websocket{
|
||||
// KeepAlivePingInterval: 10 * time.Second,
|
||||
// Upgrader: websocket.Upgrader{
|
||||
// CheckOrigin: func(r *http.Request) bool {
|
||||
// return true
|
||||
// },
|
||||
// },
|
||||
// })
|
||||
|
||||
if os.Getenv("DEBUG") != "1" {
|
||||
if os.Getenv(envDebug) != "1" {
|
||||
// Having this handler means that a error message is returned via GraphQL instead of the connection simply beeing closed.
|
||||
// The problem with this is that then, no more stacktrace is printed to stderr.
|
||||
graphQLServer.SetRecoverFunc(func(ctx context.Context, err any) error {
|
||||
@@ -86,73 +103,56 @@ func serverInit() {
|
||||
|
||||
authHandle := auth.GetAuthInstance()
|
||||
|
||||
apiHandle = api.New()
|
||||
|
||||
router = mux.NewRouter()
|
||||
buildInfo := web.Build{Version: version, Hash: commit, Buildtime: date}
|
||||
s.apiHandle = api.New()
|
||||
|
||||
info := map[string]any{}
|
||||
info["hasOpenIDConnect"] = false
|
||||
|
||||
if auth.Keys.OpenIDConfig != nil {
|
||||
openIDConnect := auth.NewOIDC(authHandle)
|
||||
openIDConnect.RegisterEndpoints(router)
|
||||
openIDConnect.RegisterEndpoints(s.router)
|
||||
info["hasOpenIDConnect"] = true
|
||||
}
|
||||
|
||||
router.HandleFunc("/login", func(rw http.ResponseWriter, r *http.Request) {
|
||||
s.router.HandleFunc("/login", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
cclog.Debugf("##%v##", info)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{Title: "Login", Build: buildInfo, Infos: info})
|
||||
}).Methods(http.MethodGet)
|
||||
router.HandleFunc("/imprint", func(rw http.ResponseWriter, r *http.Request) {
|
||||
s.router.HandleFunc("/imprint", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
web.RenderTemplate(rw, "imprint.tmpl", &web.Page{Title: "Imprint", Build: buildInfo})
|
||||
})
|
||||
router.HandleFunc("/privacy", func(rw http.ResponseWriter, r *http.Request) {
|
||||
s.router.HandleFunc("/privacy", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
web.RenderTemplate(rw, "privacy.tmpl", &web.Page{Title: "Privacy", Build: buildInfo})
|
||||
})
|
||||
|
||||
secured := router.PathPrefix("/").Subrouter()
|
||||
securedapi := router.PathPrefix("/api").Subrouter()
|
||||
userapi := router.PathPrefix("/userapi").Subrouter()
|
||||
configapi := router.PathPrefix("/config").Subrouter()
|
||||
frontendapi := router.PathPrefix("/frontend").Subrouter()
|
||||
metricstoreapi := router.PathPrefix("/metricstore").Subrouter()
|
||||
secured := s.router.PathPrefix("/").Subrouter()
|
||||
securedapi := s.router.PathPrefix("/api").Subrouter()
|
||||
userapi := s.router.PathPrefix("/userapi").Subrouter()
|
||||
configapi := s.router.PathPrefix("/config").Subrouter()
|
||||
frontendapi := s.router.PathPrefix("/frontend").Subrouter()
|
||||
metricstoreapi := s.router.PathPrefix("/metricstore").Subrouter()
|
||||
|
||||
if !config.Keys.DisableAuthentication {
|
||||
router.Handle("/login", authHandle.Login(
|
||||
// On success: Handled within Login()
|
||||
// On failure:
|
||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Login failed - ClusterCockpit",
|
||||
MsgType: "alert-warning",
|
||||
Message: err.Error(),
|
||||
Build: buildInfo,
|
||||
Infos: info,
|
||||
})
|
||||
})).Methods(http.MethodPost)
|
||||
// Create login failure handler (used by both /login and /jwt-login)
|
||||
loginFailureHandler := func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Login failed - ClusterCockpit",
|
||||
MsgType: "alert-warning",
|
||||
Message: err.Error(),
|
||||
Build: buildInfo,
|
||||
Infos: info,
|
||||
})
|
||||
}
|
||||
|
||||
router.Handle("/jwt-login", authHandle.Login(
|
||||
// On success: Handled within Login()
|
||||
// On failure:
|
||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Login failed - ClusterCockpit",
|
||||
MsgType: "alert-warning",
|
||||
Message: err.Error(),
|
||||
Build: buildInfo,
|
||||
Infos: info,
|
||||
})
|
||||
}))
|
||||
s.router.Handle("/login", authHandle.Login(loginFailureHandler)).Methods(http.MethodPost)
|
||||
s.router.Handle("/jwt-login", authHandle.Login(loginFailureHandler))
|
||||
|
||||
router.Handle("/logout", authHandle.Logout(
|
||||
s.router.Handle("/logout", authHandle.Logout(
|
||||
http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
@@ -226,8 +226,8 @@ func serverInit() {
|
||||
}
|
||||
|
||||
if flagDev {
|
||||
router.Handle("/playground", playground.Handler("GraphQL playground", "/query"))
|
||||
router.PathPrefix("/swagger/").Handler(httpSwagger.Handler(
|
||||
s.router.Handle("/playground", playground.Handler("GraphQL playground", "/query"))
|
||||
s.router.PathPrefix("/swagger/").Handler(httpSwagger.Handler(
|
||||
httpSwagger.URL("http://" + config.Keys.Addr + "/swagger/doc.json"))).Methods(http.MethodGet)
|
||||
}
|
||||
secured.Handle("/query", graphQLServer)
|
||||
@@ -239,67 +239,46 @@ func serverInit() {
|
||||
|
||||
// Mount all /monitoring/... and /api/... routes.
|
||||
routerConfig.SetupRoutes(secured, buildInfo)
|
||||
apiHandle.MountApiRoutes(securedapi)
|
||||
apiHandle.MountUserApiRoutes(userapi)
|
||||
apiHandle.MountConfigApiRoutes(configapi)
|
||||
apiHandle.MountFrontendApiRoutes(frontendapi)
|
||||
s.apiHandle.MountApiRoutes(securedapi)
|
||||
s.apiHandle.MountUserApiRoutes(userapi)
|
||||
s.apiHandle.MountConfigApiRoutes(configapi)
|
||||
s.apiHandle.MountFrontendApiRoutes(frontendapi)
|
||||
|
||||
if memorystore.InternalCCMSFlag {
|
||||
apiHandle.MountMetricStoreApiRoutes(metricstoreapi)
|
||||
s.apiHandle.MountMetricStoreApiRoutes(metricstoreapi)
|
||||
}
|
||||
|
||||
if config.Keys.EmbedStaticFiles {
|
||||
if i, err := os.Stat("./var/img"); err == nil {
|
||||
if i.IsDir() {
|
||||
cclog.Info("Use local directory for static images")
|
||||
router.PathPrefix("/img/").Handler(http.StripPrefix("/img/", http.FileServer(http.Dir("./var/img"))))
|
||||
s.router.PathPrefix("/img/").Handler(http.StripPrefix("/img/", http.FileServer(http.Dir("./var/img"))))
|
||||
}
|
||||
}
|
||||
router.PathPrefix("/").Handler(http.StripPrefix("/", web.ServeFiles()))
|
||||
s.router.PathPrefix("/").Handler(http.StripPrefix("/", web.ServeFiles()))
|
||||
} else {
|
||||
router.PathPrefix("/").Handler(http.FileServer(http.Dir(config.Keys.StaticFiles)))
|
||||
s.router.PathPrefix("/").Handler(http.FileServer(http.Dir(config.Keys.StaticFiles)))
|
||||
}
|
||||
|
||||
router.Use(handlers.CompressHandler)
|
||||
router.Use(handlers.RecoveryHandler(handlers.PrintRecoveryStack(true)))
|
||||
router.Use(handlers.CORS(
|
||||
s.router.Use(handlers.CompressHandler)
|
||||
s.router.Use(handlers.RecoveryHandler(handlers.PrintRecoveryStack(true)))
|
||||
s.router.Use(handlers.CORS(
|
||||
handlers.AllowCredentials(),
|
||||
handlers.AllowedHeaders([]string{"X-Requested-With", "Content-Type", "Authorization", "Origin"}),
|
||||
handlers.AllowedMethods([]string{"GET", "POST", "HEAD", "OPTIONS"}),
|
||||
handlers.AllowedOrigins([]string{"*"})))
|
||||
|
||||
// secured.NotFoundHandler = http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
// page := web.Page{
|
||||
// Title: "ClusterCockpit - Not Found",
|
||||
// Build: buildInfo,
|
||||
// }
|
||||
// rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
// web.RenderTemplate(rw, "404.tmpl", &page)
|
||||
// })
|
||||
|
||||
// secured.NotFoundHandler = http.HandlerFunc(http.NotFound)
|
||||
// router.NotFoundHandler = router.NewRoute().HandlerFunc(http.NotFound).GetHandler()
|
||||
|
||||
// printEndpoints(router)
|
||||
return nil
|
||||
}
|
||||
|
||||
// func printEndpoints(r *mux.Router) {
|
||||
// r.Walk(func(route *mux.Route, router *mux.Router, ancestors []*mux.Route) error {
|
||||
// path, err := route.GetPathTemplate()
|
||||
// if err != nil {
|
||||
// path = "nopath"
|
||||
// }
|
||||
// methods, err := route.GetMethods()
|
||||
// if err != nil {
|
||||
// methods = append(methods, "nomethod")
|
||||
// }
|
||||
// fmt.Printf("%v %s\n", methods, path)
|
||||
// return nil
|
||||
// })
|
||||
// }
|
||||
// Server timeout defaults (in seconds)
|
||||
const (
|
||||
defaultReadTimeout = 20
|
||||
defaultWriteTimeout = 20
|
||||
)
|
||||
|
||||
func serverStart() {
|
||||
handler := handlers.CustomLoggingHandler(io.Discard, router, func(_ io.Writer, params handlers.LogFormatterParams) {
|
||||
func (s *Server) Start(ctx context.Context) error {
|
||||
handler := handlers.CustomLoggingHandler(io.Discard, s.router, func(_ io.Writer, params handlers.LogFormatterParams) {
|
||||
if strings.HasPrefix(params.Request.RequestURI, "/api/") {
|
||||
cclog.Debugf("%s %s (%d, %.02fkb, %dms)",
|
||||
params.Request.Method, params.URL.RequestURI(),
|
||||
@@ -313,9 +292,13 @@ func serverStart() {
|
||||
}
|
||||
})
|
||||
|
||||
server = &http.Server{
|
||||
ReadTimeout: 20 * time.Second,
|
||||
WriteTimeout: 20 * time.Second,
|
||||
// Use configurable timeouts with defaults
|
||||
readTimeout := time.Duration(defaultReadTimeout) * time.Second
|
||||
writeTimeout := time.Duration(defaultWriteTimeout) * time.Second
|
||||
|
||||
s.server = &http.Server{
|
||||
ReadTimeout: readTimeout,
|
||||
WriteTimeout: writeTimeout,
|
||||
Handler: handler,
|
||||
Addr: config.Keys.Addr,
|
||||
}
|
||||
@@ -323,7 +306,7 @@ func serverStart() {
|
||||
// Start http or https server
|
||||
listener, err := net.Listen("tcp", config.Keys.Addr)
|
||||
if err != nil {
|
||||
cclog.Abortf("Server Start: Starting http listener on '%s' failed.\nError: %s\n", config.Keys.Addr, err.Error())
|
||||
return fmt.Errorf("starting listener on '%s': %w", config.Keys.Addr, err)
|
||||
}
|
||||
|
||||
if !strings.HasSuffix(config.Keys.Addr, ":80") && config.Keys.RedirectHTTPTo != "" {
|
||||
@@ -336,7 +319,7 @@ func serverStart() {
|
||||
cert, err := tls.LoadX509KeyPair(
|
||||
config.Keys.HTTPSCertFile, config.Keys.HTTPSKeyFile)
|
||||
if err != nil {
|
||||
cclog.Abortf("Server Start: Loading X509 keypair failed. Check options 'https-cert-file' and 'https-key-file' in 'config.json'.\nError: %s\n", err.Error())
|
||||
return fmt.Errorf("loading X509 keypair (check 'https-cert-file' and 'https-key-file' in config.json): %w", err)
|
||||
}
|
||||
listener = tls.NewListener(listener, &tls.Config{
|
||||
Certificates: []tls.Certificate{cert},
|
||||
@@ -347,32 +330,51 @@ func serverStart() {
|
||||
MinVersion: tls.VersionTLS12,
|
||||
PreferServerCipherSuites: true,
|
||||
})
|
||||
cclog.Printf("HTTPS server listening at %s...\n", config.Keys.Addr)
|
||||
cclog.Infof("HTTPS server listening at %s...", config.Keys.Addr)
|
||||
} else {
|
||||
cclog.Printf("HTTP server listening at %s...\n", config.Keys.Addr)
|
||||
cclog.Infof("HTTP server listening at %s...", config.Keys.Addr)
|
||||
}
|
||||
//
|
||||
// Because this program will want to bind to a privileged port (like 80), the listener must
|
||||
// be established first, then the user can be changed, and after that,
|
||||
// the actual http server can be started.
|
||||
if err := runtimeEnv.DropPrivileges(config.Keys.Group, config.Keys.User); err != nil {
|
||||
cclog.Abortf("Server Start: Error while preparing server start.\nError: %s\n", err.Error())
|
||||
return fmt.Errorf("dropping privileges: %w", err)
|
||||
}
|
||||
|
||||
if err = server.Serve(listener); err != nil && err != http.ErrServerClosed {
|
||||
cclog.Abortf("Server Start: Starting server failed.\nError: %s\n", err.Error())
|
||||
// Handle context cancellation for graceful shutdown
|
||||
go func() {
|
||||
<-ctx.Done()
|
||||
shutdownCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
if err := s.server.Shutdown(shutdownCtx); err != nil {
|
||||
cclog.Errorf("Server shutdown error: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
if err = s.server.Serve(listener); err != nil && err != http.ErrServerClosed {
|
||||
return fmt.Errorf("server failed: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func serverShutdown() {
|
||||
func (s *Server) Shutdown(ctx context.Context) {
|
||||
// Create a shutdown context with timeout
|
||||
shutdownCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
// First shut down the server gracefully (waiting for all ongoing requests)
|
||||
server.Shutdown(context.Background())
|
||||
if err := s.server.Shutdown(shutdownCtx); err != nil {
|
||||
cclog.Errorf("Server shutdown error: %v", err)
|
||||
}
|
||||
|
||||
// Archive all the metric store data
|
||||
if memorystore.InternalCCMSFlag {
|
||||
memorystore.Shutdown()
|
||||
}
|
||||
|
||||
// Then, wait for any async archivings still pending...
|
||||
archiver.WaitForArchiving()
|
||||
// Shutdown archiver with 10 second timeout for fast shutdown
|
||||
if err := archiver.Shutdown(10 * time.Second); err != nil {
|
||||
cclog.Warnf("Archiver shutdown: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,16 +4,9 @@
|
||||
"short-running-jobs-duration": 300,
|
||||
"resampling": {
|
||||
"trigger": 30,
|
||||
"resolutions": [
|
||||
600,
|
||||
300,
|
||||
120,
|
||||
60
|
||||
]
|
||||
"resolutions": [600, 300, 120, 60]
|
||||
},
|
||||
"apiAllowedIPs": [
|
||||
"*"
|
||||
],
|
||||
"apiAllowedIPs": ["*"],
|
||||
"emission-constant": 317
|
||||
},
|
||||
"cron": {
|
||||
@@ -88,6 +81,6 @@
|
||||
"directory": "./var/archive"
|
||||
},
|
||||
"retention-in-memory": "48h"
|
||||
},
|
||||
"ui-file": "./configs/uiConfig.json"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
88
go.mod
88
go.mod
@@ -5,14 +5,18 @@ go 1.24.0
|
||||
toolchain go1.24.1
|
||||
|
||||
require (
|
||||
github.com/99designs/gqlgen v0.17.78
|
||||
github.com/ClusterCockpit/cc-lib v0.10.1
|
||||
github.com/99designs/gqlgen v0.17.84
|
||||
github.com/ClusterCockpit/cc-lib v0.11.0
|
||||
github.com/Masterminds/squirrel v1.5.4
|
||||
github.com/coreos/go-oidc/v3 v3.12.0
|
||||
github.com/aws/aws-sdk-go-v2 v1.39.6
|
||||
github.com/aws/aws-sdk-go-v2/config v1.31.20
|
||||
github.com/aws/aws-sdk-go-v2/credentials v1.18.24
|
||||
github.com/aws/aws-sdk-go-v2/service/s3 v1.90.2
|
||||
github.com/coreos/go-oidc/v3 v3.16.0
|
||||
github.com/expr-lang/expr v1.17.6
|
||||
github.com/go-co-op/gocron/v2 v2.16.0
|
||||
github.com/go-ldap/ldap/v3 v3.4.10
|
||||
github.com/go-sql-driver/mysql v1.9.0
|
||||
github.com/go-co-op/gocron/v2 v2.17.0
|
||||
github.com/go-ldap/ldap/v3 v3.4.12
|
||||
github.com/go-sql-driver/mysql v1.9.3
|
||||
github.com/golang-jwt/jwt/v5 v5.2.2
|
||||
github.com/golang-migrate/migrate/v4 v4.18.2
|
||||
github.com/google/gops v0.3.28
|
||||
@@ -23,18 +27,19 @@ require (
|
||||
github.com/jmoiron/sqlx v1.4.0
|
||||
github.com/joho/godotenv v1.5.1
|
||||
github.com/linkedin/goavro/v2 v2.14.0
|
||||
github.com/mattn/go-sqlite3 v1.14.24
|
||||
github.com/nats-io/nats.go v1.46.1
|
||||
github.com/mattn/go-sqlite3 v1.14.32
|
||||
github.com/nats-io/nats.go v1.47.0
|
||||
github.com/prometheus/client_golang v1.23.2
|
||||
github.com/prometheus/common v0.66.1
|
||||
github.com/qustavo/sqlhooks/v2 v2.1.0
|
||||
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1
|
||||
github.com/stretchr/testify v1.11.1
|
||||
github.com/swaggo/http-swagger v1.3.4
|
||||
github.com/swaggo/swag v1.16.6
|
||||
github.com/vektah/gqlparser/v2 v2.5.30
|
||||
golang.org/x/crypto v0.42.0
|
||||
golang.org/x/oauth2 v0.30.0
|
||||
golang.org/x/time v0.13.0
|
||||
github.com/vektah/gqlparser/v2 v2.5.31
|
||||
golang.org/x/crypto v0.44.0
|
||||
golang.org/x/oauth2 v0.32.0
|
||||
golang.org/x/time v0.14.0
|
||||
)
|
||||
|
||||
require (
|
||||
@@ -42,18 +47,40 @@ require (
|
||||
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect
|
||||
github.com/KyleBanks/depth v1.2.1 // indirect
|
||||
github.com/agnivade/levenshtein v1.2.1 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.3 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.13 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.13 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.13 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.13 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.3 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.4 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.13 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.13 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/sso v1.30.3 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.7 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/sts v1.40.2 // indirect
|
||||
github.com/aws/smithy-go v1.23.2 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.7 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/felixge/httpsnoop v1.0.4 // indirect
|
||||
github.com/fsnotify/fsnotify v1.9.0 // indirect
|
||||
github.com/go-asn1-ber/asn1-ber v1.5.7 // indirect
|
||||
github.com/go-jose/go-jose/v4 v4.0.5 // indirect
|
||||
github.com/go-openapi/jsonpointer v0.21.1 // indirect
|
||||
github.com/go-openapi/jsonreference v0.21.0 // indirect
|
||||
github.com/go-openapi/spec v0.21.0 // indirect
|
||||
github.com/go-openapi/swag v0.23.1 // indirect
|
||||
github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667 // indirect
|
||||
github.com/go-jose/go-jose/v4 v4.1.3 // indirect
|
||||
github.com/go-openapi/jsonpointer v0.22.3 // indirect
|
||||
github.com/go-openapi/jsonreference v0.21.3 // indirect
|
||||
github.com/go-openapi/spec v0.22.1 // indirect
|
||||
github.com/go-openapi/swag/conv v0.25.4 // indirect
|
||||
github.com/go-openapi/swag/jsonname v0.25.4 // indirect
|
||||
github.com/go-openapi/swag/jsonutils v0.25.4 // indirect
|
||||
github.com/go-openapi/swag/loading v0.25.4 // indirect
|
||||
github.com/go-openapi/swag/stringutils v0.25.4 // indirect
|
||||
github.com/go-openapi/swag/typeutils v0.25.4 // indirect
|
||||
github.com/go-openapi/swag/yamlutils v0.25.4 // indirect
|
||||
github.com/go-viper/mapstructure/v2 v2.4.0 // indirect
|
||||
github.com/goccy/go-yaml v1.19.0 // indirect
|
||||
github.com/golang/snappy v0.0.4 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/gorilla/securecookie v1.1.2 // indirect
|
||||
@@ -62,37 +89,38 @@ require (
|
||||
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
||||
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
|
||||
github.com/jonboulle/clockwork v0.5.0 // indirect
|
||||
github.com/josharian/intern v1.0.0 // indirect
|
||||
github.com/jpillora/backoff v1.0.0 // indirect
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/klauspost/compress v1.18.0 // indirect
|
||||
github.com/klauspost/compress v1.18.1 // indirect
|
||||
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
|
||||
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
|
||||
github.com/mailru/easyjson v0.9.0 // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
|
||||
github.com/nats-io/nkeys v0.4.11 // indirect
|
||||
github.com/nats-io/nuid v1.0.1 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
github.com/prometheus/client_model v0.6.2 // indirect
|
||||
github.com/prometheus/procfs v0.16.1 // indirect
|
||||
github.com/robfig/cron/v3 v3.0.1 // indirect
|
||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||
github.com/sosodev/duration v1.3.1 // indirect
|
||||
github.com/stretchr/objx v0.5.2 // indirect
|
||||
github.com/swaggo/files v1.0.1 // indirect
|
||||
github.com/urfave/cli/v2 v2.27.7 // indirect
|
||||
github.com/urfave/cli/v3 v3.6.1 // indirect
|
||||
github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 // indirect
|
||||
go.uber.org/atomic v1.11.0 // indirect
|
||||
go.yaml.in/yaml/v2 v2.4.2 // indirect
|
||||
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect
|
||||
golang.org/x/mod v0.27.0 // indirect
|
||||
golang.org/x/net v0.43.0 // indirect
|
||||
golang.org/x/sync v0.17.0 // indirect
|
||||
golang.org/x/sys v0.36.0 // indirect
|
||||
golang.org/x/text v0.29.0 // indirect
|
||||
golang.org/x/tools v0.36.0 // indirect
|
||||
google.golang.org/protobuf v1.36.8 // indirect
|
||||
go.yaml.in/yaml/v2 v2.4.3 // indirect
|
||||
go.yaml.in/yaml/v3 v3.0.4 // indirect
|
||||
golang.org/x/mod v0.30.0 // indirect
|
||||
golang.org/x/net v0.47.0 // indirect
|
||||
golang.org/x/sync v0.18.0 // indirect
|
||||
golang.org/x/sys v0.38.0 // indirect
|
||||
golang.org/x/text v0.31.0 // indirect
|
||||
golang.org/x/tools v0.39.0 // indirect
|
||||
google.golang.org/protobuf v1.36.10 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
sigs.k8s.io/yaml v1.6.0 // indirect
|
||||
)
|
||||
|
||||
236
go.sum
236
go.sum
@@ -1,13 +1,13 @@
|
||||
filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA=
|
||||
filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
|
||||
github.com/99designs/gqlgen v0.17.78 h1:bhIi7ynrc3js2O8wu1sMQj1YHPENDt3jQGyifoBvoVI=
|
||||
github.com/99designs/gqlgen v0.17.78/go.mod h1:yI/o31IauG2kX0IsskM4R894OCCG1jXJORhtLQqB7Oc=
|
||||
github.com/99designs/gqlgen v0.17.84 h1:iVMdiStgUVx/BFkMb0J5GAXlqfqtQ7bqMCYK6v52kQ0=
|
||||
github.com/99designs/gqlgen v0.17.84/go.mod h1:qjoUqzTeiejdo+bwUg8unqSpeYG42XrcrQboGIezmFA=
|
||||
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0=
|
||||
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
|
||||
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8=
|
||||
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU=
|
||||
github.com/ClusterCockpit/cc-lib v0.10.1 h1:tjGEH8mFGgznYxO8BKLiiar0eZR1Oytk8x5iIQHZR5s=
|
||||
github.com/ClusterCockpit/cc-lib v0.10.1/go.mod h1:nvTZuxFCTwlos8I1rL5O1RPab7vRtkU8E/PGiaF6pQA=
|
||||
github.com/ClusterCockpit/cc-lib v0.11.0 h1:66YkTOxWUak7nB3r7dJEm2q+B0uPRPGj0mwXZHXpOuA=
|
||||
github.com/ClusterCockpit/cc-lib v0.11.0/go.mod h1:0LKjDJs813/NMmaSJXJc11A9rxiFDPV/QdWQbZUp0XY=
|
||||
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
|
||||
github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
|
||||
github.com/Masterminds/squirrel v1.5.4 h1:uUcX/aBc8O7Fg9kaISIUsHXdKuqehiXAMQTYX8afzqM=
|
||||
@@ -16,12 +16,12 @@ github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERo
|
||||
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
|
||||
github.com/NVIDIA/go-nvml v0.13.0-1 h1:OLX8Jq3dONuPOQPC7rndB6+iDmDakw0XTYgzMxObkEw=
|
||||
github.com/NVIDIA/go-nvml v0.13.0-1/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4=
|
||||
github.com/PuerkitoBio/goquery v1.10.3 h1:pFYcNSqHxBD06Fpj/KsbStFRsgRATgnf3LeXiUkhzPo=
|
||||
github.com/PuerkitoBio/goquery v1.10.3/go.mod h1:tMUX0zDMHXYlAQk6p35XxQMqMweEKB7iK7iLNd4RH4Y=
|
||||
github.com/PuerkitoBio/goquery v1.11.0 h1:jZ7pwMQXIITcUXNH83LLk+txlaEy6NVOfTuP43xxfqw=
|
||||
github.com/PuerkitoBio/goquery v1.11.0/go.mod h1:wQHgxUOU3JGuj3oD/QFfxUdlzW6xPHfqyHre6VMY4DQ=
|
||||
github.com/agnivade/levenshtein v1.2.1 h1:EHBY3UOn1gwdy/VbFwgo4cxecRznFk7fKWN1KOX7eoM=
|
||||
github.com/agnivade/levenshtein v1.2.1/go.mod h1:QVVI16kDrtSuwcpd0p1+xMC6Z/VfhtCyDIjcwga4/DU=
|
||||
github.com/alexbrainman/sspi v0.0.0-20231016080023-1a75b4708caa h1:LHTHcTQiSGT7VVbI0o4wBRNQIgn917usHWOd6VAffYI=
|
||||
github.com/alexbrainman/sspi v0.0.0-20231016080023-1a75b4708caa/go.mod h1:cEWa1LVoE5KvSD9ONXsZrj0z6KqySlCCNKHlLzbqAt4=
|
||||
github.com/alexbrainman/sspi v0.0.0-20250919150558-7d374ff0d59e h1:4dAU9FXIyQktpoUAgOJK3OTFc/xug0PCXYCqU0FgDKI=
|
||||
github.com/alexbrainman/sspi v0.0.0-20250919150558-7d374ff0d59e/go.mod h1:cEWa1LVoE5KvSD9ONXsZrj0z6KqySlCCNKHlLzbqAt4=
|
||||
github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883 h1:bvNMNQO63//z+xNgfBlViaCIJKLlCJ6/fmUseuG0wVQ=
|
||||
github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8=
|
||||
github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
|
||||
@@ -30,12 +30,48 @@ github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7D
|
||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0/go.mod h1:lvDnEdqiQrp0O42VQGgmlKpxL1AP2+08jFMw88y4klk=
|
||||
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q=
|
||||
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE=
|
||||
github.com/aws/aws-sdk-go-v2 v1.39.6 h1:2JrPCVgWJm7bm83BDwY5z8ietmeJUbh3O2ACnn+Xsqk=
|
||||
github.com/aws/aws-sdk-go-v2 v1.39.6/go.mod h1:c9pm7VwuW0UPxAEYGyTmyurVcNrbF6Rt/wixFqDhcjE=
|
||||
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.3 h1:DHctwEM8P8iTXFxC/QK0MRjwEpWQeM9yzidCRjldUz0=
|
||||
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.3/go.mod h1:xdCzcZEtnSTKVDOmUZs4l/j3pSV6rpo1WXl5ugNsL8Y=
|
||||
github.com/aws/aws-sdk-go-v2/config v1.31.20 h1:/jWF4Wu90EhKCgjTdy1DGxcbcbNrjfBHvksEL79tfQc=
|
||||
github.com/aws/aws-sdk-go-v2/config v1.31.20/go.mod h1:95Hh1Tc5VYKL9NJ7tAkDcqeKt+MCXQB1hQZaRdJIZE0=
|
||||
github.com/aws/aws-sdk-go-v2/credentials v1.18.24 h1:iJ2FmPT35EaIB0+kMa6TnQ+PwG5A1prEdAw+PsMzfHg=
|
||||
github.com/aws/aws-sdk-go-v2/credentials v1.18.24/go.mod h1:U91+DrfjAiXPDEGYhh/x29o4p0qHX5HDqG7y5VViv64=
|
||||
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.13 h1:T1brd5dR3/fzNFAQch/iBKeX07/ffu/cLu+q+RuzEWk=
|
||||
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.13/go.mod h1:Peg/GBAQ6JDt+RoBf4meB1wylmAipb7Kg2ZFakZTlwk=
|
||||
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.13 h1:a+8/MLcWlIxo1lF9xaGt3J/u3yOZx+CdSveSNwjhD40=
|
||||
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.13/go.mod h1:oGnKwIYZ4XttyU2JWxFrwvhF6YKiK/9/wmE3v3Iu9K8=
|
||||
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.13 h1:HBSI2kDkMdWz4ZM7FjwE7e/pWDEZ+nR95x8Ztet1ooY=
|
||||
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.13/go.mod h1:YE94ZoDArI7awZqJzBAZ3PDD2zSfuP7w6P2knOzIn8M=
|
||||
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 h1:WKuaxf++XKWlHWu9ECbMlha8WOEGm0OUEZqm4K/Gcfk=
|
||||
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4/go.mod h1:ZWy7j6v1vWGmPReu0iSGvRiise4YI5SkR3OHKTZ6Wuc=
|
||||
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.13 h1:eg/WYAa12vqTphzIdWMzqYRVKKnCboVPRlvaybNCqPA=
|
||||
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.13/go.mod h1:/FDdxWhz1486obGrKKC1HONd7krpk38LBt+dutLcN9k=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.3 h1:x2Ibm/Af8Fi+BH+Hsn9TXGdT+hKbDd5XOTZxTMxDk7o=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.3/go.mod h1:IW1jwyrQgMdhisceG8fQLmQIydcT/jWY21rFhzgaKwo=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.4 h1:NvMjwvv8hpGUILarKw7Z4Q0w1H9anXKsesMxtw++MA4=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.4/go.mod h1:455WPHSwaGj2waRSpQp7TsnpOnBfw8iDfPfbwl7KPJE=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.13 h1:kDqdFvMY4AtKoACfzIGD8A0+hbT41KTKF//gq7jITfM=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.13/go.mod h1:lmKuogqSU3HzQCwZ9ZtcqOc5XGMqtDK7OIc2+DxiUEg=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.13 h1:zhBJXdhWIFZ1acfDYIhu4+LCzdUS2Vbcum7D01dXlHQ=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.13/go.mod h1:JaaOeCE368qn2Hzi3sEzY6FgAZVCIYcC2nwbro2QCh8=
|
||||
github.com/aws/aws-sdk-go-v2/service/s3 v1.90.2 h1:DhdbtDl4FdNlj31+xiRXANxEE+eC7n8JQz+/ilwQ8Uc=
|
||||
github.com/aws/aws-sdk-go-v2/service/s3 v1.90.2/go.mod h1:+wArOOrcHUevqdto9k1tKOF5++YTe9JEcPSc9Tx2ZSw=
|
||||
github.com/aws/aws-sdk-go-v2/service/sso v1.30.3 h1:NjShtS1t8r5LUfFVtFeI8xLAHQNTa7UI0VawXlrBMFQ=
|
||||
github.com/aws/aws-sdk-go-v2/service/sso v1.30.3/go.mod h1:fKvyjJcz63iL/ftA6RaM8sRCtN4r4zl4tjL3qw5ec7k=
|
||||
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.7 h1:gTsnx0xXNQ6SBbymoDvcoRHL+q4l/dAFsQuKfDWSaGc=
|
||||
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.7/go.mod h1:klO+ejMvYsB4QATfEOIXk8WAEwN4N0aBfJpvC+5SZBo=
|
||||
github.com/aws/aws-sdk-go-v2/service/sts v1.40.2 h1:HK5ON3KmQV2HcAunnx4sKLB9aPf3gKGwVAf7xnx0QT0=
|
||||
github.com/aws/aws-sdk-go-v2/service/sts v1.40.2/go.mod h1:E19xDjpzPZC7LS2knI9E6BaRFDK43Eul7vd6rSq2HWk=
|
||||
github.com/aws/smithy-go v1.23.2 h1:Crv0eatJUQhaManss33hS5r40CG3ZFH+21XSkqMrIUM=
|
||||
github.com/aws/smithy-go v1.23.2/go.mod h1:LEj2LM3rBRQJxPZTB4KuzZkaZYnZPnvgIhb4pu07mx0=
|
||||
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
||||
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/coreos/go-oidc/v3 v3.12.0 h1:sJk+8G2qq94rDI6ehZ71Bol3oUHy63qNYmkiSjrc/Jo=
|
||||
github.com/coreos/go-oidc/v3 v3.12.0/go.mod h1:gE3LgjOgFoHi9a4ce4/tJczr0Ai2/BoDhf0r5lltWI0=
|
||||
github.com/coreos/go-oidc/v3 v3.16.0 h1:qRQUCFstKpXwmEjDQTIbyY/5jF00+asXzSkmkoa/mow=
|
||||
github.com/coreos/go-oidc/v3 v3.16.0/go.mod h1:wqPbKFrVnE90vty060SB40FCJ8fTHTxSwyXJqZH+sI8=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.7 h1:zbFlGlXEAKlwXpmvle3d8Oe3YnkKIK4xSRTd3sHPnBo=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.7/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
|
||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||
@@ -64,32 +100,53 @@ github.com/frankban/quicktest v1.13.0 h1:yNZif1OkDfNoDfb9zZa9aXIpejNR4F23Wely0c+
|
||||
github.com/frankban/quicktest v1.13.0/go.mod h1:qLE0fzW0VuyUAJgPU19zByoIr0HtCHN/r/VLSOOIySU=
|
||||
github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
|
||||
github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
|
||||
github.com/go-asn1-ber/asn1-ber v1.5.7 h1:DTX+lbVTWaTw1hQ+PbZPlnDZPEIs0SS/GCZAl535dDk=
|
||||
github.com/go-asn1-ber/asn1-ber v1.5.7/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0=
|
||||
github.com/go-co-op/gocron/v2 v2.16.0 h1:uqUF6WFZ4enRU45pWFNcn1xpDLc+jBOTKhPQI16Z1xs=
|
||||
github.com/go-co-op/gocron/v2 v2.16.0/go.mod h1:opexeOFy5BplhsKdA7bzY9zeYih8I8/WNJ4arTIFPVc=
|
||||
github.com/go-jose/go-jose/v4 v4.0.5 h1:M6T8+mKZl/+fNNuFHvGIzDz7BTLQPIounk/b9dw3AaE=
|
||||
github.com/go-jose/go-jose/v4 v4.0.5/go.mod h1:s3P1lRrkT8igV8D9OjyL4WRyHvjB6a4JSllnOrmmBOA=
|
||||
github.com/go-ldap/ldap/v3 v3.4.10 h1:ot/iwPOhfpNVgB1o+AVXljizWZ9JTp7YF5oeyONmcJU=
|
||||
github.com/go-ldap/ldap/v3 v3.4.10/go.mod h1:JXh4Uxgi40P6E9rdsYqpUtbW46D9UTjJ9QSwGRznplY=
|
||||
github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667 h1:BP4M0CvQ4S3TGls2FvczZtj5Re/2ZzkV9VwqPHH/3Bo=
|
||||
github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0=
|
||||
github.com/go-co-op/gocron/v2 v2.17.0 h1:e/oj6fcAM8vOOKZxv2Cgfmjo+s8AXC46po5ZPtaSea4=
|
||||
github.com/go-co-op/gocron/v2 v2.17.0/go.mod h1:Zii6he+Zfgy5W9B+JKk/KwejFOW0kZTFvHtwIpR4aBI=
|
||||
github.com/go-jose/go-jose/v4 v4.1.3 h1:CVLmWDhDVRa6Mi/IgCgaopNosCaHz7zrMeF9MlZRkrs=
|
||||
github.com/go-jose/go-jose/v4 v4.1.3/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08=
|
||||
github.com/go-ldap/ldap/v3 v3.4.12 h1:1b81mv7MagXZ7+1r7cLTWmyuTqVqdwbtJSjC0DAp9s4=
|
||||
github.com/go-ldap/ldap/v3 v3.4.12/go.mod h1:+SPAGcTtOfmGsCb3h1RFiq4xpp4N636G75OEace8lNo=
|
||||
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
|
||||
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
|
||||
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
|
||||
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
|
||||
github.com/go-openapi/jsonpointer v0.21.1 h1:whnzv/pNXtK2FbX/W9yJfRmE2gsmkfahjMKB0fZvcic=
|
||||
github.com/go-openapi/jsonpointer v0.21.1/go.mod h1:50I1STOfbY1ycR8jGz8DaMeLCdXiI6aDteEdRNNzpdk=
|
||||
github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ=
|
||||
github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4=
|
||||
github.com/go-openapi/spec v0.21.0 h1:LTVzPc3p/RzRnkQqLRndbAzjY0d0BCL72A6j3CdL9ZY=
|
||||
github.com/go-openapi/spec v0.21.0/go.mod h1:78u6VdPw81XU44qEWGhtr982gJ5BWg2c0I5XwVMotYk=
|
||||
github.com/go-openapi/swag v0.23.1 h1:lpsStH0n2ittzTnbaSloVZLuB5+fvSY/+hnagBjSNZU=
|
||||
github.com/go-openapi/swag v0.23.1/go.mod h1:STZs8TbRvEQQKUA+JZNAm3EWlgaOBGpyFDqQnDHMef0=
|
||||
github.com/go-openapi/jsonpointer v0.22.3 h1:dKMwfV4fmt6Ah90zloTbUKWMD+0he+12XYAsPotrkn8=
|
||||
github.com/go-openapi/jsonpointer v0.22.3/go.mod h1:0lBbqeRsQ5lIanv3LHZBrmRGHLHcQoOXQnf88fHlGWo=
|
||||
github.com/go-openapi/jsonreference v0.21.3 h1:96Dn+MRPa0nYAR8DR1E03SblB5FJvh7W6krPI0Z7qMc=
|
||||
github.com/go-openapi/jsonreference v0.21.3/go.mod h1:RqkUP0MrLf37HqxZxrIAtTWW4ZJIK1VzduhXYBEeGc4=
|
||||
github.com/go-openapi/spec v0.22.1 h1:beZMa5AVQzRspNjvhe5aG1/XyBSMeX1eEOs7dMoXh/k=
|
||||
github.com/go-openapi/spec v0.22.1/go.mod h1:c7aeIQT175dVowfp7FeCvXXnjN/MrpaONStibD2WtDA=
|
||||
github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM=
|
||||
github.com/go-openapi/swag/conv v0.25.4 h1:/Dd7p0LZXczgUcC/Ikm1+YqVzkEeCc9LnOWjfkpkfe4=
|
||||
github.com/go-openapi/swag/conv v0.25.4/go.mod h1:3LXfie/lwoAv0NHoEuY1hjoFAYkvlqI/Bn5EQDD3PPU=
|
||||
github.com/go-openapi/swag/jsonname v0.25.4 h1:bZH0+MsS03MbnwBXYhuTttMOqk+5KcQ9869Vye1bNHI=
|
||||
github.com/go-openapi/swag/jsonname v0.25.4/go.mod h1:GPVEk9CWVhNvWhZgrnvRA6utbAltopbKwDu8mXNUMag=
|
||||
github.com/go-openapi/swag/jsonutils v0.25.4 h1:VSchfbGhD4UTf4vCdR2F4TLBdLwHyUDTd1/q4i+jGZA=
|
||||
github.com/go-openapi/swag/jsonutils v0.25.4/go.mod h1:7OYGXpvVFPn4PpaSdPHJBtF0iGnbEaTk8AvBkoWnaAY=
|
||||
github.com/go-openapi/swag/jsonutils/fixtures_test v0.25.4 h1:IACsSvBhiNJwlDix7wq39SS2Fh7lUOCJRmx/4SN4sVo=
|
||||
github.com/go-openapi/swag/jsonutils/fixtures_test v0.25.4/go.mod h1:Mt0Ost9l3cUzVv4OEZG+WSeoHwjWLnarzMePNDAOBiM=
|
||||
github.com/go-openapi/swag/loading v0.25.4 h1:jN4MvLj0X6yhCDduRsxDDw1aHe+ZWoLjW+9ZQWIKn2s=
|
||||
github.com/go-openapi/swag/loading v0.25.4/go.mod h1:rpUM1ZiyEP9+mNLIQUdMiD7dCETXvkkC30z53i+ftTE=
|
||||
github.com/go-openapi/swag/stringutils v0.25.4 h1:O6dU1Rd8bej4HPA3/CLPciNBBDwZj9HiEpdVsb8B5A8=
|
||||
github.com/go-openapi/swag/stringutils v0.25.4/go.mod h1:GTsRvhJW5xM5gkgiFe0fV3PUlFm0dr8vki6/VSRaZK0=
|
||||
github.com/go-openapi/swag/typeutils v0.25.4 h1:1/fbZOUN472NTc39zpa+YGHn3jzHWhv42wAJSN91wRw=
|
||||
github.com/go-openapi/swag/typeutils v0.25.4/go.mod h1:Ou7g//Wx8tTLS9vG0UmzfCsjZjKhpjxayRKTHXf2pTE=
|
||||
github.com/go-openapi/swag/yamlutils v0.25.4 h1:6jdaeSItEUb7ioS9lFoCZ65Cne1/RZtPBZ9A56h92Sw=
|
||||
github.com/go-openapi/swag/yamlutils v0.25.4/go.mod h1:MNzq1ulQu+yd8Kl7wPOut/YHAAU/H6hL91fF+E2RFwc=
|
||||
github.com/go-openapi/testify/enable/yaml/v2 v2.0.2 h1:0+Y41Pz1NkbTHz8NngxTuAXxEodtNSI1WG1c/m5Akw4=
|
||||
github.com/go-openapi/testify/enable/yaml/v2 v2.0.2/go.mod h1:kme83333GCtJQHXQ8UKX3IBZu6z8T5Dvy5+CW3NLUUg=
|
||||
github.com/go-openapi/testify/v2 v2.0.2 h1:X999g3jeLcoY8qctY/c/Z8iBHTbwLz7R2WXd6Ub6wls=
|
||||
github.com/go-openapi/testify/v2 v2.0.2/go.mod h1:HCPmvFFnheKK2BuwSA0TbbdxJ3I16pjwMkYkP4Ywn54=
|
||||
github.com/go-sql-driver/mysql v1.4.1/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w=
|
||||
github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg=
|
||||
github.com/go-sql-driver/mysql v1.9.0 h1:Y0zIbQXhQKmQgTp44Y1dp3wTXcn804QoTptLZT1vtvo=
|
||||
github.com/go-sql-driver/mysql v1.9.0/go.mod h1:pDetrLJeA3oMujJuvXc8RJoasr589B6A9fwzD3QMrqw=
|
||||
github.com/go-sql-driver/mysql v1.9.3 h1:U/N249h2WzJ3Ukj8SowVFjdtZKfu9vlLZxjPXV1aweo=
|
||||
github.com/go-sql-driver/mysql v1.9.3/go.mod h1:qn46aNg1333BRMNU69Lq93t8du/dwxI64Gl8i5p1WMU=
|
||||
github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs=
|
||||
github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM=
|
||||
github.com/goccy/go-yaml v1.19.0 h1:EmkZ9RIsX+Uq4DYFowegAuJo8+xdX3T/2dwNPXbxEYE=
|
||||
github.com/goccy/go-yaml v1.19.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA=
|
||||
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
|
||||
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
|
||||
github.com/golang-jwt/jwt/v5 v5.2.2 h1:Rl4B7itRWVtYIHFrSNd7vhTiz9UpLdi6gZhZ3wEeDy8=
|
||||
@@ -101,7 +158,6 @@ github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
|
||||
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||
@@ -115,10 +171,8 @@ github.com/gorilla/handlers v1.5.2 h1:cLTUSsNkgcwhgRqvCNmdbRWG0A3N4F+M2nWKdScwyE
|
||||
github.com/gorilla/handlers v1.5.2/go.mod h1:dX+xVpaxdSw+q0Qek8SSsl3dfMk3jNddUkMzo0GtH0w=
|
||||
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
|
||||
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
|
||||
github.com/gorilla/securecookie v1.1.1/go.mod h1:ra0sb63/xPlUeL+yeDciTfxMRAA+MP+HVt/4epWDjd4=
|
||||
github.com/gorilla/securecookie v1.1.2 h1:YCIWL56dvtr73r6715mJs5ZvhtnY73hBvEF8kXD8ePA=
|
||||
github.com/gorilla/securecookie v1.1.2/go.mod h1:NfCASbcHqRSY+3a8tlWJwsQap2VX5pwzwo4h3eOamfo=
|
||||
github.com/gorilla/sessions v1.2.1/go.mod h1:dk2InVEVJ0sfLlnXv9EAgkf6ecYs/i80K/zI+bUmuGM=
|
||||
github.com/gorilla/sessions v1.4.0 h1:kpIYOp/oi6MG/p5PgxApU8srsSw9tuFbt46Lt7auzqQ=
|
||||
github.com/gorilla/sessions v1.4.0/go.mod h1:FLWm50oby91+hl7p/wRxDth9bWSuk0qVL2emc7lT5ik=
|
||||
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
|
||||
@@ -128,7 +182,6 @@ github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY
|
||||
github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
|
||||
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
|
||||
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
|
||||
github.com/hashicorp/go-uuid v1.0.2/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
|
||||
github.com/hashicorp/go-uuid v1.0.3 h1:2gKiV6YVmrJ1i2CKKa9obLvRieoRGviZFL26PcT/Co8=
|
||||
github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
|
||||
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
|
||||
@@ -162,14 +215,12 @@ github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
|
||||
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
|
||||
github.com/jonboulle/clockwork v0.5.0 h1:Hyh9A8u51kptdkR+cqRpT1EebBwTn1oK9YfGYbdFz6I=
|
||||
github.com/jonboulle/clockwork v0.5.0/go.mod h1:3mZlmanh0g2NDKO5TWZVJAfofYk64M7XN3SzBPjZF60=
|
||||
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
|
||||
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
|
||||
github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA=
|
||||
github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
|
||||
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
||||
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
|
||||
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
|
||||
github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co=
|
||||
github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0=
|
||||
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
|
||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||
@@ -186,12 +237,10 @@ github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
|
||||
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
|
||||
github.com/linkedin/goavro/v2 v2.14.0 h1:aNO/js65U+Mwq4yB5f1h01c3wiM458qtRad1DN0CMUI=
|
||||
github.com/linkedin/goavro/v2 v2.14.0/go.mod h1:KXx+erlq+RPlGSPmLF7xGo6SAbh8sCQ53x064+ioxhk=
|
||||
github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4=
|
||||
github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU=
|
||||
github.com/mattn/go-sqlite3 v1.10.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
|
||||
github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
||||
github.com/mattn/go-sqlite3 v1.14.24 h1:tpSp2G2KyMnnQu99ngJ47EIkWVmliIizyZBfPrBWDRM=
|
||||
github.com/mattn/go-sqlite3 v1.14.24/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
||||
github.com/mattn/go-sqlite3 v1.14.32 h1:JD12Ag3oLy1zQA+BNn74xRgaBbdhbNIDYvQUEuuErjs=
|
||||
github.com/mattn/go-sqlite3 v1.14.32/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
||||
github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
|
||||
github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
|
||||
github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
|
||||
@@ -207,8 +256,8 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
||||
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU=
|
||||
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
|
||||
github.com/nats-io/nats.go v1.46.1 h1:bqQ2ZcxVd2lpYI97xYASeRTY3I5boe/IVmuUDPitHfo=
|
||||
github.com/nats-io/nats.go v1.46.1/go.mod h1:iRWIPokVIFbVijxuMQq4y9ttaBTMe0SFdlZfMDd+33g=
|
||||
github.com/nats-io/nats.go v1.47.0 h1:YQdADw6J/UfGUd2Oy6tn4Hq6YHxCaJrVKayxxFqYrgM=
|
||||
github.com/nats-io/nats.go v1.47.0/go.mod h1:iRWIPokVIFbVijxuMQq4y9ttaBTMe0SFdlZfMDd+33g=
|
||||
github.com/nats-io/nkeys v0.4.11 h1:q44qGV008kYd9W1b1nEBkNzvnWxtRSQ7A8BoqRrcfa0=
|
||||
github.com/nats-io/nkeys v0.4.11/go.mod h1:szDimtgmfOi9n25JpfIdGw12tZFYXqhGxjhVxsatHVE=
|
||||
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
|
||||
@@ -237,8 +286,8 @@ github.com/qustavo/sqlhooks/v2 v2.1.0 h1:54yBemHnGHp/7xgT+pxwmIlMSDNYKx5JW5dfRAi
|
||||
github.com/qustavo/sqlhooks/v2 v2.1.0/go.mod h1:aMREyKo7fOKTwiLuWPsaHRXEmtqG4yREztO0idF83AU=
|
||||
github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
|
||||
github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro=
|
||||
github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
|
||||
github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
|
||||
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
|
||||
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
|
||||
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
|
||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 h1:lZUw3E0/J3roVtGQ+SCrUrg3ON6NgVqpn3+iol9aGu4=
|
||||
@@ -249,14 +298,13 @@ github.com/sosodev/duration v1.3.1 h1:qtHBDMQ6lvMQsL15g4aopM4HEfOaYuhWBw3NPTtlqq
|
||||
github.com/sosodev/duration v1.3.1/go.mod h1:RQIBBX0+fMLc/D9+Jb/fwvVmo0eZvDDEERAikUR6SDg=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
|
||||
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
|
||||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.7.5/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||
github.com/swaggo/files v1.0.1 h1:J1bVJ4XHZNq0I46UU90611i9/YzdrF7x92oX1ig5IdE=
|
||||
@@ -267,8 +315,10 @@ github.com/swaggo/swag v1.16.6 h1:qBNcx53ZaX+M5dxVyTrgQ0PJ/ACK+NzhwcbieTt+9yI=
|
||||
github.com/swaggo/swag v1.16.6/go.mod h1:ngP2etMK5a0P3QBizic5MEwpRmluJZPHjXcMoj4Xesg=
|
||||
github.com/urfave/cli/v2 v2.27.7 h1:bH59vdhbjLv3LAvIu6gd0usJHgoTTPhCFib8qqOwXYU=
|
||||
github.com/urfave/cli/v2 v2.27.7/go.mod h1:CyNAG/xg+iAOg0N4MPGZqVmv2rCoP267496AOXUZjA4=
|
||||
github.com/vektah/gqlparser/v2 v2.5.30 h1:EqLwGAFLIzt1wpx1IPpY67DwUujF1OfzgEyDsLrN6kE=
|
||||
github.com/vektah/gqlparser/v2 v2.5.30/go.mod h1:D1/VCZtV3LPnQrcPBeR/q5jkSQIPti0uYCP/RI0gIeo=
|
||||
github.com/urfave/cli/v3 v3.6.1 h1:j8Qq8NyUawj/7rTYdBGrxcH7A/j7/G8Q5LhWEW4G3Mo=
|
||||
github.com/urfave/cli/v3 v3.6.1/go.mod h1:ysVLtOEmg2tOy6PknnYVhDoouyC/6N42TMeoMzskhso=
|
||||
github.com/vektah/gqlparser/v2 v2.5.31 h1:YhWGA1mfTjID7qJhd1+Vxhpk5HTgydrGU9IgkWBTJ7k=
|
||||
github.com/vektah/gqlparser/v2 v2.5.31/go.mod h1:c1I28gSOVNzlfc4WuDlqU7voQnsqI6OG2amkBAFmgts=
|
||||
github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 h1:FnBeRrxr7OU4VvAzt5X7s6266i6cSVkkFPS0TuXWbIg=
|
||||
github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM=
|
||||
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||
@@ -284,99 +334,59 @@ go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE=
|
||||
go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0=
|
||||
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
|
||||
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
|
||||
go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI=
|
||||
go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU=
|
||||
go.yaml.in/yaml/v3 v3.0.3 h1:bXOww4E/J3f66rav3pX3m8w6jDE4knZjGOw8b5Y6iNE=
|
||||
go.yaml.in/yaml/v3 v3.0.3/go.mod h1:tBHosrYAkRZjRAOREWbDnBXUf08JOwYq++0QNwQiWzI=
|
||||
go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0=
|
||||
go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8=
|
||||
go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
|
||||
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||
golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58=
|
||||
golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc=
|
||||
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
|
||||
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
|
||||
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
|
||||
golang.org/x/crypto v0.42.0 h1:chiH31gIWm57EkTXpwnqf8qeuMUi0yekh6mT2AvFlqI=
|
||||
golang.org/x/crypto v0.42.0/go.mod h1:4+rDnOTJhQCx2q7/j6rAN5XDw8kPjeaXEUR2eL94ix8=
|
||||
golang.org/x/crypto v0.44.0 h1:A97SsFvM3AIwEEmTBiaxPPTYpDC47w720rdiiUvgoAU=
|
||||
golang.org/x/crypto v0.44.0/go.mod h1:013i+Nw79BMiQiMsOPcVCB5ZIJbYkerPrGnOa00tvmc=
|
||||
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b h1:M2rDM6z3Fhozi9O7NWsxAkg/yqS/lQJ6PmkyIV3YP+o=
|
||||
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8=
|
||||
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
||||
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
|
||||
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
|
||||
golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||
golang.org/x/mod v0.27.0 h1:kb+q2PyFnEADO2IEF935ehFUXlWiNjJWtRNgBLSfbxQ=
|
||||
golang.org/x/mod v0.27.0/go.mod h1:rWI627Fq0DEoudcK+MBkNkCe0EetEaDSwJJkCcjpazc=
|
||||
golang.org/x/mod v0.30.0 h1:fDEXFVZ/fmCKProc/yAXXUijritrDzahmwwefnjoPFk=
|
||||
golang.org/x/mod v0.30.0/go.mod h1:lAsf5O2EvJeSFMiBxXDki7sCgAxEUcZHXoXMKT4GJKc=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
||||
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
|
||||
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
|
||||
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
|
||||
golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
|
||||
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
|
||||
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
|
||||
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
|
||||
golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
|
||||
golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
|
||||
golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI=
|
||||
golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU=
|
||||
golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
|
||||
golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
|
||||
golang.org/x/oauth2 v0.32.0 h1:jsCblLleRMDrxMN29H3z/k1KliIvpLgCkE6R8FXXNgY=
|
||||
golang.org/x/oauth2 v0.32.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
|
||||
golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
|
||||
golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
|
||||
golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I=
|
||||
golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k=
|
||||
golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
||||
golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
|
||||
golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
|
||||
golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
|
||||
golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
|
||||
golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU=
|
||||
golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
|
||||
golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
|
||||
golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
|
||||
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
|
||||
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
|
||||
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
|
||||
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
|
||||
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
|
||||
golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk=
|
||||
golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4=
|
||||
golang.org/x/time v0.13.0 h1:eUlYslOIt32DgYD6utsuUeHs4d7AsEYLuIAdg7FlYgI=
|
||||
golang.org/x/time v0.13.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4=
|
||||
golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
|
||||
golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
|
||||
golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI=
|
||||
golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
|
||||
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
|
||||
golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
|
||||
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
|
||||
golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg=
|
||||
golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s=
|
||||
golang.org/x/tools v0.39.0 h1:ik4ho21kwuQln40uelmciQPp9SipgNDdrafrYA4TmQQ=
|
||||
golang.org/x/tools v0.39.0/go.mod h1:JnefbkDPyD8UU2kI5fuf8ZX4/yUeh9W877ZeBONxUqQ=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
|
||||
google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
|
||||
google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE=
|
||||
google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||
|
||||
@@ -65,7 +65,7 @@ func setup(t *testing.T) *api.RestApi {
|
||||
}
|
||||
]
|
||||
}`
|
||||
const testclusterJson = `{
|
||||
const testclusterJSON = `{
|
||||
"name": "testcluster",
|
||||
"subClusters": [
|
||||
{
|
||||
@@ -128,7 +128,7 @@ func setup(t *testing.T) *api.RestApi {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), fmt.Appendf(nil, "%d", 2), 0o666); err != nil {
|
||||
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), fmt.Appendf(nil, "%d", 3), 0o666); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
@@ -136,7 +136,7 @@ func setup(t *testing.T) *api.RestApi {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := os.WriteFile(filepath.Join(jobarchive, "testcluster", "cluster.json"), []byte(testclusterJson), 0o666); err != nil {
|
||||
if err := os.WriteFile(filepath.Join(jobarchive, "testcluster", "cluster.json"), []byte(testclusterJSON), 0o666); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
@@ -175,7 +175,7 @@ func setup(t *testing.T) *api.RestApi {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
archiver.Start(repository.GetJobRepository())
|
||||
archiver.Start(repository.GetJobRepository(), context.Background())
|
||||
|
||||
if cfg := ccconf.GetPackageConfig("auth"); cfg != nil {
|
||||
auth.Init(&cfg)
|
||||
@@ -190,6 +190,10 @@ func setup(t *testing.T) *api.RestApi {
|
||||
}
|
||||
|
||||
func cleanup() {
|
||||
// Gracefully shutdown archiver with timeout
|
||||
if err := archiver.Shutdown(5 * time.Second); err != nil {
|
||||
cclog.Warnf("Archiver shutdown timeout in tests: %v", err)
|
||||
}
|
||||
// TODO: Clear all caches, reset all modules, etc...
|
||||
}
|
||||
|
||||
@@ -333,7 +337,7 @@ func TestRestApi(t *testing.T) {
|
||||
t.Fatal(response.Status, recorder.Body.String())
|
||||
}
|
||||
|
||||
archiver.WaitForArchiving()
|
||||
// Archiving happens asynchronously, will be completed in cleanup
|
||||
job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
@@ -446,7 +450,7 @@ func TestRestApi(t *testing.T) {
|
||||
t.Fatal(response.Status, recorder.Body.String())
|
||||
}
|
||||
|
||||
archiver.WaitForArchiving()
|
||||
// Archiving happens asynchronously, will be completed in cleanup
|
||||
jobid, cluster := int64(12345), "testcluster"
|
||||
job, err := restapi.JobRepository.Find(&jobid, &cluster, nil)
|
||||
if err != nil {
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package api
|
||||
|
||||
import (
|
||||
@@ -29,9 +30,15 @@ import (
|
||||
"github.com/gorilla/mux"
|
||||
)
|
||||
|
||||
// StopJobApiRequest model
|
||||
type StopJobApiRequest struct {
|
||||
JobId *int64 `json:"jobId" example:"123000"`
|
||||
const (
|
||||
// secondsPerDay is the number of seconds in 24 hours.
|
||||
// Used for duplicate job detection within a day window.
|
||||
secondsPerDay = 86400
|
||||
)
|
||||
|
||||
// StopJobAPIRequest model
|
||||
type StopJobAPIRequest struct {
|
||||
JobID *int64 `json:"jobId" example:"123000"`
|
||||
Cluster *string `json:"cluster" example:"fritz"`
|
||||
StartTime *int64 `json:"startTime" example:"1649723812"`
|
||||
State schema.JobState `json:"jobState" validate:"required" example:"completed"`
|
||||
@@ -40,7 +47,7 @@ type StopJobApiRequest struct {
|
||||
|
||||
// DeleteJobApiRequest model
|
||||
type DeleteJobApiRequest struct {
|
||||
JobId *int64 `json:"jobId" validate:"required" example:"123000"` // Cluster Job ID of job
|
||||
JobID *int64 `json:"jobId" validate:"required" example:"123000"` // Cluster Job ID of job
|
||||
Cluster *string `json:"cluster" example:"fritz"` // Cluster of job
|
||||
StartTime *int64 `json:"startTime" example:"1649723812"` // Start Time of job as epoch
|
||||
}
|
||||
@@ -113,7 +120,8 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
|
||||
|
||||
for key, vals := range r.URL.Query() {
|
||||
switch key {
|
||||
// TODO: add project filter
|
||||
case "project":
|
||||
filter.Project = &model.StringInput{Eq: &vals[0]}
|
||||
case "state":
|
||||
for _, s := range vals {
|
||||
state := schema.JobState(s)
|
||||
@@ -363,7 +371,7 @@ func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
|
||||
var metrics GetJobApiRequest
|
||||
if err = decode(r.Body, &metrics); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||
handleError(fmt.Errorf("decoding request failed: %w", err), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -434,30 +442,32 @@ func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
func (api *RestApi) editMeta(rw http.ResponseWriter, r *http.Request) {
|
||||
id, err := strconv.ParseInt(mux.Vars(r)["id"], 10, 64)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||
handleError(fmt.Errorf("parsing job ID failed: %w", err), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
job, err := api.JobRepository.FindById(r.Context(), id)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusNotFound)
|
||||
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusNotFound, rw)
|
||||
return
|
||||
}
|
||||
|
||||
var req EditMetaRequest
|
||||
if err := decode(r.Body, &req); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||
handleError(fmt.Errorf("decoding request failed: %w", err), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
if err := api.JobRepository.UpdateMetadata(job, req.Key, req.Value); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
handleError(fmt.Errorf("updating metadata failed: %w", err), http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
json.NewEncoder(rw).Encode(job)
|
||||
if err := json.NewEncoder(rw).Encode(job); err != nil {
|
||||
cclog.Errorf("Failed to encode job response: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// tagJob godoc
|
||||
@@ -480,32 +490,32 @@ func (api *RestApi) editMeta(rw http.ResponseWriter, r *http.Request) {
|
||||
func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
|
||||
id, err := strconv.ParseInt(mux.Vars(r)["id"], 10, 64)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||
handleError(fmt.Errorf("parsing job ID failed: %w", err), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
job, err := api.JobRepository.FindById(r.Context(), id)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusNotFound)
|
||||
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusNotFound, rw)
|
||||
return
|
||||
}
|
||||
|
||||
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), job.ID)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
handleError(fmt.Errorf("getting tags failed: %w", err), http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
|
||||
var req TagJobApiRequest
|
||||
if err := decode(r.Body, &req); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||
handleError(fmt.Errorf("decoding request failed: %w", err), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
for _, tag := range req {
|
||||
tagId, err := api.JobRepository.AddTagOrCreate(repository.GetUserFromContext(r.Context()), *job.ID, tag.Type, tag.Name, tag.Scope)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
handleError(fmt.Errorf("adding tag failed: %w", err), http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -519,7 +529,9 @@ func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
|
||||
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
json.NewEncoder(rw).Encode(job)
|
||||
if err := json.NewEncoder(rw).Encode(job); err != nil {
|
||||
cclog.Errorf("Failed to encode job response: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// removeTagJob godoc
|
||||
@@ -542,25 +554,25 @@ func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
|
||||
func (api *RestApi) removeTagJob(rw http.ResponseWriter, r *http.Request) {
|
||||
id, err := strconv.ParseInt(mux.Vars(r)["id"], 10, 64)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||
handleError(fmt.Errorf("parsing job ID failed: %w", err), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
job, err := api.JobRepository.FindById(r.Context(), id)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusNotFound)
|
||||
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusNotFound, rw)
|
||||
return
|
||||
}
|
||||
|
||||
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), job.ID)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
handleError(fmt.Errorf("getting tags failed: %w", err), http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
|
||||
var req TagJobApiRequest
|
||||
if err := decode(r.Body, &req); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||
handleError(fmt.Errorf("decoding request failed: %w", err), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -573,7 +585,7 @@ func (api *RestApi) removeTagJob(rw http.ResponseWriter, r *http.Request) {
|
||||
|
||||
remainingTags, err := api.JobRepository.RemoveJobTagByRequest(repository.GetUserFromContext(r.Context()), *job.ID, rtag.Type, rtag.Name, rtag.Scope)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
handleError(fmt.Errorf("removing tag failed: %w", err), http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -582,7 +594,9 @@ func (api *RestApi) removeTagJob(rw http.ResponseWriter, r *http.Request) {
|
||||
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
json.NewEncoder(rw).Encode(job)
|
||||
if err := json.NewEncoder(rw).Encode(job); err != nil {
|
||||
cclog.Errorf("Failed to encode job response: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// removeTags godoc
|
||||
@@ -604,7 +618,7 @@ func (api *RestApi) removeTagJob(rw http.ResponseWriter, r *http.Request) {
|
||||
func (api *RestApi) removeTags(rw http.ResponseWriter, r *http.Request) {
|
||||
var req TagJobApiRequest
|
||||
if err := decode(r.Body, &req); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||
handleError(fmt.Errorf("decoding request failed: %w", err), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -619,11 +633,10 @@ func (api *RestApi) removeTags(rw http.ResponseWriter, r *http.Request) {
|
||||
|
||||
err := api.JobRepository.RemoveTagByRequest(rtag.Type, rtag.Name, rtag.Scope)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
handleError(fmt.Errorf("removing tag failed: %w", err), http.StatusInternalServerError, rw)
|
||||
return
|
||||
} else {
|
||||
currentCount++
|
||||
}
|
||||
currentCount++
|
||||
}
|
||||
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
@@ -656,7 +669,7 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
cclog.Printf("REST: %s\n", req.GoString())
|
||||
cclog.Debugf("REST: %s", req.GoString())
|
||||
req.State = schema.JobStateRunning
|
||||
|
||||
if err := importer.SanityChecks(&req); err != nil {
|
||||
@@ -674,9 +687,11 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
handleError(fmt.Errorf("checking for duplicate failed: %w", err), http.StatusInternalServerError, rw)
|
||||
return
|
||||
} else if err == nil {
|
||||
}
|
||||
if err == nil {
|
||||
for _, job := range jobs {
|
||||
if (req.StartTime - job.StartTime) < 86400 {
|
||||
// Check if jobs are within the same day (prevent duplicates)
|
||||
if (req.StartTime - job.StartTime) < secondsPerDay {
|
||||
handleError(fmt.Errorf("a job with that jobId, cluster and startTime already exists: dbid: %d, jobid: %d", job.ID, job.JobID), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
}
|
||||
@@ -693,18 +708,19 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
|
||||
|
||||
for _, tag := range req.Tags {
|
||||
if _, err := api.JobRepository.AddTagOrCreate(repository.GetUserFromContext(r.Context()), id, tag.Type, tag.Name, tag.Scope); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
handleError(fmt.Errorf("adding tag to new job %d failed: %w", id, err), http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
cclog.Printf("new job (id: %d): cluster=%s, jobId=%d, user=%s, startTime=%d", id, req.Cluster, req.JobID, req.User, req.StartTime)
|
||||
cclog.Infof("new job (id: %d): cluster=%s, jobId=%d, user=%s, startTime=%d", id, req.Cluster, req.JobID, req.User, req.StartTime)
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
rw.WriteHeader(http.StatusCreated)
|
||||
json.NewEncoder(rw).Encode(DefaultApiResponse{
|
||||
if err := json.NewEncoder(rw).Encode(DefaultApiResponse{
|
||||
Message: "success",
|
||||
})
|
||||
}); err != nil {
|
||||
cclog.Errorf("Failed to encode response: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// stopJobByRequest godoc
|
||||
@@ -725,7 +741,7 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
|
||||
// @router /api/jobs/stop_job/ [post]
|
||||
func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
|
||||
// Parse request body
|
||||
req := StopJobApiRequest{}
|
||||
req := StopJobAPIRequest{}
|
||||
if err := decode(r.Body, &req); err != nil {
|
||||
handleError(fmt.Errorf("parsing request body failed: %w", err), http.StatusBadRequest, rw)
|
||||
return
|
||||
@@ -734,20 +750,22 @@ func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
|
||||
// Fetch job (that will be stopped) from db
|
||||
var job *schema.Job
|
||||
var err error
|
||||
if req.JobId == nil {
|
||||
if req.JobID == nil {
|
||||
handleError(errors.New("the field 'jobId' is required"), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
// cclog.Printf("loading db job for stopJobByRequest... : stopJobApiRequest=%v", req)
|
||||
job, err = api.JobRepository.Find(req.JobId, req.Cluster, req.StartTime)
|
||||
job, err = api.JobRepository.Find(req.JobID, req.Cluster, req.StartTime)
|
||||
if err != nil {
|
||||
job, err = api.JobRepository.FindCached(req.JobId, req.Cluster, req.StartTime)
|
||||
// FIXME: Previous error is hidden
|
||||
if err != nil {
|
||||
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||
// Try cached jobs if not found in main repository
|
||||
cachedJob, cachedErr := api.JobRepository.FindCached(req.JobID, req.Cluster, req.StartTime)
|
||||
if cachedErr != nil {
|
||||
// Combine both errors for better debugging
|
||||
handleError(fmt.Errorf("finding job failed: %w (cached lookup also failed: %v)", err, cachedErr), http.StatusNotFound, rw)
|
||||
return
|
||||
}
|
||||
job = cachedJob
|
||||
}
|
||||
|
||||
api.checkAndHandleStopJob(rw, job, req)
|
||||
@@ -790,9 +808,11 @@ func (api *RestApi) deleteJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
json.NewEncoder(rw).Encode(DefaultApiResponse{
|
||||
if err := json.NewEncoder(rw).Encode(DefaultApiResponse{
|
||||
Message: fmt.Sprintf("Successfully deleted job %s", id),
|
||||
})
|
||||
}); err != nil {
|
||||
cclog.Errorf("Failed to encode response: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// deleteJobByRequest godoc
|
||||
@@ -822,12 +842,12 @@ func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request)
|
||||
// Fetch job (that will be deleted) from db
|
||||
var job *schema.Job
|
||||
var err error
|
||||
if req.JobId == nil {
|
||||
if req.JobID == nil {
|
||||
handleError(errors.New("the field 'jobId' is required"), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
job, err = api.JobRepository.Find(req.JobId, req.Cluster, req.StartTime)
|
||||
job, err = api.JobRepository.Find(req.JobID, req.Cluster, req.StartTime)
|
||||
if err != nil {
|
||||
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
@@ -841,9 +861,11 @@ func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request)
|
||||
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
json.NewEncoder(rw).Encode(DefaultApiResponse{
|
||||
if err := json.NewEncoder(rw).Encode(DefaultApiResponse{
|
||||
Message: fmt.Sprintf("Successfully deleted job %d", job.ID),
|
||||
})
|
||||
}); err != nil {
|
||||
cclog.Errorf("Failed to encode response: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// deleteJobBefore godoc
|
||||
@@ -885,19 +907,21 @@ func (api *RestApi) deleteJobBefore(rw http.ResponseWriter, r *http.Request) {
|
||||
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
json.NewEncoder(rw).Encode(DefaultApiResponse{
|
||||
if err := json.NewEncoder(rw).Encode(DefaultApiResponse{
|
||||
Message: fmt.Sprintf("Successfully deleted %d jobs", cnt),
|
||||
})
|
||||
}); err != nil {
|
||||
cclog.Errorf("Failed to encode response: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Job, req StopJobApiRequest) {
|
||||
func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Job, req StopJobAPIRequest) {
|
||||
// Sanity checks
|
||||
if job.State != schema.JobStateRunning {
|
||||
handleError(fmt.Errorf("jobId %d (id %d) on %s : job has already been stopped (state is: %s)", job.JobID, job.ID, job.Cluster, job.State), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
}
|
||||
|
||||
if job == nil || job.StartTime > req.StopTime {
|
||||
if job.StartTime > req.StopTime {
|
||||
handleError(fmt.Errorf("jobId %d (id %d) on %s : stopTime %d must be larger/equal than startTime %d", job.JobID, job.ID, job.Cluster, req.StopTime, job.StartTime), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
@@ -913,23 +937,25 @@ func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Jo
|
||||
job.Duration = int32(req.StopTime - job.StartTime)
|
||||
job.State = req.State
|
||||
api.JobRepository.Mutex.Lock()
|
||||
defer api.JobRepository.Mutex.Unlock()
|
||||
|
||||
if err := api.JobRepository.Stop(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
|
||||
if err := api.JobRepository.StopCached(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
|
||||
api.JobRepository.Mutex.Unlock()
|
||||
handleError(fmt.Errorf("jobId %d (id %d) on %s : marking job as '%s' (duration: %d) in DB failed: %w", job.JobID, job.ID, job.Cluster, job.State, job.Duration, err), http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
}
|
||||
api.JobRepository.Mutex.Unlock()
|
||||
|
||||
cclog.Printf("archiving job... (dbid: %d): cluster=%s, jobId=%d, user=%s, startTime=%d, duration=%d, state=%s", job.ID, job.Cluster, job.JobID, job.User, job.StartTime, job.Duration, job.State)
|
||||
cclog.Infof("archiving job... (dbid: %d): cluster=%s, jobId=%d, user=%s, startTime=%d, duration=%d, state=%s", job.ID, job.Cluster, job.JobID, job.User, job.StartTime, job.Duration, job.State)
|
||||
|
||||
// Send a response (with status OK). This means that errors that happen from here on forward
|
||||
// can *NOT* be communicated to the client. If reading from a MetricDataRepository or
|
||||
// writing to the filesystem fails, the client will not know.
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
json.NewEncoder(rw).Encode(job)
|
||||
if err := json.NewEncoder(rw).Encode(job); err != nil {
|
||||
cclog.Errorf("Failed to encode job response: %v", err)
|
||||
}
|
||||
|
||||
// Monitoring is disabled...
|
||||
if job.MonitoringStatus == schema.MonitoringStatusDisabled {
|
||||
@@ -947,7 +973,7 @@ func (api *RestApi) getJobMetrics(rw http.ResponseWriter, r *http.Request) {
|
||||
for _, scope := range r.URL.Query()["scope"] {
|
||||
var s schema.MetricScope
|
||||
if err := s.UnmarshalGQL(scope); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||
handleError(fmt.Errorf("unmarshaling scope failed: %w", err), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
scopes = append(scopes, s)
|
||||
@@ -956,7 +982,7 @@ func (api *RestApi) getJobMetrics(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
|
||||
type Respone struct {
|
||||
type Response struct {
|
||||
Data *struct {
|
||||
JobMetrics []*model.JobMetricWithName `json:"jobMetrics"`
|
||||
} `json:"data"`
|
||||
@@ -968,17 +994,21 @@ func (api *RestApi) getJobMetrics(rw http.ResponseWriter, r *http.Request) {
|
||||
resolver := graph.GetResolverInstance()
|
||||
data, err := resolver.Query().JobMetrics(r.Context(), id, metrics, scopes, nil)
|
||||
if err != nil {
|
||||
json.NewEncoder(rw).Encode(Respone{
|
||||
if err := json.NewEncoder(rw).Encode(Response{
|
||||
Error: &struct {
|
||||
Message string "json:\"message\""
|
||||
Message string `json:"message"`
|
||||
}{Message: err.Error()},
|
||||
})
|
||||
}); err != nil {
|
||||
cclog.Errorf("Failed to encode error response: %v", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
json.NewEncoder(rw).Encode(Respone{
|
||||
if err := json.NewEncoder(rw).Encode(Response{
|
||||
Data: &struct {
|
||||
JobMetrics []*model.JobMetricWithName "json:\"jobMetrics\""
|
||||
JobMetrics []*model.JobMetricWithName `json:"jobMetrics"`
|
||||
}{JobMetrics: data},
|
||||
})
|
||||
}); err != nil {
|
||||
cclog.Errorf("Failed to encode response: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,13 +50,6 @@ func freeMetrics(rw http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
// // TODO: lastCheckpoint might be modified by different go-routines.
|
||||
// // Load it using the sync/atomic package?
|
||||
// freeUpTo := lastCheckpoint.Unix()
|
||||
// if to < freeUpTo {
|
||||
// freeUpTo = to
|
||||
// }
|
||||
|
||||
bodyDec := json.NewDecoder(r.Body)
|
||||
var selectors [][]string
|
||||
err = bodyDec.Decode(&selectors)
|
||||
|
||||
@@ -2,6 +2,11 @@
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package api provides the REST API layer for ClusterCockpit.
|
||||
// It handles HTTP requests for job management, user administration,
|
||||
// cluster queries, node state updates, and metrics storage operations.
|
||||
// The API supports both JWT token authentication and session-based authentication.
|
||||
package api
|
||||
|
||||
import (
|
||||
@@ -11,6 +16,7 @@ import (
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
@@ -39,10 +45,19 @@ import (
|
||||
// @in header
|
||||
// @name X-Auth-Token
|
||||
|
||||
const (
|
||||
noticeFilePath = "./var/notice.txt"
|
||||
noticeFilePerms = 0o644
|
||||
)
|
||||
|
||||
type RestApi struct {
|
||||
JobRepository *repository.JobRepository
|
||||
Authentication *auth.Authentication
|
||||
MachineStateDir string
|
||||
// RepositoryMutex protects job creation operations from race conditions
|
||||
// when checking for duplicate jobs during startJob API calls.
|
||||
// It prevents concurrent job starts with the same jobId/cluster/startTime
|
||||
// from creating duplicate entries in the database.
|
||||
RepositoryMutex sync.Mutex
|
||||
}
|
||||
|
||||
@@ -66,7 +81,6 @@ func (api *RestApi) MountApiRoutes(r *mux.Router) {
|
||||
// Job Handler
|
||||
r.HandleFunc("/jobs/start_job/", api.startJob).Methods(http.MethodPost, http.MethodPut)
|
||||
r.HandleFunc("/jobs/stop_job/", api.stopJobByRequest).Methods(http.MethodPost, http.MethodPut)
|
||||
// r.HandleFunc("/jobs/import/", api.importJob).Methods(http.MethodPost, http.MethodPut)
|
||||
r.HandleFunc("/jobs/", api.getJobs).Methods(http.MethodGet)
|
||||
r.HandleFunc("/jobs/{id}", api.getJobById).Methods(http.MethodPost)
|
||||
r.HandleFunc("/jobs/{id}", api.getCompleteJobById).Methods(http.MethodGet)
|
||||
@@ -97,6 +111,7 @@ func (api *RestApi) MountUserApiRoutes(r *mux.Router) {
|
||||
|
||||
func (api *RestApi) MountMetricStoreApiRoutes(r *mux.Router) {
|
||||
// REST API Uses TokenAuth
|
||||
// Note: StrictSlash handles trailing slash variations automatically
|
||||
r.HandleFunc("/api/free", freeMetrics).Methods(http.MethodPost)
|
||||
r.HandleFunc("/api/write", writeMetrics).Methods(http.MethodPost)
|
||||
r.HandleFunc("/api/debug", debugMetrics).Methods(http.MethodGet)
|
||||
@@ -146,10 +161,12 @@ func handleError(err error, statusCode int, rw http.ResponseWriter) {
|
||||
cclog.Warnf("REST ERROR : %s", err.Error())
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
rw.WriteHeader(statusCode)
|
||||
json.NewEncoder(rw).Encode(ErrorResponse{
|
||||
if err := json.NewEncoder(rw).Encode(ErrorResponse{
|
||||
Status: http.StatusText(statusCode),
|
||||
Error: err.Error(),
|
||||
})
|
||||
}); err != nil {
|
||||
cclog.Errorf("Failed to encode error response: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func decode(r io.Reader, val any) error {
|
||||
@@ -162,41 +179,41 @@ func (api *RestApi) editNotice(rw http.ResponseWriter, r *http.Request) {
|
||||
// SecuredCheck() only worked with TokenAuth: Removed
|
||||
|
||||
if user := repository.GetUserFromContext(r.Context()); !user.HasRole(schema.RoleAdmin) {
|
||||
http.Error(rw, "Only admins are allowed to update the notice.txt file", http.StatusForbidden)
|
||||
handleError(fmt.Errorf("only admins are allowed to update the notice.txt file"), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
// Get Value
|
||||
newContent := r.FormValue("new-content")
|
||||
|
||||
// Check FIle
|
||||
noticeExists := util.CheckFileExists("./var/notice.txt")
|
||||
// Validate content length to prevent DoS
|
||||
if len(newContent) > 10000 {
|
||||
handleError(fmt.Errorf("notice content exceeds maximum length of 10000 characters"), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
// Check File
|
||||
noticeExists := util.CheckFileExists(noticeFilePath)
|
||||
if !noticeExists {
|
||||
ntxt, err := os.Create("./var/notice.txt")
|
||||
ntxt, err := os.Create(noticeFilePath)
|
||||
if err != nil {
|
||||
cclog.Errorf("Creating ./var/notice.txt failed: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||
handleError(fmt.Errorf("creating notice file failed: %w", err), http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
ntxt.Close()
|
||||
}
|
||||
|
||||
if err := os.WriteFile(noticeFilePath, []byte(newContent), noticeFilePerms); err != nil {
|
||||
handleError(fmt.Errorf("writing to notice file failed: %w", err), http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
|
||||
rw.Header().Set("Content-Type", "text/plain")
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
if newContent != "" {
|
||||
if err := os.WriteFile("./var/notice.txt", []byte(newContent), 0o666); err != nil {
|
||||
cclog.Errorf("Writing to ./var/notice.txt failed: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||
return
|
||||
} else {
|
||||
rw.Write([]byte("Update Notice Content Success"))
|
||||
}
|
||||
rw.Write([]byte("Update Notice Content Success"))
|
||||
} else {
|
||||
if err := os.WriteFile("./var/notice.txt", []byte(""), 0o666); err != nil {
|
||||
cclog.Errorf("Writing to ./var/notice.txt failed: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||
return
|
||||
} else {
|
||||
rw.Write([]byte("Empty Notice Content Success"))
|
||||
}
|
||||
rw.Write([]byte("Empty Notice Content Success"))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -206,21 +223,20 @@ func (api *RestApi) getJWT(rw http.ResponseWriter, r *http.Request) {
|
||||
me := repository.GetUserFromContext(r.Context())
|
||||
if !me.HasRole(schema.RoleAdmin) {
|
||||
if username != me.Username {
|
||||
http.Error(rw, "Only admins are allowed to sign JWTs not for themselves",
|
||||
http.StatusForbidden)
|
||||
handleError(fmt.Errorf("only admins are allowed to sign JWTs not for themselves"), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
user, err := repository.GetUserRepository().GetUser(username)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||
handleError(fmt.Errorf("getting user failed: %w", err), http.StatusNotFound, rw)
|
||||
return
|
||||
}
|
||||
|
||||
jwt, err := api.Authentication.JwtAuth.ProvideJWT(user)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||
handleError(fmt.Errorf("providing JWT failed: %w", err), http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -233,17 +249,20 @@ func (api *RestApi) getRoles(rw http.ResponseWriter, r *http.Request) {
|
||||
|
||||
user := repository.GetUserFromContext(r.Context())
|
||||
if !user.HasRole(schema.RoleAdmin) {
|
||||
http.Error(rw, "only admins are allowed to fetch a list of roles", http.StatusForbidden)
|
||||
handleError(fmt.Errorf("only admins are allowed to fetch a list of roles"), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
roles, err := schema.GetValidRoles(user)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
handleError(fmt.Errorf("getting valid roles failed: %w", err), http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
|
||||
json.NewEncoder(rw).Encode(roles)
|
||||
rw.Header().Set("Content-Type", "application/json")
|
||||
if err := json.NewEncoder(rw).Encode(roles); err != nil {
|
||||
cclog.Errorf("Failed to encode roles response: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func (api *RestApi) updateConfiguration(rw http.ResponseWriter, r *http.Request) {
|
||||
@@ -251,38 +270,50 @@ func (api *RestApi) updateConfiguration(rw http.ResponseWriter, r *http.Request)
|
||||
key, value := r.FormValue("key"), r.FormValue("value")
|
||||
|
||||
if err := repository.GetUserCfgRepo().UpdateConfig(key, value, repository.GetUserFromContext(r.Context())); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||
handleError(fmt.Errorf("updating configuration failed: %w", err), http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
rw.Write([]byte("success"))
|
||||
}
|
||||
|
||||
func (api *RestApi) putMachineState(rw http.ResponseWriter, r *http.Request) {
|
||||
if api.MachineStateDir == "" {
|
||||
http.Error(rw, "REST > machine state not enabled", http.StatusNotFound)
|
||||
handleError(fmt.Errorf("machine state not enabled"), http.StatusNotFound, rw)
|
||||
return
|
||||
}
|
||||
|
||||
vars := mux.Vars(r)
|
||||
cluster := vars["cluster"]
|
||||
host := vars["host"]
|
||||
|
||||
// Validate cluster and host to prevent path traversal attacks
|
||||
if strings.Contains(cluster, "..") || strings.Contains(cluster, "/") || strings.Contains(cluster, "\\") {
|
||||
handleError(fmt.Errorf("invalid cluster name"), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
if strings.Contains(host, "..") || strings.Contains(host, "/") || strings.Contains(host, "\\") {
|
||||
handleError(fmt.Errorf("invalid host name"), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
dir := filepath.Join(api.MachineStateDir, cluster)
|
||||
if err := os.MkdirAll(dir, 0o755); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
handleError(fmt.Errorf("creating directory failed: %w", err), http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
|
||||
filename := filepath.Join(dir, fmt.Sprintf("%s.json", host))
|
||||
f, err := os.Create(filename)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
handleError(fmt.Errorf("creating file failed: %w", err), http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
if _, err := io.Copy(f, r.Body); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
handleError(fmt.Errorf("writing file failed: %w", err), http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -291,12 +322,25 @@ func (api *RestApi) putMachineState(rw http.ResponseWriter, r *http.Request) {
|
||||
|
||||
func (api *RestApi) getMachineState(rw http.ResponseWriter, r *http.Request) {
|
||||
if api.MachineStateDir == "" {
|
||||
http.Error(rw, "REST > machine state not enabled", http.StatusNotFound)
|
||||
handleError(fmt.Errorf("machine state not enabled"), http.StatusNotFound, rw)
|
||||
return
|
||||
}
|
||||
|
||||
vars := mux.Vars(r)
|
||||
filename := filepath.Join(api.MachineStateDir, vars["cluster"], fmt.Sprintf("%s.json", vars["host"]))
|
||||
cluster := vars["cluster"]
|
||||
host := vars["host"]
|
||||
|
||||
// Validate cluster and host to prevent path traversal attacks
|
||||
if strings.Contains(cluster, "..") || strings.Contains(cluster, "/") || strings.Contains(cluster, "\\") {
|
||||
handleError(fmt.Errorf("invalid cluster name"), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
if strings.Contains(host, "..") || strings.Contains(host, "/") || strings.Contains(host, "\\") {
|
||||
handleError(fmt.Errorf("invalid host name"), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
filename := filepath.Join(api.MachineStateDir, cluster, fmt.Sprintf("%s.json", host))
|
||||
|
||||
// Sets the content-type and 'Last-Modified' Header and so on automatically
|
||||
http.ServeFile(rw, r, filename)
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package api
|
||||
|
||||
import (
|
||||
@@ -10,11 +11,12 @@ import (
|
||||
"net/http"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/gorilla/mux"
|
||||
)
|
||||
|
||||
type ApiReturnedUser struct {
|
||||
type APIReturnedUser struct {
|
||||
Username string `json:"username"`
|
||||
Name string `json:"name"`
|
||||
Roles []string `json:"roles"`
|
||||
@@ -40,24 +42,42 @@ func (api *RestApi) getUsers(rw http.ResponseWriter, r *http.Request) {
|
||||
// SecuredCheck() only worked with TokenAuth: Removed
|
||||
|
||||
if user := repository.GetUserFromContext(r.Context()); !user.HasRole(schema.RoleAdmin) {
|
||||
http.Error(rw, "Only admins are allowed to fetch a list of users", http.StatusForbidden)
|
||||
handleError(fmt.Errorf("only admins are allowed to fetch a list of users"), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
users, err := repository.GetUserRepository().ListUsers(r.URL.Query().Get("not-just-user") == "true")
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
handleError(fmt.Errorf("listing users failed: %w", err), http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
|
||||
json.NewEncoder(rw).Encode(users)
|
||||
rw.Header().Set("Content-Type", "application/json")
|
||||
if err := json.NewEncoder(rw).Encode(users); err != nil {
|
||||
cclog.Errorf("Failed to encode users response: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// updateUser godoc
|
||||
// @summary Update user roles and projects
|
||||
// @tags User
|
||||
// @description Allows admins to add/remove roles and projects for a user
|
||||
// @produce plain
|
||||
// @param id path string true "Username"
|
||||
// @param add-role formData string false "Role to add"
|
||||
// @param remove-role formData string false "Role to remove"
|
||||
// @param add-project formData string false "Project to add"
|
||||
// @param remove-project formData string false "Project to remove"
|
||||
// @success 200 {string} string "Success message"
|
||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity"
|
||||
// @security ApiKeyAuth
|
||||
// @router /api/user/{id} [post]
|
||||
func (api *RestApi) updateUser(rw http.ResponseWriter, r *http.Request) {
|
||||
// SecuredCheck() only worked with TokenAuth: Removed
|
||||
|
||||
if user := repository.GetUserFromContext(r.Context()); !user.HasRole(schema.RoleAdmin) {
|
||||
http.Error(rw, "Only admins are allowed to update a user", http.StatusForbidden)
|
||||
handleError(fmt.Errorf("only admins are allowed to update a user"), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -67,43 +87,70 @@ func (api *RestApi) updateUser(rw http.ResponseWriter, r *http.Request) {
|
||||
newproj := r.FormValue("add-project")
|
||||
delproj := r.FormValue("remove-project")
|
||||
|
||||
// TODO: Handle anything but roles...
|
||||
rw.Header().Set("Content-Type", "application/json")
|
||||
|
||||
// Handle role updates
|
||||
if newrole != "" {
|
||||
if err := repository.GetUserRepository().AddRole(r.Context(), mux.Vars(r)["id"], newrole); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||
handleError(fmt.Errorf("adding role failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
}
|
||||
rw.Write([]byte("Add Role Success"))
|
||||
if err := json.NewEncoder(rw).Encode(DefaultApiResponse{Message: "Add Role Success"}); err != nil {
|
||||
cclog.Errorf("Failed to encode response: %v", err)
|
||||
}
|
||||
} else if delrole != "" {
|
||||
if err := repository.GetUserRepository().RemoveRole(r.Context(), mux.Vars(r)["id"], delrole); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||
handleError(fmt.Errorf("removing role failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
}
|
||||
rw.Write([]byte("Remove Role Success"))
|
||||
if err := json.NewEncoder(rw).Encode(DefaultApiResponse{Message: "Remove Role Success"}); err != nil {
|
||||
cclog.Errorf("Failed to encode response: %v", err)
|
||||
}
|
||||
} else if newproj != "" {
|
||||
if err := repository.GetUserRepository().AddProject(r.Context(), mux.Vars(r)["id"], newproj); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||
handleError(fmt.Errorf("adding project failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
}
|
||||
rw.Write([]byte("Add Project Success"))
|
||||
if err := json.NewEncoder(rw).Encode(DefaultApiResponse{Message: "Add Project Success"}); err != nil {
|
||||
cclog.Errorf("Failed to encode response: %v", err)
|
||||
}
|
||||
} else if delproj != "" {
|
||||
if err := repository.GetUserRepository().RemoveProject(r.Context(), mux.Vars(r)["id"], delproj); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||
handleError(fmt.Errorf("removing project failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
}
|
||||
rw.Write([]byte("Remove Project Success"))
|
||||
if err := json.NewEncoder(rw).Encode(DefaultApiResponse{Message: "Remove Project Success"}); err != nil {
|
||||
cclog.Errorf("Failed to encode response: %v", err)
|
||||
}
|
||||
} else {
|
||||
http.Error(rw, "Not Add or Del [role|project]?", http.StatusInternalServerError)
|
||||
handleError(fmt.Errorf("no operation specified: must provide add-role, remove-role, add-project, or remove-project"), http.StatusBadRequest, rw)
|
||||
}
|
||||
}
|
||||
|
||||
// createUser godoc
|
||||
// @summary Create a new user
|
||||
// @tags User
|
||||
// @description Creates a new user with specified credentials and role
|
||||
// @produce plain
|
||||
// @param username formData string true "Username"
|
||||
// @param password formData string false "Password (not required for API users)"
|
||||
// @param role formData string true "User role"
|
||||
// @param name formData string false "Full name"
|
||||
// @param email formData string false "Email address"
|
||||
// @param project formData string false "Project (required for managers)"
|
||||
// @success 200 {string} string "Success message"
|
||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity"
|
||||
// @security ApiKeyAuth
|
||||
// @router /api/users/ [post]
|
||||
func (api *RestApi) createUser(rw http.ResponseWriter, r *http.Request) {
|
||||
// SecuredCheck() only worked with TokenAuth: Removed
|
||||
|
||||
rw.Header().Set("Content-Type", "text/plain")
|
||||
me := repository.GetUserFromContext(r.Context())
|
||||
if !me.HasRole(schema.RoleAdmin) {
|
||||
http.Error(rw, "Only admins are allowed to create new users", http.StatusForbidden)
|
||||
handleError(fmt.Errorf("only admins are allowed to create new users"), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -111,18 +158,22 @@ func (api *RestApi) createUser(rw http.ResponseWriter, r *http.Request) {
|
||||
r.FormValue("password"), r.FormValue("role"), r.FormValue("name"),
|
||||
r.FormValue("email"), r.FormValue("project")
|
||||
|
||||
// Validate username length
|
||||
if len(username) == 0 || len(username) > 100 {
|
||||
handleError(fmt.Errorf("username must be between 1 and 100 characters"), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
if len(password) == 0 && role != schema.GetRoleString(schema.RoleApi) {
|
||||
http.Error(rw, "Only API users are allowed to have a blank password (login will be impossible)", http.StatusBadRequest)
|
||||
handleError(fmt.Errorf("only API users are allowed to have a blank password (login will be impossible)"), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
if len(project) != 0 && role != schema.GetRoleString(schema.RoleManager) {
|
||||
http.Error(rw, "only managers require a project (can be changed later)",
|
||||
http.StatusBadRequest)
|
||||
handleError(fmt.Errorf("only managers require a project (can be changed later)"), http.StatusBadRequest, rw)
|
||||
return
|
||||
} else if len(project) == 0 && role == schema.GetRoleString(schema.RoleManager) {
|
||||
http.Error(rw, "managers require a project to manage (can be changed later)",
|
||||
http.StatusBadRequest)
|
||||
handleError(fmt.Errorf("managers require a project to manage (can be changed later)"), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -134,24 +185,35 @@ func (api *RestApi) createUser(rw http.ResponseWriter, r *http.Request) {
|
||||
Projects: []string{project},
|
||||
Roles: []string{role},
|
||||
}); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||
handleError(fmt.Errorf("adding user failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Fprintf(rw, "User %v successfully created!\n", username)
|
||||
}
|
||||
|
||||
// deleteUser godoc
|
||||
// @summary Delete a user
|
||||
// @tags User
|
||||
// @description Deletes a user from the system
|
||||
// @produce plain
|
||||
// @param username formData string true "Username to delete"
|
||||
// @success 200 {string} string "Success"
|
||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity"
|
||||
// @security ApiKeyAuth
|
||||
// @router /api/users/ [delete]
|
||||
func (api *RestApi) deleteUser(rw http.ResponseWriter, r *http.Request) {
|
||||
// SecuredCheck() only worked with TokenAuth: Removed
|
||||
|
||||
if user := repository.GetUserFromContext(r.Context()); !user.HasRole(schema.RoleAdmin) {
|
||||
http.Error(rw, "Only admins are allowed to delete a user", http.StatusForbidden)
|
||||
handleError(fmt.Errorf("only admins are allowed to delete a user"), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
username := r.FormValue("username")
|
||||
if err := repository.GetUserRepository().DelUser(username); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||
handleError(fmt.Errorf("deleting user failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
190
internal/archiver/README.md
Normal file
190
internal/archiver/README.md
Normal file
@@ -0,0 +1,190 @@
|
||||
# Archiver Package
|
||||
|
||||
The `archiver` package provides asynchronous job archiving functionality for ClusterCockpit. When jobs complete, their metric data is archived from the metric store to a persistent archive backend (filesystem, S3, SQLite, etc.).
|
||||
|
||||
## Architecture
|
||||
|
||||
### Producer-Consumer Pattern
|
||||
|
||||
```
|
||||
┌──────────────┐ TriggerArchiving() ┌───────────────┐
|
||||
│ API Handler │ ───────────────────────▶ │ archiveChannel│
|
||||
│ (Job Stop) │ │ (buffer: 128)│
|
||||
└──────────────┘ └───────┬───────┘
|
||||
│
|
||||
┌─────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌──────────────────────┐
|
||||
│ archivingWorker() │
|
||||
│ (goroutine) │
|
||||
└──────────┬───────────┘
|
||||
│
|
||||
▼
|
||||
1. Fetch job metadata
|
||||
2. Load metric data
|
||||
3. Calculate statistics
|
||||
4. Archive to backend
|
||||
5. Update database
|
||||
6. Call hooks
|
||||
```
|
||||
|
||||
### Components
|
||||
|
||||
- **archiveChannel**: Buffered channel (128 jobs) for async communication
|
||||
- **archivePending**: WaitGroup tracking in-flight archiving operations
|
||||
- **archivingWorker**: Background goroutine processing archiving requests
|
||||
- **shutdownCtx**: Context for graceful cancellation during shutdown
|
||||
|
||||
## Usage
|
||||
|
||||
### Initialization
|
||||
|
||||
```go
|
||||
// Start archiver with context for shutdown control
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
archiver.Start(jobRepository, ctx)
|
||||
```
|
||||
|
||||
### Archiving a Job
|
||||
|
||||
```go
|
||||
// Called automatically when a job completes
|
||||
archiver.TriggerArchiving(job)
|
||||
```
|
||||
|
||||
The function returns immediately. Actual archiving happens in the background.
|
||||
|
||||
### Graceful Shutdown
|
||||
|
||||
```go
|
||||
// Shutdown with 10 second timeout
|
||||
if err := archiver.Shutdown(10 * time.Second); err != nil {
|
||||
log.Printf("Archiver shutdown timeout: %v", err)
|
||||
}
|
||||
```
|
||||
|
||||
**Shutdown process:**
|
||||
1. Closes channel (rejects new jobs)
|
||||
2. Waits for pending jobs (up to timeout)
|
||||
3. Cancels context if timeout exceeded
|
||||
4. Waits for worker to exit cleanly
|
||||
|
||||
## Configuration
|
||||
|
||||
### Channel Buffer Size
|
||||
|
||||
The archiving channel has a buffer of 128 jobs. If more than 128 jobs are queued simultaneously, `TriggerArchiving()` will block until space is available.
|
||||
|
||||
To adjust:
|
||||
```go
|
||||
// In archiveWorker.go Start() function
|
||||
archiveChannel = make(chan *schema.Job, 256) // Increase buffer
|
||||
```
|
||||
|
||||
### Scope Selection
|
||||
|
||||
Archive data scopes are automatically selected based on job size:
|
||||
|
||||
- **Node scope**: Always included
|
||||
- **Core scope**: Included for jobs with ≤8 nodes (reduces data volume for large jobs)
|
||||
- **Accelerator scope**: Included if job used accelerators (`NumAcc > 0`)
|
||||
|
||||
To adjust the node threshold:
|
||||
```go
|
||||
// In archiver.go ArchiveJob() function
|
||||
if job.NumNodes <= 16 { // Change from 8 to 16
|
||||
scopes = append(scopes, schema.MetricScopeCore)
|
||||
}
|
||||
```
|
||||
|
||||
### Resolution
|
||||
|
||||
Data is archived at the highest available resolution (typically 60s intervals). To change:
|
||||
|
||||
```go
|
||||
// In archiver.go ArchiveJob() function
|
||||
jobData, err := metricDataDispatcher.LoadData(job, allMetrics, scopes, ctx, 300)
|
||||
// 0 = highest resolution
|
||||
// 300 = 5-minute resolution
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
### Automatic Retry
|
||||
|
||||
The archiver does **not** automatically retry failed archiving operations. If archiving fails:
|
||||
|
||||
1. Error is logged
|
||||
2. Job is marked as `MonitoringStatusArchivingFailed` in database
|
||||
3. Worker continues processing other jobs
|
||||
|
||||
### Manual Retry
|
||||
|
||||
To re-archive failed jobs, query for jobs with `MonitoringStatusArchivingFailed` and call `TriggerArchiving()` again.
|
||||
|
||||
## Performance Considerations
|
||||
|
||||
### Single Worker Thread
|
||||
|
||||
The archiver uses a single worker goroutine. For high-throughput systems:
|
||||
|
||||
- Large channel buffer (128) prevents blocking
|
||||
- Archiving is typically I/O bound (writing to storage)
|
||||
- Single worker prevents overwhelming storage backend
|
||||
|
||||
### Shutdown Timeout
|
||||
|
||||
Recommended timeout values:
|
||||
- **Development**: 5-10 seconds
|
||||
- **Production**: 10-30 seconds
|
||||
- **High-load**: 30-60 seconds
|
||||
|
||||
Choose based on:
|
||||
- Average archiving time per job
|
||||
- Storage backend latency
|
||||
- Acceptable shutdown delay
|
||||
|
||||
## Monitoring
|
||||
|
||||
### Logging
|
||||
|
||||
The archiver logs:
|
||||
- **Info**: Startup, shutdown, successful completions
|
||||
- **Debug**: Individual job archiving times
|
||||
- **Error**: Archiving failures with job ID and reason
|
||||
- **Warn**: Shutdown timeout exceeded
|
||||
|
||||
### Metrics
|
||||
|
||||
Monitor these signals for archiver health:
|
||||
- Jobs with `MonitoringStatusArchivingFailed`
|
||||
- Time from job stop to successful archive
|
||||
- Shutdown timeout occurrences
|
||||
|
||||
## Thread Safety
|
||||
|
||||
All exported functions are safe for concurrent use:
|
||||
- `Start()` - Safe to call once
|
||||
- `TriggerArchiving()` - Safe from multiple goroutines
|
||||
- `Shutdown()` - Safe to call once
|
||||
- `WaitForArchiving()` - Deprecated, but safe
|
||||
|
||||
Internal state is protected by:
|
||||
- Channel synchronization (`archiveChannel`)
|
||||
- WaitGroup for pending count (`archivePending`)
|
||||
- Context for cancellation (`shutdownCtx`)
|
||||
|
||||
## Files
|
||||
|
||||
- **archiveWorker.go**: Worker lifecycle, channel management, shutdown logic
|
||||
- **archiver.go**: Core archiving logic, metric loading, statistics calculation
|
||||
|
||||
## Dependencies
|
||||
|
||||
- `internal/repository`: Database operations for job metadata
|
||||
- `internal/metricDataDispatcher`: Loading metric data from various backends
|
||||
- `pkg/archive`: Archive backend abstraction (filesystem, S3, SQLite)
|
||||
- `cc-lib/schema`: Job and metric data structures
|
||||
@@ -2,10 +2,54 @@
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package archiver provides asynchronous job archiving functionality for ClusterCockpit.
|
||||
//
|
||||
// The archiver runs a background worker goroutine that processes job archiving requests
|
||||
// from a buffered channel. When jobs complete, their metric data is archived from the
|
||||
// metric store to the configured archive backend (filesystem, S3, etc.).
|
||||
//
|
||||
// # Architecture
|
||||
//
|
||||
// The archiver uses a producer-consumer pattern:
|
||||
// - Producer: TriggerArchiving() sends jobs to archiveChannel
|
||||
// - Consumer: archivingWorker() processes jobs from the channel
|
||||
// - Coordination: sync.WaitGroup tracks pending archive operations
|
||||
//
|
||||
// # Lifecycle
|
||||
//
|
||||
// 1. Start(repo, ctx) - Initialize worker with context for cancellation
|
||||
// 2. TriggerArchiving(job) - Queue job for archiving (called when job stops)
|
||||
// 3. archivingWorker() - Background goroutine processes jobs
|
||||
// 4. Shutdown(timeout) - Graceful shutdown with timeout
|
||||
//
|
||||
// # Graceful Shutdown
|
||||
//
|
||||
// The archiver supports graceful shutdown with configurable timeout:
|
||||
// - Closes channel to reject new jobs
|
||||
// - Waits for pending jobs to complete (up to timeout)
|
||||
// - Cancels context if timeout exceeded
|
||||
// - Ensures worker goroutine exits cleanly
|
||||
//
|
||||
// # Example Usage
|
||||
//
|
||||
// // Initialize archiver
|
||||
// ctx, cancel := context.WithCancel(context.Background())
|
||||
// defer cancel()
|
||||
// archiver.Start(jobRepository, ctx)
|
||||
//
|
||||
// // Trigger archiving when job completes
|
||||
// archiver.TriggerArchiving(job)
|
||||
//
|
||||
// // Graceful shutdown with 10 second timeout
|
||||
// if err := archiver.Shutdown(10 * time.Second); err != nil {
|
||||
// log.Printf("Archiver shutdown timeout: %v", err)
|
||||
// }
|
||||
package archiver
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -19,38 +63,82 @@ var (
|
||||
archivePending sync.WaitGroup
|
||||
archiveChannel chan *schema.Job
|
||||
jobRepo *repository.JobRepository
|
||||
shutdownCtx context.Context
|
||||
shutdownCancel context.CancelFunc
|
||||
workerDone chan struct{}
|
||||
)
|
||||
|
||||
func Start(r *repository.JobRepository) {
|
||||
// Start initializes the archiver and starts the background worker goroutine.
|
||||
//
|
||||
// The archiver processes job archiving requests asynchronously via a buffered channel.
|
||||
// Jobs are sent to the channel using TriggerArchiving() and processed by the worker.
|
||||
//
|
||||
// Parameters:
|
||||
// - r: JobRepository instance for database operations
|
||||
// - ctx: Context for cancellation (shutdown signal propagation)
|
||||
//
|
||||
// The worker goroutine will run until:
|
||||
// - ctx is cancelled (via parent shutdown)
|
||||
// - archiveChannel is closed (via Shutdown())
|
||||
//
|
||||
// Must be called before TriggerArchiving(). Safe to call only once.
|
||||
func Start(r *repository.JobRepository, ctx context.Context) {
|
||||
shutdownCtx, shutdownCancel = context.WithCancel(ctx)
|
||||
archiveChannel = make(chan *schema.Job, 128)
|
||||
workerDone = make(chan struct{})
|
||||
jobRepo = r
|
||||
|
||||
go archivingWorker()
|
||||
}
|
||||
|
||||
// Archiving worker thread
|
||||
// archivingWorker is the background goroutine that processes job archiving requests.
|
||||
//
|
||||
// The worker loop:
|
||||
// 1. Blocks waiting for jobs on archiveChannel or shutdown signal
|
||||
// 2. Fetches job metadata from repository
|
||||
// 3. Archives job data to configured backend (calls ArchiveJob)
|
||||
// 4. Updates job footprint and energy metrics in database
|
||||
// 5. Marks job as successfully archived
|
||||
// 6. Calls job stop hooks
|
||||
//
|
||||
// The worker exits when:
|
||||
// - shutdownCtx is cancelled (timeout during shutdown)
|
||||
// - archiveChannel is closed (normal shutdown)
|
||||
//
|
||||
// Errors during archiving are logged and the job is marked as failed,
|
||||
// but the worker continues processing other jobs.
|
||||
func archivingWorker() {
|
||||
defer close(workerDone)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-shutdownCtx.Done():
|
||||
cclog.Info("Archive worker received shutdown signal")
|
||||
return
|
||||
|
||||
case job, ok := <-archiveChannel:
|
||||
if !ok {
|
||||
break
|
||||
cclog.Info("Archive channel closed, worker exiting")
|
||||
return
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
// not using meta data, called to load JobMeta into Cache?
|
||||
// will fail if job meta not in repository
|
||||
if _, err := jobRepo.FetchMetadata(job); err != nil {
|
||||
cclog.Errorf("archiving job (dbid: %d) failed at check metadata step: %s", job.ID, err.Error())
|
||||
jobRepo.UpdateMonitoringStatus(*job.ID, schema.MonitoringStatusArchivingFailed)
|
||||
archivePending.Done()
|
||||
continue
|
||||
}
|
||||
|
||||
// ArchiveJob will fetch all the data from a MetricDataRepository and push into configured archive backend
|
||||
// TODO: Maybe use context with cancel/timeout here
|
||||
jobMeta, err := ArchiveJob(job, context.Background())
|
||||
// Use shutdown context to allow cancellation
|
||||
jobMeta, err := ArchiveJob(job, shutdownCtx)
|
||||
if err != nil {
|
||||
cclog.Errorf("archiving job (dbid: %d) failed at archiving job step: %s", job.ID, err.Error())
|
||||
jobRepo.UpdateMonitoringStatus(*job.ID, schema.MonitoringStatusArchivingFailed)
|
||||
archivePending.Done()
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -58,30 +146,44 @@ func archivingWorker() {
|
||||
|
||||
if stmt, err = jobRepo.UpdateFootprint(stmt, jobMeta); err != nil {
|
||||
cclog.Errorf("archiving job (dbid: %d) failed at update Footprint step: %s", job.ID, err.Error())
|
||||
archivePending.Done()
|
||||
continue
|
||||
}
|
||||
if stmt, err = jobRepo.UpdateEnergy(stmt, jobMeta); err != nil {
|
||||
cclog.Errorf("archiving job (dbid: %d) failed at update Energy step: %s", job.ID, err.Error())
|
||||
archivePending.Done()
|
||||
continue
|
||||
}
|
||||
// Update the jobs database entry one last time:
|
||||
stmt = jobRepo.MarkArchived(stmt, schema.MonitoringStatusArchivingSuccessful)
|
||||
if err := jobRepo.Execute(stmt); err != nil {
|
||||
cclog.Errorf("archiving job (dbid: %d) failed at db execute: %s", job.ID, err.Error())
|
||||
archivePending.Done()
|
||||
continue
|
||||
}
|
||||
cclog.Debugf("archiving job %d took %s", job.JobID, time.Since(start))
|
||||
cclog.Printf("archiving job (dbid: %d) successful", job.ID)
|
||||
cclog.Infof("archiving job (dbid: %d) successful", job.ID)
|
||||
|
||||
repository.CallJobStopHooks(job)
|
||||
archivePending.Done()
|
||||
default:
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Trigger async archiving
|
||||
// TriggerArchiving queues a job for asynchronous archiving.
|
||||
//
|
||||
// This function should be called when a job completes (stops) to archive its
|
||||
// metric data from the metric store to the configured archive backend.
|
||||
//
|
||||
// The function:
|
||||
// 1. Increments the pending job counter (WaitGroup)
|
||||
// 2. Sends the job to the archiving channel (buffered, capacity 128)
|
||||
// 3. Returns immediately (non-blocking unless channel is full)
|
||||
//
|
||||
// The actual archiving is performed asynchronously by the worker goroutine.
|
||||
// Upon completion, the worker will decrement the pending counter.
|
||||
//
|
||||
// Panics if Start() has not been called first.
|
||||
func TriggerArchiving(job *schema.Job) {
|
||||
if archiveChannel == nil {
|
||||
cclog.Fatal("Cannot archive without archiving channel. Did you Start the archiver?")
|
||||
@@ -91,8 +193,58 @@ func TriggerArchiving(job *schema.Job) {
|
||||
archiveChannel <- job
|
||||
}
|
||||
|
||||
// Wait for background thread to finish pending archiving operations
|
||||
func WaitForArchiving() {
|
||||
// close channel and wait for worker to process remaining jobs
|
||||
archivePending.Wait()
|
||||
// Shutdown performs a graceful shutdown of the archiver with a configurable timeout.
|
||||
//
|
||||
// The shutdown process:
|
||||
// 1. Closes archiveChannel - no new jobs will be accepted
|
||||
// 2. Waits for pending jobs to complete (up to timeout duration)
|
||||
// 3. If timeout is exceeded:
|
||||
// - Cancels shutdownCtx to interrupt ongoing ArchiveJob operations
|
||||
// - Returns error indicating timeout
|
||||
// 4. Waits for worker goroutine to exit cleanly
|
||||
//
|
||||
// Parameters:
|
||||
// - timeout: Maximum duration to wait for pending jobs to complete
|
||||
// (recommended: 10-30 seconds for production)
|
||||
//
|
||||
// Returns:
|
||||
// - nil if all jobs completed within timeout
|
||||
// - error if timeout was exceeded (some jobs may not have been archived)
|
||||
//
|
||||
// Jobs that don't complete within the timeout will be marked as failed.
|
||||
// The function always ensures the worker goroutine exits before returning.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// if err := archiver.Shutdown(10 * time.Second); err != nil {
|
||||
// log.Printf("Some jobs did not complete: %v", err)
|
||||
// }
|
||||
func Shutdown(timeout time.Duration) error {
|
||||
cclog.Info("Initiating archiver shutdown...")
|
||||
|
||||
// Close channel to signal no more jobs will be accepted
|
||||
close(archiveChannel)
|
||||
|
||||
// Create a channel to signal when all jobs are done
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
archivePending.Wait()
|
||||
close(done)
|
||||
}()
|
||||
|
||||
// Wait for jobs to complete or timeout
|
||||
select {
|
||||
case <-done:
|
||||
cclog.Info("All archive jobs completed successfully")
|
||||
// Wait for worker to exit
|
||||
<-workerDone
|
||||
return nil
|
||||
case <-time.After(timeout):
|
||||
cclog.Warn("Archiver shutdown timeout exceeded, cancelling remaining operations")
|
||||
// Cancel any ongoing operations
|
||||
shutdownCancel()
|
||||
// Wait for worker to exit
|
||||
<-workerDone
|
||||
return fmt.Errorf("archiver shutdown timeout after %v", timeout)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package archiver
|
||||
|
||||
import (
|
||||
@@ -15,7 +16,32 @@ import (
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
)
|
||||
|
||||
// Writes a running job to the job-archive
|
||||
// ArchiveJob archives a completed job's metric data to the configured archive backend.
|
||||
//
|
||||
// This function performs the following operations:
|
||||
// 1. Loads all metric data for the job from the metric data repository
|
||||
// 2. Calculates job-level statistics (avg, min, max) for each metric
|
||||
// 3. Stores the job metadata and metric data to the archive backend
|
||||
//
|
||||
// Metric data is retrieved at the highest available resolution (typically 60s)
|
||||
// for the following scopes:
|
||||
// - Node scope (always)
|
||||
// - Core scope (for jobs with ≤8 nodes, to reduce data volume)
|
||||
// - Accelerator scope (if job used accelerators)
|
||||
//
|
||||
// The function respects context cancellation. If ctx is cancelled (e.g., during
|
||||
// shutdown timeout), the operation will be interrupted and return an error.
|
||||
//
|
||||
// Parameters:
|
||||
// - job: The job to archive (must be a completed job)
|
||||
// - ctx: Context for cancellation and timeout control
|
||||
//
|
||||
// Returns:
|
||||
// - *schema.Job with populated Statistics field
|
||||
// - error if data loading or archiving fails
|
||||
//
|
||||
// If config.Keys.DisableArchive is true, only job statistics are calculated
|
||||
// and returned (no data is written to archive backend).
|
||||
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.Job, error) {
|
||||
allMetrics := make([]string, 0)
|
||||
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||
|
||||
@@ -18,7 +18,6 @@ import (
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -32,8 +31,19 @@ import (
|
||||
"github.com/gorilla/sessions"
|
||||
)
|
||||
|
||||
// Authenticator is the interface for all authentication methods.
|
||||
// Each authenticator determines if it can handle a login request (CanLogin)
|
||||
// and performs the actual authentication (Login).
|
||||
type Authenticator interface {
|
||||
// CanLogin determines if this authenticator can handle the login request.
|
||||
// It returns the user object if available and a boolean indicating if this
|
||||
// authenticator should attempt the login. This method should not perform
|
||||
// expensive operations or actual authentication.
|
||||
CanLogin(user *schema.User, username string, rw http.ResponseWriter, r *http.Request) (*schema.User, bool)
|
||||
|
||||
// Login performs the actually authentication for the user.
|
||||
// It returns the authenticated user or an error if authentication fails.
|
||||
// The user parameter may be nil if the user doesn't exist in the database yet.
|
||||
Login(user *schema.User, rw http.ResponseWriter, r *http.Request) (*schema.User, error)
|
||||
}
|
||||
|
||||
@@ -42,27 +52,70 @@ var (
|
||||
authInstance *Authentication
|
||||
)
|
||||
|
||||
var ipUserLimiters sync.Map
|
||||
|
||||
func getIPUserLimiter(ip, username string) *rate.Limiter {
|
||||
key := ip + ":" + username
|
||||
limiter, ok := ipUserLimiters.Load(key)
|
||||
if !ok {
|
||||
newLimiter := rate.NewLimiter(rate.Every(time.Hour/10), 10)
|
||||
ipUserLimiters.Store(key, newLimiter)
|
||||
return newLimiter
|
||||
}
|
||||
return limiter.(*rate.Limiter)
|
||||
// rateLimiterEntry tracks a rate limiter and its last use time for cleanup
|
||||
type rateLimiterEntry struct {
|
||||
limiter *rate.Limiter
|
||||
lastUsed time.Time
|
||||
}
|
||||
|
||||
var ipUserLimiters sync.Map
|
||||
|
||||
// getIPUserLimiter returns a rate limiter for the given IP and username combination.
|
||||
// Rate limiters are created on demand and track 5 attempts per 15 minutes.
|
||||
func getIPUserLimiter(ip, username string) *rate.Limiter {
|
||||
key := ip + ":" + username
|
||||
now := time.Now()
|
||||
|
||||
if entry, ok := ipUserLimiters.Load(key); ok {
|
||||
rle := entry.(*rateLimiterEntry)
|
||||
rle.lastUsed = now
|
||||
return rle.limiter
|
||||
}
|
||||
|
||||
// More aggressive rate limiting: 5 attempts per 15 minutes
|
||||
newLimiter := rate.NewLimiter(rate.Every(15*time.Minute/5), 5)
|
||||
ipUserLimiters.Store(key, &rateLimiterEntry{
|
||||
limiter: newLimiter,
|
||||
lastUsed: now,
|
||||
})
|
||||
return newLimiter
|
||||
}
|
||||
|
||||
// cleanupOldRateLimiters removes rate limiters that haven't been used recently
|
||||
func cleanupOldRateLimiters(olderThan time.Time) {
|
||||
ipUserLimiters.Range(func(key, value any) bool {
|
||||
entry := value.(*rateLimiterEntry)
|
||||
if entry.lastUsed.Before(olderThan) {
|
||||
ipUserLimiters.Delete(key)
|
||||
cclog.Debugf("Cleaned up rate limiter for %v", key)
|
||||
}
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
// startRateLimiterCleanup starts a background goroutine to clean up old rate limiters
|
||||
func startRateLimiterCleanup() {
|
||||
go func() {
|
||||
ticker := time.NewTicker(1 * time.Hour)
|
||||
defer ticker.Stop()
|
||||
for range ticker.C {
|
||||
// Clean up limiters not used in the last 24 hours
|
||||
cleanupOldRateLimiters(time.Now().Add(-24 * time.Hour))
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// AuthConfig contains configuration for all authentication methods
|
||||
type AuthConfig struct {
|
||||
LdapConfig *LdapConfig `json:"ldap"`
|
||||
JwtConfig *JWTAuthConfig `json:"jwts"`
|
||||
OpenIDConfig *OpenIDConfig `json:"oidc"`
|
||||
}
|
||||
|
||||
// Keys holds the global authentication configuration
|
||||
var Keys AuthConfig
|
||||
|
||||
// Authentication manages all authentication methods and session handling
|
||||
type Authentication struct {
|
||||
sessionStore *sessions.CookieStore
|
||||
LdapAuth *LdapAuthenticator
|
||||
@@ -86,10 +139,31 @@ func (auth *Authentication) AuthViaSession(
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// TODO: Check if session keys exist
|
||||
username, _ := session.Values["username"].(string)
|
||||
projects, _ := session.Values["projects"].([]string)
|
||||
roles, _ := session.Values["roles"].([]string)
|
||||
// Validate session data with proper type checking
|
||||
username, ok := session.Values["username"].(string)
|
||||
if !ok || username == "" {
|
||||
cclog.Warn("Invalid session: missing or invalid username")
|
||||
// Invalidate the corrupted session
|
||||
session.Options.MaxAge = -1
|
||||
_ = auth.sessionStore.Save(r, rw, session)
|
||||
return nil, errors.New("invalid session data")
|
||||
}
|
||||
|
||||
projects, ok := session.Values["projects"].([]string)
|
||||
if !ok {
|
||||
cclog.Warn("Invalid session: projects not found or invalid type, using empty list")
|
||||
projects = []string{}
|
||||
}
|
||||
|
||||
roles, ok := session.Values["roles"].([]string)
|
||||
if !ok || len(roles) == 0 {
|
||||
cclog.Warn("Invalid session: missing or invalid roles")
|
||||
// Invalidate the corrupted session
|
||||
session.Options.MaxAge = -1
|
||||
_ = auth.sessionStore.Save(r, rw, session)
|
||||
return nil, errors.New("invalid session data")
|
||||
}
|
||||
|
||||
return &schema.User{
|
||||
Username: username,
|
||||
Projects: projects,
|
||||
@@ -102,6 +176,9 @@ func (auth *Authentication) AuthViaSession(
|
||||
func Init(authCfg *json.RawMessage) {
|
||||
initOnce.Do(func() {
|
||||
authInstance = &Authentication{}
|
||||
|
||||
// Start background cleanup of rate limiters
|
||||
startRateLimiterCleanup()
|
||||
|
||||
sessKey := os.Getenv("SESSION_KEY")
|
||||
if sessKey == "" {
|
||||
@@ -185,38 +262,36 @@ func GetAuthInstance() *Authentication {
|
||||
return authInstance
|
||||
}
|
||||
|
||||
func handleTokenUser(tokenUser *schema.User) {
|
||||
// handleUserSync syncs or updates a user in the database based on configuration.
|
||||
// This is used for both JWT and OIDC authentication when syncUserOnLogin or updateUserOnLogin is enabled.
|
||||
func handleUserSync(user *schema.User, syncUserOnLogin, updateUserOnLogin bool) {
|
||||
r := repository.GetUserRepository()
|
||||
dbUser, err := r.GetUser(tokenUser.Username)
|
||||
dbUser, err := r.GetUser(user.Username)
|
||||
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
cclog.Errorf("Error while loading user '%s': %v", tokenUser.Username, err)
|
||||
} else if err == sql.ErrNoRows && Keys.JwtConfig.SyncUserOnLogin { // Adds New User
|
||||
if err := r.AddUser(tokenUser); err != nil {
|
||||
cclog.Errorf("Error while adding user '%s' to DB: %v", tokenUser.Username, err)
|
||||
cclog.Errorf("Error while loading user '%s': %v", user.Username, err)
|
||||
return
|
||||
}
|
||||
|
||||
if err == sql.ErrNoRows && syncUserOnLogin { // Add new user
|
||||
if err := r.AddUser(user); err != nil {
|
||||
cclog.Errorf("Error while adding user '%s' to DB: %v", user.Username, err)
|
||||
}
|
||||
} else if err == nil && Keys.JwtConfig.UpdateUserOnLogin { // Update Existing User
|
||||
if err := r.UpdateUser(dbUser, tokenUser); err != nil {
|
||||
cclog.Errorf("Error while updating user '%s' to DB: %v", dbUser.Username, err)
|
||||
} else if err == nil && updateUserOnLogin { // Update existing user
|
||||
if err := r.UpdateUser(dbUser, user); err != nil {
|
||||
cclog.Errorf("Error while updating user '%s' in DB: %v", dbUser.Username, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func handleOIDCUser(OIDCUser *schema.User) {
|
||||
r := repository.GetUserRepository()
|
||||
dbUser, err := r.GetUser(OIDCUser.Username)
|
||||
// handleTokenUser syncs JWT token user with database
|
||||
func handleTokenUser(tokenUser *schema.User) {
|
||||
handleUserSync(tokenUser, Keys.JwtConfig.SyncUserOnLogin, Keys.JwtConfig.UpdateUserOnLogin)
|
||||
}
|
||||
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
cclog.Errorf("Error while loading user '%s': %v", OIDCUser.Username, err)
|
||||
} else if err == sql.ErrNoRows && Keys.OpenIDConfig.SyncUserOnLogin { // Adds New User
|
||||
if err := r.AddUser(OIDCUser); err != nil {
|
||||
cclog.Errorf("Error while adding user '%s' to DB: %v", OIDCUser.Username, err)
|
||||
}
|
||||
} else if err == nil && Keys.OpenIDConfig.UpdateUserOnLogin { // Update Existing User
|
||||
if err := r.UpdateUser(dbUser, OIDCUser); err != nil {
|
||||
cclog.Errorf("Error while updating user '%s' to DB: %v", dbUser.Username, err)
|
||||
}
|
||||
}
|
||||
// handleOIDCUser syncs OIDC user with database
|
||||
func handleOIDCUser(OIDCUser *schema.User) {
|
||||
handleUserSync(OIDCUser, Keys.OpenIDConfig.SyncUserOnLogin, Keys.OpenIDConfig.UpdateUserOnLogin)
|
||||
}
|
||||
|
||||
func (auth *Authentication) SaveSession(rw http.ResponseWriter, r *http.Request, user *schema.User) error {
|
||||
@@ -231,6 +306,7 @@ func (auth *Authentication) SaveSession(rw http.ResponseWriter, r *http.Request,
|
||||
session.Options.MaxAge = int(auth.SessionMaxAge.Seconds())
|
||||
}
|
||||
if config.Keys.HTTPSCertFile == "" && config.Keys.HTTPSKeyFile == "" {
|
||||
cclog.Warn("HTTPS not configured - session cookies will not have Secure flag set (insecure for production)")
|
||||
session.Options.Secure = false
|
||||
}
|
||||
session.Options.SameSite = http.SameSiteStrictMode
|
||||
@@ -532,10 +608,13 @@ func securedCheck(user *schema.User, r *http.Request) error {
|
||||
IPAddress = r.RemoteAddr
|
||||
}
|
||||
|
||||
// FIXME: IPV6 not handled
|
||||
if strings.Contains(IPAddress, ":") {
|
||||
IPAddress = strings.Split(IPAddress, ":")[0]
|
||||
// Handle both IPv4 and IPv6 addresses properly
|
||||
// For IPv6, this will strip the port and brackets
|
||||
// For IPv4, this will strip the port
|
||||
if host, _, err := net.SplitHostPort(IPAddress); err == nil {
|
||||
IPAddress = host
|
||||
}
|
||||
// If SplitHostPort fails, IPAddress is already just a host (no port)
|
||||
|
||||
// If nothing declared in config: deny all request to this api endpoint
|
||||
if len(config.Keys.APIAllowedIPs) == 0 {
|
||||
|
||||
176
internal/auth/auth_test.go
Normal file
176
internal/auth/auth_test.go
Normal file
@@ -0,0 +1,176 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package auth
|
||||
|
||||
import (
|
||||
"net"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// TestGetIPUserLimiter tests the rate limiter creation and retrieval
|
||||
func TestGetIPUserLimiter(t *testing.T) {
|
||||
ip := "192.168.1.1"
|
||||
username := "testuser"
|
||||
|
||||
// Get limiter for the first time
|
||||
limiter1 := getIPUserLimiter(ip, username)
|
||||
if limiter1 == nil {
|
||||
t.Fatal("Expected limiter to be created")
|
||||
}
|
||||
|
||||
// Get the same limiter again
|
||||
limiter2 := getIPUserLimiter(ip, username)
|
||||
if limiter1 != limiter2 {
|
||||
t.Error("Expected to get the same limiter instance")
|
||||
}
|
||||
|
||||
// Get a different limiter for different user
|
||||
limiter3 := getIPUserLimiter(ip, "otheruser")
|
||||
if limiter1 == limiter3 {
|
||||
t.Error("Expected different limiter for different user")
|
||||
}
|
||||
|
||||
// Get a different limiter for different IP
|
||||
limiter4 := getIPUserLimiter("192.168.1.2", username)
|
||||
if limiter1 == limiter4 {
|
||||
t.Error("Expected different limiter for different IP")
|
||||
}
|
||||
}
|
||||
|
||||
// TestRateLimiterBehavior tests that rate limiting works correctly
|
||||
func TestRateLimiterBehavior(t *testing.T) {
|
||||
ip := "10.0.0.1"
|
||||
username := "ratelimituser"
|
||||
|
||||
limiter := getIPUserLimiter(ip, username)
|
||||
|
||||
// Should allow first 5 attempts
|
||||
for i := 0; i < 5; i++ {
|
||||
if !limiter.Allow() {
|
||||
t.Errorf("Request %d should be allowed within rate limit", i+1)
|
||||
}
|
||||
}
|
||||
|
||||
// 6th attempt should be blocked
|
||||
if limiter.Allow() {
|
||||
t.Error("Request 6 should be blocked by rate limiter")
|
||||
}
|
||||
}
|
||||
|
||||
// TestCleanupOldRateLimiters tests the cleanup function
|
||||
func TestCleanupOldRateLimiters(t *testing.T) {
|
||||
// Clear all existing limiters first to avoid interference from other tests
|
||||
cleanupOldRateLimiters(time.Now().Add(24 * time.Hour))
|
||||
|
||||
// Create some new rate limiters
|
||||
limiter1 := getIPUserLimiter("1.1.1.1", "user1")
|
||||
limiter2 := getIPUserLimiter("2.2.2.2", "user2")
|
||||
|
||||
if limiter1 == nil || limiter2 == nil {
|
||||
t.Fatal("Failed to create test limiters")
|
||||
}
|
||||
|
||||
// Cleanup limiters older than 1 second from now (should keep both)
|
||||
time.Sleep(10 * time.Millisecond) // Small delay to ensure timestamp difference
|
||||
cleanupOldRateLimiters(time.Now().Add(-1 * time.Second))
|
||||
|
||||
// Verify they still exist (should get same instance)
|
||||
if getIPUserLimiter("1.1.1.1", "user1") != limiter1 {
|
||||
t.Error("Limiter 1 was incorrectly cleaned up")
|
||||
}
|
||||
if getIPUserLimiter("2.2.2.2", "user2") != limiter2 {
|
||||
t.Error("Limiter 2 was incorrectly cleaned up")
|
||||
}
|
||||
|
||||
// Cleanup limiters older than 1 hour from now (should remove both)
|
||||
cleanupOldRateLimiters(time.Now().Add(2 * time.Hour))
|
||||
|
||||
// Getting them again should create new instances
|
||||
newLimiter1 := getIPUserLimiter("1.1.1.1", "user1")
|
||||
if newLimiter1 == limiter1 {
|
||||
t.Error("Old limiter should have been cleaned up")
|
||||
}
|
||||
}
|
||||
|
||||
// TestIPv4Extraction tests extracting IPv4 addresses
|
||||
func TestIPv4Extraction(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{"IPv4 with port", "192.168.1.1:8080", "192.168.1.1"},
|
||||
{"IPv4 without port", "192.168.1.1", "192.168.1.1"},
|
||||
{"Localhost with port", "127.0.0.1:3000", "127.0.0.1"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := tt.input
|
||||
if host, _, err := net.SplitHostPort(result); err == nil {
|
||||
result = host
|
||||
}
|
||||
|
||||
if result != tt.expected {
|
||||
t.Errorf("Expected %s, got %s", tt.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestIPv6Extraction tests extracting IPv6 addresses
|
||||
func TestIPv6Extraction(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{"IPv6 with port", "[2001:db8::1]:8080", "2001:db8::1"},
|
||||
{"IPv6 localhost with port", "[::1]:3000", "::1"},
|
||||
{"IPv6 without port", "2001:db8::1", "2001:db8::1"},
|
||||
{"IPv6 localhost", "::1", "::1"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := tt.input
|
||||
if host, _, err := net.SplitHostPort(result); err == nil {
|
||||
result = host
|
||||
}
|
||||
|
||||
if result != tt.expected {
|
||||
t.Errorf("Expected %s, got %s", tt.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestIPExtractionEdgeCases tests edge cases for IP extraction
|
||||
func TestIPExtractionEdgeCases(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{"Hostname without port", "example.com", "example.com"},
|
||||
{"Empty string", "", ""},
|
||||
{"Just port", ":8080", ""},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := tt.input
|
||||
if host, _, err := net.SplitHostPort(result); err == nil {
|
||||
result = host
|
||||
}
|
||||
|
||||
if result != tt.expected {
|
||||
t.Errorf("Expected %s, got %s", tt.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -14,7 +14,6 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/golang-jwt/jwt/v5"
|
||||
@@ -102,38 +101,21 @@ func (ja *JWTAuthenticator) AuthViaJWT(
|
||||
|
||||
// Token is valid, extract payload
|
||||
claims := token.Claims.(jwt.MapClaims)
|
||||
sub, _ := claims["sub"].(string)
|
||||
|
||||
var roles []string
|
||||
|
||||
// Validate user + roles from JWT against database?
|
||||
if Keys.JwtConfig.ValidateUser {
|
||||
ur := repository.GetUserRepository()
|
||||
user, err := ur.GetUser(sub)
|
||||
// Deny any logins for unknown usernames
|
||||
if err != nil {
|
||||
cclog.Warn("Could not find user from JWT in internal database.")
|
||||
return nil, errors.New("unknown user")
|
||||
}
|
||||
// Take user roles from database instead of trusting the JWT
|
||||
roles = user.Roles
|
||||
} else {
|
||||
// Extract roles from JWT (if present)
|
||||
if rawroles, ok := claims["roles"].([]any); ok {
|
||||
for _, rr := range rawroles {
|
||||
if r, ok := rr.(string); ok {
|
||||
roles = append(roles, r)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Use shared helper to get user from JWT claims
|
||||
var user *schema.User
|
||||
user, err = getUserFromJWT(claims, Keys.JwtConfig.ValidateUser, schema.AuthToken, -1)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &schema.User{
|
||||
Username: sub,
|
||||
Roles: roles,
|
||||
AuthType: schema.AuthToken,
|
||||
AuthSource: -1,
|
||||
}, nil
|
||||
|
||||
// If not validating user, we only get roles from JWT (no projects for this auth method)
|
||||
if !Keys.JwtConfig.ValidateUser {
|
||||
user.Roles = extractRolesFromClaims(claims, false)
|
||||
user.Projects = nil // Standard JWT auth doesn't include projects
|
||||
}
|
||||
|
||||
return user, nil
|
||||
}
|
||||
|
||||
// ProvideJWT generates a new JWT that can be used for authentication
|
||||
|
||||
@@ -7,14 +7,11 @@ package auth
|
||||
|
||||
import (
|
||||
"crypto/ed25519"
|
||||
"database/sql"
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/golang-jwt/jwt/v5"
|
||||
@@ -149,57 +146,16 @@ func (ja *JWTCookieSessionAuthenticator) Login(
|
||||
}
|
||||
|
||||
claims := token.Claims.(jwt.MapClaims)
|
||||
sub, _ := claims["sub"].(string)
|
||||
|
||||
var roles []string
|
||||
projects := make([]string, 0)
|
||||
|
||||
if jc.ValidateUser {
|
||||
var err error
|
||||
user, err = repository.GetUserRepository().GetUser(sub)
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
cclog.Errorf("Error while loading user '%v'", sub)
|
||||
}
|
||||
|
||||
// Deny any logins for unknown usernames
|
||||
if user == nil {
|
||||
cclog.Warn("Could not find user from JWT in internal database.")
|
||||
return nil, errors.New("unknown user")
|
||||
}
|
||||
} else {
|
||||
var name string
|
||||
if wrap, ok := claims["name"].(map[string]any); ok {
|
||||
if vals, ok := wrap["values"].([]any); ok {
|
||||
if len(vals) != 0 {
|
||||
name = fmt.Sprintf("%v", vals[0])
|
||||
|
||||
for i := 1; i < len(vals); i++ {
|
||||
name += fmt.Sprintf(" %v", vals[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract roles from JWT (if present)
|
||||
if rawroles, ok := claims["roles"].([]any); ok {
|
||||
for _, rr := range rawroles {
|
||||
if r, ok := rr.(string); ok {
|
||||
roles = append(roles, r)
|
||||
}
|
||||
}
|
||||
}
|
||||
user = &schema.User{
|
||||
Username: sub,
|
||||
Name: name,
|
||||
Roles: roles,
|
||||
Projects: projects,
|
||||
AuthType: schema.AuthSession,
|
||||
AuthSource: schema.AuthViaToken,
|
||||
}
|
||||
|
||||
if jc.SyncUserOnLogin || jc.UpdateUserOnLogin {
|
||||
handleTokenUser(user)
|
||||
}
|
||||
|
||||
// Use shared helper to get user from JWT claims
|
||||
user, err = getUserFromJWT(claims, jc.ValidateUser, schema.AuthSession, schema.AuthViaToken)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Sync or update user if configured
|
||||
if !jc.ValidateUser && (jc.SyncUserOnLogin || jc.UpdateUserOnLogin) {
|
||||
handleTokenUser(user)
|
||||
}
|
||||
|
||||
// (Ask browser to) Delete JWT cookie
|
||||
|
||||
136
internal/auth/jwtHelpers.go
Normal file
136
internal/auth/jwtHelpers.go
Normal file
@@ -0,0 +1,136 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package auth
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/golang-jwt/jwt/v5"
|
||||
)
|
||||
|
||||
// extractStringFromClaims extracts a string value from JWT claims
|
||||
func extractStringFromClaims(claims jwt.MapClaims, key string) string {
|
||||
if val, ok := claims[key].(string); ok {
|
||||
return val
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// extractRolesFromClaims extracts roles from JWT claims
|
||||
// If validateRoles is true, only valid roles are returned
|
||||
func extractRolesFromClaims(claims jwt.MapClaims, validateRoles bool) []string {
|
||||
var roles []string
|
||||
|
||||
if rawroles, ok := claims["roles"].([]any); ok {
|
||||
for _, rr := range rawroles {
|
||||
if r, ok := rr.(string); ok {
|
||||
if validateRoles {
|
||||
if schema.IsValidRole(r) {
|
||||
roles = append(roles, r)
|
||||
}
|
||||
} else {
|
||||
roles = append(roles, r)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return roles
|
||||
}
|
||||
|
||||
// extractProjectsFromClaims extracts projects from JWT claims
|
||||
func extractProjectsFromClaims(claims jwt.MapClaims) []string {
|
||||
projects := make([]string, 0)
|
||||
|
||||
if rawprojs, ok := claims["projects"].([]any); ok {
|
||||
for _, pp := range rawprojs {
|
||||
if p, ok := pp.(string); ok {
|
||||
projects = append(projects, p)
|
||||
}
|
||||
}
|
||||
} else if rawprojs, ok := claims["projects"]; ok {
|
||||
if projSlice, ok := rawprojs.([]string); ok {
|
||||
projects = append(projects, projSlice...)
|
||||
}
|
||||
}
|
||||
|
||||
return projects
|
||||
}
|
||||
|
||||
// extractNameFromClaims extracts name from JWT claims
|
||||
// Handles both simple string and complex nested structure
|
||||
func extractNameFromClaims(claims jwt.MapClaims) string {
|
||||
// Try simple string first
|
||||
if name, ok := claims["name"].(string); ok {
|
||||
return name
|
||||
}
|
||||
|
||||
// Try nested structure: {name: {values: [...]}}
|
||||
if wrap, ok := claims["name"].(map[string]any); ok {
|
||||
if vals, ok := wrap["values"].([]any); ok {
|
||||
if len(vals) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
name := fmt.Sprintf("%v", vals[0])
|
||||
for i := 1; i < len(vals); i++ {
|
||||
name += fmt.Sprintf(" %v", vals[i])
|
||||
}
|
||||
return name
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// getUserFromJWT creates or retrieves a user based on JWT claims
|
||||
// If validateUser is true, the user must exist in the database
|
||||
// Otherwise, a new user object is created from claims
|
||||
// authSource should be a schema.AuthSource constant (like schema.AuthViaToken)
|
||||
func getUserFromJWT(claims jwt.MapClaims, validateUser bool, authType schema.AuthType, authSource schema.AuthSource) (*schema.User, error) {
|
||||
sub := extractStringFromClaims(claims, "sub")
|
||||
if sub == "" {
|
||||
return nil, errors.New("missing 'sub' claim in JWT")
|
||||
}
|
||||
|
||||
if validateUser {
|
||||
// Validate user against database
|
||||
ur := repository.GetUserRepository()
|
||||
user, err := ur.GetUser(sub)
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
cclog.Errorf("Error while loading user '%v': %v", sub, err)
|
||||
return nil, fmt.Errorf("database error: %w", err)
|
||||
}
|
||||
|
||||
// Deny any logins for unknown usernames
|
||||
if user == nil || err == sql.ErrNoRows {
|
||||
cclog.Warn("Could not find user from JWT in internal database.")
|
||||
return nil, errors.New("unknown user")
|
||||
}
|
||||
|
||||
// Return database user (with database roles)
|
||||
return user, nil
|
||||
}
|
||||
|
||||
// Create user from JWT claims
|
||||
name := extractNameFromClaims(claims)
|
||||
roles := extractRolesFromClaims(claims, true) // Validate roles
|
||||
projects := extractProjectsFromClaims(claims)
|
||||
|
||||
return &schema.User{
|
||||
Username: sub,
|
||||
Name: name,
|
||||
Roles: roles,
|
||||
Projects: projects,
|
||||
AuthType: authType,
|
||||
AuthSource: authSource,
|
||||
}, nil
|
||||
}
|
||||
281
internal/auth/jwtHelpers_test.go
Normal file
281
internal/auth/jwtHelpers_test.go
Normal file
@@ -0,0 +1,281 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package auth
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/golang-jwt/jwt/v5"
|
||||
)
|
||||
|
||||
// TestExtractStringFromClaims tests extracting string values from JWT claims
|
||||
func TestExtractStringFromClaims(t *testing.T) {
|
||||
claims := jwt.MapClaims{
|
||||
"sub": "testuser",
|
||||
"email": "test@example.com",
|
||||
"age": 25, // not a string
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
key string
|
||||
expected string
|
||||
}{
|
||||
{"Existing string", "sub", "testuser"},
|
||||
{"Another string", "email", "test@example.com"},
|
||||
{"Non-existent key", "missing", ""},
|
||||
{"Non-string value", "age", ""},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := extractStringFromClaims(claims, tt.key)
|
||||
if result != tt.expected {
|
||||
t.Errorf("Expected %s, got %s", tt.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestExtractRolesFromClaims tests role extraction and validation
|
||||
func TestExtractRolesFromClaims(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
claims jwt.MapClaims
|
||||
validateRoles bool
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
name: "Valid roles without validation",
|
||||
claims: jwt.MapClaims{
|
||||
"roles": []any{"admin", "user", "invalid_role"},
|
||||
},
|
||||
validateRoles: false,
|
||||
expected: []string{"admin", "user", "invalid_role"},
|
||||
},
|
||||
{
|
||||
name: "Valid roles with validation",
|
||||
claims: jwt.MapClaims{
|
||||
"roles": []any{"admin", "user", "api"},
|
||||
},
|
||||
validateRoles: true,
|
||||
expected: []string{"admin", "user", "api"},
|
||||
},
|
||||
{
|
||||
name: "Invalid roles with validation",
|
||||
claims: jwt.MapClaims{
|
||||
"roles": []any{"invalid_role", "fake_role"},
|
||||
},
|
||||
validateRoles: true,
|
||||
expected: []string{}, // Should filter out invalid roles
|
||||
},
|
||||
{
|
||||
name: "No roles claim",
|
||||
claims: jwt.MapClaims{},
|
||||
validateRoles: false,
|
||||
expected: []string{},
|
||||
},
|
||||
{
|
||||
name: "Non-array roles",
|
||||
claims: jwt.MapClaims{
|
||||
"roles": "admin",
|
||||
},
|
||||
validateRoles: false,
|
||||
expected: []string{},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := extractRolesFromClaims(tt.claims, tt.validateRoles)
|
||||
|
||||
if len(result) != len(tt.expected) {
|
||||
t.Errorf("Expected %d roles, got %d", len(tt.expected), len(result))
|
||||
return
|
||||
}
|
||||
|
||||
for i, role := range result {
|
||||
if i >= len(tt.expected) || role != tt.expected[i] {
|
||||
t.Errorf("Expected role %s at position %d, got %s", tt.expected[i], i, role)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestExtractProjectsFromClaims tests project extraction from claims
|
||||
func TestExtractProjectsFromClaims(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
claims jwt.MapClaims
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
name: "Projects as array of interfaces",
|
||||
claims: jwt.MapClaims{
|
||||
"projects": []any{"project1", "project2", "project3"},
|
||||
},
|
||||
expected: []string{"project1", "project2", "project3"},
|
||||
},
|
||||
{
|
||||
name: "Projects as string array",
|
||||
claims: jwt.MapClaims{
|
||||
"projects": []string{"projectA", "projectB"},
|
||||
},
|
||||
expected: []string{"projectA", "projectB"},
|
||||
},
|
||||
{
|
||||
name: "No projects claim",
|
||||
claims: jwt.MapClaims{},
|
||||
expected: []string{},
|
||||
},
|
||||
{
|
||||
name: "Mixed types in projects array",
|
||||
claims: jwt.MapClaims{
|
||||
"projects": []any{"project1", 123, "project2"},
|
||||
},
|
||||
expected: []string{"project1", "project2"}, // Should skip non-strings
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := extractProjectsFromClaims(tt.claims)
|
||||
|
||||
if len(result) != len(tt.expected) {
|
||||
t.Errorf("Expected %d projects, got %d", len(tt.expected), len(result))
|
||||
return
|
||||
}
|
||||
|
||||
for i, project := range result {
|
||||
if i >= len(tt.expected) || project != tt.expected[i] {
|
||||
t.Errorf("Expected project %s at position %d, got %s", tt.expected[i], i, project)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestExtractNameFromClaims tests name extraction from various formats
|
||||
func TestExtractNameFromClaims(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
claims jwt.MapClaims
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "Simple string name",
|
||||
claims: jwt.MapClaims{
|
||||
"name": "John Doe",
|
||||
},
|
||||
expected: "John Doe",
|
||||
},
|
||||
{
|
||||
name: "Nested name structure",
|
||||
claims: jwt.MapClaims{
|
||||
"name": map[string]any{
|
||||
"values": []any{"John", "Doe"},
|
||||
},
|
||||
},
|
||||
expected: "John Doe",
|
||||
},
|
||||
{
|
||||
name: "Nested name with single value",
|
||||
claims: jwt.MapClaims{
|
||||
"name": map[string]any{
|
||||
"values": []any{"Alice"},
|
||||
},
|
||||
},
|
||||
expected: "Alice",
|
||||
},
|
||||
{
|
||||
name: "No name claim",
|
||||
claims: jwt.MapClaims{},
|
||||
expected: "",
|
||||
},
|
||||
{
|
||||
name: "Empty nested values",
|
||||
claims: jwt.MapClaims{
|
||||
"name": map[string]any{
|
||||
"values": []any{},
|
||||
},
|
||||
},
|
||||
expected: "",
|
||||
},
|
||||
{
|
||||
name: "Nested with non-string values",
|
||||
claims: jwt.MapClaims{
|
||||
"name": map[string]any{
|
||||
"values": []any{123, "Smith"},
|
||||
},
|
||||
},
|
||||
expected: "123 Smith", // Should convert to string
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := extractNameFromClaims(tt.claims)
|
||||
if result != tt.expected {
|
||||
t.Errorf("Expected '%s', got '%s'", tt.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestGetUserFromJWT_NoValidation tests getUserFromJWT without database validation
|
||||
func TestGetUserFromJWT_NoValidation(t *testing.T) {
|
||||
claims := jwt.MapClaims{
|
||||
"sub": "testuser",
|
||||
"name": "Test User",
|
||||
"roles": []any{"user", "admin"},
|
||||
"projects": []any{"project1", "project2"},
|
||||
}
|
||||
|
||||
user, err := getUserFromJWT(claims, false, schema.AuthToken, -1)
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("Unexpected error: %v", err)
|
||||
}
|
||||
|
||||
if user.Username != "testuser" {
|
||||
t.Errorf("Expected username 'testuser', got '%s'", user.Username)
|
||||
}
|
||||
|
||||
if user.Name != "Test User" {
|
||||
t.Errorf("Expected name 'Test User', got '%s'", user.Name)
|
||||
}
|
||||
|
||||
if len(user.Roles) != 2 {
|
||||
t.Errorf("Expected 2 roles, got %d", len(user.Roles))
|
||||
}
|
||||
|
||||
if len(user.Projects) != 2 {
|
||||
t.Errorf("Expected 2 projects, got %d", len(user.Projects))
|
||||
}
|
||||
|
||||
if user.AuthType != schema.AuthToken {
|
||||
t.Errorf("Expected AuthType %v, got %v", schema.AuthToken, user.AuthType)
|
||||
}
|
||||
}
|
||||
|
||||
// TestGetUserFromJWT_MissingSub tests error when sub claim is missing
|
||||
func TestGetUserFromJWT_MissingSub(t *testing.T) {
|
||||
claims := jwt.MapClaims{
|
||||
"name": "Test User",
|
||||
}
|
||||
|
||||
_, err := getUserFromJWT(claims, false, schema.AuthToken, -1)
|
||||
|
||||
if err == nil {
|
||||
t.Error("Expected error for missing sub claim")
|
||||
}
|
||||
|
||||
if err.Error() != "missing 'sub' claim in JWT" {
|
||||
t.Errorf("Expected specific error message, got: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -6,7 +6,6 @@
|
||||
package auth
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"fmt"
|
||||
@@ -14,7 +13,6 @@ import (
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/golang-jwt/jwt/v5"
|
||||
@@ -77,70 +75,16 @@ func (ja *JWTSessionAuthenticator) Login(
|
||||
}
|
||||
|
||||
claims := token.Claims.(jwt.MapClaims)
|
||||
sub, _ := claims["sub"].(string)
|
||||
|
||||
var roles []string
|
||||
projects := make([]string, 0)
|
||||
|
||||
if Keys.JwtConfig.ValidateUser {
|
||||
var err error
|
||||
user, err = repository.GetUserRepository().GetUser(sub)
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
cclog.Errorf("Error while loading user '%v'", sub)
|
||||
}
|
||||
|
||||
// Deny any logins for unknown usernames
|
||||
if user == nil {
|
||||
cclog.Warn("Could not find user from JWT in internal database.")
|
||||
return nil, errors.New("unknown user")
|
||||
}
|
||||
} else {
|
||||
var name string
|
||||
if wrap, ok := claims["name"].(map[string]any); ok {
|
||||
if vals, ok := wrap["values"].([]any); ok {
|
||||
if len(vals) != 0 {
|
||||
name = fmt.Sprintf("%v", vals[0])
|
||||
|
||||
for i := 1; i < len(vals); i++ {
|
||||
name += fmt.Sprintf(" %v", vals[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract roles from JWT (if present)
|
||||
if rawroles, ok := claims["roles"].([]any); ok {
|
||||
for _, rr := range rawroles {
|
||||
if r, ok := rr.(string); ok {
|
||||
if schema.IsValidRole(r) {
|
||||
roles = append(roles, r)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if rawprojs, ok := claims["projects"].([]any); ok {
|
||||
for _, pp := range rawprojs {
|
||||
if p, ok := pp.(string); ok {
|
||||
projects = append(projects, p)
|
||||
}
|
||||
}
|
||||
} else if rawprojs, ok := claims["projects"]; ok {
|
||||
projects = append(projects, rawprojs.([]string)...)
|
||||
}
|
||||
|
||||
user = &schema.User{
|
||||
Username: sub,
|
||||
Name: name,
|
||||
Roles: roles,
|
||||
Projects: projects,
|
||||
AuthType: schema.AuthSession,
|
||||
AuthSource: schema.AuthViaToken,
|
||||
}
|
||||
|
||||
if Keys.JwtConfig.SyncUserOnLogin || Keys.JwtConfig.UpdateUserOnLogin {
|
||||
handleTokenUser(user)
|
||||
}
|
||||
|
||||
// Use shared helper to get user from JWT claims
|
||||
user, err = getUserFromJWT(claims, Keys.JwtConfig.ValidateUser, schema.AuthSession, schema.AuthViaToken)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Sync or update user if configured
|
||||
if !Keys.JwtConfig.ValidateUser && (Keys.JwtConfig.SyncUserOnLogin || Keys.JwtConfig.UpdateUserOnLogin) {
|
||||
handleTokenUser(user)
|
||||
}
|
||||
|
||||
return user, nil
|
||||
|
||||
@@ -71,6 +71,7 @@ func (la *LdapAuthenticator) CanLogin(
|
||||
l, err := la.getLdapConnection(true)
|
||||
if err != nil {
|
||||
cclog.Error("LDAP connection error")
|
||||
return nil, false
|
||||
}
|
||||
defer l.Close()
|
||||
|
||||
|
||||
@@ -54,8 +54,13 @@ func setCallbackCookie(w http.ResponseWriter, r *http.Request, name, value strin
|
||||
http.SetCookie(w, c)
|
||||
}
|
||||
|
||||
// NewOIDC creates a new OIDC authenticator with the configured provider
|
||||
func NewOIDC(a *Authentication) *OIDC {
|
||||
provider, err := oidc.NewProvider(context.Background(), Keys.OpenIDConfig.Provider)
|
||||
// Use context with timeout for provider initialization
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
provider, err := oidc.NewProvider(ctx, Keys.OpenIDConfig.Provider)
|
||||
if err != nil {
|
||||
cclog.Fatal(err)
|
||||
}
|
||||
@@ -111,13 +116,18 @@ func (oa *OIDC) OAuth2Callback(rw http.ResponseWriter, r *http.Request) {
|
||||
http.Error(rw, "Code not found", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
token, err := oa.client.Exchange(context.Background(), code, oauth2.VerifierOption(codeVerifier))
|
||||
// Exchange authorization code for token with timeout
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
token, err := oa.client.Exchange(ctx, code, oauth2.VerifierOption(codeVerifier))
|
||||
if err != nil {
|
||||
http.Error(rw, "Failed to exchange token: "+err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
userInfo, err := oa.provider.UserInfo(context.Background(), oauth2.StaticTokenSource(token))
|
||||
// Get user info from OIDC provider with same timeout
|
||||
userInfo, err := oa.provider.UserInfo(ctx, oauth2.StaticTokenSource(token))
|
||||
if err != nil {
|
||||
http.Error(rw, "Failed to get userinfo: "+err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
@@ -180,8 +190,8 @@ func (oa *OIDC) OAuth2Callback(rw http.ResponseWriter, r *http.Request) {
|
||||
|
||||
oa.authentication.SaveSession(rw, r, user)
|
||||
cclog.Infof("login successfull: user: %#v (roles: %v, projects: %v)", user.Username, user.Roles, user.Projects)
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
http.RedirectHandler("/", http.StatusTemporaryRedirect).ServeHTTP(rw, r.WithContext(ctx))
|
||||
userCtx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
http.RedirectHandler("/", http.StatusTemporaryRedirect).ServeHTTP(rw, r.WithContext(userCtx))
|
||||
}
|
||||
|
||||
func (oa *OIDC) OAuth2Login(rw http.ResponseWriter, r *http.Request) {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,8 +1,9 @@
|
||||
package graph
|
||||
|
||||
// This file will be automatically regenerated based on the schema, any resolver implementations
|
||||
// This file will be automatically regenerated based on the schema, any resolver
|
||||
// implementations
|
||||
// will be copied through when generating and any unknown code will be moved to the end.
|
||||
// Code generated by github.com/99designs/gqlgen version v0.17.78
|
||||
// Code generated by github.com/99designs/gqlgen version v0.17.84
|
||||
|
||||
import (
|
||||
"context"
|
||||
@@ -976,7 +977,6 @@ func (r *queryResolver) NodeMetricsList(ctx context.Context, cluster string, sub
|
||||
|
||||
// ClusterMetrics is the resolver for the clusterMetrics field.
|
||||
func (r *queryResolver) ClusterMetrics(ctx context.Context, cluster string, metrics []string, from time.Time, to time.Time) (*model.ClusterMetrics, error) {
|
||||
|
||||
user := repository.GetUserFromContext(ctx)
|
||||
if user != nil && !user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
|
||||
return nil, errors.New("you need to be administrator or support staff for this query")
|
||||
|
||||
132
internal/importer/README.md
Normal file
132
internal/importer/README.md
Normal file
@@ -0,0 +1,132 @@
|
||||
# Importer Package
|
||||
|
||||
The `importer` package provides functionality for importing job data into the ClusterCockpit database from archived job files.
|
||||
|
||||
## Overview
|
||||
|
||||
This package supports two primary import workflows:
|
||||
|
||||
1. **Bulk Database Initialization** - Reinitialize the entire job database from archived jobs
|
||||
2. **Individual Job Import** - Import specific jobs from metadata/data file pairs
|
||||
|
||||
Both workflows enrich job metadata by calculating performance footprints and energy consumption metrics before persisting to the database.
|
||||
|
||||
## Main Entry Points
|
||||
|
||||
### InitDB()
|
||||
|
||||
Reinitializes the job database from all archived jobs.
|
||||
|
||||
```go
|
||||
if err := importer.InitDB(); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
```
|
||||
|
||||
This function:
|
||||
- Flushes existing job, tag, and jobtag tables
|
||||
- Iterates through all jobs in the configured archive
|
||||
- Enriches each job with calculated metrics
|
||||
- Inserts jobs into the database in batched transactions (100 jobs per batch)
|
||||
- Continues on individual job failures, logging errors
|
||||
|
||||
**Use Case**: Initial database setup or complete database rebuild from archive.
|
||||
|
||||
### HandleImportFlag(flag string)
|
||||
|
||||
Imports jobs from specified file pairs.
|
||||
|
||||
```go
|
||||
// Format: "<meta.json>:<data.json>[,<meta2.json>:<data2.json>,...]"
|
||||
flag := "/path/to/meta.json:/path/to/data.json"
|
||||
if err := importer.HandleImportFlag(flag); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
```
|
||||
|
||||
This function:
|
||||
- Parses the comma-separated file pairs
|
||||
- Validates metadata and job data against schemas (if validation enabled)
|
||||
- Enriches each job with footprints and energy metrics
|
||||
- Imports jobs into both the archive and database
|
||||
- Fails fast on the first error
|
||||
|
||||
**Use Case**: Importing specific jobs from external sources or manual job additions.
|
||||
|
||||
## Job Enrichment
|
||||
|
||||
Both import workflows use `enrichJobMetadata()` to calculate:
|
||||
|
||||
### Performance Footprints
|
||||
|
||||
Performance footprints are calculated from metric averages based on the subcluster configuration:
|
||||
|
||||
```go
|
||||
job.Footprint["mem_used_avg"] = 45.2 // GB
|
||||
job.Footprint["cpu_load_avg"] = 0.87 // percentage
|
||||
```
|
||||
|
||||
### Energy Metrics
|
||||
|
||||
Energy consumption is calculated from power metrics using the formula:
|
||||
|
||||
```
|
||||
Energy (kWh) = (Power (W) × Duration (s) / 3600) / 1000
|
||||
```
|
||||
|
||||
For each energy metric:
|
||||
```go
|
||||
job.EnergyFootprint["acc_power"] = 12.5 // kWh
|
||||
job.Energy = 150.2 // Total energy in kWh
|
||||
```
|
||||
|
||||
**Note**: Energy calculations for metrics with unit "energy" (Joules) are not yet implemented.
|
||||
|
||||
## Data Validation
|
||||
|
||||
### SanityChecks(job *schema.Job)
|
||||
|
||||
Validates job metadata before database insertion:
|
||||
|
||||
- Cluster exists in configuration
|
||||
- Subcluster is valid (assigns if needed)
|
||||
- Job state is valid
|
||||
- Resources and user fields are populated
|
||||
- Node counts and hardware thread counts are positive
|
||||
- Resource count matches declared node count
|
||||
|
||||
## Normalization Utilities
|
||||
|
||||
The package includes utilities for normalizing metric values to appropriate SI prefixes:
|
||||
|
||||
### Normalize(avg float64, prefix string)
|
||||
|
||||
Adjusts values and SI prefixes for readability:
|
||||
|
||||
```go
|
||||
factor, newPrefix := importer.Normalize(2048.0, "M")
|
||||
// Converts 2048 MB → ~2.0 GB
|
||||
// Returns: factor for conversion, "G"
|
||||
```
|
||||
|
||||
This is useful for automatically scaling metrics (e.g., memory, storage) to human-readable units.
|
||||
|
||||
## Dependencies
|
||||
|
||||
- `github.com/ClusterCockpit/cc-backend/internal/repository` - Database operations
|
||||
- `github.com/ClusterCockpit/cc-backend/pkg/archive` - Job archive access
|
||||
- `github.com/ClusterCockpit/cc-lib/schema` - Job schema definitions
|
||||
- `github.com/ClusterCockpit/cc-lib/ccLogger` - Logging
|
||||
- `github.com/ClusterCockpit/cc-lib/ccUnits` - SI unit handling
|
||||
|
||||
## Error Handling
|
||||
|
||||
- **InitDB**: Continues processing on individual job failures, logs errors, returns summary
|
||||
- **HandleImportFlag**: Fails fast on first error, returns immediately
|
||||
- Both functions log detailed error context for debugging
|
||||
|
||||
## Performance
|
||||
|
||||
- **Transaction Batching**: InitDB processes jobs in batches of 100 for optimal database performance
|
||||
- **Tag Caching**: Tag IDs are cached during import to minimize database queries
|
||||
- **Progress Reporting**: InitDB prints progress updates during bulk operations
|
||||
@@ -8,7 +8,6 @@ import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
@@ -19,7 +18,22 @@ import (
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
)
|
||||
|
||||
// Import all jobs specified as `<path-to-meta.json>:<path-to-data.json>,...`
|
||||
// HandleImportFlag imports jobs from file pairs specified in a comma-separated flag string.
|
||||
//
|
||||
// The flag format is: "<path-to-meta.json>:<path-to-data.json>[,<path-to-meta2.json>:<path-to-data2.json>,...]"
|
||||
//
|
||||
// For each job pair, this function:
|
||||
// 1. Reads and validates the metadata JSON file (schema.Job)
|
||||
// 2. Reads and validates the job data JSON file (schema.JobData)
|
||||
// 3. Enriches the job with calculated footprints and energy metrics
|
||||
// 4. Validates the job using SanityChecks()
|
||||
// 5. Imports the job into the archive
|
||||
// 6. Inserts the job into the database with associated tags
|
||||
//
|
||||
// Schema validation is performed if config.Keys.Validate is true.
|
||||
//
|
||||
// Returns an error if file reading, validation, enrichment, or database operations fail.
|
||||
// The function stops processing on the first error encountered.
|
||||
func HandleImportFlag(flag string) error {
|
||||
r := repository.GetJobRepository()
|
||||
|
||||
@@ -72,75 +86,8 @@ func HandleImportFlag(flag string) error {
|
||||
|
||||
job.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
||||
|
||||
sc, err := archive.GetSubCluster(job.Cluster, job.SubCluster)
|
||||
if err != nil {
|
||||
cclog.Errorf("cannot get subcluster: %s", err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
job.Footprint = make(map[string]float64)
|
||||
|
||||
for _, fp := range sc.Footprint {
|
||||
statType := "avg"
|
||||
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
|
||||
statType = sc.MetricConfig[i].Footprint
|
||||
}
|
||||
|
||||
name := fmt.Sprintf("%s_%s", fp, statType)
|
||||
|
||||
job.Footprint[name] = repository.LoadJobStat(&job, fp, statType)
|
||||
}
|
||||
|
||||
job.RawFootprint, err = json.Marshal(job.Footprint)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while marshaling job footprint")
|
||||
return err
|
||||
}
|
||||
|
||||
job.EnergyFootprint = make(map[string]float64)
|
||||
|
||||
// Total Job Energy Outside Loop
|
||||
totalEnergy := 0.0
|
||||
for _, fp := range sc.EnergyFootprint {
|
||||
// Always Init Metric Energy Inside Loop
|
||||
metricEnergy := 0.0
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
|
||||
// Note: For DB data, calculate and save as kWh
|
||||
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules)
|
||||
cclog.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", job.JobID, job.Cluster, fp)
|
||||
// FIXME: Needs sum as stats type
|
||||
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
|
||||
// Energy: Power (in Watts) * Time (in Seconds)
|
||||
// Unit: (W * (s / 3600)) / 1000 = kWh
|
||||
// Round 2 Digits: round(Energy * 100) / 100
|
||||
// Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000
|
||||
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
|
||||
rawEnergy := ((repository.LoadJobStat(&job, fp, "avg") * float64(job.NumNodes)) * (float64(job.Duration) / 3600.0)) / 1000.0
|
||||
metricEnergy = math.Round(rawEnergy*100.0) / 100.0
|
||||
}
|
||||
} else {
|
||||
cclog.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, job.ID)
|
||||
}
|
||||
|
||||
job.EnergyFootprint[fp] = metricEnergy
|
||||
totalEnergy += metricEnergy
|
||||
}
|
||||
|
||||
job.Energy = (math.Round(totalEnergy*100.0) / 100.0)
|
||||
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
|
||||
cclog.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", job.ID)
|
||||
return err
|
||||
}
|
||||
|
||||
job.RawResources, err = json.Marshal(job.Resources)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while marshaling job resources")
|
||||
return err
|
||||
}
|
||||
job.RawMetaData, err = json.Marshal(job.MetaData)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while marshaling job metadata")
|
||||
if err = enrichJobMetadata(&job); err != nil {
|
||||
cclog.Errorf("Error enriching job metadata: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
|
||||
@@ -20,6 +20,8 @@ import (
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
)
|
||||
|
||||
// copyFile copies a file from source path to destination path.
|
||||
// Used by tests to set up test fixtures.
|
||||
func copyFile(s string, d string) error {
|
||||
r, err := os.Open(s)
|
||||
if err != nil {
|
||||
@@ -35,6 +37,14 @@ func copyFile(s string, d string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// setup initializes a test environment for importer tests.
|
||||
//
|
||||
// Creates a temporary directory with:
|
||||
// - A test job archive with cluster configuration
|
||||
// - A SQLite database initialized with schema
|
||||
// - Configuration files loaded
|
||||
//
|
||||
// Returns a JobRepository instance for test assertions.
|
||||
func setup(t *testing.T) *repository.JobRepository {
|
||||
const testconfig = `{
|
||||
"main": {
|
||||
@@ -81,14 +91,14 @@ func setup(t *testing.T) *repository.JobRepository {
|
||||
tmpdir := t.TempDir()
|
||||
|
||||
jobarchive := filepath.Join(tmpdir, "job-archive")
|
||||
if err := os.Mkdir(jobarchive, 0777); err != nil {
|
||||
if err := os.Mkdir(jobarchive, 0o777); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 2)), 0666); err != nil {
|
||||
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), fmt.Appendf(nil, "%d", 3), 0o666); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
fritzArchive := filepath.Join(tmpdir, "job-archive", "fritz")
|
||||
if err := os.Mkdir(fritzArchive, 0777); err != nil {
|
||||
if err := os.Mkdir(fritzArchive, 0o777); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := copyFile(filepath.Join("testdata", "cluster-fritz.json"),
|
||||
@@ -103,7 +113,7 @@ func setup(t *testing.T) *repository.JobRepository {
|
||||
}
|
||||
|
||||
cfgFilePath := filepath.Join(tmpdir, "config.json")
|
||||
if err := os.WriteFile(cfgFilePath, []byte(testconfig), 0666); err != nil {
|
||||
if err := os.WriteFile(cfgFilePath, []byte(testconfig), 0o666); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
@@ -130,6 +140,7 @@ func setup(t *testing.T) *repository.JobRepository {
|
||||
return repository.GetJobRepository()
|
||||
}
|
||||
|
||||
// Result represents the expected test result for job import verification.
|
||||
type Result struct {
|
||||
JobId int64
|
||||
Cluster string
|
||||
@@ -137,6 +148,8 @@ type Result struct {
|
||||
Duration int32
|
||||
}
|
||||
|
||||
// readResult reads the expected test result from a golden file.
|
||||
// Golden files contain the expected job attributes after import.
|
||||
func readResult(t *testing.T, testname string) Result {
|
||||
var r Result
|
||||
|
||||
@@ -154,6 +167,13 @@ func readResult(t *testing.T, testname string) Result {
|
||||
return r
|
||||
}
|
||||
|
||||
// TestHandleImportFlag tests the HandleImportFlag function with various job import scenarios.
|
||||
//
|
||||
// The test uses golden files in testdata/ to verify that jobs are correctly:
|
||||
// - Parsed from metadata and data JSON files
|
||||
// - Enriched with footprints and energy metrics
|
||||
// - Inserted into the database
|
||||
// - Retrievable with correct attributes
|
||||
func TestHandleImportFlag(t *testing.T) {
|
||||
r := setup(t)
|
||||
|
||||
|
||||
@@ -2,6 +2,15 @@
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package importer provides functionality for importing job data into the ClusterCockpit database.
|
||||
//
|
||||
// The package supports two primary use cases:
|
||||
// 1. Bulk database initialization from archived jobs via InitDB()
|
||||
// 2. Individual job import from file pairs via HandleImportFlag()
|
||||
//
|
||||
// Both operations enrich job metadata by calculating footprints and energy metrics
|
||||
// before persisting to the database.
|
||||
package importer
|
||||
|
||||
import (
|
||||
@@ -22,8 +31,21 @@ const (
|
||||
setTagQuery = "INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)"
|
||||
)
|
||||
|
||||
// Delete the tables "job", "tag" and "jobtag" from the database and
|
||||
// repopulate them using the jobs found in `archive`.
|
||||
// InitDB reinitializes the job database from archived job data.
|
||||
//
|
||||
// This function performs the following operations:
|
||||
// 1. Flushes existing job, tag, and jobtag tables
|
||||
// 2. Iterates through all jobs in the archive
|
||||
// 3. Enriches each job with calculated footprints and energy metrics
|
||||
// 4. Inserts jobs and tags into the database in batched transactions
|
||||
//
|
||||
// Jobs are processed in batches of 100 for optimal performance. The function
|
||||
// continues processing even if individual jobs fail, logging errors and
|
||||
// returning a summary at the end.
|
||||
//
|
||||
// Returns an error if database initialization, transaction management, or
|
||||
// critical operations fail. Individual job failures are logged but do not
|
||||
// stop the overall import process.
|
||||
func InitDB() error {
|
||||
r := repository.GetJobRepository()
|
||||
if err := r.Flush(); err != nil {
|
||||
@@ -52,85 +74,32 @@ func InitDB() error {
|
||||
for jobContainer := range ar.Iter(false) {
|
||||
|
||||
jobMeta := jobContainer.Meta
|
||||
if jobMeta == nil {
|
||||
cclog.Warn("skipping job with nil metadata")
|
||||
errorOccured++
|
||||
continue
|
||||
}
|
||||
|
||||
// Bundle 100 inserts into one transaction for better performance
|
||||
if i%100 == 0 {
|
||||
r.TransactionCommit(t)
|
||||
if i > 0 {
|
||||
if err := t.Commit(); err != nil {
|
||||
cclog.Errorf("transaction commit error: %v", err)
|
||||
return err
|
||||
}
|
||||
// Start a new transaction for the next batch
|
||||
t, err = r.TransactionInit()
|
||||
if err != nil {
|
||||
cclog.Errorf("transaction init error: %v", err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
fmt.Printf("%d jobs inserted...\r", i)
|
||||
}
|
||||
|
||||
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
||||
|
||||
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
||||
if err != nil {
|
||||
cclog.Errorf("cannot get subcluster: %s", err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
jobMeta.Footprint = make(map[string]float64)
|
||||
|
||||
for _, fp := range sc.Footprint {
|
||||
statType := "avg"
|
||||
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
|
||||
statType = sc.MetricConfig[i].Footprint
|
||||
}
|
||||
|
||||
name := fmt.Sprintf("%s_%s", fp, statType)
|
||||
|
||||
jobMeta.Footprint[name] = repository.LoadJobStat(jobMeta, fp, statType)
|
||||
}
|
||||
|
||||
jobMeta.RawFootprint, err = json.Marshal(jobMeta.Footprint)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while marshaling job footprint")
|
||||
return err
|
||||
}
|
||||
|
||||
jobMeta.EnergyFootprint = make(map[string]float64)
|
||||
|
||||
// Total Job Energy Outside Loop
|
||||
totalEnergy := 0.0
|
||||
for _, fp := range sc.EnergyFootprint {
|
||||
// Always Init Metric Energy Inside Loop
|
||||
metricEnergy := 0.0
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
|
||||
// Note: For DB data, calculate and save as kWh
|
||||
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules)
|
||||
cclog.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", jobMeta.JobID, jobMeta.Cluster, fp)
|
||||
// FIXME: Needs sum as stats type
|
||||
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
|
||||
// Energy: Power (in Watts) * Time (in Seconds)
|
||||
// Unit: (W * (s / 3600)) / 1000 = kWh
|
||||
// Round 2 Digits: round(Energy * 100) / 100
|
||||
// Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000
|
||||
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
|
||||
rawEnergy := ((repository.LoadJobStat(jobMeta, fp, "avg") * float64(jobMeta.NumNodes)) * (float64(jobMeta.Duration) / 3600.0)) / 1000.0
|
||||
metricEnergy = math.Round(rawEnergy*100.0) / 100.0
|
||||
}
|
||||
} else {
|
||||
cclog.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
|
||||
}
|
||||
|
||||
jobMeta.EnergyFootprint[fp] = metricEnergy
|
||||
totalEnergy += metricEnergy
|
||||
}
|
||||
|
||||
jobMeta.Energy = (math.Round(totalEnergy*100.0) / 100.0)
|
||||
if jobMeta.RawEnergyFootprint, err = json.Marshal(jobMeta.EnergyFootprint); err != nil {
|
||||
cclog.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
|
||||
return err
|
||||
}
|
||||
|
||||
jobMeta.RawResources, err = json.Marshal(jobMeta.Resources)
|
||||
if err != nil {
|
||||
cclog.Errorf("repository initDB(): %v", err)
|
||||
errorOccured++
|
||||
continue
|
||||
}
|
||||
|
||||
jobMeta.RawMetaData, err = json.Marshal(jobMeta.MetaData)
|
||||
if err != nil {
|
||||
if err := enrichJobMetadata(jobMeta); err != nil {
|
||||
cclog.Errorf("repository initDB(): %v", err)
|
||||
errorOccured++
|
||||
continue
|
||||
@@ -152,9 +121,9 @@ func InitDB() error {
|
||||
|
||||
for _, tag := range jobMeta.Tags {
|
||||
tagstr := tag.Name + ":" + tag.Type
|
||||
tagId, ok := tags[tagstr]
|
||||
tagID, ok := tags[tagstr]
|
||||
if !ok {
|
||||
tagId, err = r.TransactionAdd(t,
|
||||
tagID, err = r.TransactionAdd(t,
|
||||
addTagQuery,
|
||||
tag.Name, tag.Type)
|
||||
if err != nil {
|
||||
@@ -162,12 +131,12 @@ func InitDB() error {
|
||||
errorOccured++
|
||||
continue
|
||||
}
|
||||
tags[tagstr] = tagId
|
||||
tags[tagstr] = tagID
|
||||
}
|
||||
|
||||
r.TransactionAdd(t,
|
||||
setTagQuery,
|
||||
id, tagId)
|
||||
id, tagID)
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
@@ -180,11 +149,114 @@ func InitDB() error {
|
||||
}
|
||||
|
||||
r.TransactionEnd(t)
|
||||
cclog.Printf("A total of %d jobs have been registered in %.3f seconds.\n", i, time.Since(starttime).Seconds())
|
||||
cclog.Infof("A total of %d jobs have been registered in %.3f seconds.", i, time.Since(starttime).Seconds())
|
||||
return nil
|
||||
}
|
||||
|
||||
// This function also sets the subcluster if necessary!
|
||||
// enrichJobMetadata calculates and populates job footprints, energy metrics, and serialized fields.
|
||||
//
|
||||
// This function performs the following enrichment operations:
|
||||
// 1. Calculates job footprint metrics based on the subcluster configuration
|
||||
// 2. Computes energy footprint and total energy consumption in kWh
|
||||
// 3. Marshals footprints, resources, and metadata into JSON for database storage
|
||||
//
|
||||
// The function expects the job's MonitoringStatus and SubCluster to be already set.
|
||||
// Energy calculations convert power metrics (Watts) to energy (kWh) using the formula:
|
||||
//
|
||||
// Energy (kWh) = (Power (W) * Duration (s) / 3600) / 1000
|
||||
//
|
||||
// Returns an error if subcluster retrieval, metric indexing, or JSON marshaling fails.
|
||||
func enrichJobMetadata(job *schema.Job) error {
|
||||
sc, err := archive.GetSubCluster(job.Cluster, job.SubCluster)
|
||||
if err != nil {
|
||||
cclog.Errorf("cannot get subcluster: %s", err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
job.Footprint = make(map[string]float64)
|
||||
|
||||
for _, fp := range sc.Footprint {
|
||||
statType := "avg"
|
||||
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
|
||||
statType = sc.MetricConfig[i].Footprint
|
||||
}
|
||||
|
||||
name := fmt.Sprintf("%s_%s", fp, statType)
|
||||
|
||||
job.Footprint[name] = repository.LoadJobStat(job, fp, statType)
|
||||
}
|
||||
|
||||
job.RawFootprint, err = json.Marshal(job.Footprint)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while marshaling job footprint")
|
||||
return err
|
||||
}
|
||||
|
||||
job.EnergyFootprint = make(map[string]float64)
|
||||
|
||||
// Total Job Energy Outside Loop
|
||||
totalEnergy := 0.0
|
||||
for _, fp := range sc.EnergyFootprint {
|
||||
// Always Init Metric Energy Inside Loop
|
||||
metricEnergy := 0.0
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
|
||||
// Note: For DB data, calculate and save as kWh
|
||||
switch sc.MetricConfig[i].Energy {
|
||||
case "energy": // this metric has energy as unit (Joules)
|
||||
cclog.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", job.JobID, job.Cluster, fp)
|
||||
// FIXME: Needs sum as stats type
|
||||
case "power": // this metric has power as unit (Watt)
|
||||
// Energy: Power (in Watts) * Time (in Seconds)
|
||||
// Unit: (W * (s / 3600)) / 1000 = kWh
|
||||
// Round 2 Digits: round(Energy * 100) / 100
|
||||
// Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000
|
||||
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
|
||||
rawEnergy := ((repository.LoadJobStat(job, fp, "avg") * float64(job.NumNodes)) * (float64(job.Duration) / 3600.0)) / 1000.0
|
||||
metricEnergy = math.Round(rawEnergy*100.0) / 100.0
|
||||
}
|
||||
} else {
|
||||
cclog.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, job.ID)
|
||||
}
|
||||
|
||||
job.EnergyFootprint[fp] = metricEnergy
|
||||
totalEnergy += metricEnergy
|
||||
}
|
||||
|
||||
job.Energy = (math.Round(totalEnergy*100.0) / 100.0)
|
||||
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
|
||||
cclog.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", job.ID)
|
||||
return err
|
||||
}
|
||||
|
||||
job.RawResources, err = json.Marshal(job.Resources)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while marshaling job resources")
|
||||
return err
|
||||
}
|
||||
|
||||
job.RawMetaData, err = json.Marshal(job.MetaData)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while marshaling job metadata")
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// SanityChecks validates job metadata and ensures cluster/subcluster configuration is valid.
|
||||
//
|
||||
// This function performs the following validations:
|
||||
// 1. Verifies the cluster exists in the archive configuration
|
||||
// 2. Assigns and validates the subcluster (may modify job.SubCluster)
|
||||
// 3. Validates job state is a recognized value
|
||||
// 4. Ensures resources and user fields are populated
|
||||
// 5. Validates node counts and hardware thread counts are positive
|
||||
// 6. Verifies the number of resources matches the declared node count
|
||||
//
|
||||
// The function may modify the job's SubCluster field if it needs to be assigned.
|
||||
//
|
||||
// Returns an error if any validation check fails.
|
||||
func SanityChecks(job *schema.Job) error {
|
||||
if c := archive.GetCluster(job.Cluster); c == nil {
|
||||
return fmt.Errorf("no such cluster: %v", job.Cluster)
|
||||
@@ -209,6 +281,14 @@ func SanityChecks(job *schema.Job) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// checkJobData normalizes metric units in job data based on average values.
|
||||
//
|
||||
// NOTE: This function is currently unused and contains incomplete implementation.
|
||||
// It was intended to normalize byte and file-related metrics to appropriate SI prefixes,
|
||||
// but the normalization logic is commented out. Consider removing or completing this
|
||||
// function based on project requirements.
|
||||
//
|
||||
// TODO: Either implement the metric normalization or remove this dead code.
|
||||
func checkJobData(d *schema.JobData) error {
|
||||
for _, scopes := range *d {
|
||||
// var newUnit schema.Unit
|
||||
|
||||
@@ -10,10 +10,24 @@ import (
|
||||
ccunits "github.com/ClusterCockpit/cc-lib/ccUnits"
|
||||
)
|
||||
|
||||
// getNormalizationFactor calculates the scaling factor needed to normalize a value
|
||||
// to a more readable range (typically between 1.0 and 1000.0).
|
||||
//
|
||||
// For values greater than 1000, the function scales down by factors of 1000 (returns negative exponent).
|
||||
// For values less than 1.0, the function scales up by factors of 1000 (returns positive exponent).
|
||||
//
|
||||
// Returns:
|
||||
// - factor: The multiplicative factor to apply (10^(count*scale))
|
||||
// - exponent: The power of 10 representing the adjustment (multiple of 3 for SI prefixes)
|
||||
func getNormalizationFactor(v float64) (float64, int) {
|
||||
count := 0
|
||||
scale := -3
|
||||
|
||||
// Prevent infinite loop for zero or negative values
|
||||
if v <= 0.0 {
|
||||
return 1.0, 0
|
||||
}
|
||||
|
||||
if v > 1000.0 {
|
||||
for v > 1000.0 {
|
||||
v *= 1e-3
|
||||
@@ -29,9 +43,22 @@ func getNormalizationFactor(v float64) (float64, int) {
|
||||
return math.Pow10(count * scale), count * scale
|
||||
}
|
||||
|
||||
// getExponent calculates the SI prefix exponent from a numeric prefix value.
|
||||
//
|
||||
// For example:
|
||||
// - Input: 1000.0 (kilo) returns 3
|
||||
// - Input: 1000000.0 (mega) returns 6
|
||||
// - Input: 1000000000.0 (giga) returns 9
|
||||
//
|
||||
// Returns the exponent representing the power of 10 for the SI prefix.
|
||||
func getExponent(p float64) int {
|
||||
count := 0
|
||||
|
||||
// Prevent infinite loop for infinity or NaN values
|
||||
if math.IsInf(p, 0) || math.IsNaN(p) || p <= 0.0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
for p > 1.0 {
|
||||
p = p / 1000.0
|
||||
count++
|
||||
@@ -40,12 +67,42 @@ func getExponent(p float64) int {
|
||||
return count * 3
|
||||
}
|
||||
|
||||
// newPrefixFromFactor computes a new SI unit prefix after applying a normalization factor.
|
||||
//
|
||||
// Given an original prefix and an exponent adjustment, this function calculates
|
||||
// the resulting SI prefix. For example, if normalizing from bytes (no prefix) by
|
||||
// a factor of 10^9, the result would be the "G" (giga) prefix.
|
||||
//
|
||||
// Parameters:
|
||||
// - op: The original SI prefix value
|
||||
// - e: The exponent adjustment to apply
|
||||
//
|
||||
// Returns the new SI prefix after adjustment.
|
||||
func newPrefixFromFactor(op ccunits.Prefix, e int) ccunits.Prefix {
|
||||
f := float64(op)
|
||||
exp := math.Pow10(getExponent(f) - e)
|
||||
return ccunits.Prefix(exp)
|
||||
}
|
||||
|
||||
// Normalize adjusts a metric value and its SI unit prefix to a more readable range.
|
||||
//
|
||||
// This function is useful for automatically scaling metrics to appropriate units.
|
||||
// For example, normalizing 2048 MiB might result in ~2.0 GiB.
|
||||
//
|
||||
// The function analyzes the average value and determines if a different SI prefix
|
||||
// would make the number more human-readable (typically keeping values between 1 and 1000).
|
||||
//
|
||||
// Parameters:
|
||||
// - avg: The metric value to normalize
|
||||
// - p: The current SI prefix as a string (e.g., "K", "M", "G")
|
||||
//
|
||||
// Returns:
|
||||
// - factor: The multiplicative factor to apply to convert the value
|
||||
// - newPrefix: The new SI prefix string to use
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// factor, newPrefix := Normalize(2048.0, "M") // returns factor for MB->GB conversion, "G"
|
||||
func Normalize(avg float64, p string) (float64, string) {
|
||||
f, e := getNormalizationFactor(avg)
|
||||
|
||||
|
||||
@@ -11,6 +11,8 @@ import (
|
||||
ccunits "github.com/ClusterCockpit/cc-lib/ccUnits"
|
||||
)
|
||||
|
||||
// TestNormalizeFactor tests the normalization of large byte values to gigabyte prefix.
|
||||
// Verifies that values in the billions are correctly scaled to the "G" (giga) prefix.
|
||||
func TestNormalizeFactor(t *testing.T) {
|
||||
// var us string
|
||||
s := []float64{2890031237, 23998994567, 389734042344, 390349424345}
|
||||
@@ -38,6 +40,8 @@ func TestNormalizeFactor(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestNormalizeKeep tests that values already in an appropriate range maintain their prefix.
|
||||
// Verifies that when values don't require rescaling, the original "G" prefix is preserved.
|
||||
func TestNormalizeKeep(t *testing.T) {
|
||||
s := []float64{3.0, 24.0, 390.0, 391.0}
|
||||
|
||||
|
||||
@@ -44,14 +44,14 @@ func Archiving(wg *sync.WaitGroup, ctx context.Context) {
|
||||
return
|
||||
case <-ticks:
|
||||
t := time.Now().Add(-d)
|
||||
cclog.Printf("[METRICSTORE]> start archiving checkpoints (older than %s)...\n", t.Format(time.RFC3339))
|
||||
cclog.Infof("[METRICSTORE]> start archiving checkpoints (older than %s)...", t.Format(time.RFC3339))
|
||||
n, err := ArchiveCheckpoints(Keys.Checkpoints.RootDir,
|
||||
Keys.Archive.RootDir, t.Unix(), Keys.Archive.DeleteInstead)
|
||||
|
||||
if err != nil {
|
||||
cclog.Printf("[METRICSTORE]> archiving failed: %s\n", err.Error())
|
||||
cclog.Errorf("[METRICSTORE]> archiving failed: %s", err.Error())
|
||||
} else {
|
||||
cclog.Printf("[METRICSTORE]> done: %d files zipped and moved to archive\n", n)
|
||||
cclog.Infof("[METRICSTORE]> done: %d files zipped and moved to archive", n)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -75,10 +75,10 @@ func ArchiveCheckpoints(checkpointsDir, archiveDir string, from int64, deleteIns
|
||||
|
||||
var wg sync.WaitGroup
|
||||
n, errs := int32(0), int32(0)
|
||||
work := make(chan workItem, NumWorkers)
|
||||
work := make(chan workItem, Keys.NumWorkers)
|
||||
|
||||
wg.Add(NumWorkers)
|
||||
for worker := 0; worker < NumWorkers; worker++ {
|
||||
wg.Add(Keys.NumWorkers)
|
||||
for worker := 0; worker < Keys.NumWorkers; worker++ {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for workItem := range work {
|
||||
@@ -116,7 +116,7 @@ func ArchiveCheckpoints(checkpointsDir, archiveDir string, from int64, deleteIns
|
||||
}
|
||||
|
||||
if errs > 0 {
|
||||
return int(n), fmt.Errorf("%d errors happend while archiving (%d successes)", errs, n)
|
||||
return int(n), fmt.Errorf("%d errors happened while archiving (%d successes)", errs, n)
|
||||
}
|
||||
return int(n), nil
|
||||
}
|
||||
@@ -147,11 +147,11 @@ func archiveCheckpoints(dir string, archiveDir string, from int64, deleteInstead
|
||||
}
|
||||
|
||||
filename := filepath.Join(archiveDir, fmt.Sprintf("%d.zip", from))
|
||||
f, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0o644)
|
||||
f, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, CheckpointFilePerms)
|
||||
if err != nil && os.IsNotExist(err) {
|
||||
err = os.MkdirAll(archiveDir, 0o755)
|
||||
err = os.MkdirAll(archiveDir, CheckpointDirPerms)
|
||||
if err == nil {
|
||||
f, err = os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0o644)
|
||||
f, err = os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, CheckpointFilePerms)
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
|
||||
@@ -105,46 +105,6 @@ func (b *buffer) firstWrite() int64 {
|
||||
|
||||
func (b *buffer) close() {}
|
||||
|
||||
/*
|
||||
func (b *buffer) close() {
|
||||
if b.closed {
|
||||
return
|
||||
}
|
||||
|
||||
b.closed = true
|
||||
n, sum, min, max := 0, 0., math.MaxFloat64, -math.MaxFloat64
|
||||
for _, x := range b.data {
|
||||
if x.IsNaN() {
|
||||
continue
|
||||
}
|
||||
|
||||
n += 1
|
||||
f := float64(x)
|
||||
sum += f
|
||||
min = math.Min(min, f)
|
||||
max = math.Max(max, f)
|
||||
}
|
||||
|
||||
b.statisticts.samples = n
|
||||
if n > 0 {
|
||||
b.statisticts.avg = Float(sum / float64(n))
|
||||
b.statisticts.min = Float(min)
|
||||
b.statisticts.max = Float(max)
|
||||
} else {
|
||||
b.statisticts.avg = NaN
|
||||
b.statisticts.min = NaN
|
||||
b.statisticts.max = NaN
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// func interpolate(idx int, data []Float) Float {
|
||||
// if idx == 0 || idx+1 == len(data) {
|
||||
// return NaN
|
||||
// }
|
||||
// return (data[idx-1] + data[idx+1]) / 2.0
|
||||
// }
|
||||
|
||||
// Return all known values from `from` to `to`. Gaps of information are represented as NaN.
|
||||
// Simple linear interpolation is done between the two neighboring cells if possible.
|
||||
// If values at the start or end are missing, instead of NaN values, the second and thrid
|
||||
|
||||
@@ -28,6 +28,17 @@ import (
|
||||
"github.com/linkedin/goavro/v2"
|
||||
)
|
||||
|
||||
// File operation constants
|
||||
const (
|
||||
// CheckpointFilePerms defines default permissions for checkpoint files
|
||||
CheckpointFilePerms = 0o644
|
||||
// CheckpointDirPerms defines default permissions for checkpoint directories
|
||||
CheckpointDirPerms = 0o755
|
||||
// GCTriggerInterval determines how often GC is forced during checkpoint loading
|
||||
// GC is triggered every GCTriggerInterval*NumWorkers loaded hosts
|
||||
GCTriggerInterval = 100
|
||||
)
|
||||
|
||||
// Whenever changed, update MarshalJSON as well!
|
||||
type CheckpointMetrics struct {
|
||||
Data []schema.Float `json:"data"`
|
||||
@@ -71,14 +82,14 @@ func Checkpointing(wg *sync.WaitGroup, ctx context.Context) {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticks:
|
||||
cclog.Printf("[METRICSTORE]> start checkpointing (starting at %s)...\n", lastCheckpoint.Format(time.RFC3339))
|
||||
cclog.Infof("[METRICSTORE]> start checkpointing (starting at %s)...", lastCheckpoint.Format(time.RFC3339))
|
||||
now := time.Now()
|
||||
n, err := ms.ToCheckpoint(Keys.Checkpoints.RootDir,
|
||||
lastCheckpoint.Unix(), now.Unix())
|
||||
if err != nil {
|
||||
cclog.Printf("[METRICSTORE]> checkpointing failed: %s\n", err.Error())
|
||||
cclog.Errorf("[METRICSTORE]> checkpointing failed: %s", err.Error())
|
||||
} else {
|
||||
cclog.Printf("[METRICSTORE]> done: %d checkpoint files created\n", n)
|
||||
cclog.Infof("[METRICSTORE]> done: %d checkpoint files created", n)
|
||||
lastCheckpoint = now
|
||||
}
|
||||
}
|
||||
@@ -171,9 +182,9 @@ func (m *MemoryStore) ToCheckpoint(dir string, from, to int64) (int, error) {
|
||||
n, errs := int32(0), int32(0)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(NumWorkers)
|
||||
work := make(chan workItem, NumWorkers*2)
|
||||
for worker := 0; worker < NumWorkers; worker++ {
|
||||
wg.Add(Keys.NumWorkers)
|
||||
work := make(chan workItem, Keys.NumWorkers*2)
|
||||
for worker := 0; worker < Keys.NumWorkers; worker++ {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
|
||||
@@ -183,7 +194,7 @@ func (m *MemoryStore) ToCheckpoint(dir string, from, to int64) (int, error) {
|
||||
continue
|
||||
}
|
||||
|
||||
cclog.Printf("[METRICSTORE]> error while checkpointing %#v: %s", workItem.selector, err.Error())
|
||||
cclog.Errorf("[METRICSTORE]> error while checkpointing %#v: %s", workItem.selector, err.Error())
|
||||
atomic.AddInt32(&errs, 1)
|
||||
} else {
|
||||
atomic.AddInt32(&n, 1)
|
||||
@@ -205,7 +216,7 @@ func (m *MemoryStore) ToCheckpoint(dir string, from, to int64) (int, error) {
|
||||
wg.Wait()
|
||||
|
||||
if errs > 0 {
|
||||
return int(n), fmt.Errorf("[METRICSTORE]> %d errors happend while creating checkpoints (%d successes)", errs, n)
|
||||
return int(n), fmt.Errorf("[METRICSTORE]> %d errors happened while creating checkpoints (%d successes)", errs, n)
|
||||
}
|
||||
return int(n), nil
|
||||
}
|
||||
@@ -285,11 +296,11 @@ func (l *Level) toCheckpoint(dir string, from, to int64, m *MemoryStore) error {
|
||||
}
|
||||
|
||||
filepath := path.Join(dir, fmt.Sprintf("%d.json", from))
|
||||
f, err := os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, 0o644)
|
||||
f, err := os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, CheckpointFilePerms)
|
||||
if err != nil && os.IsNotExist(err) {
|
||||
err = os.MkdirAll(dir, 0o755)
|
||||
err = os.MkdirAll(dir, CheckpointDirPerms)
|
||||
if err == nil {
|
||||
f, err = os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, 0o644)
|
||||
f, err = os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, CheckpointFilePerms)
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
@@ -307,11 +318,11 @@ func (l *Level) toCheckpoint(dir string, from, to int64, m *MemoryStore) error {
|
||||
|
||||
func (m *MemoryStore) FromCheckpoint(dir string, from int64, extension string) (int, error) {
|
||||
var wg sync.WaitGroup
|
||||
work := make(chan [2]string, NumWorkers)
|
||||
work := make(chan [2]string, Keys.NumWorkers)
|
||||
n, errs := int32(0), int32(0)
|
||||
|
||||
wg.Add(NumWorkers)
|
||||
for worker := 0; worker < NumWorkers; worker++ {
|
||||
wg.Add(Keys.NumWorkers)
|
||||
for worker := 0; worker < Keys.NumWorkers; worker++ {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for host := range work {
|
||||
@@ -347,7 +358,7 @@ func (m *MemoryStore) FromCheckpoint(dir string, from int64, extension string) (
|
||||
}
|
||||
|
||||
i++
|
||||
if i%NumWorkers == 0 && i > 100 {
|
||||
if i%Keys.NumWorkers == 0 && i > GCTriggerInterval {
|
||||
// Forcing garbage collection runs here regulary during the loading of checkpoints
|
||||
// will decrease the total heap size after loading everything back to memory is done.
|
||||
// While loading data, the heap will grow fast, so the GC target size will double
|
||||
@@ -368,7 +379,7 @@ done:
|
||||
}
|
||||
|
||||
if errs > 0 {
|
||||
return int(n), fmt.Errorf("[METRICSTORE]> %d errors happend while creating checkpoints (%d successes)", errs, n)
|
||||
return int(n), fmt.Errorf("[METRICSTORE]> %d errors happened while creating checkpoints (%d successes)", errs, n)
|
||||
}
|
||||
return int(n), nil
|
||||
}
|
||||
@@ -379,11 +390,11 @@ done:
|
||||
func (m *MemoryStore) FromCheckpointFiles(dir string, from int64) (int, error) {
|
||||
if _, err := os.Stat(dir); os.IsNotExist(err) {
|
||||
// The directory does not exist, so create it using os.MkdirAll()
|
||||
err := os.MkdirAll(dir, 0o755) // 0755 sets the permissions for the directory
|
||||
err := os.MkdirAll(dir, CheckpointDirPerms) // CheckpointDirPerms sets the permissions for the directory
|
||||
if err != nil {
|
||||
cclog.Fatalf("[METRICSTORE]> Error creating directory: %#v\n", err)
|
||||
}
|
||||
cclog.Printf("[METRICSTORE]> %#v Directory created successfully.\n", dir)
|
||||
cclog.Debugf("[METRICSTORE]> %#v Directory created successfully", dir)
|
||||
}
|
||||
|
||||
// Config read (replace with your actual config read)
|
||||
@@ -402,7 +413,7 @@ func (m *MemoryStore) FromCheckpointFiles(dir string, from int64) (int, error) {
|
||||
if found, err := checkFilesWithExtension(dir, fileFormat); err != nil {
|
||||
return 0, fmt.Errorf("[METRICSTORE]> error checking files with extension: %v", err)
|
||||
} else if found {
|
||||
cclog.Printf("[METRICSTORE]> Loading %s files because fileformat is %s\n", fileFormat, fileFormat)
|
||||
cclog.Infof("[METRICSTORE]> Loading %s files because fileformat is %s", fileFormat, fileFormat)
|
||||
return m.FromCheckpoint(dir, from, fileFormat)
|
||||
}
|
||||
|
||||
@@ -411,7 +422,7 @@ func (m *MemoryStore) FromCheckpointFiles(dir string, from int64) (int, error) {
|
||||
if found, err := checkFilesWithExtension(dir, altFormat); err != nil {
|
||||
return 0, fmt.Errorf("[METRICSTORE]> error checking files with extension: %v", err)
|
||||
} else if found {
|
||||
cclog.Printf("[METRICSTORE]> Loading %s files but fileformat is %s\n", altFormat, fileFormat)
|
||||
cclog.Infof("[METRICSTORE]> Loading %s files but fileformat is %s", altFormat, fileFormat)
|
||||
return m.FromCheckpoint(dir, from, altFormat)
|
||||
}
|
||||
|
||||
@@ -464,7 +475,7 @@ func (l *Level) loadAvroFile(m *MemoryStore, f *os.File, from int64) error {
|
||||
// Create a new OCF reader from the buffered reader
|
||||
ocfReader, err := goavro.NewOCFReader(br)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
return fmt.Errorf("[METRICSTORE]> error creating OCF reader: %w", err)
|
||||
}
|
||||
|
||||
metricsData := make(map[string]schema.FloatArray)
|
||||
@@ -477,7 +488,7 @@ func (l *Level) loadAvroFile(m *MemoryStore, f *os.File, from int64) error {
|
||||
|
||||
record, ok := datum.(map[string]any)
|
||||
if !ok {
|
||||
panic("[METRICSTORE]> failed to assert datum as map[string]interface{}")
|
||||
return fmt.Errorf("[METRICSTORE]> failed to assert datum as map[string]interface{}")
|
||||
}
|
||||
|
||||
for key, value := range record {
|
||||
@@ -559,7 +570,7 @@ func (l *Level) createBuffer(m *MemoryStore, metricName string, floatArray schem
|
||||
l.metrics[minfo.offset] = b
|
||||
} else {
|
||||
if prev.start > b.start {
|
||||
return errors.New("wooops")
|
||||
return fmt.Errorf("[METRICSTORE]> buffer start time %d is before previous buffer start %d", b.start, prev.start)
|
||||
}
|
||||
|
||||
b.prev = prev
|
||||
@@ -623,7 +634,7 @@ func (l *Level) loadFile(cf *CheckpointFile, m *MemoryStore) error {
|
||||
l.metrics[minfo.offset] = b
|
||||
} else {
|
||||
if prev.start > b.start {
|
||||
return errors.New("wooops")
|
||||
return fmt.Errorf("[METRICSTORE]> buffer start time %d is before previous buffer start %d", b.start, prev.start)
|
||||
}
|
||||
|
||||
b.prev = prev
|
||||
@@ -700,13 +711,17 @@ func (l *Level) fromCheckpoint(m *MemoryStore, dir string, from int64, extension
|
||||
loader := loaders[extension]
|
||||
|
||||
for _, filename := range files {
|
||||
f, err := os.Open(path.Join(dir, filename))
|
||||
if err != nil {
|
||||
return filesLoaded, err
|
||||
}
|
||||
defer f.Close()
|
||||
// Use a closure to ensure file is closed immediately after use
|
||||
err := func() error {
|
||||
f, err := os.Open(path.Join(dir, filename))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
if err = loader(m, f, from); err != nil {
|
||||
return loader(m, f, from)
|
||||
}()
|
||||
if err != nil {
|
||||
return filesLoaded, err
|
||||
}
|
||||
|
||||
|
||||
@@ -12,6 +12,9 @@ import (
|
||||
var InternalCCMSFlag bool = false
|
||||
|
||||
type MetricStoreConfig struct {
|
||||
// Number of concurrent workers for checkpoint and archive operations.
|
||||
// If not set or 0, defaults to min(runtime.NumCPU()/2+1, 10)
|
||||
NumWorkers int `json:"num-workers"`
|
||||
Checkpoints struct {
|
||||
FileFormat string `json:"file-format"`
|
||||
Interval string `json:"interval"`
|
||||
@@ -62,7 +65,7 @@ const (
|
||||
AvgAggregation
|
||||
)
|
||||
|
||||
func AssignAggregationStratergy(str string) (AggregationStrategy, error) {
|
||||
func AssignAggregationStrategy(str string) (AggregationStrategy, error) {
|
||||
switch str {
|
||||
case "":
|
||||
return NoAggregation, nil
|
||||
|
||||
@@ -39,7 +39,7 @@ func (l *Level) findLevelOrCreate(selector []string, nMetrics int) *Level {
|
||||
// Children map needs to be created...
|
||||
l.lock.RUnlock()
|
||||
} else {
|
||||
child, ok := l.children[selector[0]]
|
||||
child, ok = l.children[selector[0]]
|
||||
l.lock.RUnlock()
|
||||
if ok {
|
||||
return child.findLevelOrCreate(selector[1:], nMetrics)
|
||||
|
||||
@@ -119,7 +119,7 @@ func ReceiveNats(conf *(NatsConfig),
|
||||
for m := range msgs {
|
||||
dec := lineprotocol.NewDecoderWithBytes(m.Data)
|
||||
if err := DecodeLine(dec, ms, clusterTag); err != nil {
|
||||
cclog.Printf("error: %s\n", err.Error())
|
||||
cclog.Errorf("error: %s", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -134,7 +134,7 @@ func ReceiveNats(conf *(NatsConfig),
|
||||
sub, err = nc.Subscribe(sc.SubscribeTo, func(m *nats.Msg) {
|
||||
dec := lineprotocol.NewDecoderWithBytes(m.Data)
|
||||
if err := DecodeLine(dec, ms, clusterTag); err != nil {
|
||||
cclog.Printf("error: %s\n", err.Error())
|
||||
cclog.Errorf("error: %s", err.Error())
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -142,7 +142,7 @@ func ReceiveNats(conf *(NatsConfig),
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
cclog.Printf("NATS subscription to '%s' on '%s' established\n", sc.SubscribeTo, conf.Address)
|
||||
cclog.Infof("NATS subscription to '%s' on '%s' established", sc.SubscribeTo, conf.Address)
|
||||
subs = append(subs, sub)
|
||||
}
|
||||
|
||||
@@ -150,7 +150,7 @@ func ReceiveNats(conf *(NatsConfig),
|
||||
for _, sub := range subs {
|
||||
err = sub.Unsubscribe()
|
||||
if err != nil {
|
||||
cclog.Printf("NATS unsubscribe failed: %s", err.Error())
|
||||
cclog.Errorf("NATS unsubscribe failed: %s", err.Error())
|
||||
}
|
||||
}
|
||||
close(msgs)
|
||||
|
||||
@@ -3,6 +3,20 @@
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package memorystore provides an efficient in-memory time-series metric storage system
|
||||
// with support for hierarchical data organization, checkpointing, and archiving.
|
||||
//
|
||||
// The package organizes metrics in a tree structure (cluster → host → component) and
|
||||
// provides concurrent read/write access to metric data with configurable aggregation strategies.
|
||||
// Background goroutines handle periodic checkpointing (JSON or Avro format), archiving old data,
|
||||
// and enforcing retention policies.
|
||||
//
|
||||
// Key features:
|
||||
// - In-memory metric storage with configurable retention
|
||||
// - Hierarchical data organization (selectors)
|
||||
// - Concurrent checkpoint/archive workers
|
||||
// - Support for sum and average aggregation
|
||||
// - NATS integration for metric ingestion
|
||||
package memorystore
|
||||
|
||||
import (
|
||||
@@ -10,18 +24,14 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"os"
|
||||
"os/signal"
|
||||
"runtime"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/resampler"
|
||||
"github.com/ClusterCockpit/cc-lib/runtimeEnv"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/ClusterCockpit/cc-lib/util"
|
||||
)
|
||||
@@ -29,14 +39,12 @@ import (
|
||||
var (
|
||||
singleton sync.Once
|
||||
msInstance *MemoryStore
|
||||
// shutdownFunc stores the context cancellation function created in Init
|
||||
// and is called during Shutdown to cancel all background goroutines
|
||||
shutdownFunc context.CancelFunc
|
||||
)
|
||||
|
||||
var NumWorkers int = 4
|
||||
|
||||
func init() {
|
||||
maxWorkers := 10
|
||||
NumWorkers = min(runtime.NumCPU()/2+1, maxWorkers)
|
||||
}
|
||||
|
||||
type Metric struct {
|
||||
Name string
|
||||
@@ -61,30 +69,34 @@ func Init(rawConfig json.RawMessage, wg *sync.WaitGroup) {
|
||||
}
|
||||
}
|
||||
|
||||
// Set NumWorkers from config or use default
|
||||
if Keys.NumWorkers <= 0 {
|
||||
maxWorkers := 10
|
||||
Keys.NumWorkers = min(runtime.NumCPU()/2+1, maxWorkers)
|
||||
}
|
||||
cclog.Debugf("[METRICSTORE]> Using %d workers for checkpoint/archive operations\n", Keys.NumWorkers)
|
||||
|
||||
// Helper function to add metric configuration
|
||||
addMetricConfig := func(mc schema.MetricConfig) {
|
||||
agg, err := AssignAggregationStrategy(mc.Aggregation)
|
||||
if err != nil {
|
||||
cclog.Warnf("Could not find aggregation strategy for metric config '%s': %s", mc.Name, err.Error())
|
||||
}
|
||||
|
||||
AddMetric(mc.Name, MetricConfig{
|
||||
Frequency: int64(mc.Timestep),
|
||||
Aggregation: agg,
|
||||
})
|
||||
}
|
||||
|
||||
for _, c := range archive.Clusters {
|
||||
for _, mc := range c.MetricConfig {
|
||||
agg, err := AssignAggregationStratergy(mc.Aggregation)
|
||||
if err != nil {
|
||||
cclog.Warnf("Could not find aggregation stratergy for metric config '%s': %s", mc.Name, err.Error())
|
||||
}
|
||||
|
||||
AddMetric(mc.Name, MetricConfig{
|
||||
Frequency: int64(mc.Timestep),
|
||||
Aggregation: agg,
|
||||
})
|
||||
addMetricConfig(*mc)
|
||||
}
|
||||
|
||||
for _, sc := range c.SubClusters {
|
||||
for _, mc := range sc.MetricConfig {
|
||||
agg, err := AssignAggregationStratergy(mc.Aggregation)
|
||||
if err != nil {
|
||||
cclog.Warnf("Could not find aggregation stratergy for metric config '%s': %s", mc.Name, err.Error())
|
||||
}
|
||||
|
||||
AddMetric(mc.Name, MetricConfig{
|
||||
Frequency: int64(mc.Timestep),
|
||||
Aggregation: agg,
|
||||
})
|
||||
addMetricConfig(mc)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -126,15 +138,11 @@ func Init(rawConfig json.RawMessage, wg *sync.WaitGroup) {
|
||||
Archiving(wg, ctx)
|
||||
DataStaging(wg, ctx)
|
||||
|
||||
wg.Add(1)
|
||||
sigs := make(chan os.Signal, 1)
|
||||
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
<-sigs
|
||||
runtimeEnv.SystemdNotifiy(false, "[METRICSTORE]> Shutting down ...")
|
||||
shutdown()
|
||||
}()
|
||||
// Note: Signal handling has been removed from this function.
|
||||
// The caller is responsible for handling shutdown signals and calling
|
||||
// the shutdown() function when appropriate.
|
||||
// Store the shutdown function for later use by Shutdown()
|
||||
shutdownFunc = shutdown
|
||||
|
||||
if Keys.Nats != nil {
|
||||
for _, natsConf := range Keys.Nats {
|
||||
@@ -190,6 +198,11 @@ func GetMemoryStore() *MemoryStore {
|
||||
}
|
||||
|
||||
func Shutdown() {
|
||||
// Cancel the context to signal all background goroutines to stop
|
||||
if shutdownFunc != nil {
|
||||
shutdownFunc()
|
||||
}
|
||||
|
||||
cclog.Infof("[METRICSTORE]> Writing to '%s'...\n", Keys.Checkpoints.RootDir)
|
||||
var files int
|
||||
var err error
|
||||
@@ -207,70 +220,8 @@ func Shutdown() {
|
||||
cclog.Errorf("[METRICSTORE]> Writing checkpoint failed: %s\n", err.Error())
|
||||
}
|
||||
cclog.Infof("[METRICSTORE]> Done! (%d files written)\n", files)
|
||||
|
||||
// ms.PrintHeirarchy()
|
||||
}
|
||||
|
||||
// func (m *MemoryStore) PrintHeirarchy() {
|
||||
// m.root.lock.Lock()
|
||||
// defer m.root.lock.Unlock()
|
||||
|
||||
// fmt.Printf("Root : \n")
|
||||
|
||||
// for lvl1, sel1 := range m.root.children {
|
||||
// fmt.Printf("\t%s\n", lvl1)
|
||||
// for lvl2, sel2 := range sel1.children {
|
||||
// fmt.Printf("\t\t%s\n", lvl2)
|
||||
// if lvl1 == "fritz" && lvl2 == "f0201" {
|
||||
|
||||
// for name, met := range m.Metrics {
|
||||
// mt := sel2.metrics[met.Offset]
|
||||
|
||||
// fmt.Printf("\t\t\t\t%s\n", name)
|
||||
// fmt.Printf("\t\t\t\t")
|
||||
|
||||
// for mt != nil {
|
||||
// // if name == "cpu_load" {
|
||||
// fmt.Printf("%d(%d) -> %#v", mt.start, len(mt.data), mt.data)
|
||||
// // }
|
||||
// mt = mt.prev
|
||||
// }
|
||||
// fmt.Printf("\n")
|
||||
|
||||
// }
|
||||
// }
|
||||
// for lvl3, sel3 := range sel2.children {
|
||||
// if lvl1 == "fritz" && lvl2 == "f0201" && lvl3 == "hwthread70" {
|
||||
|
||||
// fmt.Printf("\t\t\t\t\t%s\n", lvl3)
|
||||
|
||||
// for name, met := range m.Metrics {
|
||||
// mt := sel3.metrics[met.Offset]
|
||||
|
||||
// fmt.Printf("\t\t\t\t\t\t%s\n", name)
|
||||
|
||||
// fmt.Printf("\t\t\t\t\t\t")
|
||||
|
||||
// for mt != nil {
|
||||
// // if name == "clock" {
|
||||
// fmt.Printf("%d(%d) -> %#v", mt.start, len(mt.data), mt.data)
|
||||
|
||||
// mt = mt.prev
|
||||
// }
|
||||
// fmt.Printf("\n")
|
||||
|
||||
// }
|
||||
|
||||
// // for i, _ := range sel3.metrics {
|
||||
// // fmt.Printf("\t\t\t\t\t%s\n", getName(configmetrics, i))
|
||||
// // }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// }
|
||||
|
||||
func getName(m *MemoryStore, i int) string {
|
||||
for key, val := range m.Metrics {
|
||||
if val.offset == i {
|
||||
|
||||
156
internal/memorystore/memorystore_test.go
Normal file
156
internal/memorystore/memorystore_test.go
Normal file
@@ -0,0 +1,156 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package memorystore
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
)
|
||||
|
||||
func TestAssignAggregationStrategy(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected AggregationStrategy
|
||||
wantErr bool
|
||||
}{
|
||||
{"empty string", "", NoAggregation, false},
|
||||
{"sum", "sum", SumAggregation, false},
|
||||
{"avg", "avg", AvgAggregation, false},
|
||||
{"invalid", "invalid", NoAggregation, true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result, err := AssignAggregationStrategy(tt.input)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Errorf("AssignAggregationStrategy(%q) error = %v, wantErr %v", tt.input, err, tt.wantErr)
|
||||
return
|
||||
}
|
||||
if result != tt.expected {
|
||||
t.Errorf("AssignAggregationStrategy(%q) = %v, want %v", tt.input, result, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestAddMetric(t *testing.T) {
|
||||
// Reset Metrics before test
|
||||
Metrics = make(map[string]MetricConfig)
|
||||
|
||||
err := AddMetric("test_metric", MetricConfig{
|
||||
Frequency: 60,
|
||||
Aggregation: SumAggregation,
|
||||
})
|
||||
if err != nil {
|
||||
t.Errorf("AddMetric() error = %v", err)
|
||||
}
|
||||
|
||||
if _, ok := Metrics["test_metric"]; !ok {
|
||||
t.Error("AddMetric() did not add metric to Metrics map")
|
||||
}
|
||||
|
||||
// Test updating with higher frequency
|
||||
err = AddMetric("test_metric", MetricConfig{
|
||||
Frequency: 120,
|
||||
Aggregation: SumAggregation,
|
||||
})
|
||||
if err != nil {
|
||||
t.Errorf("AddMetric() error = %v", err)
|
||||
}
|
||||
|
||||
if Metrics["test_metric"].Frequency != 120 {
|
||||
t.Errorf("AddMetric() frequency = %d, want 120", Metrics["test_metric"].Frequency)
|
||||
}
|
||||
|
||||
// Test updating with lower frequency (should not update)
|
||||
err = AddMetric("test_metric", MetricConfig{
|
||||
Frequency: 30,
|
||||
Aggregation: SumAggregation,
|
||||
})
|
||||
if err != nil {
|
||||
t.Errorf("AddMetric() error = %v", err)
|
||||
}
|
||||
|
||||
if Metrics["test_metric"].Frequency != 120 {
|
||||
t.Errorf("AddMetric() frequency = %d, want 120 (should not downgrade)", Metrics["test_metric"].Frequency)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetMetricFrequency(t *testing.T) {
|
||||
// Reset Metrics before test
|
||||
Metrics = map[string]MetricConfig{
|
||||
"test_metric": {
|
||||
Frequency: 60,
|
||||
Aggregation: SumAggregation,
|
||||
},
|
||||
}
|
||||
|
||||
freq, err := GetMetricFrequency("test_metric")
|
||||
if err != nil {
|
||||
t.Errorf("GetMetricFrequency() error = %v", err)
|
||||
}
|
||||
if freq != 60 {
|
||||
t.Errorf("GetMetricFrequency() = %d, want 60", freq)
|
||||
}
|
||||
|
||||
_, err = GetMetricFrequency("nonexistent")
|
||||
if err == nil {
|
||||
t.Error("GetMetricFrequency() expected error for nonexistent metric")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBufferWrite(t *testing.T) {
|
||||
b := newBuffer(100, 10)
|
||||
|
||||
// Test writing value
|
||||
nb, err := b.write(100, schema.Float(42.0))
|
||||
if err != nil {
|
||||
t.Errorf("buffer.write() error = %v", err)
|
||||
}
|
||||
if nb != b {
|
||||
t.Error("buffer.write() created new buffer unexpectedly")
|
||||
}
|
||||
if len(b.data) != 1 {
|
||||
t.Errorf("buffer.write() len(data) = %d, want 1", len(b.data))
|
||||
}
|
||||
if b.data[0] != schema.Float(42.0) {
|
||||
t.Errorf("buffer.write() data[0] = %v, want 42.0", b.data[0])
|
||||
}
|
||||
|
||||
// Test writing value from past (should error)
|
||||
_, err = b.write(50, schema.Float(10.0))
|
||||
if err == nil {
|
||||
t.Error("buffer.write() expected error for past timestamp")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBufferRead(t *testing.T) {
|
||||
b := newBuffer(100, 10)
|
||||
|
||||
// Write some test data
|
||||
b.write(100, schema.Float(1.0))
|
||||
b.write(110, schema.Float(2.0))
|
||||
b.write(120, schema.Float(3.0))
|
||||
|
||||
// Read data
|
||||
data := make([]schema.Float, 3)
|
||||
result, from, to, err := b.read(100, 130, data)
|
||||
if err != nil {
|
||||
t.Errorf("buffer.read() error = %v", err)
|
||||
}
|
||||
// Buffer read should return from as firstWrite (start + freq/2)
|
||||
if from != 100 {
|
||||
t.Errorf("buffer.read() from = %d, want 100", from)
|
||||
}
|
||||
if to != 130 {
|
||||
t.Errorf("buffer.read() to = %d, want 130", to)
|
||||
}
|
||||
if len(result) != 3 {
|
||||
t.Errorf("buffer.read() len(result) = %d, want 3", len(result))
|
||||
}
|
||||
}
|
||||
68
internal/repository/config.go
Normal file
68
internal/repository/config.go
Normal file
@@ -0,0 +1,68 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package repository
|
||||
|
||||
import "time"
|
||||
|
||||
// RepositoryConfig holds configuration for repository operations.
|
||||
// All fields have sensible defaults, so this configuration is optional.
|
||||
type RepositoryConfig struct {
|
||||
// CacheSize is the LRU cache size in bytes for job metadata and energy footprints.
|
||||
// Default: 1MB (1024 * 1024 bytes)
|
||||
CacheSize int
|
||||
|
||||
// MaxOpenConnections is the maximum number of open database connections.
|
||||
// Default: 4
|
||||
MaxOpenConnections int
|
||||
|
||||
// MaxIdleConnections is the maximum number of idle database connections.
|
||||
// Default: 4
|
||||
MaxIdleConnections int
|
||||
|
||||
// ConnectionMaxLifetime is the maximum amount of time a connection may be reused.
|
||||
// Default: 1 hour
|
||||
ConnectionMaxLifetime time.Duration
|
||||
|
||||
// ConnectionMaxIdleTime is the maximum amount of time a connection may be idle.
|
||||
// Default: 1 hour
|
||||
ConnectionMaxIdleTime time.Duration
|
||||
|
||||
// MinRunningJobDuration is the minimum duration in seconds for a job to be
|
||||
// considered in "running jobs" queries. This filters out very short jobs.
|
||||
// Default: 600 seconds (10 minutes)
|
||||
MinRunningJobDuration int
|
||||
}
|
||||
|
||||
// DefaultConfig returns the default repository configuration.
|
||||
// These values are optimized for typical deployments.
|
||||
func DefaultConfig() *RepositoryConfig {
|
||||
return &RepositoryConfig{
|
||||
CacheSize: 1 * 1024 * 1024, // 1MB
|
||||
MaxOpenConnections: 4,
|
||||
MaxIdleConnections: 4,
|
||||
ConnectionMaxLifetime: time.Hour,
|
||||
ConnectionMaxIdleTime: time.Hour,
|
||||
MinRunningJobDuration: 600, // 10 minutes
|
||||
}
|
||||
}
|
||||
|
||||
// repoConfig is the package-level configuration instance.
|
||||
// It is initialized with defaults and can be overridden via SetConfig.
|
||||
var repoConfig *RepositoryConfig = DefaultConfig()
|
||||
|
||||
// SetConfig sets the repository configuration.
|
||||
// This must be called before any repository initialization (Connect, GetJobRepository, etc.).
|
||||
// If not called, default values from DefaultConfig() are used.
|
||||
func SetConfig(cfg *RepositoryConfig) {
|
||||
if cfg != nil {
|
||||
repoConfig = cfg
|
||||
}
|
||||
}
|
||||
|
||||
// GetConfig returns the current repository configuration.
|
||||
func GetConfig() *RepositoryConfig {
|
||||
return repoConfig
|
||||
}
|
||||
@@ -2,6 +2,7 @@
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package repository
|
||||
|
||||
import (
|
||||
@@ -35,21 +36,15 @@ type DatabaseOptions struct {
|
||||
ConnectionMaxIdleTime time.Duration
|
||||
}
|
||||
|
||||
func setupSqlite(db *sql.DB) (err error) {
|
||||
func setupSqlite(db *sql.DB) error {
|
||||
pragmas := []string{
|
||||
// "journal_mode = WAL",
|
||||
// "busy_timeout = 5000",
|
||||
// "synchronous = NORMAL",
|
||||
// "cache_size = 1000000000", // 1GB
|
||||
// "foreign_keys = true",
|
||||
"temp_store = memory",
|
||||
// "mmap_size = 3000000000",
|
||||
}
|
||||
|
||||
for _, pragma := range pragmas {
|
||||
_, err = db.Exec("PRAGMA " + pragma)
|
||||
_, err := db.Exec("PRAGMA " + pragma)
|
||||
if err != nil {
|
||||
return
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
@@ -63,24 +58,24 @@ func Connect(driver string, db string) {
|
||||
dbConnOnce.Do(func() {
|
||||
opts := DatabaseOptions{
|
||||
URL: db,
|
||||
MaxOpenConnections: 4,
|
||||
MaxIdleConnections: 4,
|
||||
ConnectionMaxLifetime: time.Hour,
|
||||
ConnectionMaxIdleTime: time.Hour,
|
||||
MaxOpenConnections: repoConfig.MaxOpenConnections,
|
||||
MaxIdleConnections: repoConfig.MaxIdleConnections,
|
||||
ConnectionMaxLifetime: repoConfig.ConnectionMaxLifetime,
|
||||
ConnectionMaxIdleTime: repoConfig.ConnectionMaxIdleTime,
|
||||
}
|
||||
|
||||
switch driver {
|
||||
case "sqlite3":
|
||||
// TODO: Have separate DB handles for Writes and Reads
|
||||
// Optimize SQLite connection: https://kerkour.com/sqlite-for-servers
|
||||
connectionUrlParams := make(url.Values)
|
||||
connectionUrlParams.Add("_txlock", "immediate")
|
||||
connectionUrlParams.Add("_journal_mode", "WAL")
|
||||
connectionUrlParams.Add("_busy_timeout", "5000")
|
||||
connectionUrlParams.Add("_synchronous", "NORMAL")
|
||||
connectionUrlParams.Add("_cache_size", "1000000000")
|
||||
connectionUrlParams.Add("_foreign_keys", "true")
|
||||
opts.URL = fmt.Sprintf("file:%s?%s", opts.URL, connectionUrlParams.Encode())
|
||||
connectionURLParams := make(url.Values)
|
||||
connectionURLParams.Add("_txlock", "immediate")
|
||||
connectionURLParams.Add("_journal_mode", "WAL")
|
||||
connectionURLParams.Add("_busy_timeout", "5000")
|
||||
connectionURLParams.Add("_synchronous", "NORMAL")
|
||||
connectionURLParams.Add("_cache_size", "1000000000")
|
||||
connectionURLParams.Add("_foreign_keys", "true")
|
||||
opts.URL = fmt.Sprintf("file:%s?%s", opts.URL, connectionURLParams.Encode())
|
||||
|
||||
if cclog.Loglevel() == "debug" {
|
||||
sql.Register("sqlite3WithHooks", sqlhooks.Wrap(&sqlite3.SQLiteDriver{}, &Hooks{}))
|
||||
@@ -89,7 +84,10 @@ func Connect(driver string, db string) {
|
||||
dbHandle, err = sqlx.Open("sqlite3", opts.URL)
|
||||
}
|
||||
|
||||
setupSqlite(dbHandle.DB)
|
||||
err = setupSqlite(dbHandle.DB)
|
||||
if err != nil {
|
||||
cclog.Abortf("Failed sqlite db setup.\nError: %s\n", err.Error())
|
||||
}
|
||||
case "mysql":
|
||||
opts.URL += "?multiStatements=true"
|
||||
dbHandle, err = sqlx.Open("mysql", opts.URL)
|
||||
|
||||
@@ -2,6 +2,63 @@
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package repository provides the data access layer for cc-backend using the repository pattern.
|
||||
//
|
||||
// The repository pattern abstracts database operations and provides a clean interface for
|
||||
// data access. Each major entity (Job, User, Node, Tag) has its own repository with CRUD
|
||||
// operations and specialized queries.
|
||||
//
|
||||
// # Database Connection
|
||||
//
|
||||
// Initialize the database connection before using any repository:
|
||||
//
|
||||
// repository.Connect("sqlite3", "./var/job.db")
|
||||
// // or for MySQL:
|
||||
// repository.Connect("mysql", "user:password@tcp(localhost:3306)/dbname")
|
||||
//
|
||||
// # Configuration
|
||||
//
|
||||
// Optional: Configure repository settings before initialization:
|
||||
//
|
||||
// repository.SetConfig(&repository.RepositoryConfig{
|
||||
// CacheSize: 2 * 1024 * 1024, // 2MB cache
|
||||
// MaxOpenConnections: 8, // Connection pool size
|
||||
// MinRunningJobDuration: 300, // Filter threshold
|
||||
// })
|
||||
//
|
||||
// If not configured, sensible defaults are used automatically.
|
||||
//
|
||||
// # Repositories
|
||||
//
|
||||
// - JobRepository: Job lifecycle management and querying
|
||||
// - UserRepository: User management and authentication
|
||||
// - NodeRepository: Cluster node state tracking
|
||||
// - Tags: Job tagging and categorization
|
||||
//
|
||||
// # Caching
|
||||
//
|
||||
// Repositories use LRU caching to improve performance. Cache keys are constructed
|
||||
// as "type:id" (e.g., "metadata:123"). Cache is automatically invalidated on
|
||||
// mutations to maintain consistency.
|
||||
//
|
||||
// # Transaction Support
|
||||
//
|
||||
// For batch operations, use transactions:
|
||||
//
|
||||
// t, err := jobRepo.TransactionInit()
|
||||
// if err != nil {
|
||||
// return err
|
||||
// }
|
||||
// defer t.Rollback() // Rollback if not committed
|
||||
//
|
||||
// // Perform operations...
|
||||
// jobRepo.TransactionAdd(t, query, args...)
|
||||
//
|
||||
// // Commit when done
|
||||
// if err := t.Commit(); err != nil {
|
||||
// return err
|
||||
// }
|
||||
package repository
|
||||
|
||||
import (
|
||||
@@ -45,7 +102,7 @@ func GetJobRepository() *JobRepository {
|
||||
driver: db.Driver,
|
||||
|
||||
stmtCache: sq.NewStmtCache(db.DB),
|
||||
cache: lrucache.New(1024 * 1024),
|
||||
cache: lrucache.New(repoConfig.CacheSize),
|
||||
}
|
||||
})
|
||||
return jobRepoInstance
|
||||
@@ -267,7 +324,31 @@ func (r *JobRepository) FetchEnergyFootprint(job *schema.Job) (map[string]float6
|
||||
func (r *JobRepository) DeleteJobsBefore(startTime int64) (int, error) {
|
||||
var cnt int
|
||||
q := sq.Select("count(*)").From("job").Where("job.start_time < ?", startTime)
|
||||
q.RunWith(r.DB).QueryRow().Scan(cnt)
|
||||
if err := q.RunWith(r.DB).QueryRow().Scan(&cnt); err != nil {
|
||||
cclog.Errorf("Error counting jobs before %d: %v", startTime, err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// Invalidate cache for jobs being deleted (get job IDs first)
|
||||
if cnt > 0 {
|
||||
var jobIds []int64
|
||||
rows, err := sq.Select("id").From("job").Where("job.start_time < ?", startTime).RunWith(r.DB).Query()
|
||||
if err == nil {
|
||||
defer rows.Close()
|
||||
for rows.Next() {
|
||||
var id int64
|
||||
if err := rows.Scan(&id); err == nil {
|
||||
jobIds = append(jobIds, id)
|
||||
}
|
||||
}
|
||||
// Invalidate cache entries
|
||||
for _, id := range jobIds {
|
||||
r.cache.Del(fmt.Sprintf("metadata:%d", id))
|
||||
r.cache.Del(fmt.Sprintf("energyFootprint:%d", id))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
qd := sq.Delete("job").Where("job.start_time < ?", startTime)
|
||||
_, err := qd.RunWith(r.DB).Exec()
|
||||
|
||||
@@ -281,6 +362,10 @@ func (r *JobRepository) DeleteJobsBefore(startTime int64) (int, error) {
|
||||
}
|
||||
|
||||
func (r *JobRepository) DeleteJobById(id int64) error {
|
||||
// Invalidate cache entries before deletion
|
||||
r.cache.Del(fmt.Sprintf("metadata:%d", id))
|
||||
r.cache.Del(fmt.Sprintf("energyFootprint:%d", id))
|
||||
|
||||
qd := sq.Delete("job").Where("job.id = ?", id)
|
||||
_, err := qd.RunWith(r.DB).Exec()
|
||||
|
||||
@@ -450,13 +535,14 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
|
||||
// FIXME: Set duration to requested walltime?
|
||||
func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
|
||||
start := time.Now()
|
||||
currentTime := time.Now().Unix()
|
||||
res, err := sq.Update("job").
|
||||
Set("monitoring_status", schema.MonitoringStatusArchivingFailed).
|
||||
Set("duration", 0).
|
||||
Set("job_state", schema.JobStateFailed).
|
||||
Where("job.job_state = 'running'").
|
||||
Where("job.walltime > 0").
|
||||
Where(fmt.Sprintf("(%d - job.start_time) > (job.walltime + %d)", time.Now().Unix(), seconds)).
|
||||
Where("(? - job.start_time) > (job.walltime + ?)", currentTime, seconds).
|
||||
RunWith(r.DB).Exec()
|
||||
if err != nil {
|
||||
cclog.Warn("Error while stopping jobs exceeding walltime")
|
||||
@@ -505,21 +591,21 @@ func (r *JobRepository) FindJobIdsByTag(tagId int64) ([]int64, error) {
|
||||
// FIXME: Reconsider filtering short jobs with harcoded threshold
|
||||
func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) {
|
||||
query := sq.Select(jobColumns...).From("job").
|
||||
Where(fmt.Sprintf("job.cluster = '%s'", cluster)).
|
||||
Where("job.cluster = ?", cluster).
|
||||
Where("job.job_state = 'running'").
|
||||
Where("job.duration > 600")
|
||||
Where("job.duration > ?", repoConfig.MinRunningJobDuration)
|
||||
|
||||
rows, err := query.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
cclog.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
jobs := make([]*schema.Job, 0, 50)
|
||||
for rows.Next() {
|
||||
job, err := scanJob(rows)
|
||||
if err != nil {
|
||||
rows.Close()
|
||||
cclog.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
@@ -552,12 +638,10 @@ func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64
|
||||
|
||||
if startTimeBegin == 0 {
|
||||
cclog.Infof("Find jobs before %d", startTimeEnd)
|
||||
query = sq.Select(jobColumns...).From("job").Where(fmt.Sprintf(
|
||||
"job.start_time < %d", startTimeEnd))
|
||||
query = sq.Select(jobColumns...).From("job").Where("job.start_time < ?", startTimeEnd)
|
||||
} else {
|
||||
cclog.Infof("Find jobs between %d and %d", startTimeBegin, startTimeEnd)
|
||||
query = sq.Select(jobColumns...).From("job").Where(fmt.Sprintf(
|
||||
"job.start_time BETWEEN %d AND %d", startTimeBegin, startTimeEnd))
|
||||
query = sq.Select(jobColumns...).From("job").Where("job.start_time BETWEEN ? AND ?", startTimeBegin, startTimeEnd)
|
||||
}
|
||||
|
||||
rows, err := query.RunWith(r.stmtCache).Query()
|
||||
@@ -565,12 +649,12 @@ func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64
|
||||
cclog.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
jobs := make([]*schema.Job, 0, 50)
|
||||
for rows.Next() {
|
||||
job, err := scanJob(rows)
|
||||
if err != nil {
|
||||
rows.Close()
|
||||
cclog.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
@@ -582,6 +666,10 @@ func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64
|
||||
}
|
||||
|
||||
func (r *JobRepository) UpdateMonitoringStatus(job int64, monitoringStatus int32) (err error) {
|
||||
// Invalidate cache entries as monitoring status affects job state
|
||||
r.cache.Del(fmt.Sprintf("metadata:%d", job))
|
||||
r.cache.Del(fmt.Sprintf("energyFootprint:%d", job))
|
||||
|
||||
stmt := sq.Update("job").
|
||||
Set("monitoring_status", monitoringStatus).
|
||||
Where("job.id = ?", job)
|
||||
|
||||
@@ -31,8 +31,9 @@ const NamedJobInsert string = `INSERT INTO job (
|
||||
|
||||
func (r *JobRepository) InsertJob(job *schema.Job) (int64, error) {
|
||||
r.Mutex.Lock()
|
||||
defer r.Mutex.Unlock()
|
||||
|
||||
res, err := r.DB.NamedExec(NamedJobCacheInsert, job)
|
||||
r.Mutex.Unlock()
|
||||
if err != nil {
|
||||
cclog.Warn("Error while NamedJobInsert")
|
||||
return 0, err
|
||||
@@ -57,12 +58,12 @@ func (r *JobRepository) SyncJobs() ([]*schema.Job, error) {
|
||||
cclog.Errorf("Error while running query %v", err)
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
jobs := make([]*schema.Job, 0, 50)
|
||||
for rows.Next() {
|
||||
job, err := scanJob(rows)
|
||||
if err != nil {
|
||||
rows.Close()
|
||||
cclog.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
@@ -113,6 +114,10 @@ func (r *JobRepository) Stop(
|
||||
state schema.JobState,
|
||||
monitoringStatus int32,
|
||||
) (err error) {
|
||||
// Invalidate cache entries as job state is changing
|
||||
r.cache.Del(fmt.Sprintf("metadata:%d", jobId))
|
||||
r.cache.Del(fmt.Sprintf("energyFootprint:%d", jobId))
|
||||
|
||||
stmt := sq.Update("job").
|
||||
Set("job_state", state).
|
||||
Set("duration", duration).
|
||||
@@ -129,11 +134,13 @@ func (r *JobRepository) StopCached(
|
||||
state schema.JobState,
|
||||
monitoringStatus int32,
|
||||
) (err error) {
|
||||
// Note: StopCached updates job_cache table, not the main job table
|
||||
// Cache invalidation happens when job is synced to main table
|
||||
stmt := sq.Update("job_cache").
|
||||
Set("job_state", state).
|
||||
Set("duration", duration).
|
||||
Set("monitoring_status", monitoringStatus).
|
||||
Where("job.id = ?", jobId)
|
||||
Where("job_cache.id = ?", jobId)
|
||||
|
||||
_, err = stmt.RunWith(r.stmtCache).Exec()
|
||||
return err
|
||||
|
||||
@@ -89,6 +89,7 @@ func (r *JobRepository) FindAll(
|
||||
cclog.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
jobs := make([]*schema.Job, 0, 10)
|
||||
for rows.Next() {
|
||||
@@ -103,25 +104,31 @@ func (r *JobRepository) FindAll(
|
||||
return jobs, nil
|
||||
}
|
||||
|
||||
// Get complete joblist only consisting of db ids.
|
||||
// GetJobList returns job IDs for non-running jobs.
|
||||
// This is useful to process large job counts and intended to be used
|
||||
// together with FindById to process jobs one by one
|
||||
func (r *JobRepository) GetJobList() ([]int64, error) {
|
||||
// together with FindById to process jobs one by one.
|
||||
// Use limit and offset for pagination. Use limit=0 to get all results (not recommended for large datasets).
|
||||
func (r *JobRepository) GetJobList(limit int, offset int) ([]int64, error) {
|
||||
query := sq.Select("id").From("job").
|
||||
Where("job.job_state != 'running'")
|
||||
|
||||
// Add pagination if limit is specified
|
||||
if limit > 0 {
|
||||
query = query.Limit(uint64(limit)).Offset(uint64(offset))
|
||||
}
|
||||
|
||||
rows, err := query.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
cclog.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
jl := make([]int64, 0, 1000)
|
||||
for rows.Next() {
|
||||
var id int64
|
||||
err := rows.Scan(&id)
|
||||
if err != nil {
|
||||
rows.Close()
|
||||
cclog.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
@@ -256,6 +263,7 @@ func (r *JobRepository) FindConcurrentJobs(
|
||||
cclog.Errorf("Error while running query: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
items := make([]*model.JobLink, 0, 10)
|
||||
queryString := fmt.Sprintf("cluster=%s", job.Cluster)
|
||||
@@ -283,6 +291,7 @@ func (r *JobRepository) FindConcurrentJobs(
|
||||
cclog.Errorf("Error while running query: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
for rows.Next() {
|
||||
var id, jobId, startTime sql.NullInt64
|
||||
|
||||
@@ -43,7 +43,7 @@ func GetNodeRepository() *NodeRepository {
|
||||
driver: db.Driver,
|
||||
|
||||
stmtCache: sq.NewStmtCache(db.DB),
|
||||
cache: lrucache.New(1024 * 1024),
|
||||
cache: lrucache.New(repoConfig.CacheSize),
|
||||
}
|
||||
})
|
||||
return nodeRepoInstance
|
||||
@@ -77,43 +77,6 @@ func (r *NodeRepository) FetchMetadata(hostname string, cluster string) (map[str
|
||||
return MetaData, nil
|
||||
}
|
||||
|
||||
//
|
||||
// func (r *NodeRepository) UpdateMetadata(node *schema.Node, key, val string) (err error) {
|
||||
// cachekey := fmt.Sprintf("metadata:%d", node.ID)
|
||||
// r.cache.Del(cachekey)
|
||||
// if node.MetaData == nil {
|
||||
// if _, err = r.FetchMetadata(node); err != nil {
|
||||
// cclog.Warnf("Error while fetching metadata for node, DB ID '%v'", node.ID)
|
||||
// return err
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if node.MetaData != nil {
|
||||
// cpy := make(map[string]string, len(node.MetaData)+1)
|
||||
// maps.Copy(cpy, node.MetaData)
|
||||
// cpy[key] = val
|
||||
// node.MetaData = cpy
|
||||
// } else {
|
||||
// node.MetaData = map[string]string{key: val}
|
||||
// }
|
||||
//
|
||||
// if node.RawMetaData, err = json.Marshal(node.MetaData); err != nil {
|
||||
// cclog.Warnf("Error while marshaling metadata for node, DB ID '%v'", node.ID)
|
||||
// return err
|
||||
// }
|
||||
//
|
||||
// if _, err = sq.Update("node").
|
||||
// Set("meta_data", node.RawMetaData).
|
||||
// Where("node.id = ?", node.ID).
|
||||
// RunWith(r.stmtCache).Exec(); err != nil {
|
||||
// cclog.Warnf("Error while updating metadata for node, DB ID '%v'", node.ID)
|
||||
// return err
|
||||
// }
|
||||
//
|
||||
// r.cache.Put(cachekey, node.MetaData, len(node.RawMetaData), 24*time.Hour)
|
||||
// return nil
|
||||
// }
|
||||
|
||||
func (r *NodeRepository) GetNode(hostname string, cluster string, withMeta bool) (*schema.Node, error) {
|
||||
node := &schema.Node{}
|
||||
var timestamp int
|
||||
|
||||
@@ -115,7 +115,7 @@ func nodeTestSetup(t *testing.T) {
|
||||
}
|
||||
|
||||
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"),
|
||||
fmt.Appendf(nil, "%d", 2), 0o666); err != nil {
|
||||
fmt.Appendf(nil, "%d", 3), 0o666); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
|
||||
@@ -114,16 +114,6 @@ func (r *JobRepository) buildStatsQuery(
|
||||
return query
|
||||
}
|
||||
|
||||
// func (r *JobRepository) getUserName(ctx context.Context, id string) string {
|
||||
// user := GetUserFromContext(ctx)
|
||||
// name, _ := r.FindColumnValue(user, id, "hpc_user", "name", "username", false)
|
||||
// if name != "" {
|
||||
// return name
|
||||
// } else {
|
||||
// return "-"
|
||||
// }
|
||||
// }
|
||||
|
||||
func (r *JobRepository) getCastType() string {
|
||||
var castType string
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
package repository
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
@@ -14,65 +15,32 @@ import (
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
|
||||
// Add the tag with id `tagId` to the job with the database id `jobId`.
|
||||
// AddTag adds the tag with id `tagId` to the job with the database id `jobId`.
|
||||
// Requires user authentication for security checks.
|
||||
func (r *JobRepository) AddTag(user *schema.User, job int64, tag int64) ([]*schema.Tag, error) {
|
||||
j, err := r.FindByIdWithUser(user, job)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while finding job by id")
|
||||
cclog.Warnf("Error finding job %d for user %s: %v", job, user.Username, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(job, tag)
|
||||
|
||||
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
|
||||
s, _, _ := q.ToSql()
|
||||
cclog.Errorf("Error adding tag with %s: %v", s, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tags, err := r.GetTags(user, &job)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while getting tags for job")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
archiveTags, err := r.getArchiveTags(&job)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while getting tags for job")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return tags, archive.UpdateTags(j, archiveTags)
|
||||
return r.addJobTag(job, tag, j, func() ([]*schema.Tag, error) {
|
||||
return r.GetTags(user, &job)
|
||||
})
|
||||
}
|
||||
|
||||
// AddTagDirect adds a tag without user security checks.
|
||||
// Use only for internal/admin operations.
|
||||
func (r *JobRepository) AddTagDirect(job int64, tag int64) ([]*schema.Tag, error) {
|
||||
j, err := r.FindByIdDirect(job)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while finding job by id")
|
||||
cclog.Warnf("Error finding job %d: %v", job, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(job, tag)
|
||||
|
||||
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
|
||||
s, _, _ := q.ToSql()
|
||||
cclog.Errorf("Error adding tag with %s: %v", s, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tags, err := r.GetTagsDirect(&job)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while getting tags for job")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
archiveTags, err := r.getArchiveTags(&job)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while getting tags for job")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return tags, archive.UpdateTags(j, archiveTags)
|
||||
return r.addJobTag(job, tag, j, func() ([]*schema.Tag, error) {
|
||||
return r.GetTagsDirect(&job)
|
||||
})
|
||||
}
|
||||
|
||||
// Removes a tag from a job by tag id.
|
||||
@@ -260,15 +228,18 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts
|
||||
LeftJoin("jobtag jt ON t.id = jt.tag_id").
|
||||
GroupBy("t.tag_name")
|
||||
|
||||
// Handle Scope Filtering
|
||||
scopeList := "\"global\""
|
||||
// Build scope list for filtering
|
||||
var scopeBuilder strings.Builder
|
||||
scopeBuilder.WriteString(`"global"`)
|
||||
if user != nil {
|
||||
scopeList += ",\"" + user.Username + "\""
|
||||
scopeBuilder.WriteString(`,"`)
|
||||
scopeBuilder.WriteString(user.Username)
|
||||
scopeBuilder.WriteString(`"`)
|
||||
if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
|
||||
scopeBuilder.WriteString(`,"admin"`)
|
||||
}
|
||||
}
|
||||
if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
|
||||
scopeList += ",\"admin\""
|
||||
}
|
||||
q = q.Where("t.tag_scope IN (" + scopeList + ")")
|
||||
q = q.Where("t.tag_scope IN (" + scopeBuilder.String() + ")")
|
||||
|
||||
// Handle Job Ownership
|
||||
if user != nil && user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) { // ADMIN || SUPPORT: Count all jobs
|
||||
@@ -302,6 +273,41 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts
|
||||
return tags, counts, err
|
||||
}
|
||||
|
||||
var (
|
||||
ErrTagNotFound = errors.New("the tag does not exist")
|
||||
ErrJobNotOwned = errors.New("user is not owner of job")
|
||||
ErrTagNoAccess = errors.New("user not permitted to use that tag")
|
||||
ErrTagPrivateScope = errors.New("tag is private to another user")
|
||||
ErrTagAdminScope = errors.New("tag requires admin privileges")
|
||||
ErrTagsIncompatScopes = errors.New("combining admin and non-admin scoped tags not allowed")
|
||||
)
|
||||
|
||||
// addJobTag is a helper function that inserts a job-tag association and updates the archive.
|
||||
// Returns the updated tag list for the job.
|
||||
func (r *JobRepository) addJobTag(jobId int64, tagId int64, job *schema.Job, getTags func() ([]*schema.Tag, error)) ([]*schema.Tag, error) {
|
||||
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(jobId, tagId)
|
||||
|
||||
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
|
||||
s, _, _ := q.ToSql()
|
||||
cclog.Errorf("Error adding tag with %s: %v", s, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tags, err := getTags()
|
||||
if err != nil {
|
||||
cclog.Warnf("Error getting tags for job %d: %v", jobId, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
archiveTags, err := r.getArchiveTags(&jobId)
|
||||
if err != nil {
|
||||
cclog.Warnf("Error getting archive tags for job %d: %v", jobId, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return tags, archive.UpdateTags(job, archiveTags)
|
||||
}
|
||||
|
||||
// AddTagOrCreate adds the tag with the specified type and name to the job with the database id `jobId`.
|
||||
// If such a tag does not yet exist, it is created.
|
||||
func (r *JobRepository) AddTagOrCreate(user *schema.User, jobId int64, tagType string, tagName string, tagScope string) (tagId int64, err error) {
|
||||
|
||||
@@ -5,84 +5,96 @@
|
||||
package repository
|
||||
|
||||
import (
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"fmt"
|
||||
|
||||
"github.com/jmoiron/sqlx"
|
||||
)
|
||||
|
||||
// Transaction wraps a database transaction for job-related operations.
|
||||
type Transaction struct {
|
||||
tx *sqlx.Tx
|
||||
stmt *sqlx.NamedStmt
|
||||
tx *sqlx.Tx
|
||||
}
|
||||
|
||||
// TransactionInit begins a new transaction.
|
||||
func (r *JobRepository) TransactionInit() (*Transaction, error) {
|
||||
var err error
|
||||
t := new(Transaction)
|
||||
|
||||
t.tx, err = r.DB.Beginx()
|
||||
tx, err := r.DB.Beginx()
|
||||
if err != nil {
|
||||
cclog.Warn("Error while bundling transactions")
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("beginning transaction: %w", err)
|
||||
}
|
||||
return t, nil
|
||||
return &Transaction{tx: tx}, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) TransactionCommit(t *Transaction) error {
|
||||
var err error
|
||||
if t.tx != nil {
|
||||
if err = t.tx.Commit(); err != nil {
|
||||
cclog.Warn("Error while committing transactions")
|
||||
return err
|
||||
}
|
||||
// Commit commits the transaction.
|
||||
// After calling Commit, the transaction should not be used again.
|
||||
func (t *Transaction) Commit() error {
|
||||
if t.tx == nil {
|
||||
return fmt.Errorf("transaction already committed or rolled back")
|
||||
}
|
||||
|
||||
t.tx, err = r.DB.Beginx()
|
||||
err := t.tx.Commit()
|
||||
t.tx = nil // Mark as completed
|
||||
if err != nil {
|
||||
cclog.Warn("Error while bundling transactions")
|
||||
return err
|
||||
return fmt.Errorf("committing transaction: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Rollback rolls back the transaction.
|
||||
// It's safe to call Rollback on an already committed or rolled back transaction.
|
||||
func (t *Transaction) Rollback() error {
|
||||
if t.tx == nil {
|
||||
return nil // Already committed/rolled back
|
||||
}
|
||||
err := t.tx.Rollback()
|
||||
t.tx = nil // Mark as completed
|
||||
if err != nil {
|
||||
return fmt.Errorf("rolling back transaction: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// TransactionEnd commits the transaction.
|
||||
// Deprecated: Use Commit() instead.
|
||||
func (r *JobRepository) TransactionEnd(t *Transaction) error {
|
||||
if err := t.tx.Commit(); err != nil {
|
||||
cclog.Warn("Error while committing SQL transactions")
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
return t.Commit()
|
||||
}
|
||||
|
||||
// TransactionAddNamed executes a named query within the transaction.
|
||||
func (r *JobRepository) TransactionAddNamed(
|
||||
t *Transaction,
|
||||
query string,
|
||||
args ...interface{},
|
||||
) (int64, error) {
|
||||
if t.tx == nil {
|
||||
return 0, fmt.Errorf("transaction is nil or already completed")
|
||||
}
|
||||
|
||||
res, err := t.tx.NamedExec(query, args)
|
||||
if err != nil {
|
||||
cclog.Errorf("Named Exec failed: %v", err)
|
||||
return 0, err
|
||||
return 0, fmt.Errorf("named exec: %w", err)
|
||||
}
|
||||
|
||||
id, err := res.LastInsertId()
|
||||
if err != nil {
|
||||
cclog.Errorf("repository initDB(): %v", err)
|
||||
return 0, err
|
||||
return 0, fmt.Errorf("getting last insert id: %w", err)
|
||||
}
|
||||
|
||||
return id, nil
|
||||
}
|
||||
|
||||
// TransactionAdd executes a query within the transaction.
|
||||
func (r *JobRepository) TransactionAdd(t *Transaction, query string, args ...interface{}) (int64, error) {
|
||||
if t.tx == nil {
|
||||
return 0, fmt.Errorf("transaction is nil or already completed")
|
||||
}
|
||||
|
||||
res, err := t.tx.Exec(query, args...)
|
||||
if err != nil {
|
||||
cclog.Errorf("TransactionAdd(), Exec() Error: %v", err)
|
||||
return 0, err
|
||||
return 0, fmt.Errorf("exec: %w", err)
|
||||
}
|
||||
|
||||
id, err := res.LastInsertId()
|
||||
if err != nil {
|
||||
cclog.Errorf("TransactionAdd(), LastInsertId() Error: %v", err)
|
||||
return 0, err
|
||||
return 0, fmt.Errorf("getting last insert id: %w", err)
|
||||
}
|
||||
|
||||
return id, nil
|
||||
|
||||
@@ -24,10 +24,14 @@ import (
|
||||
)
|
||||
|
||||
//go:embed jobclasses/*
|
||||
var jobclassFiles embed.FS
|
||||
var jobClassFiles embed.FS
|
||||
|
||||
// Variable defines a named expression that can be computed and reused in rules.
|
||||
// Variables are evaluated before the main rule and their results are added to the environment.
|
||||
type Variable struct {
|
||||
// Name is the variable identifier used in rule expressions
|
||||
Name string `json:"name"`
|
||||
// Expr is the expression to evaluate (must return a numeric value)
|
||||
Expr string `json:"expr"`
|
||||
}
|
||||
|
||||
@@ -36,14 +40,25 @@ type ruleVariable struct {
|
||||
expr *vm.Program
|
||||
}
|
||||
|
||||
// RuleFormat defines the JSON structure for job classification rules.
|
||||
// Each rule specifies requirements, metrics to analyze, variables to compute,
|
||||
// and the final rule expression that determines if the job matches the classification.
|
||||
type RuleFormat struct {
|
||||
// Name is a human-readable description of the rule
|
||||
Name string `json:"name"`
|
||||
// Tag is the classification tag to apply if the rule matches
|
||||
Tag string `json:"tag"`
|
||||
// Parameters are shared values referenced in the rule (e.g., thresholds)
|
||||
Parameters []string `json:"parameters"`
|
||||
// Metrics are the job metrics required for this rule (e.g., "cpu_load", "mem_used")
|
||||
Metrics []string `json:"metrics"`
|
||||
// Requirements are boolean expressions that must be true for the rule to apply
|
||||
Requirements []string `json:"requirements"`
|
||||
// Variables are computed values used in the rule expression
|
||||
Variables []Variable `json:"variables"`
|
||||
// Rule is the boolean expression that determines if the job matches
|
||||
Rule string `json:"rule"`
|
||||
// Hint is a template string that generates a message when the rule matches
|
||||
Hint string `json:"hint"`
|
||||
}
|
||||
|
||||
@@ -56,11 +71,35 @@ type ruleInfo struct {
|
||||
hint *template.Template
|
||||
}
|
||||
|
||||
// JobRepository defines the interface for job database operations needed by the tagger.
|
||||
// This interface allows for easier testing and decoupling from the concrete repository implementation.
|
||||
type JobRepository interface {
|
||||
// HasTag checks if a job already has a specific tag
|
||||
HasTag(jobId int64, tagType string, tagName string) bool
|
||||
// AddTagOrCreateDirect adds a tag to a job or creates it if it doesn't exist
|
||||
AddTagOrCreateDirect(jobId int64, tagType string, tagName string) (tagId int64, err error)
|
||||
// UpdateMetadata updates job metadata with a key-value pair
|
||||
UpdateMetadata(job *schema.Job, key, val string) (err error)
|
||||
}
|
||||
|
||||
// JobClassTagger classifies jobs based on configurable rules that evaluate job metrics and properties.
|
||||
// Rules are loaded from embedded JSON files and can be dynamically reloaded from a watched directory.
|
||||
// When a job matches a rule, it is tagged with the corresponding classification and an optional hint message.
|
||||
type JobClassTagger struct {
|
||||
rules map[string]ruleInfo
|
||||
parameters map[string]any
|
||||
tagType string
|
||||
cfgPath string
|
||||
// rules maps classification tags to their compiled rule information
|
||||
rules map[string]ruleInfo
|
||||
// parameters are shared values (e.g., thresholds) used across multiple rules
|
||||
parameters map[string]any
|
||||
// tagType is the type of tag ("jobClass")
|
||||
tagType string
|
||||
// cfgPath is the path to watch for configuration changes
|
||||
cfgPath string
|
||||
// repo provides access to job database operations
|
||||
repo JobRepository
|
||||
// getStatistics retrieves job statistics for analysis
|
||||
getStatistics func(job *schema.Job) (map[string]schema.JobStatistics, error)
|
||||
// getMetricConfig retrieves metric configuration (limits) for a cluster
|
||||
getMetricConfig func(cluster, subCluster string) map[string]*schema.Metric
|
||||
}
|
||||
|
||||
func (t *JobClassTagger) prepareRule(b []byte, fns string) {
|
||||
@@ -127,10 +166,14 @@ func (t *JobClassTagger) prepareRule(b []byte, fns string) {
|
||||
t.rules[rule.Tag] = ri
|
||||
}
|
||||
|
||||
// EventMatch checks if a filesystem event should trigger configuration reload.
|
||||
// It returns true if the event path contains "jobclasses".
|
||||
func (t *JobClassTagger) EventMatch(s string) bool {
|
||||
return strings.Contains(s, "jobclasses")
|
||||
}
|
||||
|
||||
// EventCallback is triggered when the configuration directory changes.
|
||||
// It reloads parameters and all rule files from the watched directory.
|
||||
// FIXME: Only process the file that caused the event
|
||||
func (t *JobClassTagger) EventCallback() {
|
||||
files, err := os.ReadDir(t.cfgPath)
|
||||
@@ -170,7 +213,7 @@ func (t *JobClassTagger) EventCallback() {
|
||||
|
||||
func (t *JobClassTagger) initParameters() error {
|
||||
cclog.Info("Initialize parameters")
|
||||
b, err := jobclassFiles.ReadFile("jobclasses/parameters.json")
|
||||
b, err := jobClassFiles.ReadFile("jobclasses/parameters.json")
|
||||
if err != nil {
|
||||
cclog.Warnf("prepareRule() > open file error: %v", err)
|
||||
return err
|
||||
@@ -184,6 +227,10 @@ func (t *JobClassTagger) initParameters() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Register initializes the JobClassTagger by loading parameters and classification rules.
|
||||
// It loads embedded configuration files and sets up a file watch on ./var/tagger/jobclasses
|
||||
// if it exists, allowing for dynamic configuration updates without restarting the application.
|
||||
// Returns an error if the embedded configuration files cannot be read or parsed.
|
||||
func (t *JobClassTagger) Register() error {
|
||||
t.cfgPath = "./var/tagger/jobclasses"
|
||||
t.tagType = "jobClass"
|
||||
@@ -194,18 +241,18 @@ func (t *JobClassTagger) Register() error {
|
||||
return err
|
||||
}
|
||||
|
||||
files, err := jobclassFiles.ReadDir("jobclasses")
|
||||
files, err := jobClassFiles.ReadDir("jobclasses")
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading app folder: %#v", err)
|
||||
}
|
||||
t.rules = make(map[string]ruleInfo, 0)
|
||||
t.rules = make(map[string]ruleInfo)
|
||||
for _, fn := range files {
|
||||
fns := fn.Name()
|
||||
if fns != "parameters.json" {
|
||||
filename := fmt.Sprintf("jobclasses/%s", fns)
|
||||
cclog.Infof("Process: %s", fns)
|
||||
|
||||
b, err := jobclassFiles.ReadFile(filename)
|
||||
b, err := jobClassFiles.ReadFile(filename)
|
||||
if err != nil {
|
||||
cclog.Warnf("prepareRule() > open file error: %v", err)
|
||||
return err
|
||||
@@ -220,13 +267,30 @@ func (t *JobClassTagger) Register() error {
|
||||
util.AddListener(t.cfgPath, t)
|
||||
}
|
||||
|
||||
t.repo = repository.GetJobRepository()
|
||||
t.getStatistics = archive.GetStatistics
|
||||
t.getMetricConfig = archive.GetMetricConfigSubCluster
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Match evaluates all classification rules against a job and applies matching tags.
|
||||
// It retrieves job statistics and metric configurations, then tests each rule's requirements
|
||||
// and main expression. For each matching rule, it:
|
||||
// - Applies the classification tag to the job
|
||||
// - Generates and stores a hint message based on the rule's template
|
||||
//
|
||||
// The function constructs an evaluation environment containing:
|
||||
// - Job properties (duration, cores, nodes, state, etc.)
|
||||
// - Metric statistics (min, max, avg) and their configured limits
|
||||
// - Shared parameters defined in parameters.json
|
||||
// - Computed variables from the rule definition
|
||||
//
|
||||
// Rules are evaluated in arbitrary order. If multiple rules match, only the first
|
||||
// encountered match is applied (FIXME: this should handle multiple matches).
|
||||
func (t *JobClassTagger) Match(job *schema.Job) {
|
||||
r := repository.GetJobRepository()
|
||||
jobstats, err := archive.GetStatistics(job)
|
||||
metricsList := archive.GetMetricConfigSubCluster(job.Cluster, job.SubCluster)
|
||||
jobStats, err := t.getStatistics(job)
|
||||
metricsList := t.getMetricConfig(job.Cluster, job.SubCluster)
|
||||
cclog.Infof("Enter match rule with %d rules for job %d", len(t.rules), job.JobID)
|
||||
if err != nil {
|
||||
cclog.Errorf("job classification failed for job %d: %#v", job.JobID, err)
|
||||
@@ -251,7 +315,7 @@ func (t *JobClassTagger) Match(job *schema.Job) {
|
||||
|
||||
// add metrics to env
|
||||
for _, m := range ri.metrics {
|
||||
stats, ok := jobstats[m]
|
||||
stats, ok := jobStats[m]
|
||||
if !ok {
|
||||
cclog.Errorf("job classification failed for job %d: missing metric '%s'", job.JobID, m)
|
||||
return
|
||||
@@ -302,8 +366,11 @@ func (t *JobClassTagger) Match(job *schema.Job) {
|
||||
if match.(bool) {
|
||||
cclog.Info("Rule matches!")
|
||||
id := *job.ID
|
||||
if !r.HasTag(id, t.tagType, tag) {
|
||||
r.AddTagOrCreateDirect(id, t.tagType, tag)
|
||||
if !t.repo.HasTag(id, t.tagType, tag) {
|
||||
_, err := t.repo.AddTagOrCreateDirect(id, t.tagType, tag)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// process hint template
|
||||
@@ -314,7 +381,11 @@ func (t *JobClassTagger) Match(job *schema.Job) {
|
||||
}
|
||||
|
||||
// FIXME: Handle case where multiple tags apply
|
||||
r.UpdateMetadata(job, "message", msg.String())
|
||||
// FIXME: Handle case where multiple tags apply
|
||||
err = t.repo.UpdateMetadata(job, "message", msg.String())
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
} else {
|
||||
cclog.Info("Rule does not match!")
|
||||
}
|
||||
|
||||
162
internal/tagger/classifyJob_test.go
Normal file
162
internal/tagger/classifyJob_test.go
Normal file
@@ -0,0 +1,162 @@
|
||||
package tagger
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/mock"
|
||||
)
|
||||
|
||||
// MockJobRepository is a mock implementation of the JobRepository interface
|
||||
type MockJobRepository struct {
|
||||
mock.Mock
|
||||
}
|
||||
|
||||
func (m *MockJobRepository) HasTag(jobId int64, tagType string, tagName string) bool {
|
||||
args := m.Called(jobId, tagType, tagName)
|
||||
return args.Bool(0)
|
||||
}
|
||||
|
||||
func (m *MockJobRepository) AddTagOrCreateDirect(jobId int64, tagType string, tagName string) (tagId int64, err error) {
|
||||
args := m.Called(jobId, tagType, tagName)
|
||||
return args.Get(0).(int64), args.Error(1)
|
||||
}
|
||||
|
||||
func (m *MockJobRepository) UpdateMetadata(job *schema.Job, key, val string) (err error) {
|
||||
args := m.Called(job, key, val)
|
||||
return args.Error(0)
|
||||
}
|
||||
|
||||
func TestPrepareRule(t *testing.T) {
|
||||
tagger := &JobClassTagger{
|
||||
rules: make(map[string]ruleInfo),
|
||||
parameters: make(map[string]any),
|
||||
}
|
||||
|
||||
// Valid rule JSON
|
||||
validRule := []byte(`{
|
||||
"name": "Test Rule",
|
||||
"tag": "test_tag",
|
||||
"parameters": [],
|
||||
"metrics": ["flops_any"],
|
||||
"requirements": ["job.numNodes > 1"],
|
||||
"variables": [{"name": "avg_flops", "expr": "flops_any.avg"}],
|
||||
"rule": "avg_flops > 100",
|
||||
"hint": "High FLOPS"
|
||||
}`)
|
||||
|
||||
tagger.prepareRule(validRule, "test_rule.json")
|
||||
|
||||
assert.Contains(t, tagger.rules, "test_tag")
|
||||
rule := tagger.rules["test_tag"]
|
||||
assert.Equal(t, 1, len(rule.metrics))
|
||||
assert.Equal(t, 1, len(rule.requirements))
|
||||
assert.Equal(t, 1, len(rule.variables))
|
||||
assert.NotNil(t, rule.rule)
|
||||
assert.NotNil(t, rule.hint)
|
||||
}
|
||||
|
||||
func TestClassifyJobMatch(t *testing.T) {
|
||||
mockRepo := new(MockJobRepository)
|
||||
tagger := &JobClassTagger{
|
||||
rules: make(map[string]ruleInfo),
|
||||
parameters: make(map[string]any),
|
||||
tagType: "jobClass",
|
||||
repo: mockRepo,
|
||||
getStatistics: func(job *schema.Job) (map[string]schema.JobStatistics, error) {
|
||||
return map[string]schema.JobStatistics{
|
||||
"flops_any": {Min: 0, Max: 200, Avg: 150},
|
||||
}, nil
|
||||
},
|
||||
getMetricConfig: func(cluster, subCluster string) map[string]*schema.Metric {
|
||||
return map[string]*schema.Metric{
|
||||
"flops_any": {Peak: 1000, Normal: 100, Caution: 50, Alert: 10},
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
// Add a rule manually or via prepareRule
|
||||
validRule := []byte(`{
|
||||
"name": "Test Rule",
|
||||
"tag": "high_flops",
|
||||
"parameters": [],
|
||||
"metrics": ["flops_any"],
|
||||
"requirements": [],
|
||||
"variables": [{"name": "avg_flops", "expr": "flops_any.avg"}],
|
||||
"rule": "avg_flops > 100",
|
||||
"hint": "High FLOPS: {{.avg_flops}}"
|
||||
}`)
|
||||
tagger.prepareRule(validRule, "test_rule.json")
|
||||
|
||||
jobID := int64(123)
|
||||
job := &schema.Job{
|
||||
ID: &jobID,
|
||||
JobID: 123,
|
||||
Cluster: "test_cluster",
|
||||
SubCluster: "test_subcluster",
|
||||
NumNodes: 2,
|
||||
NumHWThreads: 4,
|
||||
State: schema.JobStateCompleted,
|
||||
}
|
||||
|
||||
// Expectation: Rule matches
|
||||
// 1. Check if tag exists (return false)
|
||||
mockRepo.On("HasTag", jobID, "jobClass", "high_flops").Return(false)
|
||||
// 2. Add tag
|
||||
mockRepo.On("AddTagOrCreateDirect", jobID, "jobClass", "high_flops").Return(int64(1), nil)
|
||||
// 3. Update metadata
|
||||
mockRepo.On("UpdateMetadata", job, "message", mock.Anything).Return(nil)
|
||||
|
||||
tagger.Match(job)
|
||||
|
||||
mockRepo.AssertExpectations(t)
|
||||
}
|
||||
|
||||
func TestMatch_NoMatch(t *testing.T) {
|
||||
mockRepo := new(MockJobRepository)
|
||||
tagger := &JobClassTagger{
|
||||
rules: make(map[string]ruleInfo),
|
||||
parameters: make(map[string]any),
|
||||
tagType: "jobClass",
|
||||
repo: mockRepo,
|
||||
getStatistics: func(job *schema.Job) (map[string]schema.JobStatistics, error) {
|
||||
return map[string]schema.JobStatistics{
|
||||
"flops_any": {Min: 0, Max: 50, Avg: 20}, // Avg 20 < 100
|
||||
}, nil
|
||||
},
|
||||
getMetricConfig: func(cluster, subCluster string) map[string]*schema.Metric {
|
||||
return map[string]*schema.Metric{
|
||||
"flops_any": {Peak: 1000, Normal: 100, Caution: 50, Alert: 10},
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
validRule := []byte(`{
|
||||
"name": "Test Rule",
|
||||
"tag": "high_flops",
|
||||
"parameters": [],
|
||||
"metrics": ["flops_any"],
|
||||
"requirements": [],
|
||||
"variables": [{"name": "avg_flops", "expr": "flops_any.avg"}],
|
||||
"rule": "avg_flops > 100",
|
||||
"hint": "High FLOPS"
|
||||
}`)
|
||||
tagger.prepareRule(validRule, "test_rule.json")
|
||||
|
||||
jobID := int64(123)
|
||||
job := &schema.Job{
|
||||
ID: &jobID,
|
||||
JobID: 123,
|
||||
Cluster: "test_cluster",
|
||||
SubCluster: "test_subcluster",
|
||||
NumNodes: 2,
|
||||
NumHWThreads: 4,
|
||||
State: schema.JobStateCompleted,
|
||||
}
|
||||
|
||||
// Expectation: Rule does NOT match, so no repo calls
|
||||
tagger.Match(job)
|
||||
|
||||
mockRepo.AssertExpectations(t)
|
||||
}
|
||||
@@ -2,6 +2,7 @@
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package tagger
|
||||
|
||||
import (
|
||||
@@ -28,9 +29,16 @@ type appInfo struct {
|
||||
strings []string
|
||||
}
|
||||
|
||||
// AppTagger detects applications by matching patterns in job scripts.
|
||||
// It loads application patterns from embedded files and can dynamically reload
|
||||
// configuration from a watched directory. When a job script matches a pattern,
|
||||
// the corresponding application tag is automatically applied.
|
||||
type AppTagger struct {
|
||||
// apps maps application tags to their matching patterns
|
||||
apps map[string]appInfo
|
||||
// tagType is the type of tag ("app")
|
||||
tagType string
|
||||
// cfgPath is the path to watch for configuration changes
|
||||
cfgPath string
|
||||
}
|
||||
|
||||
@@ -45,10 +53,14 @@ func (t *AppTagger) scanApp(f fs.File, fns string) {
|
||||
t.apps[ai.tag] = ai
|
||||
}
|
||||
|
||||
// EventMatch checks if a filesystem event should trigger configuration reload.
|
||||
// It returns true if the event path contains "apps".
|
||||
func (t *AppTagger) EventMatch(s string) bool {
|
||||
return strings.Contains(s, "apps")
|
||||
}
|
||||
|
||||
// EventCallback is triggered when the configuration directory changes.
|
||||
// It reloads all application pattern files from the watched directory.
|
||||
// FIXME: Only process the file that caused the event
|
||||
func (t *AppTagger) EventCallback() {
|
||||
files, err := os.ReadDir(t.cfgPath)
|
||||
@@ -67,6 +79,10 @@ func (t *AppTagger) EventCallback() {
|
||||
}
|
||||
}
|
||||
|
||||
// Register initializes the AppTagger by loading application patterns from embedded files.
|
||||
// It also sets up a file watch on ./var/tagger/apps if it exists, allowing for
|
||||
// dynamic configuration updates without restarting the application.
|
||||
// Returns an error if the embedded application files cannot be read.
|
||||
func (t *AppTagger) Register() error {
|
||||
t.cfgPath = "./var/tagger/apps"
|
||||
t.tagType = "app"
|
||||
@@ -96,6 +112,11 @@ func (t *AppTagger) Register() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Match attempts to detect the application used by a job by analyzing its job script.
|
||||
// It fetches the job metadata, extracts the job script, and matches it against
|
||||
// all configured application patterns using regular expressions.
|
||||
// If a match is found, the corresponding application tag is added to the job.
|
||||
// Only the first matching application is tagged.
|
||||
func (t *AppTagger) Match(job *schema.Job) {
|
||||
r := repository.GetJobRepository()
|
||||
metadata, err := r.FetchMetadata(job)
|
||||
|
||||
@@ -2,6 +2,11 @@
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package tagger provides automatic job tagging functionality for cc-backend.
|
||||
// It supports detecting applications and classifying jobs based on configurable rules.
|
||||
// Tags are automatically applied when jobs start or stop, or can be applied retroactively
|
||||
// to existing jobs using RunTaggers.
|
||||
package tagger
|
||||
|
||||
import (
|
||||
@@ -12,8 +17,15 @@ import (
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
)
|
||||
|
||||
// Tagger is the interface that must be implemented by all tagging components.
|
||||
// Taggers can be registered at job start or stop events to automatically apply tags.
|
||||
type Tagger interface {
|
||||
// Register initializes the tagger and loads any required configuration.
|
||||
// It should be called once before the tagger is used.
|
||||
Register() error
|
||||
|
||||
// Match evaluates the tagger's rules against a job and applies appropriate tags.
|
||||
// It is called for each job that needs to be evaluated.
|
||||
Match(job *schema.Job)
|
||||
}
|
||||
|
||||
@@ -22,8 +34,12 @@ var (
|
||||
jobTagger *JobTagger
|
||||
)
|
||||
|
||||
// JobTagger coordinates multiple taggers that run at different job lifecycle events.
|
||||
// It maintains separate lists of taggers that run when jobs start and when they stop.
|
||||
type JobTagger struct {
|
||||
// startTaggers are applied when a job starts (e.g., application detection)
|
||||
startTaggers []Tagger
|
||||
// stopTaggers are applied when a job completes (e.g., job classification)
|
||||
stopTaggers []Tagger
|
||||
}
|
||||
|
||||
@@ -42,6 +58,9 @@ func newTagger() {
|
||||
}
|
||||
}
|
||||
|
||||
// Init initializes the job tagger system and registers it with the job repository.
|
||||
// This function is safe to call multiple times; initialization only occurs once.
|
||||
// It should be called during application startup.
|
||||
func Init() {
|
||||
initOnce.Do(func() {
|
||||
newTagger()
|
||||
@@ -49,22 +68,30 @@ func Init() {
|
||||
})
|
||||
}
|
||||
|
||||
// JobStartCallback is called when a job starts.
|
||||
// It runs all registered start taggers (e.g., application detection) on the job.
|
||||
func (jt *JobTagger) JobStartCallback(job *schema.Job) {
|
||||
for _, tagger := range jt.startTaggers {
|
||||
tagger.Match(job)
|
||||
}
|
||||
}
|
||||
|
||||
// JobStopCallback is called when a job completes.
|
||||
// It runs all registered stop taggers (e.g., job classification) on the job.
|
||||
func (jt *JobTagger) JobStopCallback(job *schema.Job) {
|
||||
for _, tagger := range jt.stopTaggers {
|
||||
tagger.Match(job)
|
||||
}
|
||||
}
|
||||
|
||||
// RunTaggers applies all configured taggers to all existing jobs in the repository.
|
||||
// This is useful for retroactively applying tags to jobs that were created before
|
||||
// the tagger system was initialized or when new tagging rules are added.
|
||||
// It fetches all jobs and runs both start and stop taggers on each one.
|
||||
func RunTaggers() error {
|
||||
newTagger()
|
||||
r := repository.GetJobRepository()
|
||||
jl, err := r.GetJobList()
|
||||
jl, err := r.GetJobList(0, 0) // 0 limit means get all jobs (no pagination)
|
||||
if err != nil {
|
||||
cclog.Errorf("Error while getting job list %s", err)
|
||||
return err
|
||||
|
||||
@@ -27,7 +27,7 @@ func RegisterLdapSyncService(ds string) {
|
||||
gocron.NewTask(
|
||||
func() {
|
||||
t := time.Now()
|
||||
cclog.Printf("ldap sync started at %s", t.Format(time.RFC3339))
|
||||
cclog.Infof("ldap sync started at %s", t.Format(time.RFC3339))
|
||||
if err := auth.LdapAuth.Sync(); err != nil {
|
||||
cclog.Errorf("ldap sync failed: %s", err.Error())
|
||||
}
|
||||
|
||||
@@ -25,8 +25,8 @@ func RegisterUpdateDurationWorker() {
|
||||
gocron.NewTask(
|
||||
func() {
|
||||
start := time.Now()
|
||||
cclog.Printf("Update duration started at %s\n", start.Format(time.RFC3339))
|
||||
cclog.Infof("Update duration started at %s", start.Format(time.RFC3339))
|
||||
jobRepo.UpdateDuration()
|
||||
cclog.Printf("Update duration is done and took %s\n", time.Since(start))
|
||||
cclog.Infof("Update duration is done and took %s", time.Since(start))
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -34,7 +34,7 @@ func RegisterFootprintWorker() {
|
||||
c := 0
|
||||
ce := 0
|
||||
cl := 0
|
||||
cclog.Printf("Update Footprints started at %s\n", s.Format(time.RFC3339))
|
||||
cclog.Infof("Update Footprints started at %s", s.Format(time.RFC3339))
|
||||
|
||||
for _, cluster := range archive.Clusters {
|
||||
s_cluster := time.Now()
|
||||
@@ -136,6 +136,6 @@ func RegisterFootprintWorker() {
|
||||
}
|
||||
cclog.Debugf("Finish Cluster %s, took %s\n", cluster.Name, time.Since(s_cluster))
|
||||
}
|
||||
cclog.Printf("Updating %d (of %d; Skipped %d) Footprints is done and took %s\n", c, cl, ce, time.Since(s))
|
||||
cclog.Infof("Updating %d (of %d; Skipped %d) Footprints is done and took %s", c, cl, ce, time.Since(s))
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -12,12 +12,40 @@ var configSchema = `
|
||||
"kind": {
|
||||
"description": "Backend type for job-archive",
|
||||
"type": "string",
|
||||
"enum": ["file", "s3"]
|
||||
"enum": ["file", "s3", "sqlite"]
|
||||
},
|
||||
"path": {
|
||||
"description": "Path to job archive for file backend",
|
||||
"type": "string"
|
||||
},
|
||||
"dbPath": {
|
||||
"description": "Path to SQLite database file for sqlite backend",
|
||||
"type": "string"
|
||||
},
|
||||
"endpoint": {
|
||||
"description": "S3 endpoint URL (for S3-compatible services like MinIO)",
|
||||
"type": "string"
|
||||
},
|
||||
"accessKey": {
|
||||
"description": "S3 access key ID",
|
||||
"type": "string"
|
||||
},
|
||||
"secretKey": {
|
||||
"description": "S3 secret access key",
|
||||
"type": "string"
|
||||
},
|
||||
"bucket": {
|
||||
"description": "S3 bucket name for job archive",
|
||||
"type": "string"
|
||||
},
|
||||
"region": {
|
||||
"description": "AWS region for S3 bucket",
|
||||
"type": "string"
|
||||
},
|
||||
"usePathStyle": {
|
||||
"description": "Use path-style S3 URLs (required for MinIO and some S3-compatible services)",
|
||||
"type": "boolean"
|
||||
},
|
||||
"compression": {
|
||||
"description": "Setup automatic compression for jobs older than number of days",
|
||||
"type": "integer"
|
||||
|
||||
@@ -3,7 +3,79 @@
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package archive implements the job archive interface and various backend implementations
|
||||
// Package archive implements the job archive interface and various backend implementations.
|
||||
//
|
||||
// The archive package provides a pluggable storage backend system for job metadata and performance data.
|
||||
// It supports three backend types:
|
||||
//
|
||||
// - file: Filesystem-based storage with hierarchical directory structure
|
||||
// - s3: AWS S3 and S3-compatible object storage (MinIO, localstack)
|
||||
// - sqlite: Single-file SQLite database with BLOB storage
|
||||
//
|
||||
// # Backend Selection
|
||||
//
|
||||
// Choose a backend based on your deployment requirements:
|
||||
//
|
||||
// - File: Best for single-server deployments with local fast storage
|
||||
// - S3: Best for distributed deployments requiring redundancy and multi-instance access
|
||||
// - SQLite: Best for portable archives with SQL query capability and transactional integrity
|
||||
//
|
||||
// # Configuration
|
||||
//
|
||||
// The archive backend is configured via JSON in the application config file:
|
||||
//
|
||||
// {
|
||||
// "archive": {
|
||||
// "kind": "file", // or "s3" or "sqlite"
|
||||
// "path": "/var/lib/archive" // for file backend
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// For S3 backend:
|
||||
//
|
||||
// {
|
||||
// "archive": {
|
||||
// "kind": "s3",
|
||||
// "bucket": "my-job-archive",
|
||||
// "region": "us-east-1",
|
||||
// "accessKey": "...",
|
||||
// "secretKey": "..."
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// For SQLite backend:
|
||||
//
|
||||
// {
|
||||
// "archive": {
|
||||
// "kind": "sqlite",
|
||||
// "dbPath": "/var/lib/archive.db"
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// # Usage
|
||||
//
|
||||
// The package is initialized once at application startup:
|
||||
//
|
||||
// err := archive.Init(rawConfig, false)
|
||||
// if err != nil {
|
||||
// log.Fatal(err)
|
||||
// }
|
||||
//
|
||||
// After initialization, use the global functions to interact with the archive:
|
||||
//
|
||||
// // Check if a job exists
|
||||
// exists := archive.GetHandle().Exists(job)
|
||||
//
|
||||
// // Load job metadata
|
||||
// jobMeta, err := archive.GetHandle().LoadJobMeta(job)
|
||||
//
|
||||
// // Store job metadata
|
||||
// err = archive.GetHandle().StoreJobMeta(job)
|
||||
//
|
||||
// # Thread Safety
|
||||
//
|
||||
// All backend implementations are safe for concurrent use. The package uses
|
||||
// internal locking for operations that modify shared state.
|
||||
package archive
|
||||
|
||||
import (
|
||||
@@ -18,45 +90,88 @@ import (
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
)
|
||||
|
||||
const Version uint64 = 2
|
||||
// Version is the current archive schema version.
|
||||
// The archive backend must match this version for compatibility.
|
||||
const Version uint64 = 3
|
||||
|
||||
// ArchiveBackend defines the interface that all archive storage backends must implement.
|
||||
// Implementations include FsArchive (filesystem), S3Archive (object storage), and SqliteArchive (database).
|
||||
//
|
||||
// All methods are safe for concurrent use unless otherwise noted.
|
||||
type ArchiveBackend interface {
|
||||
// Init initializes the archive backend with the provided configuration.
|
||||
// Returns the archive version found in the backend storage.
|
||||
// Returns an error if the version is incompatible or initialization fails.
|
||||
Init(rawConfig json.RawMessage) (uint64, error)
|
||||
|
||||
// Info prints archive statistics to stdout, including job counts,
|
||||
// date ranges, and storage sizes per cluster.
|
||||
Info()
|
||||
|
||||
// Exists checks if a job with the given ID, cluster, and start time
|
||||
// exists in the archive.
|
||||
Exists(job *schema.Job) bool
|
||||
|
||||
// LoadJobMeta loads job metadata from the archive.
|
||||
// Returns the complete Job structure including resources, tags, and statistics.
|
||||
LoadJobMeta(job *schema.Job) (*schema.Job, error)
|
||||
|
||||
// LoadJobData loads the complete time-series performance data for a job.
|
||||
// Returns a map of metric names to their scoped data (node, socket, core, etc.).
|
||||
LoadJobData(job *schema.Job) (schema.JobData, error)
|
||||
|
||||
// LoadJobStats loads pre-computed statistics from the job data.
|
||||
// Returns scoped statistics (min, max, avg) for all metrics.
|
||||
LoadJobStats(job *schema.Job) (schema.ScopedJobStats, error)
|
||||
|
||||
// LoadClusterCfg loads the cluster configuration.
|
||||
// Returns the cluster topology, metrics, and hardware specifications.
|
||||
LoadClusterCfg(name string) (*schema.Cluster, error)
|
||||
|
||||
// StoreJobMeta stores job metadata to the archive.
|
||||
// Overwrites existing metadata for the same job ID, cluster, and start time.
|
||||
StoreJobMeta(jobMeta *schema.Job) error
|
||||
|
||||
// ImportJob stores both job metadata and performance data to the archive.
|
||||
// This is typically used during initial job archiving.
|
||||
ImportJob(jobMeta *schema.Job, jobData *schema.JobData) error
|
||||
|
||||
// GetClusters returns a list of all cluster names found in the archive.
|
||||
GetClusters() []string
|
||||
|
||||
// CleanUp removes the specified jobs from the archive.
|
||||
// Used by retention policies to delete old jobs.
|
||||
CleanUp(jobs []*schema.Job)
|
||||
|
||||
// Move relocates jobs to a different path within the archive.
|
||||
// The implementation depends on the backend type.
|
||||
Move(jobs []*schema.Job, path string)
|
||||
|
||||
// Clean removes jobs outside the specified time range.
|
||||
// Jobs with start_time < before OR start_time > after are deleted.
|
||||
// Set after=0 to only use the before parameter.
|
||||
Clean(before int64, after int64)
|
||||
|
||||
// Compress compresses job data files to save storage space.
|
||||
// For filesystem and SQLite backends, this applies gzip compression.
|
||||
// For S3, this compresses and replaces objects.
|
||||
Compress(jobs []*schema.Job)
|
||||
|
||||
// CompressLast returns the timestamp of the last compression run
|
||||
// and updates it to the provided starttime.
|
||||
CompressLast(starttime int64) int64
|
||||
|
||||
// Iter returns a channel that yields all jobs in the archive.
|
||||
// If loadMetricData is true, includes performance data; otherwise only metadata.
|
||||
// The channel is closed when iteration completes.
|
||||
Iter(loadMetricData bool) <-chan JobContainer
|
||||
}
|
||||
|
||||
// JobContainer combines job metadata and optional performance data.
|
||||
// Used by Iter() to yield jobs during archive iteration.
|
||||
type JobContainer struct {
|
||||
Meta *schema.Job
|
||||
Data *schema.JobData
|
||||
Meta *schema.Job // Job metadata (always present)
|
||||
Data *schema.JobData // Performance data (nil if not loaded)
|
||||
}
|
||||
|
||||
var (
|
||||
@@ -67,6 +182,15 @@ var (
|
||||
mutex sync.Mutex
|
||||
)
|
||||
|
||||
// Init initializes the archive backend with the provided configuration.
|
||||
// Must be called once at application startup before using any archive functions.
|
||||
//
|
||||
// Parameters:
|
||||
// - rawConfig: JSON configuration for the archive backend
|
||||
// - disableArchive: if true, disables archive functionality
|
||||
//
|
||||
// The configuration determines which backend is used (file, s3, or sqlite).
|
||||
// Returns an error if initialization fails or version is incompatible.
|
||||
func Init(rawConfig json.RawMessage, disableArchive bool) error {
|
||||
var err error
|
||||
|
||||
@@ -86,8 +210,10 @@ func Init(rawConfig json.RawMessage, disableArchive bool) error {
|
||||
switch cfg.Kind {
|
||||
case "file":
|
||||
ar = &FsArchive{}
|
||||
// case "s3":
|
||||
// ar = &S3Archive{}
|
||||
case "s3":
|
||||
ar = &S3Archive{}
|
||||
case "sqlite":
|
||||
ar = &SqliteArchive{}
|
||||
default:
|
||||
err = fmt.Errorf("ARCHIVE/ARCHIVE > unkown archive backend '%s''", cfg.Kind)
|
||||
}
|
||||
@@ -106,10 +232,59 @@ func Init(rawConfig json.RawMessage, disableArchive bool) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// GetHandle returns the initialized archive backend instance.
|
||||
// Must be called after Init().
|
||||
func GetHandle() ArchiveBackend {
|
||||
return ar
|
||||
}
|
||||
|
||||
// InitBackend creates and initializes a new archive backend instance
|
||||
// without affecting the global singleton. This is useful for archive migration
|
||||
// tools that need to work with multiple archive backends simultaneously.
|
||||
//
|
||||
// Parameters:
|
||||
// - rawConfig: JSON configuration for the archive backend
|
||||
//
|
||||
// Returns the initialized backend instance or an error if initialization fails.
|
||||
// Does not validate the configuration against the schema.
|
||||
func InitBackend(rawConfig json.RawMessage) (ArchiveBackend, error) {
|
||||
var cfg struct {
|
||||
Kind string `json:"kind"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(rawConfig, &cfg); err != nil {
|
||||
cclog.Warn("Error while unmarshaling raw config json")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var backend ArchiveBackend
|
||||
switch cfg.Kind {
|
||||
case "file":
|
||||
backend = &FsArchive{}
|
||||
case "s3":
|
||||
backend = &S3Archive{}
|
||||
case "sqlite":
|
||||
backend = &SqliteArchive{}
|
||||
default:
|
||||
return nil, fmt.Errorf("ARCHIVE/ARCHIVE > unknown archive backend '%s'", cfg.Kind)
|
||||
}
|
||||
|
||||
_, err := backend.Init(rawConfig)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error while initializing archive backend: %w", err)
|
||||
}
|
||||
|
||||
return backend, nil
|
||||
}
|
||||
|
||||
|
||||
// LoadAveragesFromArchive loads average metric values for a job from the archive.
|
||||
// This is a helper function that extracts average values from job statistics.
|
||||
//
|
||||
// Parameters:
|
||||
// - job: Job to load averages for
|
||||
// - metrics: List of metric names to retrieve
|
||||
// - data: 2D slice where averages will be appended (one row per metric)
|
||||
func LoadAveragesFromArchive(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
@@ -132,6 +307,8 @@ func LoadAveragesFromArchive(
|
||||
return nil
|
||||
}
|
||||
|
||||
// LoadStatsFromArchive loads metric statistics for a job from the archive.
|
||||
// Returns a map of metric names to their statistics (min, max, avg).
|
||||
func LoadStatsFromArchive(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
@@ -160,6 +337,8 @@ func LoadStatsFromArchive(
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// LoadScopedStatsFromArchive loads scoped statistics for a job from the archive.
|
||||
// Returns statistics organized by metric scope (node, socket, core, etc.).
|
||||
func LoadScopedStatsFromArchive(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
@@ -174,6 +353,8 @@ func LoadScopedStatsFromArchive(
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// GetStatistics returns all metric statistics for a job.
|
||||
// Returns a map of metric names to their job-level statistics.
|
||||
func GetStatistics(job *schema.Job) (map[string]schema.JobStatistics, error) {
|
||||
metaFile, err := ar.LoadJobMeta(job)
|
||||
if err != nil {
|
||||
@@ -184,8 +365,10 @@ func GetStatistics(job *schema.Job) (map[string]schema.JobStatistics, error) {
|
||||
return metaFile.Statistics, nil
|
||||
}
|
||||
|
||||
// UpdateMetadata checks if the job is archived, find its `meta.json` file and override the Metadata
|
||||
// in that JSON file. If the job is not archived, nothing is done.
|
||||
// UpdateMetadata updates the metadata map for an archived job.
|
||||
// If the job is still running or archiving is disabled, this is a no-op.
|
||||
//
|
||||
// This function is safe for concurrent use (protected by mutex).
|
||||
func UpdateMetadata(job *schema.Job, metadata map[string]string) error {
|
||||
mutex.Lock()
|
||||
defer mutex.Unlock()
|
||||
@@ -205,8 +388,10 @@ func UpdateMetadata(job *schema.Job, metadata map[string]string) error {
|
||||
return ar.StoreJobMeta(jobMeta)
|
||||
}
|
||||
|
||||
// UpdateTags checks if the job is archived, find its `meta.json` file and override the tags list
|
||||
// in that JSON file. If the job is not archived, nothing is done.
|
||||
// UpdateTags updates the tag list for an archived job.
|
||||
// If the job is still running or archiving is disabled, this is a no-op.
|
||||
//
|
||||
// This function is safe for concurrent use (protected by mutex).
|
||||
func UpdateTags(job *schema.Job, tags []*schema.Tag) error {
|
||||
mutex.Lock()
|
||||
defer mutex.Unlock()
|
||||
|
||||
@@ -28,20 +28,26 @@ import (
|
||||
"github.com/santhosh-tekuri/jsonschema/v5"
|
||||
)
|
||||
|
||||
// FsArchiveConfig holds the configuration for the filesystem archive backend.
|
||||
type FsArchiveConfig struct {
|
||||
Path string `json:"path"`
|
||||
Path string `json:"path"` // Root directory path for the archive
|
||||
}
|
||||
|
||||
// FsArchive implements ArchiveBackend using a hierarchical filesystem structure.
|
||||
// Jobs are stored in directories organized by cluster, job ID, and start time.
|
||||
//
|
||||
// Directory structure: <path>/<cluster>/<jobid/1000>/<jobid%1000>/<starttime>/
|
||||
type FsArchive struct {
|
||||
path string
|
||||
clusters []string
|
||||
path string // Root path of the archive
|
||||
clusters []string // List of discovered cluster names
|
||||
}
|
||||
|
||||
// clusterInfo holds statistics about jobs in a cluster.
|
||||
type clusterInfo struct {
|
||||
numJobs int
|
||||
dateFirst int64
|
||||
dateLast int64
|
||||
diskSize float64
|
||||
numJobs int // Total number of jobs
|
||||
dateFirst int64 // Unix timestamp of oldest job
|
||||
dateLast int64 // Unix timestamp of newest job
|
||||
diskSize float64 // Total disk usage in MB
|
||||
}
|
||||
|
||||
func getDirectory(
|
||||
@@ -509,7 +515,6 @@ func (fsa *FsArchive) Iter(loadMetricData bool) <-chan JobContainer {
|
||||
cclog.Errorf("in %s: %s", filepath.Join(dirpath, startTimeDir.Name()), err.Error())
|
||||
}
|
||||
ch <- JobContainer{Meta: job, Data: &data}
|
||||
cclog.Errorf("in %s: %s", filepath.Join(dirpath, startTimeDir.Name()), err.Error())
|
||||
} else {
|
||||
ch <- JobContainer{Meta: job, Data: nil}
|
||||
}
|
||||
|
||||
@@ -47,7 +47,7 @@ func TestInit(t *testing.T) {
|
||||
if fsa.path != "testdata/archive" {
|
||||
t.Fail()
|
||||
}
|
||||
if version != 2 {
|
||||
if version != 3 {
|
||||
t.Fail()
|
||||
}
|
||||
if len(fsa.clusters) != 3 || fsa.clusters[1] != "emmy" {
|
||||
|
||||
835
pkg/archive/s3Backend.go
Normal file
835
pkg/archive/s3Backend.go
Normal file
@@ -0,0 +1,835 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package archive
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"text/tabwriter"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/ClusterCockpit/cc-lib/util"
|
||||
"github.com/aws/aws-sdk-go-v2/aws"
|
||||
awsconfig "github.com/aws/aws-sdk-go-v2/config"
|
||||
"github.com/aws/aws-sdk-go-v2/credentials"
|
||||
"github.com/aws/aws-sdk-go-v2/service/s3"
|
||||
)
|
||||
|
||||
// S3ArchiveConfig holds the configuration for the S3 archive backend.
|
||||
type S3ArchiveConfig struct {
|
||||
Endpoint string `json:"endpoint"` // S3 endpoint URL (optional, for MinIO/localstack)
|
||||
AccessKey string `json:"accessKey"` // AWS access key ID
|
||||
SecretKey string `json:"secretKey"` // AWS secret access key
|
||||
Bucket string `json:"bucket"` // S3 bucket name
|
||||
Region string `json:"region"` // AWS region
|
||||
UsePathStyle bool `json:"usePathStyle"` // Use path-style URLs (required for MinIO)
|
||||
}
|
||||
|
||||
// S3Archive implements ArchiveBackend using AWS S3 or S3-compatible object storage.
|
||||
// Jobs are stored as objects with keys mirroring the filesystem structure.
|
||||
//
|
||||
// Object key structure: <cluster>/<jobid/1000>/<jobid%1000>/<starttime>/meta.json
|
||||
type S3Archive struct {
|
||||
client *s3.Client // AWS S3 client
|
||||
bucket string // S3 bucket name
|
||||
clusters []string // List of discovered cluster names
|
||||
}
|
||||
|
||||
// getS3Key generates the S3 object key for a job file
|
||||
func getS3Key(job *schema.Job, file string) string {
|
||||
lvl1 := fmt.Sprintf("%d", job.JobID/1000)
|
||||
lvl2 := fmt.Sprintf("%03d", job.JobID%1000)
|
||||
startTime := strconv.FormatInt(job.StartTime, 10)
|
||||
return fmt.Sprintf("%s/%s/%s/%s/%s", job.Cluster, lvl1, lvl2, startTime, file)
|
||||
}
|
||||
|
||||
// getS3Directory generates the S3 key prefix for a job directory
|
||||
func getS3Directory(job *schema.Job) string {
|
||||
lvl1 := fmt.Sprintf("%d", job.JobID/1000)
|
||||
lvl2 := fmt.Sprintf("%03d", job.JobID%1000)
|
||||
startTime := strconv.FormatInt(job.StartTime, 10)
|
||||
return fmt.Sprintf("%s/%s/%s/%s/", job.Cluster, lvl1, lvl2, startTime)
|
||||
}
|
||||
|
||||
func (s3a *S3Archive) Init(rawConfig json.RawMessage) (uint64, error) {
|
||||
var cfg S3ArchiveConfig
|
||||
if err := json.Unmarshal(rawConfig, &cfg); err != nil {
|
||||
cclog.Warnf("S3Archive Init() > Unmarshal error: %#v", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if cfg.Bucket == "" {
|
||||
err := fmt.Errorf("S3Archive Init(): empty bucket name")
|
||||
cclog.Errorf("S3Archive Init() > config error: %v", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if cfg.Region == "" {
|
||||
cfg.Region = "us-east-1" // Default region
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
// Create custom AWS config
|
||||
var awsCfg aws.Config
|
||||
var err error
|
||||
|
||||
if cfg.AccessKey != "" && cfg.SecretKey != "" {
|
||||
// Use static credentials
|
||||
awsCfg, err = awsconfig.LoadDefaultConfig(ctx,
|
||||
awsconfig.WithRegion(cfg.Region),
|
||||
awsconfig.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(
|
||||
cfg.AccessKey,
|
||||
cfg.SecretKey,
|
||||
"",
|
||||
)),
|
||||
)
|
||||
} else {
|
||||
// Use default credential chain
|
||||
awsCfg, err = awsconfig.LoadDefaultConfig(ctx,
|
||||
awsconfig.WithRegion(cfg.Region),
|
||||
)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive Init() > failed to load AWS config: %v", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// Create S3 client with path-style option and custom endpoint if specified
|
||||
s3a.client = s3.NewFromConfig(awsCfg, func(o *s3.Options) {
|
||||
o.UsePathStyle = cfg.UsePathStyle
|
||||
if cfg.Endpoint != "" {
|
||||
o.BaseEndpoint = aws.String(cfg.Endpoint)
|
||||
}
|
||||
})
|
||||
s3a.bucket = cfg.Bucket
|
||||
|
||||
// Check if bucket exists and is accessible
|
||||
_, err = s3a.client.HeadBucket(ctx, &s3.HeadBucketInput{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
})
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive Init() > bucket access error: %v", err)
|
||||
return 0, fmt.Errorf("cannot access S3 bucket '%s': %w", s3a.bucket, err)
|
||||
}
|
||||
|
||||
// Read version.txt from S3
|
||||
versionKey := "version.txt"
|
||||
result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
Key: aws.String(versionKey),
|
||||
})
|
||||
if err != nil {
|
||||
cclog.Warnf("S3Archive Init() > cannot read version.txt: %v", err)
|
||||
return 0, err
|
||||
}
|
||||
defer result.Body.Close()
|
||||
|
||||
versionBytes, err := io.ReadAll(result.Body)
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive Init() > failed to read version.txt: %v", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
version, err := strconv.ParseUint(strings.TrimSuffix(string(versionBytes), "\n"), 10, 64)
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive Init() > version parse error: %v", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if version != Version {
|
||||
return version, fmt.Errorf("unsupported version %d, need %d", version, Version)
|
||||
}
|
||||
|
||||
// Discover clusters by listing top-level prefixes
|
||||
s3a.clusters = []string{}
|
||||
paginator := s3.NewListObjectsV2Paginator(s3a.client, &s3.ListObjectsV2Input{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
Delimiter: aws.String("/"),
|
||||
})
|
||||
|
||||
for paginator.HasMorePages() {
|
||||
page, err := paginator.NextPage(ctx)
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive Init() > failed to list clusters: %v", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
for _, prefix := range page.CommonPrefixes {
|
||||
if prefix.Prefix != nil {
|
||||
clusterName := strings.TrimSuffix(*prefix.Prefix, "/")
|
||||
// Filter out non-cluster entries
|
||||
if clusterName != "" && clusterName != "version.txt" {
|
||||
s3a.clusters = append(s3a.clusters, clusterName)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cclog.Infof("S3Archive initialized with bucket '%s', found %d clusters", s3a.bucket, len(s3a.clusters))
|
||||
return version, nil
|
||||
}
|
||||
|
||||
func (s3a *S3Archive) Info() {
|
||||
ctx := context.Background()
|
||||
fmt.Printf("S3 Job archive bucket: %s\n", s3a.bucket)
|
||||
|
||||
ci := make(map[string]*clusterInfo)
|
||||
|
||||
for _, cluster := range s3a.clusters {
|
||||
ci[cluster] = &clusterInfo{dateFirst: time.Now().Unix()}
|
||||
|
||||
// List all jobs for this cluster
|
||||
prefix := cluster + "/"
|
||||
paginator := s3.NewListObjectsV2Paginator(s3a.client, &s3.ListObjectsV2Input{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
Prefix: aws.String(prefix),
|
||||
})
|
||||
|
||||
for paginator.HasMorePages() {
|
||||
page, err := paginator.NextPage(ctx)
|
||||
if err != nil {
|
||||
cclog.Fatalf("S3Archive Info() > failed to list objects: %s", err.Error())
|
||||
}
|
||||
|
||||
for _, obj := range page.Contents {
|
||||
if obj.Key != nil && strings.HasSuffix(*obj.Key, "/meta.json") {
|
||||
ci[cluster].numJobs++
|
||||
// Extract starttime from key: cluster/lvl1/lvl2/starttime/meta.json
|
||||
parts := strings.Split(*obj.Key, "/")
|
||||
if len(parts) >= 4 {
|
||||
startTime, err := strconv.ParseInt(parts[3], 10, 64)
|
||||
if err == nil {
|
||||
ci[cluster].dateFirst = util.Min(ci[cluster].dateFirst, startTime)
|
||||
ci[cluster].dateLast = util.Max(ci[cluster].dateLast, startTime)
|
||||
}
|
||||
}
|
||||
if obj.Size != nil {
|
||||
ci[cluster].diskSize += float64(*obj.Size) / (1024 * 1024) // Convert to MB
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cit := clusterInfo{dateFirst: time.Now().Unix()}
|
||||
w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', tabwriter.Debug)
|
||||
fmt.Fprintln(w, "cluster\t#jobs\tfrom\tto\tsize (MB)")
|
||||
for cluster, clusterInfo := range ci {
|
||||
fmt.Fprintf(w, "%s\t%d\t%s\t%s\t%.2f\n", cluster,
|
||||
clusterInfo.numJobs,
|
||||
time.Unix(clusterInfo.dateFirst, 0),
|
||||
time.Unix(clusterInfo.dateLast, 0),
|
||||
clusterInfo.diskSize)
|
||||
|
||||
cit.numJobs += clusterInfo.numJobs
|
||||
cit.dateFirst = util.Min(cit.dateFirst, clusterInfo.dateFirst)
|
||||
cit.dateLast = util.Max(cit.dateLast, clusterInfo.dateLast)
|
||||
cit.diskSize += clusterInfo.diskSize
|
||||
}
|
||||
|
||||
fmt.Fprintf(w, "TOTAL\t%d\t%s\t%s\t%.2f\n",
|
||||
cit.numJobs, time.Unix(cit.dateFirst, 0), time.Unix(cit.dateLast, 0), cit.diskSize)
|
||||
w.Flush()
|
||||
}
|
||||
|
||||
func (s3a *S3Archive) Exists(job *schema.Job) bool {
|
||||
ctx := context.Background()
|
||||
key := getS3Key(job, "meta.json")
|
||||
|
||||
_, err := s3a.client.HeadObject(ctx, &s3.HeadObjectInput{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
Key: aws.String(key),
|
||||
})
|
||||
|
||||
return err == nil
|
||||
}
|
||||
|
||||
func (s3a *S3Archive) LoadJobMeta(job *schema.Job) (*schema.Job, error) {
|
||||
ctx := context.Background()
|
||||
key := getS3Key(job, "meta.json")
|
||||
|
||||
result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
Key: aws.String(key),
|
||||
})
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive LoadJobMeta() > GetObject error: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
defer result.Body.Close()
|
||||
|
||||
b, err := io.ReadAll(result.Body)
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive LoadJobMeta() > read error: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if config.Keys.Validate {
|
||||
if err := schema.Validate(schema.Meta, bytes.NewReader(b)); err != nil {
|
||||
return nil, fmt.Errorf("validate job meta: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
return DecodeJobMeta(bytes.NewReader(b))
|
||||
}
|
||||
|
||||
func (s3a *S3Archive) LoadJobData(job *schema.Job) (schema.JobData, error) {
|
||||
ctx := context.Background()
|
||||
|
||||
// Try compressed file first
|
||||
keyGz := getS3Key(job, "data.json.gz")
|
||||
result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
Key: aws.String(keyGz),
|
||||
})
|
||||
if err != nil {
|
||||
// Try uncompressed file
|
||||
key := getS3Key(job, "data.json")
|
||||
result, err = s3a.client.GetObject(ctx, &s3.GetObjectInput{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
Key: aws.String(key),
|
||||
})
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive LoadJobData() > GetObject error: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
defer result.Body.Close()
|
||||
|
||||
if config.Keys.Validate {
|
||||
b, _ := io.ReadAll(result.Body)
|
||||
if err := schema.Validate(schema.Data, bytes.NewReader(b)); err != nil {
|
||||
return schema.JobData{}, fmt.Errorf("validate job data: %v", err)
|
||||
}
|
||||
return DecodeJobData(bytes.NewReader(b), key)
|
||||
}
|
||||
return DecodeJobData(result.Body, key)
|
||||
}
|
||||
defer result.Body.Close()
|
||||
|
||||
// Decompress
|
||||
r, err := gzip.NewReader(result.Body)
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive LoadJobData() > gzip error: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
defer r.Close()
|
||||
|
||||
if config.Keys.Validate {
|
||||
b, _ := io.ReadAll(r)
|
||||
if err := schema.Validate(schema.Data, bytes.NewReader(b)); err != nil {
|
||||
return schema.JobData{}, fmt.Errorf("validate job data: %v", err)
|
||||
}
|
||||
return DecodeJobData(bytes.NewReader(b), keyGz)
|
||||
}
|
||||
return DecodeJobData(r, keyGz)
|
||||
}
|
||||
|
||||
func (s3a *S3Archive) LoadJobStats(job *schema.Job) (schema.ScopedJobStats, error) {
|
||||
ctx := context.Background()
|
||||
|
||||
// Try compressed file first
|
||||
keyGz := getS3Key(job, "data.json.gz")
|
||||
result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
Key: aws.String(keyGz),
|
||||
})
|
||||
if err != nil {
|
||||
// Try uncompressed file
|
||||
key := getS3Key(job, "data.json")
|
||||
result, err = s3a.client.GetObject(ctx, &s3.GetObjectInput{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
Key: aws.String(key),
|
||||
})
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive LoadJobStats() > GetObject error: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
defer result.Body.Close()
|
||||
|
||||
if config.Keys.Validate {
|
||||
b, _ := io.ReadAll(result.Body)
|
||||
if err := schema.Validate(schema.Data, bytes.NewReader(b)); err != nil {
|
||||
return nil, fmt.Errorf("validate job data: %v", err)
|
||||
}
|
||||
return DecodeJobStats(bytes.NewReader(b), key)
|
||||
}
|
||||
return DecodeJobStats(result.Body, key)
|
||||
}
|
||||
defer result.Body.Close()
|
||||
|
||||
// Decompress
|
||||
r, err := gzip.NewReader(result.Body)
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive LoadJobStats() > gzip error: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
defer r.Close()
|
||||
|
||||
if config.Keys.Validate {
|
||||
b, _ := io.ReadAll(r)
|
||||
if err := schema.Validate(schema.Data, bytes.NewReader(b)); err != nil {
|
||||
return nil, fmt.Errorf("validate job data: %v", err)
|
||||
}
|
||||
return DecodeJobStats(bytes.NewReader(b), keyGz)
|
||||
}
|
||||
return DecodeJobStats(r, keyGz)
|
||||
}
|
||||
|
||||
func (s3a *S3Archive) LoadClusterCfg(name string) (*schema.Cluster, error) {
|
||||
ctx := context.Background()
|
||||
key := fmt.Sprintf("%s/cluster.json", name)
|
||||
|
||||
result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
Key: aws.String(key),
|
||||
})
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive LoadClusterCfg() > GetObject error: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
defer result.Body.Close()
|
||||
|
||||
b, err := io.ReadAll(result.Body)
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive LoadClusterCfg() > read error: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := schema.Validate(schema.ClusterCfg, bytes.NewReader(b)); err != nil {
|
||||
cclog.Warnf("Validate cluster config: %v\n", err)
|
||||
return &schema.Cluster{}, fmt.Errorf("validate cluster config: %v", err)
|
||||
}
|
||||
|
||||
return DecodeCluster(bytes.NewReader(b))
|
||||
}
|
||||
|
||||
func (s3a *S3Archive) StoreJobMeta(job *schema.Job) error {
|
||||
ctx := context.Background()
|
||||
key := getS3Key(job, "meta.json")
|
||||
|
||||
var buf bytes.Buffer
|
||||
if err := EncodeJobMeta(&buf, job); err != nil {
|
||||
cclog.Error("S3Archive StoreJobMeta() > encoding error")
|
||||
return err
|
||||
}
|
||||
|
||||
_, err := s3a.client.PutObject(ctx, &s3.PutObjectInput{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
Key: aws.String(key),
|
||||
Body: bytes.NewReader(buf.Bytes()),
|
||||
})
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive StoreJobMeta() > PutObject error: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s3a *S3Archive) ImportJob(jobMeta *schema.Job, jobData *schema.JobData) error {
|
||||
ctx := context.Background()
|
||||
|
||||
// Upload meta.json
|
||||
metaKey := getS3Key(jobMeta, "meta.json")
|
||||
var metaBuf bytes.Buffer
|
||||
if err := EncodeJobMeta(&metaBuf, jobMeta); err != nil {
|
||||
cclog.Error("S3Archive ImportJob() > encoding meta error")
|
||||
return err
|
||||
}
|
||||
|
||||
_, err := s3a.client.PutObject(ctx, &s3.PutObjectInput{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
Key: aws.String(metaKey),
|
||||
Body: bytes.NewReader(metaBuf.Bytes()),
|
||||
})
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive ImportJob() > PutObject meta error: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Upload data.json
|
||||
dataKey := getS3Key(jobMeta, "data.json")
|
||||
var dataBuf bytes.Buffer
|
||||
if err := EncodeJobData(&dataBuf, jobData); err != nil {
|
||||
cclog.Error("S3Archive ImportJob() > encoding data error")
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = s3a.client.PutObject(ctx, &s3.PutObjectInput{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
Key: aws.String(dataKey),
|
||||
Body: bytes.NewReader(dataBuf.Bytes()),
|
||||
})
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive ImportJob() > PutObject data error: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s3a *S3Archive) GetClusters() []string {
|
||||
return s3a.clusters
|
||||
}
|
||||
|
||||
func (s3a *S3Archive) CleanUp(jobs []*schema.Job) {
|
||||
ctx := context.Background()
|
||||
start := time.Now()
|
||||
|
||||
for _, job := range jobs {
|
||||
if job == nil {
|
||||
cclog.Errorf("S3Archive CleanUp() error: job is nil")
|
||||
continue
|
||||
}
|
||||
|
||||
// Delete all files in the job directory
|
||||
prefix := getS3Directory(job)
|
||||
|
||||
paginator := s3.NewListObjectsV2Paginator(s3a.client, &s3.ListObjectsV2Input{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
Prefix: aws.String(prefix),
|
||||
})
|
||||
|
||||
for paginator.HasMorePages() {
|
||||
page, err := paginator.NextPage(ctx)
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive CleanUp() > list error: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, obj := range page.Contents {
|
||||
if obj.Key != nil {
|
||||
_, err := s3a.client.DeleteObject(ctx, &s3.DeleteObjectInput{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
Key: obj.Key,
|
||||
})
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive CleanUp() > delete error: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cclog.Infof("Retention Service - Remove %d jobs from S3 in %s", len(jobs), time.Since(start))
|
||||
}
|
||||
|
||||
func (s3a *S3Archive) Move(jobs []*schema.Job, targetPath string) {
|
||||
ctx := context.Background()
|
||||
|
||||
for _, job := range jobs {
|
||||
sourcePrefix := getS3Directory(job)
|
||||
|
||||
// List all objects in source
|
||||
paginator := s3.NewListObjectsV2Paginator(s3a.client, &s3.ListObjectsV2Input{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
Prefix: aws.String(sourcePrefix),
|
||||
})
|
||||
|
||||
for paginator.HasMorePages() {
|
||||
page, err := paginator.NextPage(ctx)
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive Move() > list error: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, obj := range page.Contents {
|
||||
if obj.Key == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Compute target key by replacing prefix
|
||||
targetKey := strings.Replace(*obj.Key, sourcePrefix, targetPath+"/", 1)
|
||||
|
||||
// Copy object
|
||||
_, err := s3a.client.CopyObject(ctx, &s3.CopyObjectInput{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
CopySource: aws.String(fmt.Sprintf("%s/%s", s3a.bucket, *obj.Key)),
|
||||
Key: aws.String(targetKey),
|
||||
})
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive Move() > copy error: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Delete source object
|
||||
_, err = s3a.client.DeleteObject(ctx, &s3.DeleteObjectInput{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
Key: obj.Key,
|
||||
})
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive Move() > delete error: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s3a *S3Archive) Clean(before int64, after int64) {
|
||||
ctx := context.Background()
|
||||
|
||||
if after == 0 {
|
||||
after = math.MaxInt64
|
||||
}
|
||||
|
||||
for _, cluster := range s3a.clusters {
|
||||
prefix := cluster + "/"
|
||||
|
||||
paginator := s3.NewListObjectsV2Paginator(s3a.client, &s3.ListObjectsV2Input{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
Prefix: aws.String(prefix),
|
||||
})
|
||||
|
||||
for paginator.HasMorePages() {
|
||||
page, err := paginator.NextPage(ctx)
|
||||
if err != nil {
|
||||
cclog.Fatalf("S3Archive Clean() > list error: %s", err.Error())
|
||||
}
|
||||
|
||||
for _, obj := range page.Contents {
|
||||
if obj.Key == nil || !strings.HasSuffix(*obj.Key, "/meta.json") {
|
||||
continue
|
||||
}
|
||||
|
||||
// Extract starttime from key: cluster/lvl1/lvl2/starttime/meta.json
|
||||
parts := strings.Split(*obj.Key, "/")
|
||||
if len(parts) < 4 {
|
||||
continue
|
||||
}
|
||||
|
||||
startTime, err := strconv.ParseInt(parts[3], 10, 64)
|
||||
if err != nil {
|
||||
cclog.Fatalf("S3Archive Clean() > cannot parse starttime: %s", err.Error())
|
||||
}
|
||||
|
||||
if startTime < before || startTime > after {
|
||||
// Delete entire job directory
|
||||
jobPrefix := strings.Join(parts[:4], "/") + "/"
|
||||
|
||||
jobPaginator := s3.NewListObjectsV2Paginator(s3a.client, &s3.ListObjectsV2Input{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
Prefix: aws.String(jobPrefix),
|
||||
})
|
||||
|
||||
for jobPaginator.HasMorePages() {
|
||||
jobPage, err := jobPaginator.NextPage(ctx)
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive Clean() > list job error: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, jobObj := range jobPage.Contents {
|
||||
if jobObj.Key != nil {
|
||||
_, err := s3a.client.DeleteObject(ctx, &s3.DeleteObjectInput{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
Key: jobObj.Key,
|
||||
})
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive Clean() > delete error: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s3a *S3Archive) Compress(jobs []*schema.Job) {
|
||||
ctx := context.Background()
|
||||
var cnt int
|
||||
start := time.Now()
|
||||
|
||||
for _, job := range jobs {
|
||||
dataKey := getS3Key(job, "data.json")
|
||||
|
||||
// Check if uncompressed file exists and get its size
|
||||
headResult, err := s3a.client.HeadObject(ctx, &s3.HeadObjectInput{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
Key: aws.String(dataKey),
|
||||
})
|
||||
if err != nil {
|
||||
continue // File doesn't exist or error
|
||||
}
|
||||
|
||||
if headResult.ContentLength == nil || *headResult.ContentLength < 2000 {
|
||||
continue // Too small to compress
|
||||
}
|
||||
|
||||
// Download the file
|
||||
result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
Key: aws.String(dataKey),
|
||||
})
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive Compress() > GetObject error: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
data, err := io.ReadAll(result.Body)
|
||||
result.Body.Close()
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive Compress() > read error: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Compress the data
|
||||
var compressedBuf bytes.Buffer
|
||||
gzipWriter := gzip.NewWriter(&compressedBuf)
|
||||
if _, err := gzipWriter.Write(data); err != nil {
|
||||
cclog.Errorf("S3Archive Compress() > gzip write error: %v", err)
|
||||
gzipWriter.Close()
|
||||
continue
|
||||
}
|
||||
gzipWriter.Close()
|
||||
|
||||
// Upload compressed file
|
||||
compressedKey := getS3Key(job, "data.json.gz")
|
||||
_, err = s3a.client.PutObject(ctx, &s3.PutObjectInput{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
Key: aws.String(compressedKey),
|
||||
Body: bytes.NewReader(compressedBuf.Bytes()),
|
||||
})
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive Compress() > PutObject error: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Delete uncompressed file
|
||||
_, err = s3a.client.DeleteObject(ctx, &s3.DeleteObjectInput{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
Key: aws.String(dataKey),
|
||||
})
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive Compress() > delete error: %v", err)
|
||||
}
|
||||
|
||||
cnt++
|
||||
}
|
||||
|
||||
cclog.Infof("Compression Service - %d files in S3 took %s", cnt, time.Since(start))
|
||||
}
|
||||
|
||||
func (s3a *S3Archive) CompressLast(starttime int64) int64 {
|
||||
ctx := context.Background()
|
||||
compressKey := "compress.txt"
|
||||
|
||||
// Try to read existing compress.txt
|
||||
result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
Key: aws.String(compressKey),
|
||||
})
|
||||
|
||||
var last int64
|
||||
if err == nil {
|
||||
b, _ := io.ReadAll(result.Body)
|
||||
result.Body.Close()
|
||||
last, err = strconv.ParseInt(strings.TrimSuffix(string(b), "\n"), 10, 64)
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive CompressLast() > parse error: %v", err)
|
||||
last = starttime
|
||||
}
|
||||
} else {
|
||||
last = starttime
|
||||
}
|
||||
|
||||
cclog.Infof("S3Archive CompressLast() - start %d last %d", starttime, last)
|
||||
|
||||
// Write new timestamp
|
||||
newValue := fmt.Sprintf("%d", starttime)
|
||||
_, err = s3a.client.PutObject(ctx, &s3.PutObjectInput{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
Key: aws.String(compressKey),
|
||||
Body: strings.NewReader(newValue),
|
||||
})
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive CompressLast() > PutObject error: %v", err)
|
||||
}
|
||||
|
||||
return last
|
||||
}
|
||||
|
||||
func (s3a *S3Archive) Iter(loadMetricData bool) <-chan JobContainer {
|
||||
ch := make(chan JobContainer)
|
||||
|
||||
go func() {
|
||||
ctx := context.Background()
|
||||
defer close(ch)
|
||||
|
||||
for _, cluster := range s3a.clusters {
|
||||
prefix := cluster + "/"
|
||||
|
||||
paginator := s3.NewListObjectsV2Paginator(s3a.client, &s3.ListObjectsV2Input{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
Prefix: aws.String(prefix),
|
||||
})
|
||||
|
||||
for paginator.HasMorePages() {
|
||||
page, err := paginator.NextPage(ctx)
|
||||
if err != nil {
|
||||
cclog.Fatalf("S3Archive Iter() > list error: %s", err.Error())
|
||||
}
|
||||
|
||||
for _, obj := range page.Contents {
|
||||
if obj.Key == nil || !strings.HasSuffix(*obj.Key, "/meta.json") {
|
||||
continue
|
||||
}
|
||||
|
||||
// Load job metadata
|
||||
result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{
|
||||
Bucket: aws.String(s3a.bucket),
|
||||
Key: obj.Key,
|
||||
})
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive Iter() > GetObject meta error: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
b, err := io.ReadAll(result.Body)
|
||||
result.Body.Close()
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive Iter() > read meta error: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
job, err := DecodeJobMeta(bytes.NewReader(b))
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive Iter() > decode meta error: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
if loadMetricData {
|
||||
jobData, err := s3a.LoadJobData(job)
|
||||
if err != nil {
|
||||
cclog.Errorf("S3Archive Iter() > load data error: %v", err)
|
||||
ch <- JobContainer{Meta: job, Data: nil}
|
||||
} else {
|
||||
ch <- JobContainer{Meta: job, Data: &jobData}
|
||||
}
|
||||
} else {
|
||||
ch <- JobContainer{Meta: job, Data: nil}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
return ch
|
||||
}
|
||||
293
pkg/archive/s3Backend_test.go
Normal file
293
pkg/archive/s3Backend_test.go
Normal file
@@ -0,0 +1,293 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package archive
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/aws/aws-sdk-go-v2/aws"
|
||||
"github.com/aws/aws-sdk-go-v2/service/s3"
|
||||
"github.com/aws/aws-sdk-go-v2/service/s3/types"
|
||||
)
|
||||
|
||||
// MockS3Client is a mock implementation of the S3 client for testing
|
||||
type MockS3Client struct {
|
||||
objects map[string][]byte
|
||||
}
|
||||
|
||||
func NewMockS3Client() *MockS3Client {
|
||||
return &MockS3Client{
|
||||
objects: make(map[string][]byte),
|
||||
}
|
||||
}
|
||||
|
||||
func (m *MockS3Client) HeadBucket(ctx context.Context, params *s3.HeadBucketInput, optFns ...func(*s3.Options)) (*s3.HeadBucketOutput, error) {
|
||||
// Always succeed for mock
|
||||
return &s3.HeadBucketOutput{}, nil
|
||||
}
|
||||
|
||||
func (m *MockS3Client) GetObject(ctx context.Context, params *s3.GetObjectInput, optFns ...func(*s3.Options)) (*s3.GetObjectOutput, error) {
|
||||
key := aws.ToString(params.Key)
|
||||
data, exists := m.objects[key]
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("NoSuchKey: object not found")
|
||||
}
|
||||
|
||||
contentLength := int64(len(data))
|
||||
return &s3.GetObjectOutput{
|
||||
Body: io.NopCloser(bytes.NewReader(data)),
|
||||
ContentLength: &contentLength,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (m *MockS3Client) PutObject(ctx context.Context, params *s3.PutObjectInput, optFns ...func(*s3.Options)) (*s3.PutObjectOutput, error) {
|
||||
key := aws.ToString(params.Key)
|
||||
data, err := io.ReadAll(params.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m.objects[key] = data
|
||||
return &s3.PutObjectOutput{}, nil
|
||||
}
|
||||
|
||||
func (m *MockS3Client) HeadObject(ctx context.Context, params *s3.HeadObjectInput, optFns ...func(*s3.Options)) (*s3.HeadObjectOutput, error) {
|
||||
key := aws.ToString(params.Key)
|
||||
data, exists := m.objects[key]
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("NotFound")
|
||||
}
|
||||
|
||||
contentLength := int64(len(data))
|
||||
return &s3.HeadObjectOutput{
|
||||
ContentLength: &contentLength,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (m *MockS3Client) DeleteObject(ctx context.Context, params *s3.DeleteObjectInput, optFns ...func(*s3.Options)) (*s3.DeleteObjectOutput, error) {
|
||||
key := aws.ToString(params.Key)
|
||||
delete(m.objects, key)
|
||||
return &s3.DeleteObjectOutput{}, nil
|
||||
}
|
||||
|
||||
func (m *MockS3Client) CopyObject(ctx context.Context, params *s3.CopyObjectInput, optFns ...func(*s3.Options)) (*s3.CopyObjectOutput, error) {
|
||||
// Parse source bucket/key from CopySource
|
||||
source := aws.ToString(params.CopySource)
|
||||
parts := strings.SplitN(source, "/", 2)
|
||||
if len(parts) < 2 {
|
||||
return nil, fmt.Errorf("invalid CopySource")
|
||||
}
|
||||
sourceKey := parts[1]
|
||||
|
||||
data, exists := m.objects[sourceKey]
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("source not found")
|
||||
}
|
||||
|
||||
destKey := aws.ToString(params.Key)
|
||||
m.objects[destKey] = data
|
||||
return &s3.CopyObjectOutput{}, nil
|
||||
}
|
||||
|
||||
func (m *MockS3Client) ListObjectsV2(ctx context.Context, params *s3.ListObjectsV2Input, optFns ...func(*s3.Options)) (*s3.ListObjectsV2Output, error) {
|
||||
prefix := aws.ToString(params.Prefix)
|
||||
delimiter := aws.ToString(params.Delimiter)
|
||||
|
||||
var contents []types.Object
|
||||
commonPrefixes := make(map[string]bool)
|
||||
|
||||
for key, data := range m.objects {
|
||||
if !strings.HasPrefix(key, prefix) {
|
||||
continue
|
||||
}
|
||||
|
||||
if delimiter != "" {
|
||||
// Check if there's a delimiter after the prefix
|
||||
remainder := strings.TrimPrefix(key, prefix)
|
||||
delimIdx := strings.Index(remainder, delimiter)
|
||||
if delimIdx >= 0 {
|
||||
// This is a "directory" - add to common prefixes
|
||||
commonPrefix := prefix + remainder[:delimIdx+1]
|
||||
commonPrefixes[commonPrefix] = true
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
size := int64(len(data))
|
||||
contents = append(contents, types.Object{
|
||||
Key: aws.String(key),
|
||||
Size: &size,
|
||||
})
|
||||
}
|
||||
|
||||
var prefixList []types.CommonPrefix
|
||||
for p := range commonPrefixes {
|
||||
prefixList = append(prefixList, types.CommonPrefix{
|
||||
Prefix: aws.String(p),
|
||||
})
|
||||
}
|
||||
|
||||
return &s3.ListObjectsV2Output{
|
||||
Contents: contents,
|
||||
CommonPrefixes: prefixList,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Test helper to create a mock S3 archive with test data
|
||||
func setupMockS3Archive(t *testing.T) *MockS3Client {
|
||||
mock := NewMockS3Client()
|
||||
|
||||
// Add version.txt
|
||||
mock.objects["version.txt"] = []byte("2\n")
|
||||
|
||||
// Add a test cluster directory
|
||||
mock.objects["emmy/cluster.json"] = []byte(`{
|
||||
"name": "emmy",
|
||||
"metricConfig": [],
|
||||
"subClusters": [
|
||||
{
|
||||
"name": "main",
|
||||
"processorType": "Intel Xeon",
|
||||
"socketsPerNode": 2,
|
||||
"coresPerSocket": 4,
|
||||
"threadsPerCore": 2,
|
||||
"flopRateScalar": 16,
|
||||
"flopRateSimd": 32,
|
||||
"memoryBandwidth": 100
|
||||
}
|
||||
]
|
||||
}`)
|
||||
|
||||
// Add a test job
|
||||
mock.objects["emmy/1403/244/1608923076/meta.json"] = []byte(`{
|
||||
"jobId": 1403244,
|
||||
"cluster": "emmy",
|
||||
"startTime": 1608923076,
|
||||
"numNodes": 1,
|
||||
"resources": [{"hostname": "node001"}]
|
||||
}`)
|
||||
|
||||
mock.objects["emmy/1403/244/1608923076/data.json"] = []byte(`{
|
||||
"mem_used": {
|
||||
"node": {
|
||||
"node001": {
|
||||
"series": [{"time": 1608923076, "value": 1000}]
|
||||
}
|
||||
}
|
||||
}
|
||||
}`)
|
||||
|
||||
return mock
|
||||
}
|
||||
|
||||
func TestS3InitEmptyBucket(t *testing.T) {
|
||||
var s3a S3Archive
|
||||
_, err := s3a.Init(json.RawMessage(`{"kind":"s3"}`))
|
||||
if err == nil {
|
||||
t.Fatal("expected error for empty bucket")
|
||||
}
|
||||
}
|
||||
|
||||
func TestS3InitInvalidConfig(t *testing.T) {
|
||||
var s3a S3Archive
|
||||
_, err := s3a.Init(json.RawMessage(`"bucket":"test-bucket"`))
|
||||
if err == nil {
|
||||
t.Fatal("expected error for invalid config")
|
||||
}
|
||||
}
|
||||
|
||||
// Note: TestS3Init would require actual S3 connection or more complex mocking
|
||||
// For now, we document that Init() should be tested manually with MinIO
|
||||
|
||||
func TestGetS3Key(t *testing.T) {
|
||||
job := &schema.Job{
|
||||
JobID: 1403244,
|
||||
Cluster: "emmy",
|
||||
StartTime: 1608923076,
|
||||
}
|
||||
|
||||
key := getS3Key(job, "meta.json")
|
||||
expected := "emmy/1403/244/1608923076/meta.json"
|
||||
if key != expected {
|
||||
t.Errorf("expected key %s, got %s", expected, key)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetS3Directory(t *testing.T) {
|
||||
job := &schema.Job{
|
||||
JobID: 1403244,
|
||||
Cluster: "emmy",
|
||||
StartTime: 1608923076,
|
||||
}
|
||||
|
||||
dir := getS3Directory(job)
|
||||
expected := "emmy/1403/244/1608923076/"
|
||||
if dir != expected {
|
||||
t.Errorf("expected dir %s, got %s", expected, dir)
|
||||
}
|
||||
}
|
||||
|
||||
// Integration-style tests would go here for actual S3 operations
|
||||
// These would require MinIO or localstack for testing
|
||||
|
||||
func TestS3ArchiveConfigParsing(t *testing.T) {
|
||||
rawConfig := json.RawMessage(`{
|
||||
"endpoint": "http://localhost:9000",
|
||||
"accessKey": "minioadmin",
|
||||
"secretKey": "minioadmin",
|
||||
"bucket": "test-bucket",
|
||||
"region": "us-east-1",
|
||||
"usePathStyle": true
|
||||
}`)
|
||||
|
||||
var cfg S3ArchiveConfig
|
||||
err := json.Unmarshal(rawConfig, &cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse config: %v", err)
|
||||
}
|
||||
|
||||
if cfg.Bucket != "test-bucket" {
|
||||
t.Errorf("expected bucket 'test-bucket', got '%s'", cfg.Bucket)
|
||||
}
|
||||
if cfg.Region != "us-east-1" {
|
||||
t.Errorf("expected region 'us-east-1', got '%s'", cfg.Region)
|
||||
}
|
||||
if !cfg.UsePathStyle {
|
||||
t.Error("expected usePathStyle to be true")
|
||||
}
|
||||
}
|
||||
|
||||
func TestS3KeyGeneration(t *testing.T) {
|
||||
tests := []struct {
|
||||
jobID int64
|
||||
cluster string
|
||||
startTime int64
|
||||
file string
|
||||
expected string
|
||||
}{
|
||||
{1403244, "emmy", 1608923076, "meta.json", "emmy/1403/244/1608923076/meta.json"},
|
||||
{1404397, "emmy", 1609300556, "data.json.gz", "emmy/1404/397/1609300556/data.json.gz"},
|
||||
{42, "fritz", 1234567890, "meta.json", "fritz/0/042/1234567890/meta.json"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
job := &schema.Job{
|
||||
JobID: tt.jobID,
|
||||
Cluster: tt.cluster,
|
||||
StartTime: tt.startTime,
|
||||
}
|
||||
|
||||
key := getS3Key(job, tt.file)
|
||||
if key != tt.expected {
|
||||
t.Errorf("for job %d: expected %s, got %s", tt.jobID, tt.expected, key)
|
||||
}
|
||||
}
|
||||
}
|
||||
584
pkg/archive/sqliteBackend.go
Normal file
584
pkg/archive/sqliteBackend.go
Normal file
@@ -0,0 +1,584 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package archive
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"os"
|
||||
"strconv"
|
||||
"text/tabwriter"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/ClusterCockpit/cc-lib/util"
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
// SqliteArchiveConfig holds the configuration for the SQLite archive backend.
|
||||
type SqliteArchiveConfig struct {
|
||||
DBPath string `json:"dbPath"` // Path to SQLite database file
|
||||
}
|
||||
|
||||
// SqliteArchive implements ArchiveBackend using a SQLite database with BLOB storage.
|
||||
// Job metadata and data are stored as JSON BLOBs with indexes for fast queries.
|
||||
//
|
||||
// Uses WAL (Write-Ahead Logging) mode for better concurrency and a 64MB cache.
|
||||
type SqliteArchive struct {
|
||||
db *sql.DB // SQLite database connection
|
||||
clusters []string // List of discovered cluster names
|
||||
}
|
||||
|
||||
// sqliteSchema defines the database schema for SQLite archive backend.
|
||||
// Jobs table: stores job metadata and data as BLOBs with compression flag
|
||||
// Clusters table: stores cluster configurations
|
||||
// Metadata table: stores version and other key-value pairs
|
||||
const sqliteSchema = `
|
||||
CREATE TABLE IF NOT EXISTS jobs (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
job_id INTEGER NOT NULL,
|
||||
cluster TEXT NOT NULL,
|
||||
start_time INTEGER NOT NULL,
|
||||
meta_json BLOB NOT NULL,
|
||||
data_json BLOB,
|
||||
data_compressed BOOLEAN DEFAULT 0,
|
||||
created_at INTEGER NOT NULL,
|
||||
updated_at INTEGER NOT NULL,
|
||||
UNIQUE(job_id, cluster, start_time)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_jobs_cluster ON jobs(cluster);
|
||||
CREATE INDEX IF NOT EXISTS idx_jobs_start_time ON jobs(start_time);
|
||||
CREATE INDEX IF NOT EXISTS idx_jobs_lookup ON jobs(cluster, job_id, start_time);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS clusters (
|
||||
name TEXT PRIMARY KEY,
|
||||
config_json BLOB NOT NULL,
|
||||
updated_at INTEGER NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS metadata (
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT NOT NULL
|
||||
);
|
||||
`
|
||||
|
||||
func (sa *SqliteArchive) Init(rawConfig json.RawMessage) (uint64, error) {
|
||||
var cfg SqliteArchiveConfig
|
||||
if err := json.Unmarshal(rawConfig, &cfg); err != nil {
|
||||
cclog.Warnf("SqliteArchive Init() > Unmarshal error: %#v", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if cfg.DBPath == "" {
|
||||
err := fmt.Errorf("SqliteArchive Init(): empty database path")
|
||||
cclog.Errorf("SqliteArchive Init() > config error: %v", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// Open SQLite database
|
||||
db, err := sql.Open("sqlite3", cfg.DBPath)
|
||||
if err != nil {
|
||||
cclog.Errorf("SqliteArchive Init() > failed to open database: %v", err)
|
||||
return 0, err
|
||||
}
|
||||
sa.db = db
|
||||
|
||||
// Set pragmas for better performance
|
||||
pragmas := []string{
|
||||
"PRAGMA journal_mode=WAL",
|
||||
"PRAGMA synchronous=NORMAL",
|
||||
"PRAGMA cache_size=-64000", // 64MB cache
|
||||
"PRAGMA busy_timeout=5000",
|
||||
}
|
||||
for _, pragma := range pragmas {
|
||||
if _, err := sa.db.Exec(pragma); err != nil {
|
||||
cclog.Warnf("SqliteArchive Init() > pragma failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Create schema
|
||||
if _, err := sa.db.Exec(sqliteSchema); err != nil {
|
||||
cclog.Errorf("SqliteArchive Init() > schema creation failed: %v", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// Check/set version
|
||||
var versionStr string
|
||||
err = sa.db.QueryRow("SELECT value FROM metadata WHERE key = 'version'").Scan(&versionStr)
|
||||
if err == sql.ErrNoRows {
|
||||
// First time initialization, set version
|
||||
_, err = sa.db.Exec("INSERT INTO metadata (key, value) VALUES ('version', ?)", fmt.Sprintf("%d", Version))
|
||||
if err != nil {
|
||||
cclog.Errorf("SqliteArchive Init() > failed to set version: %v", err)
|
||||
return 0, err
|
||||
}
|
||||
versionStr = fmt.Sprintf("%d", Version)
|
||||
} else if err != nil {
|
||||
cclog.Errorf("SqliteArchive Init() > failed to read version: %v", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
version, err := strconv.ParseUint(versionStr, 10, 64)
|
||||
if err != nil {
|
||||
cclog.Errorf("SqliteArchive Init() > version parse error: %v", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if version != Version {
|
||||
return version, fmt.Errorf("unsupported version %d, need %d", version, Version)
|
||||
}
|
||||
|
||||
// Discover clusters
|
||||
sa.clusters = []string{}
|
||||
rows, err := sa.db.Query("SELECT DISTINCT cluster FROM jobs ORDER BY cluster")
|
||||
if err != nil {
|
||||
cclog.Errorf("SqliteArchive Init() > failed to query clusters: %v", err)
|
||||
return 0, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
for rows.Next() {
|
||||
var cluster string
|
||||
if err := rows.Scan(&cluster); err != nil {
|
||||
cclog.Errorf("SqliteArchive Init() > failed to scan cluster: %v", err)
|
||||
continue
|
||||
}
|
||||
sa.clusters = append(sa.clusters, cluster)
|
||||
}
|
||||
|
||||
cclog.Infof("SqliteArchive initialized with database '%s', found %d clusters", cfg.DBPath, len(sa.clusters))
|
||||
return version, nil
|
||||
}
|
||||
|
||||
func (sa *SqliteArchive) Info() {
|
||||
fmt.Printf("SQLite Job archive database\n")
|
||||
|
||||
ci := make(map[string]*clusterInfo)
|
||||
|
||||
rows, err := sa.db.Query(`
|
||||
SELECT cluster, COUNT(*), MIN(start_time), MAX(start_time),
|
||||
SUM(LENGTH(meta_json) + COALESCE(LENGTH(data_json), 0))
|
||||
FROM jobs
|
||||
GROUP BY cluster
|
||||
`)
|
||||
if err != nil {
|
||||
cclog.Fatalf("SqliteArchive Info() > query failed: %s", err.Error())
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
for rows.Next() {
|
||||
var cluster string
|
||||
var numJobs int
|
||||
var dateFirst, dateLast int64
|
||||
var diskSize int64
|
||||
|
||||
if err := rows.Scan(&cluster, &numJobs, &dateFirst, &dateLast, &diskSize); err != nil {
|
||||
cclog.Errorf("SqliteArchive Info() > scan failed: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
ci[cluster] = &clusterInfo{
|
||||
numJobs: numJobs,
|
||||
dateFirst: dateFirst,
|
||||
dateLast: dateLast,
|
||||
diskSize: float64(diskSize) / (1024 * 1024), // Convert to MB
|
||||
}
|
||||
}
|
||||
|
||||
cit := clusterInfo{dateFirst: time.Now().Unix()}
|
||||
w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', tabwriter.Debug)
|
||||
fmt.Fprintln(w, "cluster\t#jobs\tfrom\tto\tsize (MB)")
|
||||
for cluster, clusterInfo := range ci {
|
||||
fmt.Fprintf(w, "%s\t%d\t%s\t%s\t%.2f\n", cluster,
|
||||
clusterInfo.numJobs,
|
||||
time.Unix(clusterInfo.dateFirst, 0),
|
||||
time.Unix(clusterInfo.dateLast, 0),
|
||||
clusterInfo.diskSize)
|
||||
|
||||
cit.numJobs += clusterInfo.numJobs
|
||||
cit.dateFirst = util.Min(cit.dateFirst, clusterInfo.dateFirst)
|
||||
cit.dateLast = util.Max(cit.dateLast, clusterInfo.dateLast)
|
||||
cit.diskSize += clusterInfo.diskSize
|
||||
}
|
||||
|
||||
fmt.Fprintf(w, "TOTAL\t%d\t%s\t%s\t%.2f\n",
|
||||
cit.numJobs, time.Unix(cit.dateFirst, 0), time.Unix(cit.dateLast, 0), cit.diskSize)
|
||||
w.Flush()
|
||||
}
|
||||
|
||||
func (sa *SqliteArchive) Exists(job *schema.Job) bool {
|
||||
var count int
|
||||
err := sa.db.QueryRow("SELECT COUNT(*) FROM jobs WHERE job_id = ? AND cluster = ? AND start_time = ?",
|
||||
job.JobID, job.Cluster, job.StartTime).Scan(&count)
|
||||
return err == nil && count > 0
|
||||
}
|
||||
|
||||
func (sa *SqliteArchive) LoadJobMeta(job *schema.Job) (*schema.Job, error) {
|
||||
var metaBlob []byte
|
||||
err := sa.db.QueryRow("SELECT meta_json FROM jobs WHERE job_id = ? AND cluster = ? AND start_time = ?",
|
||||
job.JobID, job.Cluster, job.StartTime).Scan(&metaBlob)
|
||||
if err != nil {
|
||||
cclog.Errorf("SqliteArchive LoadJobMeta() > query error: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if config.Keys.Validate {
|
||||
if err := schema.Validate(schema.Meta, bytes.NewReader(metaBlob)); err != nil {
|
||||
return nil, fmt.Errorf("validate job meta: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
return DecodeJobMeta(bytes.NewReader(metaBlob))
|
||||
}
|
||||
|
||||
func (sa *SqliteArchive) LoadJobData(job *schema.Job) (schema.JobData, error) {
|
||||
var dataBlob []byte
|
||||
var compressed bool
|
||||
err := sa.db.QueryRow("SELECT data_json, data_compressed FROM jobs WHERE job_id = ? AND cluster = ? AND start_time = ?",
|
||||
job.JobID, job.Cluster, job.StartTime).Scan(&dataBlob, &compressed)
|
||||
if err != nil {
|
||||
cclog.Errorf("SqliteArchive LoadJobData() > query error: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var reader io.Reader = bytes.NewReader(dataBlob)
|
||||
if compressed {
|
||||
gzipReader, err := gzip.NewReader(reader)
|
||||
if err != nil {
|
||||
cclog.Errorf("SqliteArchive LoadJobData() > gzip error: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
defer gzipReader.Close()
|
||||
reader = gzipReader
|
||||
}
|
||||
|
||||
if config.Keys.Validate {
|
||||
data, _ := io.ReadAll(reader)
|
||||
if err := schema.Validate(schema.Data, bytes.NewReader(data)); err != nil {
|
||||
return schema.JobData{}, fmt.Errorf("validate job data: %v", err)
|
||||
}
|
||||
return DecodeJobData(bytes.NewReader(data), "sqlite")
|
||||
}
|
||||
|
||||
return DecodeJobData(reader, "sqlite")
|
||||
}
|
||||
|
||||
func (sa *SqliteArchive) LoadJobStats(job *schema.Job) (schema.ScopedJobStats, error) {
|
||||
var dataBlob []byte
|
||||
var compressed bool
|
||||
err := sa.db.QueryRow("SELECT data_json, data_compressed FROM jobs WHERE job_id = ? AND cluster = ? AND start_time = ?",
|
||||
job.JobID, job.Cluster, job.StartTime).Scan(&dataBlob, &compressed)
|
||||
if err != nil {
|
||||
cclog.Errorf("SqliteArchive LoadJobStats() > query error: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var reader io.Reader = bytes.NewReader(dataBlob)
|
||||
if compressed {
|
||||
gzipReader, err := gzip.NewReader(reader)
|
||||
if err != nil {
|
||||
cclog.Errorf("SqliteArchive LoadJobStats() > gzip error: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
defer gzipReader.Close()
|
||||
reader = gzipReader
|
||||
}
|
||||
|
||||
if config.Keys.Validate {
|
||||
data, _ := io.ReadAll(reader)
|
||||
if err := schema.Validate(schema.Data, bytes.NewReader(data)); err != nil {
|
||||
return nil, fmt.Errorf("validate job data: %v", err)
|
||||
}
|
||||
return DecodeJobStats(bytes.NewReader(data), "sqlite")
|
||||
}
|
||||
|
||||
return DecodeJobStats(reader, "sqlite")
|
||||
}
|
||||
|
||||
func (sa *SqliteArchive) LoadClusterCfg(name string) (*schema.Cluster, error) {
|
||||
var configBlob []byte
|
||||
err := sa.db.QueryRow("SELECT config_json FROM clusters WHERE name = ?", name).Scan(&configBlob)
|
||||
if err != nil {
|
||||
cclog.Errorf("SqliteArchive LoadClusterCfg() > query error: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := schema.Validate(schema.ClusterCfg, bytes.NewReader(configBlob)); err != nil {
|
||||
cclog.Warnf("Validate cluster config: %v\n", err)
|
||||
return &schema.Cluster{}, fmt.Errorf("validate cluster config: %v", err)
|
||||
}
|
||||
|
||||
return DecodeCluster(bytes.NewReader(configBlob))
|
||||
}
|
||||
|
||||
func (sa *SqliteArchive) StoreJobMeta(job *schema.Job) error {
|
||||
var metaBuf bytes.Buffer
|
||||
if err := EncodeJobMeta(&metaBuf, job); err != nil {
|
||||
cclog.Error("SqliteArchive StoreJobMeta() > encoding error")
|
||||
return err
|
||||
}
|
||||
|
||||
now := time.Now().Unix()
|
||||
_, err := sa.db.Exec(`
|
||||
INSERT INTO jobs (job_id, cluster, start_time, meta_json, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(job_id, cluster, start_time) DO UPDATE SET
|
||||
meta_json = excluded.meta_json,
|
||||
updated_at = excluded.updated_at
|
||||
`, job.JobID, job.Cluster, job.StartTime, metaBuf.Bytes(), now, now)
|
||||
|
||||
if err != nil {
|
||||
cclog.Errorf("SqliteArchive StoreJobMeta() > insert error: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sa *SqliteArchive) ImportJob(jobMeta *schema.Job, jobData *schema.JobData) error {
|
||||
var metaBuf, dataBuf bytes.Buffer
|
||||
if err := EncodeJobMeta(&metaBuf, jobMeta); err != nil {
|
||||
cclog.Error("SqliteArchive ImportJob() > encoding meta error")
|
||||
return err
|
||||
}
|
||||
if err := EncodeJobData(&dataBuf, jobData); err != nil {
|
||||
cclog.Error("SqliteArchive ImportJob() > encoding data error")
|
||||
return err
|
||||
}
|
||||
|
||||
now := time.Now().Unix()
|
||||
_, err := sa.db.Exec(`
|
||||
INSERT INTO jobs (job_id, cluster, start_time, meta_json, data_json, data_compressed, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, 0, ?, ?)
|
||||
ON CONFLICT(job_id, cluster, start_time) DO UPDATE SET
|
||||
meta_json = excluded.meta_json,
|
||||
data_json = excluded.data_json,
|
||||
data_compressed = excluded.data_compressed,
|
||||
updated_at = excluded.updated_at
|
||||
`, jobMeta.JobID, jobMeta.Cluster, jobMeta.StartTime, metaBuf.Bytes(), dataBuf.Bytes(), now, now)
|
||||
|
||||
if err != nil {
|
||||
cclog.Errorf("SqliteArchive ImportJob() > insert error: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sa *SqliteArchive) GetClusters() []string {
|
||||
return sa.clusters
|
||||
}
|
||||
|
||||
func (sa *SqliteArchive) CleanUp(jobs []*schema.Job) {
|
||||
start := time.Now()
|
||||
count := 0
|
||||
|
||||
tx, err := sa.db.Begin()
|
||||
if err != nil {
|
||||
cclog.Errorf("SqliteArchive CleanUp() > transaction error: %v", err)
|
||||
return
|
||||
}
|
||||
defer tx.Rollback()
|
||||
|
||||
stmt, err := tx.Prepare("DELETE FROM jobs WHERE job_id = ? AND cluster = ? AND start_time = ?")
|
||||
if err != nil {
|
||||
cclog.Errorf("SqliteArchive CleanUp() > prepare error: %v", err)
|
||||
return
|
||||
}
|
||||
defer stmt.Close()
|
||||
|
||||
for _, job := range jobs {
|
||||
if job == nil {
|
||||
cclog.Errorf("SqliteArchive CleanUp() error: job is nil")
|
||||
continue
|
||||
}
|
||||
|
||||
if _, err := stmt.Exec(job.JobID, job.Cluster, job.StartTime); err != nil {
|
||||
cclog.Errorf("SqliteArchive CleanUp() > delete error: %v", err)
|
||||
} else {
|
||||
count++
|
||||
}
|
||||
}
|
||||
|
||||
if err := tx.Commit(); err != nil {
|
||||
cclog.Errorf("SqliteArchive CleanUp() > commit error: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
cclog.Infof("Retention Service - Remove %d jobs from SQLite in %s", count, time.Since(start))
|
||||
}
|
||||
|
||||
func (sa *SqliteArchive) Move(jobs []*schema.Job, targetPath string) {
|
||||
// For SQLite, "move" means updating the cluster field or similar
|
||||
// This is interpretation-dependent; for now we'll just log
|
||||
cclog.Warnf("SqliteArchive Move() is not fully implemented - moves within database not applicable")
|
||||
}
|
||||
|
||||
func (sa *SqliteArchive) Clean(before int64, after int64) {
|
||||
if after == 0 {
|
||||
after = math.MaxInt64
|
||||
}
|
||||
|
||||
result, err := sa.db.Exec("DELETE FROM jobs WHERE start_time < ? OR start_time > ?", before, after)
|
||||
if err != nil {
|
||||
cclog.Fatalf("SqliteArchive Clean() > delete error: %s", err.Error())
|
||||
}
|
||||
|
||||
rowsAffected, _ := result.RowsAffected()
|
||||
cclog.Infof("SqliteArchive Clean() removed %d jobs", rowsAffected)
|
||||
}
|
||||
|
||||
func (sa *SqliteArchive) Compress(jobs []*schema.Job) {
|
||||
var cnt int
|
||||
start := time.Now()
|
||||
|
||||
tx, err := sa.db.Begin()
|
||||
if err != nil {
|
||||
cclog.Errorf("SqliteArchive Compress() > transaction error: %v", err)
|
||||
return
|
||||
}
|
||||
defer tx.Rollback()
|
||||
|
||||
stmt, err := tx.Prepare("UPDATE jobs SET data_json = ?, data_compressed = 1 WHERE job_id = ? AND cluster = ? AND start_time = ?")
|
||||
if err != nil {
|
||||
cclog.Errorf("SqliteArchive Compress() > prepare error: %v", err)
|
||||
return
|
||||
}
|
||||
defer stmt.Close()
|
||||
|
||||
for _, job := range jobs {
|
||||
var dataBlob []byte
|
||||
var compressed bool
|
||||
err := sa.db.QueryRow("SELECT data_json, data_compressed FROM jobs WHERE job_id = ? AND cluster = ? AND start_time = ?",
|
||||
job.JobID, job.Cluster, job.StartTime).Scan(&dataBlob, &compressed)
|
||||
if err != nil || compressed || len(dataBlob) < 2000 {
|
||||
continue // Skip if error, already compressed, or too small
|
||||
}
|
||||
|
||||
// Compress the data
|
||||
var compressedBuf bytes.Buffer
|
||||
gzipWriter := gzip.NewWriter(&compressedBuf)
|
||||
if _, err := gzipWriter.Write(dataBlob); err != nil {
|
||||
cclog.Errorf("SqliteArchive Compress() > gzip error: %v", err)
|
||||
gzipWriter.Close()
|
||||
continue
|
||||
}
|
||||
gzipWriter.Close()
|
||||
|
||||
if _, err := stmt.Exec(compressedBuf.Bytes(), job.JobID, job.Cluster, job.StartTime); err != nil {
|
||||
cclog.Errorf("SqliteArchive Compress() > update error: %v", err)
|
||||
} else {
|
||||
cnt++
|
||||
}
|
||||
}
|
||||
|
||||
if err := tx.Commit(); err != nil {
|
||||
cclog.Errorf("SqliteArchive Compress() > commit error: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
cclog.Infof("Compression Service - %d jobs in SQLite took %s", cnt, time.Since(start))
|
||||
}
|
||||
|
||||
func (sa *SqliteArchive) CompressLast(starttime int64) int64 {
|
||||
var lastStr string
|
||||
err := sa.db.QueryRow("SELECT value FROM metadata WHERE key = 'compress_last'").Scan(&lastStr)
|
||||
|
||||
var last int64
|
||||
if err == sql.ErrNoRows {
|
||||
last = starttime
|
||||
} else if err != nil {
|
||||
cclog.Errorf("SqliteArchive CompressLast() > query error: %v", err)
|
||||
last = starttime
|
||||
} else {
|
||||
last, err = strconv.ParseInt(lastStr, 10, 64)
|
||||
if err != nil {
|
||||
cclog.Errorf("SqliteArchive CompressLast() > parse error: %v", err)
|
||||
last = starttime
|
||||
}
|
||||
}
|
||||
|
||||
cclog.Infof("SqliteArchive CompressLast() - start %d last %d", starttime, last)
|
||||
|
||||
// Update timestamp
|
||||
_, err = sa.db.Exec(`
|
||||
INSERT INTO metadata (key, value) VALUES ('compress_last', ?)
|
||||
ON CONFLICT(key) DO UPDATE SET value = excluded.value
|
||||
`, fmt.Sprintf("%d", starttime))
|
||||
if err != nil {
|
||||
cclog.Errorf("SqliteArchive CompressLast() > update error: %v", err)
|
||||
}
|
||||
|
||||
return last
|
||||
}
|
||||
|
||||
func (sa *SqliteArchive) Iter(loadMetricData bool) <-chan JobContainer {
|
||||
ch := make(chan JobContainer)
|
||||
|
||||
go func() {
|
||||
defer close(ch)
|
||||
|
||||
rows, err := sa.db.Query("SELECT job_id, cluster, start_time, meta_json, data_json, data_compressed FROM jobs ORDER BY cluster, start_time")
|
||||
if err != nil {
|
||||
cclog.Fatalf("SqliteArchive Iter() > query error: %s", err.Error())
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
for rows.Next() {
|
||||
var jobID int64
|
||||
var cluster string
|
||||
var startTime int64
|
||||
var metaBlob []byte
|
||||
var dataBlob []byte
|
||||
var compressed bool
|
||||
|
||||
if err := rows.Scan(&jobID, &cluster, &startTime, &metaBlob, &dataBlob, &compressed); err != nil {
|
||||
cclog.Errorf("SqliteArchive Iter() > scan error: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
job, err := DecodeJobMeta(bytes.NewReader(metaBlob))
|
||||
if err != nil {
|
||||
cclog.Errorf("SqliteArchive Iter() > decode meta error: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
if loadMetricData && dataBlob != nil {
|
||||
var reader io.Reader = bytes.NewReader(dataBlob)
|
||||
if compressed {
|
||||
gzipReader, err := gzip.NewReader(reader)
|
||||
if err != nil {
|
||||
cclog.Errorf("SqliteArchive Iter() > gzip error: %v", err)
|
||||
ch <- JobContainer{Meta: job, Data: nil}
|
||||
continue
|
||||
}
|
||||
defer gzipReader.Close()
|
||||
reader = gzipReader
|
||||
}
|
||||
|
||||
jobData, err := DecodeJobData(reader, "sqlite")
|
||||
if err != nil {
|
||||
cclog.Errorf("SqliteArchive Iter() > decode data error: %v", err)
|
||||
ch <- JobContainer{Meta: job, Data: nil}
|
||||
} else {
|
||||
ch <- JobContainer{Meta: job, Data: &jobData}
|
||||
}
|
||||
} else {
|
||||
ch <- JobContainer{Meta: job, Data: nil}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
return ch
|
||||
}
|
||||
313
pkg/archive/sqliteBackend_test.go
Normal file
313
pkg/archive/sqliteBackend_test.go
Normal file
@@ -0,0 +1,313 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package archive
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
)
|
||||
|
||||
func TestSqliteInitEmptyPath(t *testing.T) {
|
||||
var sa SqliteArchive
|
||||
_, err := sa.Init(json.RawMessage(`{"kind":"sqlite"}`))
|
||||
if err == nil {
|
||||
t.Fatal("expected error for empty database path")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSqliteInitInvalidConfig(t *testing.T) {
|
||||
var sa SqliteArchive
|
||||
_, err := sa.Init(json.RawMessage(`"dbPath":"/tmp/test.db"`))
|
||||
if err == nil {
|
||||
t.Fatal("expected error for invalid config")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSqliteInit(t *testing.T) {
|
||||
tmpfile := t.TempDir() + "/test.db"
|
||||
defer os.Remove(tmpfile)
|
||||
|
||||
var sa SqliteArchive
|
||||
version, err := sa.Init(json.RawMessage(`{"dbPath":"` + tmpfile + `"}`))
|
||||
if err != nil {
|
||||
t.Fatalf("init failed: %v", err)
|
||||
}
|
||||
if version != Version {
|
||||
t.Errorf("expected version %d, got %d", Version, version)
|
||||
}
|
||||
if sa.db == nil {
|
||||
t.Fatal("database not initialized")
|
||||
}
|
||||
sa.db.Close()
|
||||
}
|
||||
|
||||
func TestSqliteStoreAndLoadJobMeta(t *testing.T) {
|
||||
tmpfile := t.TempDir() + "/test.db"
|
||||
defer os.Remove(tmpfile)
|
||||
|
||||
var sa SqliteArchive
|
||||
_, err := sa.Init(json.RawMessage(`{"dbPath":"` + tmpfile + `"}`))
|
||||
if err != nil {
|
||||
t.Fatalf("init failed: %v", err)
|
||||
}
|
||||
defer sa.db.Close()
|
||||
|
||||
job := &schema.Job{
|
||||
JobID: 12345,
|
||||
Cluster: "test-cluster",
|
||||
StartTime: 1234567890,
|
||||
NumNodes: 1,
|
||||
Resources: []*schema.Resource{{Hostname: "node001"}},
|
||||
}
|
||||
|
||||
// Store job metadata
|
||||
if err := sa.StoreJobMeta(job); err != nil {
|
||||
t.Fatalf("store failed: %v", err)
|
||||
}
|
||||
|
||||
// Check if exists
|
||||
if !sa.Exists(job) {
|
||||
t.Fatal("job should exist")
|
||||
}
|
||||
|
||||
// Load job metadata
|
||||
loaded, err := sa.LoadJobMeta(job)
|
||||
if err != nil {
|
||||
t.Fatalf("load failed: %v", err)
|
||||
}
|
||||
|
||||
if loaded.JobID != job.JobID {
|
||||
t.Errorf("expected JobID %d, got %d", job.JobID, loaded.JobID)
|
||||
}
|
||||
if loaded.Cluster != job.Cluster {
|
||||
t.Errorf("expected Cluster %s, got %s", job.Cluster, loaded.Cluster)
|
||||
}
|
||||
if loaded.StartTime != job.StartTime {
|
||||
t.Errorf("expected StartTime %d, got %d", job.StartTime, loaded.StartTime)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSqliteImportJob(t *testing.T) {
|
||||
tmpfile := t.TempDir() + "/test.db"
|
||||
defer os.Remove(tmpfile)
|
||||
|
||||
var sa SqliteArchive
|
||||
_, err := sa.Init(json.RawMessage(`{"dbPath":"` + tmpfile + `"}`))
|
||||
if err != nil {
|
||||
t.Fatalf("init failed: %v", err)
|
||||
}
|
||||
defer sa.db.Close()
|
||||
|
||||
// For now, skip complex JobData testing
|
||||
// Just test that ImportJob accepts the parameters
|
||||
// Full integration testing would require actual job data files
|
||||
t.Log("ImportJob interface verified (full data test requires integration)")
|
||||
}
|
||||
|
||||
func TestSqliteGetClusters(t *testing.T) {
|
||||
tmpfile := t.TempDir() + "/test.db"
|
||||
defer os.Remove(tmpfile)
|
||||
|
||||
var sa SqliteArchive
|
||||
_, err := sa.Init(json.RawMessage(`{"dbPath":"` + tmpfile + `"}`))
|
||||
if err != nil {
|
||||
t.Fatalf("init failed: %v", err)
|
||||
}
|
||||
defer sa.db.Close()
|
||||
|
||||
// Add jobs from different clusters
|
||||
job1 := &schema.Job{
|
||||
JobID: 1,
|
||||
Cluster: "cluster-a",
|
||||
StartTime: 1000,
|
||||
NumNodes: 1,
|
||||
Resources: []*schema.Resource{{Hostname: "node001"}},
|
||||
}
|
||||
job2 := &schema.Job{
|
||||
JobID: 2,
|
||||
Cluster: "cluster-b",
|
||||
StartTime: 2000,
|
||||
NumNodes: 1,
|
||||
Resources: []*schema.Resource{{Hostname: "node002"}},
|
||||
}
|
||||
|
||||
sa.StoreJobMeta(job1)
|
||||
sa.StoreJobMeta(job2)
|
||||
|
||||
// Reinitialize to refresh cluster list
|
||||
sa.db.Close()
|
||||
_, err = sa.Init(json.RawMessage(`{"dbPath":"` + tmpfile + `"}`))
|
||||
if err != nil {
|
||||
t.Fatalf("reinit failed: %v", err)
|
||||
}
|
||||
defer sa.db.Close()
|
||||
|
||||
clusters := sa.GetClusters()
|
||||
if len(clusters) != 2 {
|
||||
t.Errorf("expected 2 clusters, got %d", len(clusters))
|
||||
}
|
||||
}
|
||||
|
||||
func TestSqliteCleanUp(t *testing.T) {
|
||||
tmpfile := t.TempDir() + "/test.db"
|
||||
defer os.Remove(tmpfile)
|
||||
|
||||
var sa SqliteArchive
|
||||
_, err := sa.Init(json.RawMessage(`{"dbPath":"` + tmpfile + `"}`))
|
||||
if err != nil {
|
||||
t.Fatalf("init failed: %v", err)
|
||||
}
|
||||
defer sa.db.Close()
|
||||
|
||||
job := &schema.Job{
|
||||
JobID: 999,
|
||||
Cluster: "test",
|
||||
StartTime: 5000,
|
||||
NumNodes: 1,
|
||||
Resources: []*schema.Resource{{Hostname: "node001"}},
|
||||
}
|
||||
|
||||
sa.StoreJobMeta(job)
|
||||
|
||||
// Verify exists
|
||||
if !sa.Exists(job) {
|
||||
t.Fatal("job should exist")
|
||||
}
|
||||
|
||||
// Clean up
|
||||
sa.CleanUp([]*schema.Job{job})
|
||||
|
||||
// Verify deleted
|
||||
if sa.Exists(job) {
|
||||
t.Fatal("job should not exist after cleanup")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSqliteClean(t *testing.T) {
|
||||
tmpfile := t.TempDir() + "/test.db"
|
||||
defer os.Remove(tmpfile)
|
||||
|
||||
var sa SqliteArchive
|
||||
_, err := sa.Init(json.RawMessage(`{"dbPath":"` + tmpfile + `"}`))
|
||||
if err != nil {
|
||||
t.Fatalf("init failed: %v", err)
|
||||
}
|
||||
defer sa.db.Close()
|
||||
|
||||
// Add jobs with different start times
|
||||
oldJob := &schema.Job{
|
||||
JobID: 1,
|
||||
Cluster: "test",
|
||||
StartTime: 1000,
|
||||
NumNodes: 1,
|
||||
Resources: []*schema.Resource{{Hostname: "node001"}},
|
||||
}
|
||||
newJob := &schema.Job{
|
||||
JobID: 2,
|
||||
Cluster: "test",
|
||||
StartTime: 9000,
|
||||
NumNodes: 1,
|
||||
Resources: []*schema.Resource{{Hostname: "node002"}},
|
||||
}
|
||||
|
||||
sa.StoreJobMeta(oldJob)
|
||||
sa.StoreJobMeta(newJob)
|
||||
|
||||
// Clean jobs before 5000
|
||||
sa.Clean(5000, 0)
|
||||
|
||||
// Old job should be deleted
|
||||
if sa.Exists(oldJob) {
|
||||
t.Error("old job should be deleted")
|
||||
}
|
||||
|
||||
// New job should still exist
|
||||
if !sa.Exists(newJob) {
|
||||
t.Error("new job should still exist")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSqliteIter(t *testing.T) {
|
||||
tmpfile := t.TempDir() + "/test.db"
|
||||
defer os.Remove(tmpfile)
|
||||
|
||||
var sa SqliteArchive
|
||||
_, err := sa.Init(json.RawMessage(`{"dbPath":"` + tmpfile + `"}`))
|
||||
if err != nil {
|
||||
t.Fatalf("init failed: %v", err)
|
||||
}
|
||||
defer sa.db.Close()
|
||||
|
||||
// Add multiple jobs
|
||||
for i := 1; i <= 3; i++ {
|
||||
job := &schema.Job{
|
||||
JobID: int64(i),
|
||||
Cluster: "test",
|
||||
StartTime: int64(i * 1000),
|
||||
NumNodes: 1,
|
||||
Resources: []*schema.Resource{{Hostname: "node001"}},
|
||||
}
|
||||
sa.StoreJobMeta(job)
|
||||
}
|
||||
|
||||
// Iterate
|
||||
count := 0
|
||||
for container := range sa.Iter(false) {
|
||||
if container.Meta == nil {
|
||||
t.Error("expected non-nil meta")
|
||||
}
|
||||
count++
|
||||
}
|
||||
|
||||
if count != 3 {
|
||||
t.Errorf("expected 3 jobs, got %d", count)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSqliteCompress(t *testing.T) {
|
||||
// Compression test requires actual job data
|
||||
// For now just verify the method exists and doesn't panic
|
||||
tmpfile := t.TempDir() + "/test.db"
|
||||
defer os.Remove(tmpfile)
|
||||
|
||||
var sa SqliteArchive
|
||||
_, err := sa.Init(json.RawMessage(`{"dbPath":"` + tmpfile + `"}`))
|
||||
if err != nil {
|
||||
t.Fatalf("init failed: %v", err)
|
||||
}
|
||||
defer sa.db.Close()
|
||||
|
||||
job := &schema.Job{
|
||||
JobID: 777,
|
||||
Cluster: "test",
|
||||
StartTime: 7777,
|
||||
NumNodes: 1,
|
||||
Resources: []*schema.Resource{{Hostname: "node001"}},
|
||||
}
|
||||
|
||||
sa.StoreJobMeta(job)
|
||||
|
||||
// Compress should not panic even with missing data
|
||||
sa.Compress([]*schema.Job{job})
|
||||
|
||||
t.Log("Compression method verified")
|
||||
}
|
||||
|
||||
func TestSqliteConfigParsing(t *testing.T) {
|
||||
rawConfig := json.RawMessage(`{"dbPath": "/tmp/test.db"}`)
|
||||
|
||||
var cfg SqliteArchiveConfig
|
||||
err := json.Unmarshal(rawConfig, &cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse config: %v", err)
|
||||
}
|
||||
|
||||
if cfg.DBPath != "/tmp/test.db" {
|
||||
t.Errorf("expected dbPath '/tmp/test.db', got '%s'", cfg.DBPath)
|
||||
}
|
||||
}
|
||||
2
pkg/archive/testdata/archive/version.txt
vendored
2
pkg/archive/testdata/archive/version.txt
vendored
@@ -1 +1 @@
|
||||
2
|
||||
3
|
||||
|
||||
13
startDemo.sh
13
startDemo.sh
@@ -7,16 +7,13 @@ if [ -d './var' ]; then
|
||||
./cc-backend -server -dev
|
||||
else
|
||||
make
|
||||
wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-dev.tar
|
||||
tar xf job-archive-dev.tar
|
||||
rm ./job-archive-dev.tar
|
||||
|
||||
cp ./configs/env-template.txt .env
|
||||
./cc-backend --init
|
||||
cp ./configs/config-demo.json config.json
|
||||
|
||||
./cc-backend -migrate-db
|
||||
wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-demo.tar
|
||||
tar xf job-archive-demo.tar
|
||||
rm ./job-archive-demo.tar
|
||||
|
||||
./cc-backend -dev -init-db -add-user demo:admin,api:demo
|
||||
|
||||
./cc-backend -server -dev
|
||||
|
||||
fi
|
||||
|
||||
BIN
tools/archive-manager/archive-manager
Executable file
BIN
tools/archive-manager/archive-manager
Executable file
Binary file not shown.
341
tools/archive-manager/import_test.go
Normal file
341
tools/archive-manager/import_test.go
Normal file
@@ -0,0 +1,341 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/ClusterCockpit/cc-lib/util"
|
||||
)
|
||||
|
||||
// TestImportFileToSqlite tests importing jobs from file backend to SQLite backend
|
||||
func TestImportFileToSqlite(t *testing.T) {
|
||||
// Create temporary directories
|
||||
tmpdir := t.TempDir()
|
||||
srcArchive := filepath.Join(tmpdir, "src-archive")
|
||||
dstDb := filepath.Join(tmpdir, "dst-archive.db")
|
||||
|
||||
// Copy test data to source archive
|
||||
testDataPath := "../../pkg/archive/testdata/archive"
|
||||
if _, err := os.Stat(testDataPath); os.IsNotExist(err) {
|
||||
t.Skip("Test data not found, skipping integration test")
|
||||
}
|
||||
|
||||
if err := util.CopyDir(testDataPath, srcArchive); err != nil {
|
||||
t.Fatalf("Failed to copy test data: %s", err.Error())
|
||||
}
|
||||
|
||||
// Initialize source backend (file)
|
||||
srcConfig := fmt.Sprintf(`{"kind":"file","path":"%s"}`, srcArchive)
|
||||
srcBackend, err := archive.InitBackend(json.RawMessage(srcConfig))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to initialize source backend: %s", err.Error())
|
||||
}
|
||||
|
||||
// Initialize destination backend (sqlite)
|
||||
dstConfig := fmt.Sprintf(`{"kind":"sqlite","dbPath":"%s"}`, dstDb)
|
||||
dstBackend, err := archive.InitBackend(json.RawMessage(dstConfig))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to initialize destination backend: %s", err.Error())
|
||||
}
|
||||
|
||||
// Perform import
|
||||
imported, failed, err := importArchive(srcBackend, dstBackend)
|
||||
if err != nil {
|
||||
t.Errorf("Import failed: %s", err.Error())
|
||||
}
|
||||
|
||||
if imported == 0 {
|
||||
t.Error("No jobs were imported")
|
||||
}
|
||||
|
||||
if failed > 0 {
|
||||
t.Errorf("%d jobs failed to import", failed)
|
||||
}
|
||||
|
||||
t.Logf("Successfully imported %d jobs", imported)
|
||||
|
||||
// Verify jobs exist in destination
|
||||
// Count jobs in source
|
||||
srcCount := 0
|
||||
for range srcBackend.Iter(false) {
|
||||
srcCount++
|
||||
}
|
||||
|
||||
// Count jobs in destination
|
||||
dstCount := 0
|
||||
for range dstBackend.Iter(false) {
|
||||
dstCount++
|
||||
}
|
||||
|
||||
if srcCount != dstCount {
|
||||
t.Errorf("Job count mismatch: source has %d jobs, destination has %d jobs", srcCount, dstCount)
|
||||
}
|
||||
}
|
||||
|
||||
// TestImportFileToFile tests importing jobs from one file backend to another
|
||||
func TestImportFileToFile(t *testing.T) {
|
||||
// Create temporary directories
|
||||
tmpdir := t.TempDir()
|
||||
srcArchive := filepath.Join(tmpdir, "src-archive")
|
||||
dstArchive := filepath.Join(tmpdir, "dst-archive")
|
||||
|
||||
// Copy test data to source archive
|
||||
testDataPath := "../../pkg/archive/testdata/archive"
|
||||
if _, err := os.Stat(testDataPath); os.IsNotExist(err) {
|
||||
t.Skip("Test data not found, skipping integration test")
|
||||
}
|
||||
|
||||
if err := util.CopyDir(testDataPath, srcArchive); err != nil {
|
||||
t.Fatalf("Failed to copy test data: %s", err.Error())
|
||||
}
|
||||
|
||||
// Create destination archive directory
|
||||
if err := os.MkdirAll(dstArchive, 0755); err != nil {
|
||||
t.Fatalf("Failed to create destination directory: %s", err.Error())
|
||||
}
|
||||
|
||||
// Write version file
|
||||
versionFile := filepath.Join(dstArchive, "version.txt")
|
||||
if err := os.WriteFile(versionFile, []byte("3"), 0644); err != nil {
|
||||
t.Fatalf("Failed to write version file: %s", err.Error())
|
||||
}
|
||||
|
||||
// Initialize source backend
|
||||
srcConfig := fmt.Sprintf(`{"kind":"file","path":"%s"}`, srcArchive)
|
||||
srcBackend, err := archive.InitBackend(json.RawMessage(srcConfig))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to initialize source backend: %s", err.Error())
|
||||
}
|
||||
|
||||
// Initialize destination backend
|
||||
dstConfig := fmt.Sprintf(`{"kind":"file","path":"%s"}`, dstArchive)
|
||||
dstBackend, err := archive.InitBackend(json.RawMessage(dstConfig))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to initialize destination backend: %s", err.Error())
|
||||
}
|
||||
|
||||
// Perform import
|
||||
imported, failed, err := importArchive(srcBackend, dstBackend)
|
||||
if err != nil {
|
||||
t.Errorf("Import failed: %s", err.Error())
|
||||
}
|
||||
|
||||
if imported == 0 {
|
||||
t.Error("No jobs were imported")
|
||||
}
|
||||
|
||||
if failed > 0 {
|
||||
t.Errorf("%d jobs failed to import", failed)
|
||||
}
|
||||
|
||||
t.Logf("Successfully imported %d jobs", imported)
|
||||
}
|
||||
|
||||
// TestImportDataIntegrity verifies that job metadata and data are correctly imported
|
||||
func TestImportDataIntegrity(t *testing.T) {
|
||||
// Create temporary directories
|
||||
tmpdir := t.TempDir()
|
||||
srcArchive := filepath.Join(tmpdir, "src-archive")
|
||||
dstDb := filepath.Join(tmpdir, "dst-archive.db")
|
||||
|
||||
// Copy test data to source archive
|
||||
testDataPath := "../../pkg/archive/testdata/archive"
|
||||
if _, err := os.Stat(testDataPath); os.IsNotExist(err) {
|
||||
t.Skip("Test data not found, skipping integration test")
|
||||
}
|
||||
|
||||
if err := util.CopyDir(testDataPath, srcArchive); err != nil {
|
||||
t.Fatalf("Failed to copy test data: %s", err.Error())
|
||||
}
|
||||
|
||||
// Initialize backends
|
||||
srcConfig := fmt.Sprintf(`{"kind":"file","path":"%s"}`, srcArchive)
|
||||
srcBackend, err := archive.InitBackend(json.RawMessage(srcConfig))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to initialize source backend: %s", err.Error())
|
||||
}
|
||||
|
||||
dstConfig := fmt.Sprintf(`{"kind":"sqlite","dbPath":"%s"}`, dstDb)
|
||||
dstBackend, err := archive.InitBackend(json.RawMessage(dstConfig))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to initialize destination backend: %s", err.Error())
|
||||
}
|
||||
|
||||
// Perform import
|
||||
_, _, err = importArchive(srcBackend, dstBackend)
|
||||
if err != nil {
|
||||
t.Errorf("Import failed: %s", err.Error())
|
||||
}
|
||||
|
||||
// Verify data integrity for each job
|
||||
verifiedJobs := 0
|
||||
for srcJob := range srcBackend.Iter(false) {
|
||||
if srcJob.Meta == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Load job from destination
|
||||
dstJobMeta, err := dstBackend.LoadJobMeta(srcJob.Meta)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to load job %d from destination: %s", srcJob.Meta.JobID, err.Error())
|
||||
continue
|
||||
}
|
||||
|
||||
// Verify basic metadata
|
||||
if dstJobMeta.JobID != srcJob.Meta.JobID {
|
||||
t.Errorf("JobID mismatch: expected %d, got %d", srcJob.Meta.JobID, dstJobMeta.JobID)
|
||||
}
|
||||
|
||||
if dstJobMeta.Cluster != srcJob.Meta.Cluster {
|
||||
t.Errorf("Cluster mismatch for job %d: expected %s, got %s",
|
||||
srcJob.Meta.JobID, srcJob.Meta.Cluster, dstJobMeta.Cluster)
|
||||
}
|
||||
|
||||
if dstJobMeta.StartTime != srcJob.Meta.StartTime {
|
||||
t.Errorf("StartTime mismatch for job %d: expected %d, got %d",
|
||||
srcJob.Meta.JobID, srcJob.Meta.StartTime, dstJobMeta.StartTime)
|
||||
}
|
||||
|
||||
// Load and verify job data
|
||||
srcData, err := srcBackend.LoadJobData(srcJob.Meta)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to load job data from source: %s", err.Error())
|
||||
continue
|
||||
}
|
||||
|
||||
dstData, err := dstBackend.LoadJobData(srcJob.Meta)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to load job data from destination: %s", err.Error())
|
||||
continue
|
||||
}
|
||||
|
||||
// Verify metric data exists
|
||||
if len(srcData) != len(dstData) {
|
||||
t.Errorf("Metric count mismatch for job %d: expected %d, got %d",
|
||||
srcJob.Meta.JobID, len(srcData), len(dstData))
|
||||
}
|
||||
|
||||
verifiedJobs++
|
||||
}
|
||||
|
||||
if verifiedJobs == 0 {
|
||||
t.Error("No jobs were verified")
|
||||
}
|
||||
|
||||
t.Logf("Successfully verified %d jobs", verifiedJobs)
|
||||
}
|
||||
|
||||
// TestImportEmptyArchive tests importing from an empty archive
|
||||
func TestImportEmptyArchive(t *testing.T) {
|
||||
tmpdir := t.TempDir()
|
||||
srcArchive := filepath.Join(tmpdir, "empty-archive")
|
||||
dstDb := filepath.Join(tmpdir, "dst-archive.db")
|
||||
|
||||
// Create empty source archive
|
||||
if err := os.MkdirAll(srcArchive, 0755); err != nil {
|
||||
t.Fatalf("Failed to create source directory: %s", err.Error())
|
||||
}
|
||||
|
||||
// Write version file
|
||||
versionFile := filepath.Join(srcArchive, "version.txt")
|
||||
if err := os.WriteFile(versionFile, []byte("3"), 0644); err != nil {
|
||||
t.Fatalf("Failed to write version file: %s", err.Error())
|
||||
}
|
||||
|
||||
// Initialize backends
|
||||
srcConfig := fmt.Sprintf(`{"kind":"file","path":"%s"}`, srcArchive)
|
||||
srcBackend, err := archive.InitBackend(json.RawMessage(srcConfig))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to initialize source backend: %s", err.Error())
|
||||
}
|
||||
|
||||
dstConfig := fmt.Sprintf(`{"kind":"sqlite","dbPath":"%s"}`, dstDb)
|
||||
dstBackend, err := archive.InitBackend(json.RawMessage(dstConfig))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to initialize destination backend: %s", err.Error())
|
||||
}
|
||||
|
||||
// Perform import
|
||||
imported, failed, err := importArchive(srcBackend, dstBackend)
|
||||
if err != nil {
|
||||
t.Errorf("Import from empty archive should not fail: %s", err.Error())
|
||||
}
|
||||
|
||||
if imported != 0 {
|
||||
t.Errorf("Expected 0 imported jobs, got %d", imported)
|
||||
}
|
||||
|
||||
if failed != 0 {
|
||||
t.Errorf("Expected 0 failed jobs, got %d", failed)
|
||||
}
|
||||
}
|
||||
|
||||
// TestImportDuplicateJobs tests that duplicate jobs are skipped
|
||||
func TestImportDuplicateJobs(t *testing.T) {
|
||||
tmpdir := t.TempDir()
|
||||
srcArchive := filepath.Join(tmpdir, "src-archive")
|
||||
dstDb := filepath.Join(tmpdir, "dst-archive.db")
|
||||
|
||||
// Copy test data
|
||||
testDataPath := "../../pkg/archive/testdata/archive"
|
||||
if _, err := os.Stat(testDataPath); os.IsNotExist(err) {
|
||||
t.Skip("Test data not found, skipping integration test")
|
||||
}
|
||||
|
||||
if err := util.CopyDir(testDataPath, srcArchive); err != nil {
|
||||
t.Fatalf("Failed to copy test data: %s", err.Error())
|
||||
}
|
||||
|
||||
// Initialize backends
|
||||
srcConfig := fmt.Sprintf(`{"kind":"file","path":"%s"}`, srcArchive)
|
||||
srcBackend, err := archive.InitBackend(json.RawMessage(srcConfig))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to initialize source backend: %s", err.Error())
|
||||
}
|
||||
|
||||
dstConfig := fmt.Sprintf(`{"kind":"sqlite","dbPath":"%s"}`, dstDb)
|
||||
dstBackend, err := archive.InitBackend(json.RawMessage(dstConfig))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to initialize destination backend: %s", err.Error())
|
||||
}
|
||||
|
||||
// First import
|
||||
imported1, _, err := importArchive(srcBackend, dstBackend)
|
||||
if err != nil {
|
||||
t.Fatalf("First import failed: %s", err.Error())
|
||||
}
|
||||
|
||||
// Second import (should skip all jobs)
|
||||
imported2, _, err := importArchive(srcBackend, dstBackend)
|
||||
if err != nil {
|
||||
t.Errorf("Second import failed: %s", err.Error())
|
||||
}
|
||||
|
||||
if imported2 != 0 {
|
||||
t.Errorf("Second import should skip all jobs, but imported %d", imported2)
|
||||
}
|
||||
|
||||
t.Logf("First import: %d jobs, Second import: %d jobs (all skipped as expected)", imported1, imported2)
|
||||
}
|
||||
|
||||
// TestJobStub is a helper test to verify that the job stub used in tests matches the schema
|
||||
func TestJobStub(t *testing.T) {
|
||||
job := &schema.Job{
|
||||
JobID: 123,
|
||||
Cluster: "test-cluster",
|
||||
StartTime: 1234567890,
|
||||
}
|
||||
|
||||
if job.JobID != 123 {
|
||||
t.Errorf("Expected JobID 123, got %d", job.JobID)
|
||||
}
|
||||
}
|
||||
@@ -9,6 +9,8 @@ import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
@@ -31,9 +33,104 @@ func parseDate(in string) int64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// importArchive imports all jobs from a source archive backend to a destination archive backend.
|
||||
// It uses parallel processing with a worker pool to improve performance.
|
||||
// Returns the number of successfully imported jobs, failed jobs, and any error encountered.
|
||||
func importArchive(srcBackend, dstBackend archive.ArchiveBackend) (int, int, error) {
|
||||
cclog.Info("Starting parallel archive import...")
|
||||
|
||||
// Use atomic counters for thread-safe updates
|
||||
var imported int32
|
||||
var failed int32
|
||||
var skipped int32
|
||||
|
||||
// Number of parallel workers
|
||||
numWorkers := 4
|
||||
cclog.Infof("Using %d parallel workers", numWorkers)
|
||||
|
||||
// Create channels for job distribution
|
||||
jobs := make(chan archive.JobContainer, numWorkers*2)
|
||||
|
||||
// WaitGroup to track worker completion
|
||||
var wg sync.WaitGroup
|
||||
|
||||
// Start worker goroutines
|
||||
for i := 0; i < numWorkers; i++ {
|
||||
wg.Add(1)
|
||||
go func(workerID int) {
|
||||
defer wg.Done()
|
||||
|
||||
for job := range jobs {
|
||||
// Validate job metadata
|
||||
if job.Meta == nil {
|
||||
cclog.Warn("Skipping job with nil metadata")
|
||||
atomic.AddInt32(&failed, 1)
|
||||
continue
|
||||
}
|
||||
|
||||
// Validate job data
|
||||
if job.Data == nil {
|
||||
cclog.Warnf("Job %d from cluster %s has no metric data, skipping",
|
||||
job.Meta.JobID, job.Meta.Cluster)
|
||||
atomic.AddInt32(&failed, 1)
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if job already exists in destination
|
||||
if dstBackend.Exists(job.Meta) {
|
||||
cclog.Debugf("Job %d (cluster: %s, start: %d) already exists in destination, skipping",
|
||||
job.Meta.JobID, job.Meta.Cluster, job.Meta.StartTime)
|
||||
atomic.AddInt32(&skipped, 1)
|
||||
continue
|
||||
}
|
||||
|
||||
// Import job to destination
|
||||
if err := dstBackend.ImportJob(job.Meta, job.Data); err != nil {
|
||||
cclog.Errorf("Failed to import job %d from cluster %s: %s",
|
||||
job.Meta.JobID, job.Meta.Cluster, err.Error())
|
||||
atomic.AddInt32(&failed, 1)
|
||||
continue
|
||||
}
|
||||
|
||||
// Successfully imported
|
||||
newCount := atomic.AddInt32(&imported, 1)
|
||||
if newCount%100 == 0 {
|
||||
cclog.Infof("Progress: %d jobs imported, %d skipped, %d failed",
|
||||
newCount, atomic.LoadInt32(&skipped), atomic.LoadInt32(&failed))
|
||||
}
|
||||
}
|
||||
}(i)
|
||||
}
|
||||
|
||||
// Feed jobs to workers
|
||||
go func() {
|
||||
for job := range srcBackend.Iter(true) {
|
||||
jobs <- job
|
||||
}
|
||||
close(jobs)
|
||||
}()
|
||||
|
||||
// Wait for all workers to complete
|
||||
wg.Wait()
|
||||
|
||||
finalImported := int(atomic.LoadInt32(&imported))
|
||||
finalFailed := int(atomic.LoadInt32(&failed))
|
||||
finalSkipped := int(atomic.LoadInt32(&skipped))
|
||||
|
||||
cclog.Infof("Import completed: %d jobs imported, %d skipped, %d failed",
|
||||
finalImported, finalSkipped, finalFailed)
|
||||
|
||||
if finalFailed > 0 {
|
||||
return finalImported, finalFailed, fmt.Errorf("%d jobs failed to import", finalFailed)
|
||||
}
|
||||
|
||||
return finalImported, finalFailed, nil
|
||||
}
|
||||
|
||||
func main() {
|
||||
var srcPath, flagConfigFile, flagLogLevel, flagRemoveCluster, flagRemoveAfter, flagRemoveBefore string
|
||||
var flagLogDateTime, flagValidate bool
|
||||
var flagSrcConfig, flagDstConfig string
|
||||
var flagLogDateTime, flagValidate, flagImport bool
|
||||
|
||||
flag.StringVar(&srcPath, "s", "./var/job-archive", "Specify the source job archive path. Default is ./var/job-archive")
|
||||
flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
|
||||
@@ -43,12 +140,50 @@ func main() {
|
||||
flag.StringVar(&flagRemoveBefore, "remove-before", "", "Remove all jobs with start time before date (Format: 2006-Jan-04)")
|
||||
flag.StringVar(&flagRemoveAfter, "remove-after", "", "Remove all jobs with start time after date (Format: 2006-Jan-04)")
|
||||
flag.BoolVar(&flagValidate, "validate", false, "Set this flag to validate a job archive against the json schema")
|
||||
flag.BoolVar(&flagImport, "import", false, "Import jobs from source archive to destination archive")
|
||||
flag.StringVar(&flagSrcConfig, "src-config", "", "Source archive backend configuration (JSON), e.g. '{\"kind\":\"file\",\"path\":\"./archive\"}'")
|
||||
flag.StringVar(&flagDstConfig, "dst-config", "", "Destination archive backend configuration (JSON), e.g. '{\"kind\":\"sqlite\",\"dbPath\":\"./archive.db\"}'")
|
||||
flag.Parse()
|
||||
|
||||
archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", srcPath)
|
||||
|
||||
cclog.Init(flagLogLevel, flagLogDateTime)
|
||||
|
||||
// Handle import mode
|
||||
if flagImport {
|
||||
if flagSrcConfig == "" || flagDstConfig == "" {
|
||||
cclog.Fatal("Both --src-config and --dst-config must be specified for import mode")
|
||||
}
|
||||
|
||||
cclog.Info("Import mode: initializing source and destination backends...")
|
||||
|
||||
// Initialize source backend
|
||||
srcBackend, err := archive.InitBackend(json.RawMessage(flagSrcConfig))
|
||||
if err != nil {
|
||||
cclog.Fatalf("Failed to initialize source backend: %s", err.Error())
|
||||
}
|
||||
cclog.Info("Source backend initialized successfully")
|
||||
|
||||
// Initialize destination backend
|
||||
dstBackend, err := archive.InitBackend(json.RawMessage(flagDstConfig))
|
||||
if err != nil {
|
||||
cclog.Fatalf("Failed to initialize destination backend: %s", err.Error())
|
||||
}
|
||||
cclog.Info("Destination backend initialized successfully")
|
||||
|
||||
// Perform import
|
||||
imported, failed, err := importArchive(srcBackend, dstBackend)
|
||||
if err != nil {
|
||||
cclog.Errorf("Import completed with errors: %s", err.Error())
|
||||
if failed > 0 {
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
cclog.Infof("Import finished successfully: %d jobs imported", imported)
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
ccconf.Init(flagConfigFile)
|
||||
|
||||
// Load and check main configuration
|
||||
@@ -70,7 +205,7 @@ func main() {
|
||||
if flagValidate {
|
||||
config.Keys.Validate = true
|
||||
for job := range ar.Iter(true) {
|
||||
cclog.Printf("Validate %s - %d\n", job.Meta.Cluster, job.Meta.JobID)
|
||||
cclog.Debugf("Validate %s - %d", job.Meta.Cluster, job.Meta.JobID)
|
||||
}
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
145
tools/archive-migration/README.md
Normal file
145
tools/archive-migration/README.md
Normal file
@@ -0,0 +1,145 @@
|
||||
# Archive Migration Tool
|
||||
|
||||
## Overview
|
||||
|
||||
The `archive-migration` tool migrates job archives from old schema versions to the current schema version. It handles schema changes such as the `exclusive` → `shared` field transformation and adds/removes fields as needed.
|
||||
|
||||
## Features
|
||||
|
||||
- **Parallel Processing**: Uses worker pool for fast migration
|
||||
- **Dry-Run Mode**: Preview changes without modifying files
|
||||
- **Safe Transformations**: Applies well-defined schema transformations
|
||||
- **Progress Reporting**: Shows real-time migration progress
|
||||
- **Error Handling**: Continues on individual failures, reports at end
|
||||
|
||||
## Schema Transformations
|
||||
|
||||
### Exclusive → Shared
|
||||
|
||||
Converts the old `exclusive` integer field to the new `shared` string field:
|
||||
|
||||
- `0` → `"multi_user"`
|
||||
- `1` → `"none"`
|
||||
- `2` → `"single_user"`
|
||||
|
||||
### Missing Fields
|
||||
|
||||
Adds fields required by current schema:
|
||||
|
||||
- `submitTime`: Defaults to `startTime` if missing
|
||||
- `energy`: Defaults to `0.0`
|
||||
- `requestedMemory`: Defaults to `0`
|
||||
- `shared`: Defaults to `"none"` if still missing after transformation
|
||||
|
||||
### Deprecated Fields
|
||||
|
||||
Removes fields no longer in schema:
|
||||
|
||||
- `mem_used_max`, `flops_any_avg`, `mem_bw_avg`
|
||||
- `load_avg`, `net_bw_avg`, `net_data_vol_total`
|
||||
- `file_bw_avg`, `file_data_vol_total`
|
||||
|
||||
## Usage
|
||||
|
||||
### Build
|
||||
|
||||
```bash
|
||||
cd ./tools/archive-migration
|
||||
go build
|
||||
```
|
||||
|
||||
### Dry Run (Preview Changes)
|
||||
|
||||
```bash
|
||||
./archive-migration --archive /path/to/archive --dry-run
|
||||
```
|
||||
|
||||
### Migrate Archive
|
||||
|
||||
```bash
|
||||
# IMPORTANT: Backup your archive first!
|
||||
cp -r /path/to/archive /path/to/archive-backup
|
||||
|
||||
# Run migration
|
||||
./archive-migration --archive /path/to/archive
|
||||
```
|
||||
|
||||
### Command-Line Options
|
||||
|
||||
- `--archive <path>`: Path to job archive (required)
|
||||
- `--dry-run`: Preview changes without modifying files
|
||||
- `--workers <n>`: Number of parallel workers (default: 4)
|
||||
- `--loglevel <level>`: Logging level: debug, info, warn, err, fatal, crit (default: info)
|
||||
- `--logdate`: Add timestamps to log messages
|
||||
|
||||
## Examples
|
||||
|
||||
```bash
|
||||
# Preview what would change
|
||||
./archive-migration --archive ./var/job-archive --dry-run
|
||||
|
||||
# Migrate with verbose logging
|
||||
./archive-migration --archive ./var/job-archive --loglevel debug
|
||||
|
||||
# Migrate with 8 workers for faster processing
|
||||
./archive-migration --archive ./var/job-archive --workers 8
|
||||
```
|
||||
|
||||
## Safety
|
||||
|
||||
> [!CAUTION]
|
||||
> **Always backup your archive before running migration!**
|
||||
|
||||
The tool modifies `meta.json` files in place. While transformations are designed to be safe, unexpected issues could occur. Follow these safety practices:
|
||||
|
||||
1. **Always run with `--dry-run` first** to preview changes
|
||||
2. **Backup your archive** before migration
|
||||
3. **Test on a copy** of your archive first
|
||||
4. **Verify results** after migration
|
||||
|
||||
## Verification
|
||||
|
||||
After migration, verify the archive:
|
||||
|
||||
```bash
|
||||
# Use archive-manager to check the archive
|
||||
cd ../archive-manager
|
||||
./archive-manager -s /path/to/migrated-archive
|
||||
|
||||
# Or validate specific jobs
|
||||
./archive-manager -s /path/to/migrated-archive --validate
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Migration Failures
|
||||
|
||||
If individual jobs fail to migrate:
|
||||
|
||||
- Check the error messages for specific files
|
||||
- Examine the failing `meta.json` files manually
|
||||
- Fix invalid JSON or unexpected field types
|
||||
- Re-run migration (already-migrated jobs will be processed again)
|
||||
|
||||
### Performance
|
||||
|
||||
For large archives:
|
||||
|
||||
- Increase `--workers` for more parallelism
|
||||
- Use `--loglevel warn` to reduce log output
|
||||
- Monitor disk I/O if migration is slow
|
||||
|
||||
## Technical Details
|
||||
|
||||
The migration process:
|
||||
|
||||
1. Walks archive directory recursively
|
||||
2. Finds all `meta.json` files
|
||||
3. Distributes jobs to worker pool
|
||||
4. For each job:
|
||||
- Reads JSON file
|
||||
- Applies transformations in order
|
||||
- Writes back migrated data (if not dry-run)
|
||||
5. Reports statistics and errors
|
||||
|
||||
Transformations are idempotent - running migration multiple times is safe (though not recommended for performance).
|
||||
108
tools/archive-migration/main.go
Normal file
108
tools/archive-migration/main.go
Normal file
@@ -0,0 +1,108 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
)
|
||||
|
||||
func main() {
|
||||
var archivePath string
|
||||
var dryRun bool
|
||||
var numWorkers int
|
||||
var flagLogLevel string
|
||||
var flagLogDateTime bool
|
||||
|
||||
flag.StringVar(&archivePath, "archive", "", "Path to job archive to migrate (required)")
|
||||
flag.BoolVar(&dryRun, "dry-run", false, "Preview changes without modifying files")
|
||||
flag.IntVar(&numWorkers, "workers", 4, "Number of parallel workers")
|
||||
flag.StringVar(&flagLogLevel, "loglevel", "info", "Sets the logging level: `[debug,info,warn (default),err,fatal,crit]`")
|
||||
flag.BoolVar(&flagLogDateTime, "logdate", false, "Add date and time to log messages")
|
||||
flag.Parse()
|
||||
|
||||
// Initialize logger
|
||||
cclog.Init(flagLogLevel, flagLogDateTime)
|
||||
|
||||
// Validate inputs
|
||||
if archivePath == "" {
|
||||
fmt.Fprintf(os.Stderr, "Error: --archive flag is required\n\n")
|
||||
flag.Usage()
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Check if archive path exists
|
||||
if _, err := os.Stat(archivePath); os.IsNotExist(err) {
|
||||
cclog.Fatalf("Archive path does not exist: %s", archivePath)
|
||||
}
|
||||
|
||||
// Check archive version
|
||||
if err := checkVersion(archivePath); err != nil {
|
||||
cclog.Fatalf("Version check failed: %v", err)
|
||||
}
|
||||
|
||||
// Display warning for non-dry-run mode
|
||||
if !dryRun {
|
||||
cclog.Warn("WARNING: This will modify files in the archive!")
|
||||
cclog.Warn("It is strongly recommended to backup your archive first.")
|
||||
cclog.Warn("Run with --dry-run first to preview changes.")
|
||||
cclog.Info("")
|
||||
|
||||
fmt.Print("Are you sure you want to continue? [y/N]: ")
|
||||
reader := bufio.NewReader(os.Stdin)
|
||||
input, err := reader.ReadString('\n')
|
||||
if err != nil {
|
||||
cclog.Fatalf("Error reading input: %v", err)
|
||||
}
|
||||
if strings.ToLower(strings.TrimSpace(input)) != "y" {
|
||||
cclog.Info("Aborted by user.")
|
||||
os.Exit(0)
|
||||
}
|
||||
}
|
||||
|
||||
// Run migration
|
||||
migrated, failed, err := migrateArchive(archivePath, dryRun, numWorkers)
|
||||
|
||||
if err != nil {
|
||||
cclog.Errorf("Migration completed with errors: %s", err.Error())
|
||||
if failed > 0 {
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
if dryRun {
|
||||
cclog.Infof("Dry run completed: %d jobs would be migrated", migrated)
|
||||
} else {
|
||||
if err := updateVersion(archivePath); err != nil {
|
||||
cclog.Errorf("Failed to update archive version: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
cclog.Infof("Migration completed successfully: %d jobs migrated", migrated)
|
||||
}
|
||||
}
|
||||
|
||||
func checkVersion(archivePath string) error {
|
||||
versionFile := filepath.Join(archivePath, "version.txt")
|
||||
data, err := os.ReadFile(versionFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read version.txt: %v", err)
|
||||
}
|
||||
versionStr := strings.TrimSpace(string(data))
|
||||
if versionStr != "2" {
|
||||
return fmt.Errorf("archive version is %s, expected 2", versionStr)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func updateVersion(archivePath string) error {
|
||||
versionFile := filepath.Join(archivePath, "version.txt")
|
||||
return os.WriteFile(versionFile, []byte("3\n"), 0644)
|
||||
}
|
||||
232
tools/archive-migration/transforms.go
Normal file
232
tools/archive-migration/transforms.go
Normal file
@@ -0,0 +1,232 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
)
|
||||
|
||||
// transformExclusiveToShared converts the old 'exclusive' field to the new 'shared' field
|
||||
// Mapping: 0 -> "multi_user", 1 -> "none", 2 -> "single_user"
|
||||
func transformExclusiveToShared(jobData map[string]any) error {
|
||||
// Check if 'exclusive' field exists
|
||||
if exclusive, ok := jobData["exclusive"]; ok {
|
||||
var exclusiveVal int
|
||||
|
||||
// Handle both int and float64 (JSON unmarshaling can produce float64)
|
||||
switch v := exclusive.(type) {
|
||||
case float64:
|
||||
exclusiveVal = int(v)
|
||||
case int:
|
||||
exclusiveVal = v
|
||||
default:
|
||||
return fmt.Errorf("exclusive field has unexpected type: %T", exclusive)
|
||||
}
|
||||
|
||||
// Map exclusive to shared
|
||||
var shared string
|
||||
switch exclusiveVal {
|
||||
case 0:
|
||||
shared = "multi_user"
|
||||
case 1:
|
||||
shared = "none"
|
||||
case 2:
|
||||
shared = "single_user"
|
||||
default:
|
||||
return fmt.Errorf("invalid exclusive value: %d", exclusiveVal)
|
||||
}
|
||||
|
||||
// Add shared field and remove exclusive
|
||||
jobData["shared"] = shared
|
||||
delete(jobData, "exclusive")
|
||||
|
||||
cclog.Debugf("Transformed exclusive=%d to shared=%s", exclusiveVal, shared)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// addMissingFields adds fields that are required in the current schema but might be missing in old archives
|
||||
func addMissingFields(jobData map[string]any) error {
|
||||
// Add submitTime if missing (default to startTime)
|
||||
if _, ok := jobData["submitTime"]; !ok {
|
||||
if startTime, ok := jobData["startTime"]; ok {
|
||||
jobData["submitTime"] = startTime
|
||||
cclog.Debug("Added submitTime (defaulted to startTime)")
|
||||
}
|
||||
}
|
||||
|
||||
// Add energy if missing (default to 0.0)
|
||||
if _, ok := jobData["energy"]; !ok {
|
||||
jobData["energy"] = 0.0
|
||||
}
|
||||
|
||||
// Add requestedMemory if missing (default to 0)
|
||||
if _, ok := jobData["requestedMemory"]; !ok {
|
||||
jobData["requestedMemory"] = 0
|
||||
}
|
||||
|
||||
// Ensure shared field exists (if still missing, default to "none")
|
||||
if _, ok := jobData["shared"]; !ok {
|
||||
jobData["shared"] = "none"
|
||||
cclog.Debug("Added default shared field: none")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// removeDeprecatedFields removes fields that are no longer in the current schema
|
||||
func removeDeprecatedFields(jobData map[string]any) error {
|
||||
// List of deprecated fields to remove
|
||||
deprecatedFields := []string{
|
||||
"mem_used_max",
|
||||
"flops_any_avg",
|
||||
"mem_bw_avg",
|
||||
"load_avg",
|
||||
"net_bw_avg",
|
||||
"net_data_vol_total",
|
||||
"file_bw_avg",
|
||||
"file_data_vol_total",
|
||||
}
|
||||
|
||||
for _, field := range deprecatedFields {
|
||||
if _, ok := jobData[field]; ok {
|
||||
delete(jobData, field)
|
||||
cclog.Debugf("Removed deprecated field: %s", field)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// migrateJobMetadata applies all transformations to a job metadata map
|
||||
func migrateJobMetadata(jobData map[string]any) error {
|
||||
// Apply transformations in order
|
||||
if err := transformExclusiveToShared(jobData); err != nil {
|
||||
return fmt.Errorf("transformExclusiveToShared failed: %w", err)
|
||||
}
|
||||
|
||||
if err := addMissingFields(jobData); err != nil {
|
||||
return fmt.Errorf("addMissingFields failed: %w", err)
|
||||
}
|
||||
|
||||
if err := removeDeprecatedFields(jobData); err != nil {
|
||||
return fmt.Errorf("removeDeprecatedFields failed: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// processJob reads, migrates, and writes a job metadata file
|
||||
func processJob(metaPath string, dryRun bool) error {
|
||||
// Read the meta.json file
|
||||
data, err := os.ReadFile(metaPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read %s: %w", metaPath, err)
|
||||
}
|
||||
|
||||
// Parse JSON
|
||||
var jobData map[string]any
|
||||
if err := json.Unmarshal(data, &jobData); err != nil {
|
||||
return fmt.Errorf("failed to parse JSON from %s: %w", metaPath, err)
|
||||
}
|
||||
|
||||
// Apply migrations
|
||||
if err := migrateJobMetadata(jobData); err != nil {
|
||||
return fmt.Errorf("migration failed for %s: %w", metaPath, err)
|
||||
}
|
||||
|
||||
// If dry-run, just report what would change
|
||||
if dryRun {
|
||||
cclog.Infof("Would migrate: %s", metaPath)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Write back the migrated data
|
||||
migratedData, err := json.MarshalIndent(jobData, "", " ")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal migrated data: %w", err)
|
||||
}
|
||||
|
||||
if err := os.WriteFile(metaPath, migratedData, 0o644); err != nil {
|
||||
return fmt.Errorf("failed to write %s: %w", metaPath, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// migrateArchive walks through an archive directory and migrates all meta.json files
|
||||
func migrateArchive(archivePath string, dryRun bool, numWorkers int) (int, int, error) {
|
||||
cclog.Infof("Starting archive migration at %s", archivePath)
|
||||
if dryRun {
|
||||
cclog.Info("DRY RUN MODE - no files will be modified")
|
||||
}
|
||||
|
||||
var migrated int32
|
||||
var failed int32
|
||||
|
||||
// Channel for job paths
|
||||
jobs := make(chan string, numWorkers*2)
|
||||
var wg sync.WaitGroup
|
||||
|
||||
// Start worker goroutines
|
||||
for i := range numWorkers {
|
||||
wg.Add(1)
|
||||
go func(workerID int) {
|
||||
defer wg.Done()
|
||||
|
||||
for metaPath := range jobs {
|
||||
if err := processJob(metaPath, dryRun); err != nil {
|
||||
cclog.Errorf("Failed to migrate %s: %s", metaPath, err.Error())
|
||||
atomic.AddInt32(&failed, 1)
|
||||
continue
|
||||
}
|
||||
|
||||
newCount := atomic.AddInt32(&migrated, 1)
|
||||
if newCount%100 == 0 {
|
||||
cclog.Infof("Progress: %d jobs migrated, %d failed", newCount, atomic.LoadInt32(&failed))
|
||||
}
|
||||
}
|
||||
}(i)
|
||||
}
|
||||
|
||||
// Walk the archive directory and find all meta.json files
|
||||
go func() {
|
||||
filepath.Walk(archivePath, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
cclog.Errorf("Error accessing path %s: %s", path, err.Error())
|
||||
return nil // Continue walking
|
||||
}
|
||||
|
||||
if !info.IsDir() && info.Name() == "meta.json" {
|
||||
jobs <- path
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
close(jobs)
|
||||
}()
|
||||
|
||||
// Wait for all workers to complete
|
||||
wg.Wait()
|
||||
|
||||
finalMigrated := int(atomic.LoadInt32(&migrated))
|
||||
finalFailed := int(atomic.LoadInt32(&failed))
|
||||
|
||||
cclog.Infof("Migration completed: %d jobs migrated, %d failed", finalMigrated, finalFailed)
|
||||
|
||||
if finalFailed > 0 {
|
||||
return finalMigrated, finalFailed, fmt.Errorf("%d jobs failed to migrate", finalFailed)
|
||||
}
|
||||
|
||||
return finalMigrated, finalFailed, nil
|
||||
}
|
||||
74
tools/grepCCLog.pl
Executable file
74
tools/grepCCLog.pl
Executable file
@@ -0,0 +1,74 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
my $filename = $ARGV[0];
|
||||
my $Tday = $ARGV[1];
|
||||
|
||||
open FILE,"<$filename";
|
||||
|
||||
my %startedJob;
|
||||
my %stoppedJob;
|
||||
|
||||
foreach ( <FILE> ) {
|
||||
if ( /Oct ([0-9]+) .*new job \(id: ([0-9]+)\): cluster=([a-z]+), jobId=([0-9]+), user=([a-z0-9]+),/ ) {
|
||||
my $day = $1;
|
||||
my $id = $2;
|
||||
my $cluster = $3;
|
||||
my $jobId = $4;
|
||||
my $user = $5;
|
||||
|
||||
if ( $cluster eq 'woody' && $day eq $Tday ) {
|
||||
$startedJob{$id} = {
|
||||
'day' => $day,
|
||||
'cluster' => $cluster,
|
||||
'jobId' => $jobId,
|
||||
'user' => $user
|
||||
};
|
||||
}
|
||||
}
|
||||
if ( /Oct ([0-9]+) .*archiving job... \(dbid: ([0-9]+)\): cluster=([a-z]+), jobId=([0-9]+), user=([a-z0-9]+),/ ) {
|
||||
my $day = $1;
|
||||
my $id = $2;
|
||||
my $cluster = $3;
|
||||
my $jobId = $4;
|
||||
my $user = $5;
|
||||
|
||||
if ( $cluster eq 'woody' ) {
|
||||
$stoppedJob{$id} = {
|
||||
'day' => $day,
|
||||
'cluster' => $cluster,
|
||||
'jobId' => $jobId,
|
||||
'user' => $user
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
close FILE;
|
||||
|
||||
my $started = 0;
|
||||
my $count = 0;
|
||||
my %users;
|
||||
|
||||
foreach my $key (keys %startedJob) {
|
||||
$started++;
|
||||
if ( not exists $stoppedJob{$key} ) {
|
||||
$count++;
|
||||
|
||||
if ( not exists $users{$startedJob{$key}->{'user'}} ) {
|
||||
$users{$startedJob{$key}->{'user'}} = 1;
|
||||
} else {
|
||||
$users{$startedJob{$key}->{'user'}}++;
|
||||
}
|
||||
|
||||
print <<END;
|
||||
======
|
||||
jobID: $startedJob{$key}->{'jobId'} User: $startedJob{$key}->{'user'}
|
||||
======
|
||||
END
|
||||
}
|
||||
}
|
||||
|
||||
foreach my $key ( keys %users ) {
|
||||
print "$key => $users{$key}\n";
|
||||
}
|
||||
|
||||
print "Not stopped: $count of $started\n";
|
||||
23
web/frontend/package-lock.json
generated
23
web/frontend/package-lock.json
generated
@@ -15,13 +15,13 @@
|
||||
"chart.js": "^4.5.1",
|
||||
"date-fns": "^4.1.0",
|
||||
"graphql": "^16.12.0",
|
||||
"mathjs": "^14.9.1",
|
||||
"mathjs": "^15.0.0",
|
||||
"uplot": "^1.6.32",
|
||||
"wonka": "^6.3.5"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@rollup/plugin-commonjs": "^28.0.9",
|
||||
"@rollup/plugin-node-resolve": "^16.0.3",
|
||||
"@rollup/plugin-commonjs": "^29.0.0",
|
||||
"@rollup/plugin-node-resolve": "^16.0.1",
|
||||
"@rollup/plugin-terser": "^0.4.4",
|
||||
"@timohausmann/quadtree-js": "^1.2.6",
|
||||
"rollup": "^4.53.3",
|
||||
@@ -126,9 +126,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@rollup/plugin-commonjs": {
|
||||
"version": "28.0.9",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/plugin-commonjs/-/plugin-commonjs-28.0.9.tgz",
|
||||
"integrity": "sha512-PIR4/OHZ79romx0BVVll/PkwWpJ7e5lsqFa3gFfcrFPWwLXLV39JVUzQV9RKjWerE7B845Hqjj9VYlQeieZ2dA==",
|
||||
"version": "29.0.0",
|
||||
"resolved": "https://registry.npmjs.org/@rollup/plugin-commonjs/-/plugin-commonjs-29.0.0.tgz",
|
||||
"integrity": "sha512-U2YHaxR2cU/yAiwKJtJRhnyLk7cifnQw0zUpISsocBDoHDJn+HTV74ABqnwr5bEgWUwFZC9oFL6wLe21lHu5eQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
@@ -621,6 +621,7 @@
|
||||
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
|
||||
"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"bin": {
|
||||
"acorn": "bin/acorn"
|
||||
},
|
||||
@@ -821,6 +822,7 @@
|
||||
"resolved": "https://registry.npmjs.org/graphql/-/graphql-16.12.0.tgz",
|
||||
"integrity": "sha512-DKKrynuQRne0PNpEbzuEdHlYOMksHSUI8Zc9Unei5gTsMNA2/vMpoMz/yKba50pejK56qj98qM0SjYxAKi13gQ==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": "^12.22.0 || ^14.16.0 || ^16.0.0 || >=17.0.0"
|
||||
}
|
||||
@@ -893,9 +895,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/mathjs": {
|
||||
"version": "14.9.1",
|
||||
"resolved": "https://registry.npmjs.org/mathjs/-/mathjs-14.9.1.tgz",
|
||||
"integrity": "sha512-xhqv8Xjf+caWG3WlaPekg4v8QFOR3D5+8ycfcjMcPcnCNDgAONQLaLfyGgrggJrcHx2yUGCpACRpiD4GmXwX+Q==",
|
||||
"version": "15.1.0",
|
||||
"resolved": "https://registry.npmjs.org/mathjs/-/mathjs-15.1.0.tgz",
|
||||
"integrity": "sha512-HfnAcScQm9drGryodlDqeS3WAl4gUTYGDcOtcqL/8s23MZ28Ib1i8XnYK3ZdjNuaW/L4BAp9lIp8vxAMrcuu1w==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@babel/runtime": "^7.26.10",
|
||||
@@ -927,6 +929,7 @@
|
||||
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
|
||||
"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
@@ -981,6 +984,7 @@
|
||||
"integrity": "sha512-w8GmOxZfBmKknvdXU1sdM9NHcoQejwF/4mNgj2JuEEdRaHwwF12K7e9eXn1nLZ07ad+du76mkVsyeb2rKGllsA==",
|
||||
"devOptional": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@types/estree": "1.0.8"
|
||||
},
|
||||
@@ -1161,6 +1165,7 @@
|
||||
"resolved": "https://registry.npmjs.org/svelte/-/svelte-5.44.0.tgz",
|
||||
"integrity": "sha512-R7387No2zEGw4CtYtI2rgsui6BqjFARzoZFGLiLN5OPla0Pq4Ra2WwcP/zBomP3MYalhSNvF1fzDMuU0P0zPJw==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@jridgewell/remapping": "^2.3.4",
|
||||
"@jridgewell/sourcemap-codec": "^1.5.0",
|
||||
|
||||
@@ -7,8 +7,8 @@
|
||||
"dev": "rollup -c -w"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@rollup/plugin-commonjs": "^28.0.9",
|
||||
"@rollup/plugin-node-resolve": "^16.0.3",
|
||||
"@rollup/plugin-commonjs": "^29.0.0",
|
||||
"@rollup/plugin-node-resolve": "^16.0.1",
|
||||
"@rollup/plugin-terser": "^0.4.4",
|
||||
"@timohausmann/quadtree-js": "^1.2.6",
|
||||
"rollup": "^4.53.3",
|
||||
@@ -23,7 +23,7 @@
|
||||
"chart.js": "^4.5.1",
|
||||
"date-fns": "^4.1.0",
|
||||
"graphql": "^16.12.0",
|
||||
"mathjs": "^14.9.1",
|
||||
"mathjs": "^15.0.0",
|
||||
"uplot": "^1.6.32",
|
||||
"wonka": "^6.3.5"
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user