Merge branch 'ai-review' into dev

2025-12-13 02:46:16 +01:00 · 2025-12-03 15:01:18 +01:00
parent 967f0a3294 2333068de7
commit 2b64b31393
64 changed files with 9328 additions and 8624 deletions
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -0,0 +1,15 @@
+# To get started with Dependabot version updates, you'll need to specify which
+# package ecosystems to update and where the package manifests are located.
+# Please see the documentation for all configuration options:
+# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
+
+version: 2
+updates:
+  - package-ecosystem: "gomod"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+  - package-ecosystem: "npm"
+    directory: "/web/frontend"
+    schedule:
+      interval: "weekly"
--- a/.gitignore
+++ b/.gitignore
@@ -27,3 +27,4 @@ test_ccms_write_api.sh
 /.vscode/*
 dist/
 *.db
+.idea
--- a/README.md
+++ b/README.md
@@ -1,5 +1,8 @@
 # NOTE

+While we do our best to keep the master branch in a usable state, there is no guarantee the master branch works.
+Please do not use it for production!
+
 Please have a look at the [Release
 Notes](https://github.com/ClusterCockpit/cc-backend/blob/master/ReleaseNotes.md)
 for breaking changes!
--- a/cmd/cc-backend/cli.go
+++ b/cmd/cc-backend/cli.go
@@ -2,6 +2,9 @@
 // All rights reserved. This file is part of cc-backend.
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file.
+
+// Package main provides the entry point for the ClusterCockpit backend server.
+// This file defines all command-line flags and their default values.
 package main

 import "flag"
--- a/cmd/cc-backend/init.go
+++ b/cmd/cc-backend/init.go
@@ -2,6 +2,10 @@
 // All rights reserved. This file is part of cc-backend.
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file.
+
+// Package main provides the entry point for the ClusterCockpit backend server.
+// This file contains bootstrap logic for initializing the environment,
+// creating default configuration files, and setting up the database.
 package main

 import (
--- a/cmd/cc-backend/main.go
+++ b/cmd/cc-backend/main.go
@@ -2,9 +2,14 @@
 // All rights reserved. This file is part of cc-backend.
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file.
+
+// Package main provides the entry point for the ClusterCockpit backend server.
+// It orchestrates initialization of all subsystems including configuration,
+// database, authentication, and the HTTP server.
 package main

 import (
+	"context"
 	"encoding/json"
 	"fmt"
 	"os"
@@ -13,6 +18,7 @@ import (
 	"strings"
 	"sync"
 	"syscall"
+	"time"

 	"github.com/ClusterCockpit/cc-backend/internal/archiver"
 	"github.com/ClusterCockpit/cc-backend/internal/auth"
@@ -46,90 +52,108 @@ const logoString = `
                                                    |_|
 `

+// Environment variable names
+const (
+	envGOGC = "GOGC"
+)
+
+// Default configurations
+const (
+	defaultArchiveConfig = `{"kind":"file","path":"./var/job-archive"}`
+)
+
 var (
 	date    string
 	commit  string
 	version string
 )

-func main() {
-	cliInit()
+func printVersion() {
+	fmt.Print(logoString)
+	fmt.Printf("Version:\t%s\n", version)
+	fmt.Printf("Git hash:\t%s\n", commit)
+	fmt.Printf("Build time:\t%s\n", date)
+	fmt.Printf("SQL db version:\t%d\n", repository.Version)
+	fmt.Printf("Job archive version:\t%d\n", archive.Version)
+}

-	if flagVersion {
-		fmt.Print(logoString)
-		fmt.Printf("Version:\t%s\n", version)
-		fmt.Printf("Git hash:\t%s\n", commit)
-		fmt.Printf("Build time:\t%s\n", date)
-		fmt.Printf("SQL db version:\t%d\n", repository.Version)
-		fmt.Printf("Job archive version:\t%d\n", archive.Version)
-		os.Exit(0)
+func initGops() error {
+	if !flagGops {
+		return nil
 	}

-	cclog.Init(flagLogLevel, flagLogDateTime)
-
-	// If init flag set, run tasks here before any file dependencies cause errors
-	if flagInit {
-		initEnv()
-		cclog.Exit("Successfully setup environment!\n" +
-			"Please review config.json and .env and adjust it to your needs.\n" +
-			"Add your job-archive at ./var/job-archive.")
+	if err := agent.Listen(agent.Options{}); err != nil {
+		return fmt.Errorf("starting gops agent: %w", err)
 	}
+	return nil
+}

-	// See https://github.com/google/gops (Runtime overhead is almost zero)
-	if flagGops {
-		if err := agent.Listen(agent.Options{}); err != nil {
-			cclog.Abortf("Could not start gops agent with 'gops/agent.Listen(agent.Options{})'. Application startup failed, exited.\nError: %s\n", err.Error())
-		}
+func loadEnvironment() error {
+	if err := godotenv.Load(); err != nil {
+		return fmt.Errorf("loading .env file: %w", err)
 	}
+	return nil
+}

-	err := godotenv.Load()
-	if err != nil {
-		cclog.Abortf("Could not parse existing .env file at location './.env'. Application startup failed, exited.\nError: %s\n", err.Error())
-	}
-
-	// Initialize sub-modules and handle command line flags.
-	// The order here is important!
+func initConfiguration() error {
 	ccconf.Init(flagConfigFile)

-	// Load and check main configuration
-	if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
-		if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil {
-			config.Init(cfg, clustercfg)
-		} else {
-			cclog.Abort("Cluster configuration must be present")
-		}
-	} else {
-		cclog.Abort("Main configuration must be present")
+	cfg := ccconf.GetPackageConfig("main")
+	if cfg == nil {
+		return fmt.Errorf("main configuration must be present")
 	}

+	clustercfg := ccconf.GetPackageConfig("clusters")
+	if clustercfg == nil {
+		return fmt.Errorf("cluster configuration must be present")
+	}
+
+	config.Init(cfg, clustercfg)
+	return nil
+}
+
+func initDatabase() error {
+	repository.Connect(config.Keys.DBDriver, config.Keys.DB)
+	return nil
+}
+
+func handleDatabaseCommands() error {
 	if flagMigrateDB {
 		err := repository.MigrateDB(config.Keys.DBDriver, config.Keys.DB)
 		if err != nil {
-			cclog.Abortf("MigrateDB Failed: Could not migrate '%s' database at location '%s' to version %d.\nError: %s\n", config.Keys.DBDriver, config.Keys.DB, repository.Version, err.Error())
+			return fmt.Errorf("migrating database to version %d: %w", repository.Version, err)
 		}
-		cclog.Exitf("MigrateDB Success: Migrated '%s' database at location '%s' to version %d.\n", config.Keys.DBDriver, config.Keys.DB, repository.Version)
+		cclog.Exitf("MigrateDB Success: Migrated '%s' database at location '%s' to version %d.\n",
+			config.Keys.DBDriver, config.Keys.DB, repository.Version)
 	}

 	if flagRevertDB {
 		err := repository.RevertDB(config.Keys.DBDriver, config.Keys.DB)
 		if err != nil {
-			cclog.Abortf("RevertDB Failed: Could not revert '%s' database at location '%s' to version %d.\nError: %s\n", config.Keys.DBDriver, config.Keys.DB, (repository.Version - 1), err.Error())
+			return fmt.Errorf("reverting database to version %d: %w", repository.Version-1, err)
 		}
-		cclog.Exitf("RevertDB Success: Reverted '%s' database at location '%s' to version %d.\n", config.Keys.DBDriver, config.Keys.DB, (repository.Version - 1))
+		cclog.Exitf("RevertDB Success: Reverted '%s' database at location '%s' to version %d.\n",
+			config.Keys.DBDriver, config.Keys.DB, repository.Version-1)
 	}

 	if flagForceDB {
 		err := repository.ForceDB(config.Keys.DBDriver, config.Keys.DB)
 		if err != nil {
-			cclog.Abortf("ForceDB Failed: Could not force '%s' database at location '%s' to version %d.\nError: %s\n", config.Keys.DBDriver, config.Keys.DB, repository.Version, err.Error())
+			return fmt.Errorf("forcing database to version %d: %w", repository.Version, err)
 		}
-		cclog.Exitf("ForceDB Success: Forced '%s' database at location '%s' to version %d.\n", config.Keys.DBDriver, config.Keys.DB, repository.Version)
+		cclog.Exitf("ForceDB Success: Forced '%s' database at location '%s' to version %d.\n",
+			config.Keys.DBDriver, config.Keys.DB, repository.Version)
 	}

-	repository.Connect(config.Keys.DBDriver, config.Keys.DB)
+	return nil
+}
+
+func handleUserCommands() error {
+	if config.Keys.DisableAuthentication && (flagNewUser != "" || flagDelUser != "") {
+		return fmt.Errorf("--add-user and --del-user can only be used if authentication is enabled")
+	}

 	if !config.Keys.DisableAuthentication {
-
 		if cfg := ccconf.GetPackageConfig("auth"); cfg != nil {
 			auth.Init(&cfg)
 		} else {
@@ -137,157 +161,318 @@ func main() {
 			auth.Init(nil)
 		}

-		if flagNewUser != "" {
-			parts := strings.SplitN(flagNewUser, ":", 3)
-			if len(parts) != 3 || len(parts[0]) == 0 {
-				cclog.Abortf("Add User: Could not parse supplied argument format: No changes.\n"+
-					"Want: <username>:[admin,support,manager,api,user]:<password>\n"+
-					"Have: %s\n", flagNewUser)
-			}
+		// Check for default security keys
+		checkDefaultSecurityKeys()

-			ur := repository.GetUserRepository()
-			if err := ur.AddUser(&schema.User{
-				Username: parts[0], Projects: make([]string, 0), Password: parts[2], Roles: strings.Split(parts[1], ","),
-			}); err != nil {
-				cclog.Abortf("Add User: Could not add new user authentication for '%s' and roles '%s'.\nError: %s\n", parts[0], parts[1], err.Error())
-			} else {
-				cclog.Printf("Add User: Added new user '%s' with roles '%s'.\n", parts[0], parts[1])
+		if flagNewUser != "" {
+			if err := addUser(flagNewUser); err != nil {
+				return err
 			}
 		}

 		if flagDelUser != "" {
-			ur := repository.GetUserRepository()
-			if err := ur.DelUser(flagDelUser); err != nil {
-				cclog.Abortf("Delete User: Could not delete user '%s' from DB.\nError: %s\n", flagDelUser, err.Error())
-			} else {
-				cclog.Printf("Delete User: Deleted user '%s' from DB.\n", flagDelUser)
+			if err := delUser(flagDelUser); err != nil {
+				return err
 			}
 		}

 		authHandle := auth.GetAuthInstance()

 		if flagSyncLDAP {
-			if authHandle.LdapAuth == nil {
-				cclog.Abort("Sync LDAP: LDAP authentication is not configured, could not synchronize. No changes, exited.")
+			if err := syncLDAP(authHandle); err != nil {
+				return err
 			}
-
-			if err := authHandle.LdapAuth.Sync(); err != nil {
-				cclog.Abortf("Sync LDAP: Could not synchronize, failed with error.\nError: %s\n", err.Error())
-			}
-			cclog.Print("Sync LDAP: LDAP synchronization successfull.")
 		}

 		if flagGenJWT != "" {
-			ur := repository.GetUserRepository()
-			user, err := ur.GetUser(flagGenJWT)
-			if err != nil {
-				cclog.Abortf("JWT: Could not get supplied user '%s' from DB. No changes, exited.\nError: %s\n", flagGenJWT, err.Error())
+			if err := generateJWT(authHandle, flagGenJWT); err != nil {
+				return err
 			}
-
-			if !user.HasRole(schema.RoleApi) {
-				cclog.Warnf("JWT: User '%s' does not have the role 'api'. REST API endpoints will return error!\n", user.Username)
-			}
-
-			jwt, err := authHandle.JwtAuth.ProvideJWT(user)
-			if err != nil {
-				cclog.Abortf("JWT: User '%s' found in DB, but failed to provide JWT.\nError: %s\n", user.Username, err.Error())
-			}
-
-			cclog.Printf("JWT: Successfully generated JWT for user '%s': %s\n", user.Username, jwt)
 		}
-
-	} else if flagNewUser != "" || flagDelUser != "" {
-		cclog.Abort("Error: Arguments '--add-user' and '--del-user' can only be used if authentication is enabled. No changes, exited.")
 	}

-	if archiveCfg := ccconf.GetPackageConfig("archive"); archiveCfg != nil {
-		err = archive.Init(archiveCfg, config.Keys.DisableArchive)
-	} else {
-		err = archive.Init(json.RawMessage("{\"kind\":\"file\",\"path\":\"./var/job-archive\"}"), config.Keys.DisableArchive)
+	return nil
+}
+
+// checkDefaultSecurityKeys warns if default JWT keys are detected
+func checkDefaultSecurityKeys() {
+	// Default JWT public key from init.go
+	defaultJWTPublic := "kzfYrYy+TzpanWZHJ5qSdMj5uKUWgq74BWhQG6copP0="
+
+	if os.Getenv("JWT_PUBLIC_KEY") == defaultJWTPublic {
+		cclog.Warn("Using default JWT keys - not recommended for production environments")
 	}
+}
+
+func addUser(userSpec string) error {
+	parts := strings.SplitN(userSpec, ":", 3)
+	if len(parts) != 3 || len(parts[0]) == 0 {
+		return fmt.Errorf("invalid user format, want: <username>:[admin,support,manager,api,user]:<password>, have: %s", userSpec)
+	}
+
+	ur := repository.GetUserRepository()
+	if err := ur.AddUser(&schema.User{
+		Username: parts[0],
+		Projects: make([]string, 0),
+		Password: parts[2],
+		Roles:    strings.Split(parts[1], ","),
+	}); err != nil {
+		return fmt.Errorf("adding user '%s' with roles '%s': %w", parts[0], parts[1], err)
+	}
+
+	cclog.Printf("Add User: Added new user '%s' with roles '%s'.\n", parts[0], parts[1])
+	return nil
+}
+
+func delUser(username string) error {
+	ur := repository.GetUserRepository()
+	if err := ur.DelUser(username); err != nil {
+		return fmt.Errorf("deleting user '%s': %w", username, err)
+	}
+	cclog.Printf("Delete User: Deleted user '%s' from DB.\n", username)
+	return nil
+}
+
+func syncLDAP(authHandle *auth.Authentication) error {
+	if authHandle.LdapAuth == nil {
+		return fmt.Errorf("LDAP authentication is not configured")
+	}
+
+	if err := authHandle.LdapAuth.Sync(); err != nil {
+		return fmt.Errorf("synchronizing LDAP: %w", err)
+	}
+
+	cclog.Print("Sync LDAP: LDAP synchronization successfull.")
+	return nil
+}
+
+func generateJWT(authHandle *auth.Authentication, username string) error {
+	ur := repository.GetUserRepository()
+	user, err := ur.GetUser(username)
 	if err != nil {
-		cclog.Abortf("Init: Failed to initialize archive.\nError: %s\n", err.Error())
+		return fmt.Errorf("getting user '%s': %w", username, err)
 	}

+	if !user.HasRole(schema.RoleApi) {
+		cclog.Warnf("JWT: User '%s' does not have the role 'api'. REST API endpoints will return error!\n", user.Username)
+	}
+
+	jwt, err := authHandle.JwtAuth.ProvideJWT(user)
+	if err != nil {
+		return fmt.Errorf("generating JWT for user '%s': %w", user.Username, err)
+	}
+
+	cclog.Printf("JWT: Successfully generated JWT for user '%s': %s\n", user.Username, jwt)
+	return nil
+}
+
+func initSubsystems() error {
+	// Initialize job archive
+	archiveCfg := ccconf.GetPackageConfig("archive")
+	if archiveCfg == nil {
+		archiveCfg = json.RawMessage(defaultArchiveConfig)
+	}
+	if err := archive.Init(archiveCfg, config.Keys.DisableArchive); err != nil {
+		return fmt.Errorf("initializing archive: %w", err)
+	}
+
+	// Initialize metricdata
 	if err := metricdata.Init(); err != nil {
-		cclog.Abortf("Init: Failed to initialize metricdata repository.\nError %s\n", err.Error())
+		return fmt.Errorf("initializing metricdata repository: %w", err)
 	}

+	// Handle database re-initialization
 	if flagReinitDB {
 		if err := importer.InitDB(); err != nil {
-			cclog.Abortf("Init DB: Failed to re-initialize repository DB.\nError: %s\n", err.Error())
-		} else {
-			cclog.Print("Init DB: Sucessfully re-initialized repository DB.")
+			return fmt.Errorf("re-initializing repository DB: %w", err)
 		}
+		cclog.Print("Init DB: Successfully re-initialized repository DB.")
 	}

+	// Handle job import
 	if flagImportJob != "" {
 		if err := importer.HandleImportFlag(flagImportJob); err != nil {
-			cclog.Abortf("Import Job: Job import failed.\nError: %s\n", err.Error())
-		} else {
-			cclog.Printf("Import Job: Imported Job '%s' into DB.\n", flagImportJob)
+			return fmt.Errorf("importing job: %w", err)
 		}
+		cclog.Printf("Import Job: Imported Job '%s' into DB.\n", flagImportJob)
 	}

+	// Initialize taggers
 	if config.Keys.EnableJobTaggers {
 		tagger.Init()
 	}

+	// Apply tags if requested
 	if flagApplyTags {
 		if err := tagger.RunTaggers(); err != nil {
-			cclog.Abortf("Running job taggers.\nError: %s\n", err.Error())
+			return fmt.Errorf("running job taggers: %w", err)
 		}
 	}

-	if !flagServer {
-		cclog.Exit("No errors, server flag not set. Exiting cc-backend.")
-	}
+	return nil
+}

+func runServer(ctx context.Context) error {
 	var wg sync.WaitGroup

-	// Metric Store starts after all flags have been processes
+	// Start metric store if enabled
 	if memorystore.InternalCCMSFlag {
-		if mscfg := ccconf.GetPackageConfig("metric-store"); mscfg != nil {
-			memorystore.Init(mscfg, &wg)
-		} else {
-			cclog.Abort("Metric Store configuration must be present")
+		mscfg := ccconf.GetPackageConfig("metric-store")
+		if mscfg == nil {
+			return fmt.Errorf("metric store configuration must be present")
 		}
+		memorystore.Init(mscfg, &wg)
 	}
+
+	// Start archiver and task manager
 	archiver.Start(repository.GetJobRepository())
+	taskManager.Start(ccconf.GetPackageConfig("cron"), ccconf.GetPackageConfig("archive"))

-	taskManager.Start(ccconf.GetPackageConfig("cron"),
-		ccconf.GetPackageConfig("archive"))
-
+	// Initialize web UI
 	cfg := ccconf.GetPackageConfig("ui")
 	web.Init(cfg)

-	serverInit()
+	// Initialize HTTP server
+	srv, err := NewServer(version, commit, date)
+	if err != nil {
+		return fmt.Errorf("creating server: %w", err)
+	}

+	// Channel to collect errors from server
+	errChan := make(chan error, 1)
+
+	// Start HTTP server
 	wg.Add(1)
 	go func() {
 		defer wg.Done()
-		serverStart()
+		if err := srv.Start(ctx); err != nil {
+			errChan <- err
+		}
 	}()

+	// Handle shutdown signals
 	wg.Add(1)
 	sigs := make(chan os.Signal, 1)
 	signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
 	go func() {
 		defer wg.Done()
-		<-sigs
+		select {
+		case <-sigs:
+			cclog.Info("Shutdown signal received")
+		case <-ctx.Done():
+		}
+
 		runtimeEnv.SystemdNotifiy(false, "Shutting down ...")
-
-		serverShutdown()
-
+		srv.Shutdown(ctx)
 		util.FsWatcherShutdown()
-
 		taskManager.Shutdown()
 	}()

-	if os.Getenv("GOGC") == "" {
+	// Set GC percent if not configured
+	if os.Getenv(envGOGC) == "" {
 		debug.SetGCPercent(25)
 	}
 	runtimeEnv.SystemdNotifiy(true, "running")
-	wg.Wait()
+
+	// Wait for completion or error
+	go func() {
+		wg.Wait()
+		close(errChan)
+	}()
+
+	// Check for server startup errors
+	select {
+	case err := <-errChan:
+		if err != nil {
+			return err
+		}
+	case <-time.After(100 * time.Millisecond):
+		// Server started successfully, wait for completion
+		if err := <-errChan; err != nil {
+			return err
+		}
+	}
+
 	cclog.Print("Graceful shutdown completed!")
+	return nil
+}
+
+func run() error {
+	cliInit()
+
+	if flagVersion {
+		printVersion()
+		return nil
+	}
+
+	// Initialize logger
+	cclog.Init(flagLogLevel, flagLogDateTime)
+
+	// Handle init flag
+	if flagInit {
+		initEnv()
+		cclog.Exit("Successfully setup environment!\n" +
+			"Please review config.json and .env and adjust it to your needs.\n" +
+			"Add your job-archive at ./var/job-archive.")
+	}
+
+	// Initialize gops agent
+	if err := initGops(); err != nil {
+		return err
+	}
+
+	// Initialize subsystems in dependency order:
+	// 1. Load environment variables from .env file (contains sensitive configuration)
+	// 2. Load configuration from config.json (may reference environment variables)
+	// 3. Handle database migration commands if requested
+	// 4. Initialize database connection (requires config for connection string)
+	// 5. Handle user commands if requested (requires database and authentication config)
+	// 6. Initialize subsystems like archive and metrics (require config and database)
+
+	// Load environment and configuration
+	if err := loadEnvironment(); err != nil {
+		return err
+	}
+
+	if err := initConfiguration(); err != nil {
+		return err
+	}
+
+	// Handle database migration (migrate, revert, force)
+	if err := handleDatabaseCommands(); err != nil {
+		return err
+	}
+
+	// Initialize database
+	if err := initDatabase(); err != nil {
+		return err
+	}
+
+	// Handle user commands (add, delete, sync, JWT)
+	if err := handleUserCommands(); err != nil {
+		return err
+	}
+
+	// Initialize subsystems (archive, metrics, taggers)
+	if err := initSubsystems(); err != nil {
+		return err
+	}
+
+	// Exit if start server is not requested
+	if !flagServer {
+		cclog.Exit("No errors, server flag not set. Exiting cc-backend.")
+	}
+
+	// Run server with context
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	return runServer(ctx)
+}
+
+func main() {
+	if err := run(); err != nil {
+		cclog.Error(err.Error())
+		os.Exit(1)
+	}
 }
--- a/cmd/cc-backend/server.go
+++ b/cmd/cc-backend/server.go
@@ -2,6 +2,9 @@
 // All rights reserved. This file is part of cc-backend.
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file.
+// Package main provides the entry point for the ClusterCockpit backend server.
+// This file contains HTTP server setup, routing configuration, and
+// authentication middleware integration.
 package main

 import (
@@ -36,11 +39,19 @@ import (
 	httpSwagger "github.com/swaggo/http-swagger"
 )

-var (
+var buildInfo web.Build
+
+// Environment variable names
+const (
+	envDebug = "DEBUG"
+)
+
+// Server encapsulates the HTTP server state and dependencies
+type Server struct {
 	router    *mux.Router
 	server    *http.Server
 	apiHandle *api.RestApi
-)
+}

 func onFailureResponse(rw http.ResponseWriter, r *http.Request, err error) {
 	rw.Header().Add("Content-Type", "application/json")
@@ -51,25 +62,31 @@ func onFailureResponse(rw http.ResponseWriter, r *http.Request, err error) {
 	})
 }

-func serverInit() {
+// NewServer creates and initializes a new Server instance
+func NewServer(version, commit, buildDate string) (*Server, error) {
+	buildInfo = web.Build{Version: version, Hash: commit, Buildtime: buildDate}
+
+	s := &Server{
+		router: mux.NewRouter(),
+	}
+
+	if err := s.init(); err != nil {
+		return nil, err
+	}
+
+	return s, nil
+}
+
+func (s *Server) init() error {
 	// Setup the http.Handler/Router used by the server
 	graph.Init()
 	resolver := graph.GetResolverInstance()
 	graphQLServer := handler.New(
 		generated.NewExecutableSchema(generated.Config{Resolvers: resolver}))

-	// graphQLServer.AddTransport(transport.SSE{})
 	graphQLServer.AddTransport(transport.POST{})
-	// graphQLServer.AddTransport(transport.Websocket{
-	// 	KeepAlivePingInterval: 10 * time.Second,
-	// 	Upgrader: websocket.Upgrader{
-	// 		CheckOrigin: func(r *http.Request) bool {
-	// 			return true
-	// 		},
-	// 	},
-	// })

-	if os.Getenv("DEBUG") != "1" {
+	if os.Getenv(envDebug) != "1" {
 		// Having this handler means that a error message is returned via GraphQL instead of the connection simply beeing closed.
 		// The problem with this is that then, no more stacktrace is printed to stderr.
 		graphQLServer.SetRecoverFunc(func(ctx context.Context, err any) error {
@@ -86,73 +103,56 @@ func serverInit() {

 	authHandle := auth.GetAuthInstance()

-	apiHandle = api.New()
-
-	router = mux.NewRouter()
-	buildInfo := web.Build{Version: version, Hash: commit, Buildtime: date}
+	s.apiHandle = api.New()

 	info := map[string]any{}
 	info["hasOpenIDConnect"] = false

 	if auth.Keys.OpenIDConfig != nil {
 		openIDConnect := auth.NewOIDC(authHandle)
-		openIDConnect.RegisterEndpoints(router)
+		openIDConnect.RegisterEndpoints(s.router)
 		info["hasOpenIDConnect"] = true
 	}

-	router.HandleFunc("/login", func(rw http.ResponseWriter, r *http.Request) {
+	s.router.HandleFunc("/login", func(rw http.ResponseWriter, r *http.Request) {
 		rw.Header().Add("Content-Type", "text/html; charset=utf-8")
 		cclog.Debugf("##%v##", info)
 		web.RenderTemplate(rw, "login.tmpl", &web.Page{Title: "Login", Build: buildInfo, Infos: info})
 	}).Methods(http.MethodGet)
-	router.HandleFunc("/imprint", func(rw http.ResponseWriter, r *http.Request) {
+	s.router.HandleFunc("/imprint", func(rw http.ResponseWriter, r *http.Request) {
 		rw.Header().Add("Content-Type", "text/html; charset=utf-8")
 		web.RenderTemplate(rw, "imprint.tmpl", &web.Page{Title: "Imprint", Build: buildInfo})
 	})
-	router.HandleFunc("/privacy", func(rw http.ResponseWriter, r *http.Request) {
+	s.router.HandleFunc("/privacy", func(rw http.ResponseWriter, r *http.Request) {
 		rw.Header().Add("Content-Type", "text/html; charset=utf-8")
 		web.RenderTemplate(rw, "privacy.tmpl", &web.Page{Title: "Privacy", Build: buildInfo})
 	})

-	secured := router.PathPrefix("/").Subrouter()
-	securedapi := router.PathPrefix("/api").Subrouter()
-	userapi := router.PathPrefix("/userapi").Subrouter()
-	configapi := router.PathPrefix("/config").Subrouter()
-	frontendapi := router.PathPrefix("/frontend").Subrouter()
-	metricstoreapi := router.PathPrefix("/metricstore").Subrouter()
+	secured := s.router.PathPrefix("/").Subrouter()
+	securedapi := s.router.PathPrefix("/api").Subrouter()
+	userapi := s.router.PathPrefix("/userapi").Subrouter()
+	configapi := s.router.PathPrefix("/config").Subrouter()
+	frontendapi := s.router.PathPrefix("/frontend").Subrouter()
+	metricstoreapi := s.router.PathPrefix("/metricstore").Subrouter()

 	if !config.Keys.DisableAuthentication {
-		router.Handle("/login", authHandle.Login(
-			// On success: Handled within Login()
-			// On failure:
-			func(rw http.ResponseWriter, r *http.Request, err error) {
-				rw.Header().Add("Content-Type", "text/html; charset=utf-8")
-				rw.WriteHeader(http.StatusUnauthorized)
-				web.RenderTemplate(rw, "login.tmpl", &web.Page{
-					Title:   "Login failed - ClusterCockpit",
-					MsgType: "alert-warning",
-					Message: err.Error(),
-					Build:   buildInfo,
-					Infos:   info,
-				})
-			})).Methods(http.MethodPost)
+		// Create login failure handler (used by both /login and /jwt-login)
+		loginFailureHandler := func(rw http.ResponseWriter, r *http.Request, err error) {
+			rw.Header().Add("Content-Type", "text/html; charset=utf-8")
+			rw.WriteHeader(http.StatusUnauthorized)
+			web.RenderTemplate(rw, "login.tmpl", &web.Page{
+				Title:   "Login failed - ClusterCockpit",
+				MsgType: "alert-warning",
+				Message: err.Error(),
+				Build:   buildInfo,
+				Infos:   info,
+			})
+		}

-		router.Handle("/jwt-login", authHandle.Login(
-			// On success: Handled within Login()
-			// On failure:
-			func(rw http.ResponseWriter, r *http.Request, err error) {
-				rw.Header().Add("Content-Type", "text/html; charset=utf-8")
-				rw.WriteHeader(http.StatusUnauthorized)
-				web.RenderTemplate(rw, "login.tmpl", &web.Page{
-					Title:   "Login failed - ClusterCockpit",
-					MsgType: "alert-warning",
-					Message: err.Error(),
-					Build:   buildInfo,
-					Infos:   info,
-				})
-			}))
+		s.router.Handle("/login", authHandle.Login(loginFailureHandler)).Methods(http.MethodPost)
+		s.router.Handle("/jwt-login", authHandle.Login(loginFailureHandler))

-		router.Handle("/logout", authHandle.Logout(
+		s.router.Handle("/logout", authHandle.Logout(
 			http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
 				rw.Header().Add("Content-Type", "text/html; charset=utf-8")
 				rw.WriteHeader(http.StatusOK)
@@ -226,8 +226,8 @@ func serverInit() {
 	}

 	if flagDev {
-		router.Handle("/playground", playground.Handler("GraphQL playground", "/query"))
-		router.PathPrefix("/swagger/").Handler(httpSwagger.Handler(
+		s.router.Handle("/playground", playground.Handler("GraphQL playground", "/query"))
+		s.router.PathPrefix("/swagger/").Handler(httpSwagger.Handler(
 			httpSwagger.URL("http://" + config.Keys.Addr + "/swagger/doc.json"))).Methods(http.MethodGet)
 	}
 	secured.Handle("/query", graphQLServer)
@@ -239,67 +239,46 @@ func serverInit() {

 	// Mount all /monitoring/... and /api/... routes.
 	routerConfig.SetupRoutes(secured, buildInfo)
-	apiHandle.MountApiRoutes(securedapi)
-	apiHandle.MountUserApiRoutes(userapi)
-	apiHandle.MountConfigApiRoutes(configapi)
-	apiHandle.MountFrontendApiRoutes(frontendapi)
+	s.apiHandle.MountApiRoutes(securedapi)
+	s.apiHandle.MountUserApiRoutes(userapi)
+	s.apiHandle.MountConfigApiRoutes(configapi)
+	s.apiHandle.MountFrontendApiRoutes(frontendapi)

 	if memorystore.InternalCCMSFlag {
-		apiHandle.MountMetricStoreApiRoutes(metricstoreapi)
+		s.apiHandle.MountMetricStoreApiRoutes(metricstoreapi)
 	}

 	if config.Keys.EmbedStaticFiles {
 		if i, err := os.Stat("./var/img"); err == nil {
 			if i.IsDir() {
 				cclog.Info("Use local directory for static images")
-				router.PathPrefix("/img/").Handler(http.StripPrefix("/img/", http.FileServer(http.Dir("./var/img"))))
+				s.router.PathPrefix("/img/").Handler(http.StripPrefix("/img/", http.FileServer(http.Dir("./var/img"))))
 			}
 		}
-		router.PathPrefix("/").Handler(http.StripPrefix("/", web.ServeFiles()))
+		s.router.PathPrefix("/").Handler(http.StripPrefix("/", web.ServeFiles()))
 	} else {
-		router.PathPrefix("/").Handler(http.FileServer(http.Dir(config.Keys.StaticFiles)))
+		s.router.PathPrefix("/").Handler(http.FileServer(http.Dir(config.Keys.StaticFiles)))
 	}

-	router.Use(handlers.CompressHandler)
-	router.Use(handlers.RecoveryHandler(handlers.PrintRecoveryStack(true)))
-	router.Use(handlers.CORS(
+	s.router.Use(handlers.CompressHandler)
+	s.router.Use(handlers.RecoveryHandler(handlers.PrintRecoveryStack(true)))
+	s.router.Use(handlers.CORS(
 		handlers.AllowCredentials(),
 		handlers.AllowedHeaders([]string{"X-Requested-With", "Content-Type", "Authorization", "Origin"}),
 		handlers.AllowedMethods([]string{"GET", "POST", "HEAD", "OPTIONS"}),
 		handlers.AllowedOrigins([]string{"*"})))

-	// secured.NotFoundHandler = http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
-	// 	page := web.Page{
-	// 		Title: "ClusterCockpit - Not Found",
-	// 		Build: buildInfo,
-	// 	}
-	// 	rw.Header().Add("Content-Type", "text/html; charset=utf-8")
-	// 	web.RenderTemplate(rw, "404.tmpl", &page)
-	// })
-
-	// secured.NotFoundHandler = http.HandlerFunc(http.NotFound)
-	// router.NotFoundHandler = router.NewRoute().HandlerFunc(http.NotFound).GetHandler()
-
-	// printEndpoints(router)
+	return nil
 }

-// func printEndpoints(r *mux.Router) {
-// 	r.Walk(func(route *mux.Route, router *mux.Router, ancestors []*mux.Route) error {
-// 		path, err := route.GetPathTemplate()
-// 		if err != nil {
-// 			path = "nopath"
-// 		}
-// 		methods, err := route.GetMethods()
-// 		if err != nil {
-// 			methods = append(methods, "nomethod")
-// 		}
-// 		fmt.Printf("%v %s\n", methods, path)
-// 		return nil
-// 	})
-// }
+// Server timeout defaults (in seconds)
+const (
+	defaultReadTimeout  = 20
+	defaultWriteTimeout = 20
+)

-func serverStart() {
-	handler := handlers.CustomLoggingHandler(io.Discard, router, func(_ io.Writer, params handlers.LogFormatterParams) {
+func (s *Server) Start(ctx context.Context) error {
+	handler := handlers.CustomLoggingHandler(io.Discard, s.router, func(_ io.Writer, params handlers.LogFormatterParams) {
 		if strings.HasPrefix(params.Request.RequestURI, "/api/") {
 			cclog.Debugf("%s %s (%d, %.02fkb, %dms)",
 				params.Request.Method, params.URL.RequestURI(),
@@ -313,9 +292,13 @@ func serverStart() {
 		}
 	})

-	server = &http.Server{
-		ReadTimeout:  20 * time.Second,
-		WriteTimeout: 20 * time.Second,
+	// Use configurable timeouts with defaults
+	readTimeout := time.Duration(defaultReadTimeout) * time.Second
+	writeTimeout := time.Duration(defaultWriteTimeout) * time.Second
+
+	s.server = &http.Server{
+		ReadTimeout:  readTimeout,
+		WriteTimeout: writeTimeout,
 		Handler:      handler,
 		Addr:         config.Keys.Addr,
 	}
@@ -323,7 +306,7 @@ func serverStart() {
 	// Start http or https server
 	listener, err := net.Listen("tcp", config.Keys.Addr)
 	if err != nil {
-		cclog.Abortf("Server Start: Starting http listener on '%s' failed.\nError: %s\n", config.Keys.Addr, err.Error())
+		return fmt.Errorf("starting listener on '%s': %w", config.Keys.Addr, err)
 	}

 	if !strings.HasSuffix(config.Keys.Addr, ":80") && config.Keys.RedirectHTTPTo != "" {
@@ -336,7 +319,7 @@ func serverStart() {
 		cert, err := tls.LoadX509KeyPair(
 			config.Keys.HTTPSCertFile, config.Keys.HTTPSKeyFile)
 		if err != nil {
-			cclog.Abortf("Server Start: Loading X509 keypair failed. Check options 'https-cert-file' and 'https-key-file' in 'config.json'.\nError: %s\n", err.Error())
+			return fmt.Errorf("loading X509 keypair (check 'https-cert-file' and 'https-key-file' in config.json): %w", err)
 		}
 		listener = tls.NewListener(listener, &tls.Config{
 			Certificates: []tls.Certificate{cert},
@@ -356,17 +339,34 @@ func serverStart() {
 	// be established first, then the user can be changed, and after that,
 	// the actual http server can be started.
 	if err := runtimeEnv.DropPrivileges(config.Keys.Group, config.Keys.User); err != nil {
-		cclog.Abortf("Server Start: Error while preparing server start.\nError: %s\n", err.Error())
+		return fmt.Errorf("dropping privileges: %w", err)
 	}

-	if err = server.Serve(listener); err != nil && err != http.ErrServerClosed {
-		cclog.Abortf("Server Start: Starting server failed.\nError: %s\n", err.Error())
+	// Handle context cancellation for graceful shutdown
+	go func() {
+		<-ctx.Done()
+		shutdownCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+		defer cancel()
+		if err := s.server.Shutdown(shutdownCtx); err != nil {
+			cclog.Errorf("Server shutdown error: %v", err)
+		}
+	}()
+
+	if err = s.server.Serve(listener); err != nil && err != http.ErrServerClosed {
+		return fmt.Errorf("server failed: %w", err)
 	}
+	return nil
 }

-func serverShutdown() {
+func (s *Server) Shutdown(ctx context.Context) {
+	// Create a shutdown context with timeout
+	shutdownCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
+	defer cancel()
+
 	// First shut down the server gracefully (waiting for all ongoing requests)
-	server.Shutdown(context.Background())
+	if err := s.server.Shutdown(shutdownCtx); err != nil {
+		cclog.Errorf("Server shutdown error: %v", err)
+	}

 	// Archive all the metric store data
 	if memorystore.InternalCCMSFlag {
--- a/go.mod
+++ b/go.mod
@@ -5,10 +5,14 @@ go 1.24.0
 toolchain go1.24.1

 require (
-	github.com/99designs/gqlgen v0.17.78
+	github.com/99designs/gqlgen v0.17.81
 	github.com/ClusterCockpit/cc-lib v0.10.1
 	github.com/Masterminds/squirrel v1.5.4
-	github.com/coreos/go-oidc/v3 v3.12.0
+	github.com/aws/aws-sdk-go-v2 v1.39.6
+	github.com/aws/aws-sdk-go-v2/config v1.31.20
+	github.com/aws/aws-sdk-go-v2/credentials v1.18.24
+	github.com/aws/aws-sdk-go-v2/service/s3 v1.90.2
+	github.com/coreos/go-oidc/v3 v3.16.0
 	github.com/expr-lang/expr v1.17.6
 	github.com/go-co-op/gocron/v2 v2.16.0
 	github.com/go-ldap/ldap/v3 v3.4.10
@@ -29,11 +33,12 @@ require (
 	github.com/prometheus/common v0.66.1
 	github.com/qustavo/sqlhooks/v2 v2.1.0
 	github.com/santhosh-tekuri/jsonschema/v5 v5.3.1
+	github.com/stretchr/testify v1.11.1
 	github.com/swaggo/http-swagger v1.3.4
 	github.com/swaggo/swag v1.16.6
 	github.com/vektah/gqlparser/v2 v2.5.30
-	golang.org/x/crypto v0.42.0
-	golang.org/x/oauth2 v0.30.0
+	golang.org/x/crypto v0.43.0
+	golang.org/x/oauth2 v0.32.0
 	golang.org/x/time v0.13.0
 )

@@ -42,17 +47,38 @@ require (
 	github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect
 	github.com/KyleBanks/depth v1.2.1 // indirect
 	github.com/agnivade/levenshtein v1.2.1 // indirect
+	github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.3 // indirect
+	github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.13 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.13 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.13 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.13 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.3 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.4 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.13 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.13 // indirect
+	github.com/aws/aws-sdk-go-v2/service/sso v1.30.3 // indirect
+	github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.7 // indirect
+	github.com/aws/aws-sdk-go-v2/service/sts v1.40.2 // indirect
+	github.com/aws/smithy-go v1.23.2 // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
 	github.com/cpuguy83/go-md2man/v2 v2.0.7 // indirect
+	github.com/davecgh/go-spew v1.1.1 // indirect
 	github.com/felixge/httpsnoop v1.0.4 // indirect
 	github.com/fsnotify/fsnotify v1.9.0 // indirect
 	github.com/go-asn1-ber/asn1-ber v1.5.7 // indirect
-	github.com/go-jose/go-jose/v4 v4.0.5 // indirect
-	github.com/go-openapi/jsonpointer v0.21.1 // indirect
-	github.com/go-openapi/jsonreference v0.21.0 // indirect
-	github.com/go-openapi/spec v0.21.0 // indirect
-	github.com/go-openapi/swag v0.23.1 // indirect
+	github.com/go-jose/go-jose/v4 v4.1.3 // indirect
+	github.com/go-openapi/jsonpointer v0.22.1 // indirect
+	github.com/go-openapi/jsonreference v0.21.2 // indirect
+	github.com/go-openapi/spec v0.22.0 // indirect
+	github.com/go-openapi/swag/conv v0.25.1 // indirect
+	github.com/go-openapi/swag/jsonname v0.25.1 // indirect
+	github.com/go-openapi/swag/jsonutils v0.25.1 // indirect
+	github.com/go-openapi/swag/loading v0.25.1 // indirect
+	github.com/go-openapi/swag/stringutils v0.25.1 // indirect
+	github.com/go-openapi/swag/typeutils v0.25.1 // indirect
+	github.com/go-openapi/swag/yamlutils v0.25.1 // indirect
 	github.com/go-viper/mapstructure/v2 v2.4.0 // indirect
 	github.com/golang/snappy v0.0.4 // indirect
 	github.com/google/uuid v1.6.0 // indirect
@@ -62,37 +88,38 @@ require (
 	github.com/hashicorp/go-multierror v1.1.1 // indirect
 	github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
 	github.com/jonboulle/clockwork v0.5.0 // indirect
-	github.com/josharian/intern v1.0.0 // indirect
 	github.com/jpillora/backoff v1.0.0 // indirect
 	github.com/json-iterator/go v1.1.12 // indirect
 	github.com/klauspost/compress v1.18.0 // indirect
 	github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
 	github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
-	github.com/mailru/easyjson v0.9.0 // indirect
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/reflect2 v1.0.2 // indirect
 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
 	github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
 	github.com/nats-io/nkeys v0.4.11 // indirect
 	github.com/nats-io/nuid v1.0.1 // indirect
+	github.com/pmezard/go-difflib v1.0.0 // indirect
 	github.com/prometheus/client_model v0.6.2 // indirect
 	github.com/prometheus/procfs v0.16.1 // indirect
 	github.com/robfig/cron/v3 v3.0.1 // indirect
 	github.com/russross/blackfriday/v2 v2.1.0 // indirect
 	github.com/sosodev/duration v1.3.1 // indirect
+	github.com/stretchr/objx v0.5.2 // indirect
 	github.com/swaggo/files v1.0.1 // indirect
 	github.com/urfave/cli/v2 v2.27.7 // indirect
 	github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 // indirect
 	go.uber.org/atomic v1.11.0 // indirect
-	go.yaml.in/yaml/v2 v2.4.2 // indirect
+	go.yaml.in/yaml/v2 v2.4.3 // indirect
+	go.yaml.in/yaml/v3 v3.0.4 // indirect
 	golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect
-	golang.org/x/mod v0.27.0 // indirect
-	golang.org/x/net v0.43.0 // indirect
+	golang.org/x/mod v0.29.0 // indirect
+	golang.org/x/net v0.46.0 // indirect
 	golang.org/x/sync v0.17.0 // indirect
-	golang.org/x/sys v0.36.0 // indirect
-	golang.org/x/text v0.29.0 // indirect
-	golang.org/x/tools v0.36.0 // indirect
-	google.golang.org/protobuf v1.36.8 // indirect
+	golang.org/x/sys v0.37.0 // indirect
+	golang.org/x/text v0.30.0 // indirect
+	golang.org/x/tools v0.38.0 // indirect
+	google.golang.org/protobuf v1.36.9 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 	sigs.k8s.io/yaml v1.6.0 // indirect
 )
--- a/go.sum
+++ b/go.sum
@@ -1,7 +1,7 @@
 filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA=
 filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
-github.com/99designs/gqlgen v0.17.78 h1:bhIi7ynrc3js2O8wu1sMQj1YHPENDt3jQGyifoBvoVI=
-github.com/99designs/gqlgen v0.17.78/go.mod h1:yI/o31IauG2kX0IsskM4R894OCCG1jXJORhtLQqB7Oc=
+github.com/99designs/gqlgen v0.17.81 h1:kCkN/xVyRb5rEQpuwOHRTYq83i0IuTQg9vdIiwEerTs=
+github.com/99designs/gqlgen v0.17.81/go.mod h1:vgNcZlLwemsUhYim4dC1pvFP5FX0pr2Y+uYUoHFb1ig=
 github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0=
 github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
 github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8=
@@ -30,12 +30,48 @@ github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7D
 github.com/apapsch/go-jsonmerge/v2 v2.0.0/go.mod h1:lvDnEdqiQrp0O42VQGgmlKpxL1AP2+08jFMw88y4klk=
 github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q=
 github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE=
+github.com/aws/aws-sdk-go-v2 v1.39.6 h1:2JrPCVgWJm7bm83BDwY5z8ietmeJUbh3O2ACnn+Xsqk=
+github.com/aws/aws-sdk-go-v2 v1.39.6/go.mod h1:c9pm7VwuW0UPxAEYGyTmyurVcNrbF6Rt/wixFqDhcjE=
+github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.3 h1:DHctwEM8P8iTXFxC/QK0MRjwEpWQeM9yzidCRjldUz0=
+github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.3/go.mod h1:xdCzcZEtnSTKVDOmUZs4l/j3pSV6rpo1WXl5ugNsL8Y=
+github.com/aws/aws-sdk-go-v2/config v1.31.20 h1:/jWF4Wu90EhKCgjTdy1DGxcbcbNrjfBHvksEL79tfQc=
+github.com/aws/aws-sdk-go-v2/config v1.31.20/go.mod h1:95Hh1Tc5VYKL9NJ7tAkDcqeKt+MCXQB1hQZaRdJIZE0=
+github.com/aws/aws-sdk-go-v2/credentials v1.18.24 h1:iJ2FmPT35EaIB0+kMa6TnQ+PwG5A1prEdAw+PsMzfHg=
+github.com/aws/aws-sdk-go-v2/credentials v1.18.24/go.mod h1:U91+DrfjAiXPDEGYhh/x29o4p0qHX5HDqG7y5VViv64=
+github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.13 h1:T1brd5dR3/fzNFAQch/iBKeX07/ffu/cLu+q+RuzEWk=
+github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.13/go.mod h1:Peg/GBAQ6JDt+RoBf4meB1wylmAipb7Kg2ZFakZTlwk=
+github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.13 h1:a+8/MLcWlIxo1lF9xaGt3J/u3yOZx+CdSveSNwjhD40=
+github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.13/go.mod h1:oGnKwIYZ4XttyU2JWxFrwvhF6YKiK/9/wmE3v3Iu9K8=
+github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.13 h1:HBSI2kDkMdWz4ZM7FjwE7e/pWDEZ+nR95x8Ztet1ooY=
+github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.13/go.mod h1:YE94ZoDArI7awZqJzBAZ3PDD2zSfuP7w6P2knOzIn8M=
+github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 h1:WKuaxf++XKWlHWu9ECbMlha8WOEGm0OUEZqm4K/Gcfk=
+github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4/go.mod h1:ZWy7j6v1vWGmPReu0iSGvRiise4YI5SkR3OHKTZ6Wuc=
+github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.13 h1:eg/WYAa12vqTphzIdWMzqYRVKKnCboVPRlvaybNCqPA=
+github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.13/go.mod h1:/FDdxWhz1486obGrKKC1HONd7krpk38LBt+dutLcN9k=
+github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.3 h1:x2Ibm/Af8Fi+BH+Hsn9TXGdT+hKbDd5XOTZxTMxDk7o=
+github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.3/go.mod h1:IW1jwyrQgMdhisceG8fQLmQIydcT/jWY21rFhzgaKwo=
+github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.4 h1:NvMjwvv8hpGUILarKw7Z4Q0w1H9anXKsesMxtw++MA4=
+github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.4/go.mod h1:455WPHSwaGj2waRSpQp7TsnpOnBfw8iDfPfbwl7KPJE=
+github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.13 h1:kDqdFvMY4AtKoACfzIGD8A0+hbT41KTKF//gq7jITfM=
+github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.13/go.mod h1:lmKuogqSU3HzQCwZ9ZtcqOc5XGMqtDK7OIc2+DxiUEg=
+github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.13 h1:zhBJXdhWIFZ1acfDYIhu4+LCzdUS2Vbcum7D01dXlHQ=
+github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.13/go.mod h1:JaaOeCE368qn2Hzi3sEzY6FgAZVCIYcC2nwbro2QCh8=
+github.com/aws/aws-sdk-go-v2/service/s3 v1.90.2 h1:DhdbtDl4FdNlj31+xiRXANxEE+eC7n8JQz+/ilwQ8Uc=
+github.com/aws/aws-sdk-go-v2/service/s3 v1.90.2/go.mod h1:+wArOOrcHUevqdto9k1tKOF5++YTe9JEcPSc9Tx2ZSw=
+github.com/aws/aws-sdk-go-v2/service/sso v1.30.3 h1:NjShtS1t8r5LUfFVtFeI8xLAHQNTa7UI0VawXlrBMFQ=
+github.com/aws/aws-sdk-go-v2/service/sso v1.30.3/go.mod h1:fKvyjJcz63iL/ftA6RaM8sRCtN4r4zl4tjL3qw5ec7k=
+github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.7 h1:gTsnx0xXNQ6SBbymoDvcoRHL+q4l/dAFsQuKfDWSaGc=
+github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.7/go.mod h1:klO+ejMvYsB4QATfEOIXk8WAEwN4N0aBfJpvC+5SZBo=
+github.com/aws/aws-sdk-go-v2/service/sts v1.40.2 h1:HK5ON3KmQV2HcAunnx4sKLB9aPf3gKGwVAf7xnx0QT0=
+github.com/aws/aws-sdk-go-v2/service/sts v1.40.2/go.mod h1:E19xDjpzPZC7LS2knI9E6BaRFDK43Eul7vd6rSq2HWk=
+github.com/aws/smithy-go v1.23.2 h1:Crv0eatJUQhaManss33hS5r40CG3ZFH+21XSkqMrIUM=
+github.com/aws/smithy-go v1.23.2/go.mod h1:LEj2LM3rBRQJxPZTB4KuzZkaZYnZPnvgIhb4pu07mx0=
 github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
 github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
 github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
 github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
-github.com/coreos/go-oidc/v3 v3.12.0 h1:sJk+8G2qq94rDI6ehZ71Bol3oUHy63qNYmkiSjrc/Jo=
-github.com/coreos/go-oidc/v3 v3.12.0/go.mod h1:gE3LgjOgFoHi9a4ce4/tJczr0Ai2/BoDhf0r5lltWI0=
+github.com/coreos/go-oidc/v3 v3.16.0 h1:qRQUCFstKpXwmEjDQTIbyY/5jF00+asXzSkmkoa/mow=
+github.com/coreos/go-oidc/v3 v3.16.0/go.mod h1:wqPbKFrVnE90vty060SB40FCJ8fTHTxSwyXJqZH+sI8=
 github.com/cpuguy83/go-md2man/v2 v2.0.7 h1:zbFlGlXEAKlwXpmvle3d8Oe3YnkKIK4xSRTd3sHPnBo=
 github.com/cpuguy83/go-md2man/v2 v2.0.7/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
 github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
@@ -68,22 +104,37 @@ github.com/go-asn1-ber/asn1-ber v1.5.7 h1:DTX+lbVTWaTw1hQ+PbZPlnDZPEIs0SS/GCZAl5
 github.com/go-asn1-ber/asn1-ber v1.5.7/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0=
 github.com/go-co-op/gocron/v2 v2.16.0 h1:uqUF6WFZ4enRU45pWFNcn1xpDLc+jBOTKhPQI16Z1xs=
 github.com/go-co-op/gocron/v2 v2.16.0/go.mod h1:opexeOFy5BplhsKdA7bzY9zeYih8I8/WNJ4arTIFPVc=
-github.com/go-jose/go-jose/v4 v4.0.5 h1:M6T8+mKZl/+fNNuFHvGIzDz7BTLQPIounk/b9dw3AaE=
-github.com/go-jose/go-jose/v4 v4.0.5/go.mod h1:s3P1lRrkT8igV8D9OjyL4WRyHvjB6a4JSllnOrmmBOA=
+github.com/go-jose/go-jose/v4 v4.1.3 h1:CVLmWDhDVRa6Mi/IgCgaopNosCaHz7zrMeF9MlZRkrs=
+github.com/go-jose/go-jose/v4 v4.1.3/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08=
 github.com/go-ldap/ldap/v3 v3.4.10 h1:ot/iwPOhfpNVgB1o+AVXljizWZ9JTp7YF5oeyONmcJU=
 github.com/go-ldap/ldap/v3 v3.4.10/go.mod h1:JXh4Uxgi40P6E9rdsYqpUtbW46D9UTjJ9QSwGRznplY=
 github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
 github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
 github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
 github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
-github.com/go-openapi/jsonpointer v0.21.1 h1:whnzv/pNXtK2FbX/W9yJfRmE2gsmkfahjMKB0fZvcic=
-github.com/go-openapi/jsonpointer v0.21.1/go.mod h1:50I1STOfbY1ycR8jGz8DaMeLCdXiI6aDteEdRNNzpdk=
-github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ=
-github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4=
-github.com/go-openapi/spec v0.21.0 h1:LTVzPc3p/RzRnkQqLRndbAzjY0d0BCL72A6j3CdL9ZY=
-github.com/go-openapi/spec v0.21.0/go.mod h1:78u6VdPw81XU44qEWGhtr982gJ5BWg2c0I5XwVMotYk=
-github.com/go-openapi/swag v0.23.1 h1:lpsStH0n2ittzTnbaSloVZLuB5+fvSY/+hnagBjSNZU=
-github.com/go-openapi/swag v0.23.1/go.mod h1:STZs8TbRvEQQKUA+JZNAm3EWlgaOBGpyFDqQnDHMef0=
+github.com/go-openapi/jsonpointer v0.22.1 h1:sHYI1He3b9NqJ4wXLoJDKmUmHkWy/L7rtEo92JUxBNk=
+github.com/go-openapi/jsonpointer v0.22.1/go.mod h1:pQT9OsLkfz1yWoMgYFy4x3U5GY5nUlsOn1qSBH5MkCM=
+github.com/go-openapi/jsonreference v0.21.2 h1:Wxjda4M/BBQllegefXrY/9aq1fxBA8sI5M/lFU6tSWU=
+github.com/go-openapi/jsonreference v0.21.2/go.mod h1:pp3PEjIsJ9CZDGCNOyXIQxsNuroxm8FAJ/+quA0yKzQ=
+github.com/go-openapi/spec v0.22.0 h1:xT/EsX4frL3U09QviRIZXvkh80yibxQmtoEvyqug0Tw=
+github.com/go-openapi/spec v0.22.0/go.mod h1:K0FhKxkez8YNS94XzF8YKEMULbFrRw4m15i2YUht4L0=
+github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM=
+github.com/go-openapi/swag/conv v0.25.1 h1:+9o8YUg6QuqqBM5X6rYL/p1dpWeZRhoIt9x7CCP+he0=
+github.com/go-openapi/swag/conv v0.25.1/go.mod h1:Z1mFEGPfyIKPu0806khI3zF+/EUXde+fdeksUl2NiDs=
+github.com/go-openapi/swag/jsonname v0.25.1 h1:Sgx+qbwa4ej6AomWC6pEfXrA6uP2RkaNjA9BR8a1RJU=
+github.com/go-openapi/swag/jsonname v0.25.1/go.mod h1:71Tekow6UOLBD3wS7XhdT98g5J5GR13NOTQ9/6Q11Zo=
+github.com/go-openapi/swag/jsonutils v0.25.1 h1:AihLHaD0brrkJoMqEZOBNzTLnk81Kg9cWr+SPtxtgl8=
+github.com/go-openapi/swag/jsonutils v0.25.1/go.mod h1:JpEkAjxQXpiaHmRO04N1zE4qbUEg3b7Udll7AMGTNOo=
+github.com/go-openapi/swag/jsonutils/fixtures_test v0.25.1 h1:DSQGcdB6G0N9c/KhtpYc71PzzGEIc/fZ1no35x4/XBY=
+github.com/go-openapi/swag/jsonutils/fixtures_test v0.25.1/go.mod h1:kjmweouyPwRUEYMSrbAidoLMGeJ5p6zdHi9BgZiqmsg=
+github.com/go-openapi/swag/loading v0.25.1 h1:6OruqzjWoJyanZOim58iG2vj934TysYVptyaoXS24kw=
+github.com/go-openapi/swag/loading v0.25.1/go.mod h1:xoIe2EG32NOYYbqxvXgPzne989bWvSNoWoyQVWEZicc=
+github.com/go-openapi/swag/stringutils v0.25.1 h1:Xasqgjvk30eUe8VKdmyzKtjkVjeiXx1Iz0zDfMNpPbw=
+github.com/go-openapi/swag/stringutils v0.25.1/go.mod h1:JLdSAq5169HaiDUbTvArA2yQxmgn4D6h4A+4HqVvAYg=
+github.com/go-openapi/swag/typeutils v0.25.1 h1:rD/9HsEQieewNt6/k+JBwkxuAHktFtH3I3ysiFZqukA=
+github.com/go-openapi/swag/typeutils v0.25.1/go.mod h1:9McMC/oCdS4BKwk2shEB7x17P6HmMmA6dQRtAkSnNb8=
+github.com/go-openapi/swag/yamlutils v0.25.1 h1:mry5ez8joJwzvMbaTGLhw8pXUnhDK91oSJLDPF1bmGk=
+github.com/go-openapi/swag/yamlutils v0.25.1/go.mod h1:cm9ywbzncy3y6uPm/97ysW8+wZ09qsks+9RS8fLWKqg=
 github.com/go-sql-driver/mysql v1.4.1/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w=
 github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg=
 github.com/go-sql-driver/mysql v1.9.0 h1:Y0zIbQXhQKmQgTp44Y1dp3wTXcn804QoTptLZT1vtvo=
@@ -162,8 +213,6 @@ github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
 github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
 github.com/jonboulle/clockwork v0.5.0 h1:Hyh9A8u51kptdkR+cqRpT1EebBwTn1oK9YfGYbdFz6I=
 github.com/jonboulle/clockwork v0.5.0/go.mod h1:3mZlmanh0g2NDKO5TWZVJAfofYk64M7XN3SzBPjZF60=
-github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
-github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
 github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA=
 github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
 github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
@@ -186,8 +235,6 @@ github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
 github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
 github.com/linkedin/goavro/v2 v2.14.0 h1:aNO/js65U+Mwq4yB5f1h01c3wiM458qtRad1DN0CMUI=
 github.com/linkedin/goavro/v2 v2.14.0/go.mod h1:KXx+erlq+RPlGSPmLF7xGo6SAbh8sCQ53x064+ioxhk=
-github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4=
-github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU=
 github.com/mattn/go-sqlite3 v1.10.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
 github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
 github.com/mattn/go-sqlite3 v1.14.24 h1:tpSp2G2KyMnnQu99ngJ47EIkWVmliIizyZBfPrBWDRM=
@@ -250,6 +297,8 @@ github.com/sosodev/duration v1.3.1/go.mod h1:RQIBBX0+fMLc/D9+Jb/fwvVmo0eZvDDEERA
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
 github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
+github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
 github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
@@ -284,10 +333,10 @@ go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE=
 go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0=
 go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
 go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
-go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI=
-go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU=
-go.yaml.in/yaml/v3 v3.0.3 h1:bXOww4E/J3f66rav3pX3m8w6jDE4knZjGOw8b5Y6iNE=
-go.yaml.in/yaml/v3 v3.0.3/go.mod h1:tBHosrYAkRZjRAOREWbDnBXUf08JOwYq++0QNwQiWzI=
+go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0=
+go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8=
+go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
+go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
 golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58=
@@ -295,8 +344,8 @@ golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliY
 golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
 golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
 golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
-golang.org/x/crypto v0.42.0 h1:chiH31gIWm57EkTXpwnqf8qeuMUi0yekh6mT2AvFlqI=
-golang.org/x/crypto v0.42.0/go.mod h1:4+rDnOTJhQCx2q7/j6rAN5XDw8kPjeaXEUR2eL94ix8=
+golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04=
+golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0=
 golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b h1:M2rDM6z3Fhozi9O7NWsxAkg/yqS/lQJ6PmkyIV3YP+o=
 golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8=
 golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
@@ -304,8 +353,8 @@ golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
 golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
 golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
 golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
-golang.org/x/mod v0.27.0 h1:kb+q2PyFnEADO2IEF935ehFUXlWiNjJWtRNgBLSfbxQ=
-golang.org/x/mod v0.27.0/go.mod h1:rWI627Fq0DEoudcK+MBkNkCe0EetEaDSwJJkCcjpazc=
+golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
+golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
@@ -317,10 +366,10 @@ golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
 golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
 golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
 golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
-golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
-golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
-golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI=
-golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU=
+golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4=
+golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210=
+golang.org/x/oauth2 v0.32.0 h1:jsCblLleRMDrxMN29H3z/k1KliIvpLgCkE6R8FXXNgY=
+golang.org/x/oauth2 v0.32.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -341,8 +390,8 @@ golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k=
-golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
+golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
 golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
@@ -361,8 +410,8 @@ golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
 golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
 golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
 golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
-golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk=
-golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4=
+golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k=
+golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM=
 golang.org/x/time v0.13.0 h1:eUlYslOIt32DgYD6utsuUeHs4d7AsEYLuIAdg7FlYgI=
 golang.org/x/time v0.13.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
@@ -371,12 +420,12 @@ golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc
 golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
 golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
 golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
-golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg=
-golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s=
+golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ=
+golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
-google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
+google.golang.org/protobuf v1.36.9 h1:w2gp2mA27hUeUzj9Ex9FBjsBm40zfaDtEWow293U7Iw=
+google.golang.org/protobuf v1.36.9/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
--- a/internal/api/api_test.go
+++ b/internal/api/api_test.go
@@ -65,7 +65,7 @@ func setup(t *testing.T) *api.RestApi {
 	}
 	]
 }`
-	const testclusterJson = `{
+	const testclusterJSON = `{
        "name": "testcluster",
 		"subClusters": [
 			{
@@ -128,7 +128,7 @@ func setup(t *testing.T) *api.RestApi {
 		t.Fatal(err)
 	}

-	if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), fmt.Appendf(nil, "%d", 2), 0o666); err != nil {
+	if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), fmt.Appendf(nil, "%d", 3), 0o666); err != nil {
 		t.Fatal(err)
 	}

@@ -136,7 +136,7 @@ func setup(t *testing.T) *api.RestApi {
 		t.Fatal(err)
 	}

-	if err := os.WriteFile(filepath.Join(jobarchive, "testcluster", "cluster.json"), []byte(testclusterJson), 0o666); err != nil {
+	if err := os.WriteFile(filepath.Join(jobarchive, "testcluster", "cluster.json"), []byte(testclusterJSON), 0o666); err != nil {
 		t.Fatal(err)
 	}

--- a/internal/api/job.go
+++ b/internal/api/job.go
@@ -2,6 +2,7 @@
 // All rights reserved. This file is part of cc-backend.
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file.
+
 package api

 import (
@@ -29,9 +30,15 @@ import (
 	"github.com/gorilla/mux"
 )

-// StopJobApiRequest model
-type StopJobApiRequest struct {
-	JobId     *int64          `json:"jobId" example:"123000"`
+const (
+	// secondsPerDay is the number of seconds in 24 hours.
+	// Used for duplicate job detection within a day window.
+	secondsPerDay = 86400
+)
+
+// StopJobAPIRequest model
+type StopJobAPIRequest struct {
+	JobID     *int64          `json:"jobId" example:"123000"`
 	Cluster   *string         `json:"cluster" example:"fritz"`
 	StartTime *int64          `json:"startTime" example:"1649723812"`
 	State     schema.JobState `json:"jobState" validate:"required" example:"completed"`
@@ -40,7 +47,7 @@ type StopJobApiRequest struct {

 // DeleteJobApiRequest model
 type DeleteJobApiRequest struct {
-	JobId     *int64  `json:"jobId" validate:"required" example:"123000"` // Cluster Job ID of job
+	JobID     *int64  `json:"jobId" validate:"required" example:"123000"` // Cluster Job ID of job
 	Cluster   *string `json:"cluster" example:"fritz"`                    // Cluster of job
 	StartTime *int64  `json:"startTime" example:"1649723812"`             // Start Time of job as epoch
 }
@@ -113,7 +120,8 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {

 	for key, vals := range r.URL.Query() {
 		switch key {
-		// TODO: add project filter
+		case "project":
+			filter.Project = &model.StringInput{Eq: &vals[0]}
 		case "state":
 			for _, s := range vals {
 				state := schema.JobState(s)
@@ -363,7 +371,7 @@ func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {

 	var metrics GetJobApiRequest
 	if err = decode(r.Body, &metrics); err != nil {
-		http.Error(rw, err.Error(), http.StatusBadRequest)
+		handleError(fmt.Errorf("decoding request failed: %w", err), http.StatusBadRequest, rw)
 		return
 	}

@@ -434,30 +442,32 @@ func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
 func (api *RestApi) editMeta(rw http.ResponseWriter, r *http.Request) {
 	id, err := strconv.ParseInt(mux.Vars(r)["id"], 10, 64)
 	if err != nil {
-		http.Error(rw, err.Error(), http.StatusBadRequest)
+		handleError(fmt.Errorf("parsing job ID failed: %w", err), http.StatusBadRequest, rw)
 		return
 	}

 	job, err := api.JobRepository.FindById(r.Context(), id)
 	if err != nil {
-		http.Error(rw, err.Error(), http.StatusNotFound)
+		handleError(fmt.Errorf("finding job failed: %w", err), http.StatusNotFound, rw)
 		return
 	}

 	var req EditMetaRequest
 	if err := decode(r.Body, &req); err != nil {
-		http.Error(rw, err.Error(), http.StatusBadRequest)
+		handleError(fmt.Errorf("decoding request failed: %w", err), http.StatusBadRequest, rw)
 		return
 	}

 	if err := api.JobRepository.UpdateMetadata(job, req.Key, req.Value); err != nil {
-		http.Error(rw, err.Error(), http.StatusInternalServerError)
+		handleError(fmt.Errorf("updating metadata failed: %w", err), http.StatusInternalServerError, rw)
 		return
 	}

 	rw.Header().Add("Content-Type", "application/json")
 	rw.WriteHeader(http.StatusOK)
-	json.NewEncoder(rw).Encode(job)
+	if err := json.NewEncoder(rw).Encode(job); err != nil {
+		cclog.Errorf("Failed to encode job response: %v", err)
+	}
 }

 // tagJob godoc
@@ -480,32 +490,32 @@ func (api *RestApi) editMeta(rw http.ResponseWriter, r *http.Request) {
 func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
 	id, err := strconv.ParseInt(mux.Vars(r)["id"], 10, 64)
 	if err != nil {
-		http.Error(rw, err.Error(), http.StatusBadRequest)
+		handleError(fmt.Errorf("parsing job ID failed: %w", err), http.StatusBadRequest, rw)
 		return
 	}

 	job, err := api.JobRepository.FindById(r.Context(), id)
 	if err != nil {
-		http.Error(rw, err.Error(), http.StatusNotFound)
+		handleError(fmt.Errorf("finding job failed: %w", err), http.StatusNotFound, rw)
 		return
 	}

 	job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), job.ID)
 	if err != nil {
-		http.Error(rw, err.Error(), http.StatusInternalServerError)
+		handleError(fmt.Errorf("getting tags failed: %w", err), http.StatusInternalServerError, rw)
 		return
 	}

 	var req TagJobApiRequest
 	if err := decode(r.Body, &req); err != nil {
-		http.Error(rw, err.Error(), http.StatusBadRequest)
+		handleError(fmt.Errorf("decoding request failed: %w", err), http.StatusBadRequest, rw)
 		return
 	}

 	for _, tag := range req {
 		tagId, err := api.JobRepository.AddTagOrCreate(repository.GetUserFromContext(r.Context()), *job.ID, tag.Type, tag.Name, tag.Scope)
 		if err != nil {
-			http.Error(rw, err.Error(), http.StatusInternalServerError)
+			handleError(fmt.Errorf("adding tag failed: %w", err), http.StatusInternalServerError, rw)
 			return
 		}

@@ -519,7 +529,9 @@ func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {

 	rw.Header().Add("Content-Type", "application/json")
 	rw.WriteHeader(http.StatusOK)
-	json.NewEncoder(rw).Encode(job)
+	if err := json.NewEncoder(rw).Encode(job); err != nil {
+		cclog.Errorf("Failed to encode job response: %v", err)
+	}
 }

 // removeTagJob godoc
@@ -542,25 +554,25 @@ func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
 func (api *RestApi) removeTagJob(rw http.ResponseWriter, r *http.Request) {
 	id, err := strconv.ParseInt(mux.Vars(r)["id"], 10, 64)
 	if err != nil {
-		http.Error(rw, err.Error(), http.StatusBadRequest)
+		handleError(fmt.Errorf("parsing job ID failed: %w", err), http.StatusBadRequest, rw)
 		return
 	}

 	job, err := api.JobRepository.FindById(r.Context(), id)
 	if err != nil {
-		http.Error(rw, err.Error(), http.StatusNotFound)
+		handleError(fmt.Errorf("finding job failed: %w", err), http.StatusNotFound, rw)
 		return
 	}

 	job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), job.ID)
 	if err != nil {
-		http.Error(rw, err.Error(), http.StatusInternalServerError)
+		handleError(fmt.Errorf("getting tags failed: %w", err), http.StatusInternalServerError, rw)
 		return
 	}

 	var req TagJobApiRequest
 	if err := decode(r.Body, &req); err != nil {
-		http.Error(rw, err.Error(), http.StatusBadRequest)
+		handleError(fmt.Errorf("decoding request failed: %w", err), http.StatusBadRequest, rw)
 		return
 	}

@@ -573,7 +585,7 @@ func (api *RestApi) removeTagJob(rw http.ResponseWriter, r *http.Request) {

 		remainingTags, err := api.JobRepository.RemoveJobTagByRequest(repository.GetUserFromContext(r.Context()), *job.ID, rtag.Type, rtag.Name, rtag.Scope)
 		if err != nil {
-			http.Error(rw, err.Error(), http.StatusInternalServerError)
+			handleError(fmt.Errorf("removing tag failed: %w", err), http.StatusInternalServerError, rw)
 			return
 		}

@@ -582,7 +594,9 @@ func (api *RestApi) removeTagJob(rw http.ResponseWriter, r *http.Request) {

 	rw.Header().Add("Content-Type", "application/json")
 	rw.WriteHeader(http.StatusOK)
-	json.NewEncoder(rw).Encode(job)
+	if err := json.NewEncoder(rw).Encode(job); err != nil {
+		cclog.Errorf("Failed to encode job response: %v", err)
+	}
 }

 // removeTags godoc
@@ -604,7 +618,7 @@ func (api *RestApi) removeTagJob(rw http.ResponseWriter, r *http.Request) {
 func (api *RestApi) removeTags(rw http.ResponseWriter, r *http.Request) {
 	var req TagJobApiRequest
 	if err := decode(r.Body, &req); err != nil {
-		http.Error(rw, err.Error(), http.StatusBadRequest)
+		handleError(fmt.Errorf("decoding request failed: %w", err), http.StatusBadRequest, rw)
 		return
 	}

@@ -619,11 +633,10 @@ func (api *RestApi) removeTags(rw http.ResponseWriter, r *http.Request) {

 		err := api.JobRepository.RemoveTagByRequest(rtag.Type, rtag.Name, rtag.Scope)
 		if err != nil {
-			http.Error(rw, err.Error(), http.StatusInternalServerError)
+			handleError(fmt.Errorf("removing tag failed: %w", err), http.StatusInternalServerError, rw)
 			return
-		} else {
-			currentCount++
 		}
+		currentCount++
 	}

 	rw.WriteHeader(http.StatusOK)
@@ -674,9 +687,11 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
 	if err != nil && err != sql.ErrNoRows {
 		handleError(fmt.Errorf("checking for duplicate failed: %w", err), http.StatusInternalServerError, rw)
 		return
-	} else if err == nil {
+	}
+	if err == nil {
 		for _, job := range jobs {
-			if (req.StartTime - job.StartTime) < 86400 {
+			// Check if jobs are within the same day (prevent duplicates)
+			if (req.StartTime - job.StartTime) < secondsPerDay {
 				handleError(fmt.Errorf("a job with that jobId, cluster and startTime already exists: dbid: %d, jobid: %d", job.ID, job.JobID), http.StatusUnprocessableEntity, rw)
 				return
 			}
@@ -693,7 +708,6 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {

 	for _, tag := range req.Tags {
 		if _, err := api.JobRepository.AddTagOrCreate(repository.GetUserFromContext(r.Context()), id, tag.Type, tag.Name, tag.Scope); err != nil {
-			http.Error(rw, err.Error(), http.StatusInternalServerError)
 			handleError(fmt.Errorf("adding tag to new job %d failed: %w", id, err), http.StatusInternalServerError, rw)
 			return
 		}
@@ -702,9 +716,11 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
 	cclog.Printf("new job (id: %d): cluster=%s, jobId=%d, user=%s, startTime=%d", id, req.Cluster, req.JobID, req.User, req.StartTime)
 	rw.Header().Add("Content-Type", "application/json")
 	rw.WriteHeader(http.StatusCreated)
-	json.NewEncoder(rw).Encode(DefaultApiResponse{
+	if err := json.NewEncoder(rw).Encode(DefaultApiResponse{
 		Message: "success",
-	})
+	}); err != nil {
+		cclog.Errorf("Failed to encode response: %v", err)
+	}
 }

 // stopJobByRequest godoc
@@ -725,7 +741,7 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
 // @router      /api/jobs/stop_job/ [post]
 func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
 	// Parse request body
-	req := StopJobApiRequest{}
+	req := StopJobAPIRequest{}
 	if err := decode(r.Body, &req); err != nil {
 		handleError(fmt.Errorf("parsing request body failed: %w", err), http.StatusBadRequest, rw)
 		return
@@ -734,20 +750,22 @@ func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
 	// Fetch job (that will be stopped) from db
 	var job *schema.Job
 	var err error
-	if req.JobId == nil {
+	if req.JobID == nil {
 		handleError(errors.New("the field 'jobId' is required"), http.StatusBadRequest, rw)
 		return
 	}

 	// cclog.Printf("loading db job for stopJobByRequest... : stopJobApiRequest=%v", req)
-	job, err = api.JobRepository.Find(req.JobId, req.Cluster, req.StartTime)
+	job, err = api.JobRepository.Find(req.JobID, req.Cluster, req.StartTime)
 	if err != nil {
-		job, err = api.JobRepository.FindCached(req.JobId, req.Cluster, req.StartTime)
-		// FIXME: Previous error is hidden
-		if err != nil {
-			handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
+		// Try cached jobs if not found in main repository
+		cachedJob, cachedErr := api.JobRepository.FindCached(req.JobID, req.Cluster, req.StartTime)
+		if cachedErr != nil {
+			// Combine both errors for better debugging
+			handleError(fmt.Errorf("finding job failed: %w (cached lookup also failed: %v)", err, cachedErr), http.StatusNotFound, rw)
 			return
 		}
+		job = cachedJob
 	}

 	api.checkAndHandleStopJob(rw, job, req)
@@ -790,9 +808,11 @@ func (api *RestApi) deleteJobById(rw http.ResponseWriter, r *http.Request) {
 	}
 	rw.Header().Add("Content-Type", "application/json")
 	rw.WriteHeader(http.StatusOK)
-	json.NewEncoder(rw).Encode(DefaultApiResponse{
+	if err := json.NewEncoder(rw).Encode(DefaultApiResponse{
 		Message: fmt.Sprintf("Successfully deleted job %s", id),
-	})
+	}); err != nil {
+		cclog.Errorf("Failed to encode response: %v", err)
+	}
 }

 // deleteJobByRequest godoc
@@ -822,12 +842,12 @@ func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request)
 	// Fetch job (that will be deleted) from db
 	var job *schema.Job
 	var err error
-	if req.JobId == nil {
+	if req.JobID == nil {
 		handleError(errors.New("the field 'jobId' is required"), http.StatusBadRequest, rw)
 		return
 	}

-	job, err = api.JobRepository.Find(req.JobId, req.Cluster, req.StartTime)
+	job, err = api.JobRepository.Find(req.JobID, req.Cluster, req.StartTime)
 	if err != nil {
 		handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
 		return
@@ -841,9 +861,11 @@ func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request)

 	rw.Header().Add("Content-Type", "application/json")
 	rw.WriteHeader(http.StatusOK)
-	json.NewEncoder(rw).Encode(DefaultApiResponse{
+	if err := json.NewEncoder(rw).Encode(DefaultApiResponse{
 		Message: fmt.Sprintf("Successfully deleted job %d", job.ID),
-	})
+	}); err != nil {
+		cclog.Errorf("Failed to encode response: %v", err)
+	}
 }

 // deleteJobBefore godoc
@@ -885,19 +907,21 @@ func (api *RestApi) deleteJobBefore(rw http.ResponseWriter, r *http.Request) {

 	rw.Header().Add("Content-Type", "application/json")
 	rw.WriteHeader(http.StatusOK)
-	json.NewEncoder(rw).Encode(DefaultApiResponse{
+	if err := json.NewEncoder(rw).Encode(DefaultApiResponse{
 		Message: fmt.Sprintf("Successfully deleted %d jobs", cnt),
-	})
+	}); err != nil {
+		cclog.Errorf("Failed to encode response: %v", err)
+	}
 }

-func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Job, req StopJobApiRequest) {
+func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Job, req StopJobAPIRequest) {
 	// Sanity checks
 	if job.State != schema.JobStateRunning {
 		handleError(fmt.Errorf("jobId %d (id %d) on %s : job has already been stopped (state is: %s)", job.JobID, job.ID, job.Cluster, job.State), http.StatusUnprocessableEntity, rw)
 		return
 	}

-	if job == nil || job.StartTime > req.StopTime {
+	if job.StartTime > req.StopTime {
 		handleError(fmt.Errorf("jobId %d (id %d) on %s : stopTime %d must be larger/equal than startTime %d", job.JobID, job.ID, job.Cluster, req.StopTime, job.StartTime), http.StatusBadRequest, rw)
 		return
 	}
@@ -913,14 +937,14 @@ func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Jo
 	job.Duration = int32(req.StopTime - job.StartTime)
 	job.State = req.State
 	api.JobRepository.Mutex.Lock()
+	defer api.JobRepository.Mutex.Unlock()
+
 	if err := api.JobRepository.Stop(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
 		if err := api.JobRepository.StopCached(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
-			api.JobRepository.Mutex.Unlock()
 			handleError(fmt.Errorf("jobId %d (id %d) on %s : marking job as '%s' (duration: %d) in DB failed: %w", job.JobID, job.ID, job.Cluster, job.State, job.Duration, err), http.StatusInternalServerError, rw)
 			return
 		}
 	}
-	api.JobRepository.Mutex.Unlock()

 	cclog.Printf("archiving job... (dbid: %d): cluster=%s, jobId=%d, user=%s, startTime=%d, duration=%d, state=%s", job.ID, job.Cluster, job.JobID, job.User, job.StartTime, job.Duration, job.State)

@@ -929,7 +953,9 @@ func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Jo
 	// writing to the filesystem fails, the client will not know.
 	rw.Header().Add("Content-Type", "application/json")
 	rw.WriteHeader(http.StatusOK)
-	json.NewEncoder(rw).Encode(job)
+	if err := json.NewEncoder(rw).Encode(job); err != nil {
+		cclog.Errorf("Failed to encode job response: %v", err)
+	}

 	// Monitoring is disabled...
 	if job.MonitoringStatus == schema.MonitoringStatusDisabled {
@@ -947,7 +973,7 @@ func (api *RestApi) getJobMetrics(rw http.ResponseWriter, r *http.Request) {
 	for _, scope := range r.URL.Query()["scope"] {
 		var s schema.MetricScope
 		if err := s.UnmarshalGQL(scope); err != nil {
-			http.Error(rw, err.Error(), http.StatusBadRequest)
+			handleError(fmt.Errorf("unmarshaling scope failed: %w", err), http.StatusBadRequest, rw)
 			return
 		}
 		scopes = append(scopes, s)
@@ -956,7 +982,7 @@ func (api *RestApi) getJobMetrics(rw http.ResponseWriter, r *http.Request) {
 	rw.Header().Add("Content-Type", "application/json")
 	rw.WriteHeader(http.StatusOK)

-	type Respone struct {
+	type Response struct {
 		Data *struct {
 			JobMetrics []*model.JobMetricWithName `json:"jobMetrics"`
 		} `json:"data"`
@@ -968,17 +994,21 @@ func (api *RestApi) getJobMetrics(rw http.ResponseWriter, r *http.Request) {
 	resolver := graph.GetResolverInstance()
 	data, err := resolver.Query().JobMetrics(r.Context(), id, metrics, scopes, nil)
 	if err != nil {
-		json.NewEncoder(rw).Encode(Respone{
+		if err := json.NewEncoder(rw).Encode(Response{
 			Error: &struct {
-				Message string "json:\"message\""
+				Message string `json:"message"`
 			}{Message: err.Error()},
-		})
+		}); err != nil {
+			cclog.Errorf("Failed to encode error response: %v", err)
+		}
 		return
 	}

-	json.NewEncoder(rw).Encode(Respone{
+	if err := json.NewEncoder(rw).Encode(Response{
 		Data: &struct {
-			JobMetrics []*model.JobMetricWithName "json:\"jobMetrics\""
+			JobMetrics []*model.JobMetricWithName `json:"jobMetrics"`
 		}{JobMetrics: data},
-	})
+	}); err != nil {
+		cclog.Errorf("Failed to encode response: %v", err)
+	}
 }
--- a/internal/api/memorystore.go
+++ b/internal/api/memorystore.go
@@ -50,13 +50,6 @@ func freeMetrics(rw http.ResponseWriter, r *http.Request) {
 		return
 	}

-	// // TODO: lastCheckpoint might be modified by different go-routines.
-	// // Load it using the sync/atomic package?
-	// freeUpTo := lastCheckpoint.Unix()
-	// if to < freeUpTo {
-	// 	freeUpTo = to
-	// }
-
 	bodyDec := json.NewDecoder(r.Body)
 	var selectors [][]string
 	err = bodyDec.Decode(&selectors)
--- a/internal/api/rest.go
+++ b/internal/api/rest.go
@@ -2,6 +2,11 @@
 // All rights reserved. This file is part of cc-backend.
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file.
+
+// Package api provides the REST API layer for ClusterCockpit.
+// It handles HTTP requests for job management, user administration,
+// cluster queries, node state updates, and metrics storage operations.
+// The API supports both JWT token authentication and session-based authentication.
 package api

 import (
@@ -11,6 +16,7 @@ import (
 	"net/http"
 	"os"
 	"path/filepath"
+	"strings"
 	"sync"

 	"github.com/ClusterCockpit/cc-backend/internal/auth"
@@ -39,10 +45,19 @@ import (
 // @in                         header
 // @name                       X-Auth-Token

+const (
+	noticeFilePath  = "./var/notice.txt"
+	noticeFilePerms = 0o644
+)
+
 type RestApi struct {
 	JobRepository   *repository.JobRepository
 	Authentication  *auth.Authentication
 	MachineStateDir string
+	// RepositoryMutex protects job creation operations from race conditions
+	// when checking for duplicate jobs during startJob API calls.
+	// It prevents concurrent job starts with the same jobId/cluster/startTime
+	// from creating duplicate entries in the database.
 	RepositoryMutex sync.Mutex
 }

@@ -66,7 +81,6 @@ func (api *RestApi) MountApiRoutes(r *mux.Router) {
 	// Job Handler
 	r.HandleFunc("/jobs/start_job/", api.startJob).Methods(http.MethodPost, http.MethodPut)
 	r.HandleFunc("/jobs/stop_job/", api.stopJobByRequest).Methods(http.MethodPost, http.MethodPut)
-	// r.HandleFunc("/jobs/import/", api.importJob).Methods(http.MethodPost, http.MethodPut)
 	r.HandleFunc("/jobs/", api.getJobs).Methods(http.MethodGet)
 	r.HandleFunc("/jobs/{id}", api.getJobById).Methods(http.MethodPost)
 	r.HandleFunc("/jobs/{id}", api.getCompleteJobById).Methods(http.MethodGet)
@@ -97,6 +111,7 @@ func (api *RestApi) MountUserApiRoutes(r *mux.Router) {

 func (api *RestApi) MountMetricStoreApiRoutes(r *mux.Router) {
 	// REST API Uses TokenAuth
+	// Note: StrictSlash handles trailing slash variations automatically
 	r.HandleFunc("/api/free", freeMetrics).Methods(http.MethodPost)
 	r.HandleFunc("/api/write", writeMetrics).Methods(http.MethodPost)
 	r.HandleFunc("/api/debug", debugMetrics).Methods(http.MethodGet)
@@ -146,10 +161,12 @@ func handleError(err error, statusCode int, rw http.ResponseWriter) {
 	cclog.Warnf("REST ERROR : %s", err.Error())
 	rw.Header().Add("Content-Type", "application/json")
 	rw.WriteHeader(statusCode)
-	json.NewEncoder(rw).Encode(ErrorResponse{
+	if err := json.NewEncoder(rw).Encode(ErrorResponse{
 		Status: http.StatusText(statusCode),
 		Error:  err.Error(),
-	})
+	}); err != nil {
+		cclog.Errorf("Failed to encode error response: %v", err)
+	}
 }

 func decode(r io.Reader, val any) error {
@@ -162,41 +179,41 @@ func (api *RestApi) editNotice(rw http.ResponseWriter, r *http.Request) {
 	// SecuredCheck() only worked with TokenAuth: Removed

 	if user := repository.GetUserFromContext(r.Context()); !user.HasRole(schema.RoleAdmin) {
-		http.Error(rw, "Only admins are allowed to update the notice.txt file", http.StatusForbidden)
+		handleError(fmt.Errorf("only admins are allowed to update the notice.txt file"), http.StatusForbidden, rw)
 		return
 	}

 	// Get Value
 	newContent := r.FormValue("new-content")

-	// Check FIle
-	noticeExists := util.CheckFileExists("./var/notice.txt")
+	// Validate content length to prevent DoS
+	if len(newContent) > 10000 {
+		handleError(fmt.Errorf("notice content exceeds maximum length of 10000 characters"), http.StatusBadRequest, rw)
+		return
+	}
+
+	// Check File
+	noticeExists := util.CheckFileExists(noticeFilePath)
 	if !noticeExists {
-		ntxt, err := os.Create("./var/notice.txt")
+		ntxt, err := os.Create(noticeFilePath)
 		if err != nil {
-			cclog.Errorf("Creating ./var/notice.txt failed: %s", err.Error())
-			http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
+			handleError(fmt.Errorf("creating notice file failed: %w", err), http.StatusInternalServerError, rw)
 			return
 		}
 		ntxt.Close()
 	}

+	if err := os.WriteFile(noticeFilePath, []byte(newContent), noticeFilePerms); err != nil {
+		handleError(fmt.Errorf("writing to notice file failed: %w", err), http.StatusInternalServerError, rw)
+		return
+	}
+
+	rw.Header().Set("Content-Type", "text/plain")
+	rw.WriteHeader(http.StatusOK)
 	if newContent != "" {
-		if err := os.WriteFile("./var/notice.txt", []byte(newContent), 0o666); err != nil {
-			cclog.Errorf("Writing to ./var/notice.txt failed: %s", err.Error())
-			http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
-			return
-		} else {
-			rw.Write([]byte("Update Notice Content Success"))
-		}
+		rw.Write([]byte("Update Notice Content Success"))
 	} else {
-		if err := os.WriteFile("./var/notice.txt", []byte(""), 0o666); err != nil {
-			cclog.Errorf("Writing to ./var/notice.txt failed: %s", err.Error())
-			http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
-			return
-		} else {
-			rw.Write([]byte("Empty Notice Content Success"))
-		}
+		rw.Write([]byte("Empty Notice Content Success"))
 	}
 }

@@ -206,21 +223,20 @@ func (api *RestApi) getJWT(rw http.ResponseWriter, r *http.Request) {
 	me := repository.GetUserFromContext(r.Context())
 	if !me.HasRole(schema.RoleAdmin) {
 		if username != me.Username {
-			http.Error(rw, "Only admins are allowed to sign JWTs not for themselves",
-				http.StatusForbidden)
+			handleError(fmt.Errorf("only admins are allowed to sign JWTs not for themselves"), http.StatusForbidden, rw)
 			return
 		}
 	}

 	user, err := repository.GetUserRepository().GetUser(username)
 	if err != nil {
-		http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
+		handleError(fmt.Errorf("getting user failed: %w", err), http.StatusNotFound, rw)
 		return
 	}

 	jwt, err := api.Authentication.JwtAuth.ProvideJWT(user)
 	if err != nil {
-		http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
+		handleError(fmt.Errorf("providing JWT failed: %w", err), http.StatusInternalServerError, rw)
 		return
 	}

@@ -233,17 +249,20 @@ func (api *RestApi) getRoles(rw http.ResponseWriter, r *http.Request) {

 	user := repository.GetUserFromContext(r.Context())
 	if !user.HasRole(schema.RoleAdmin) {
-		http.Error(rw, "only admins are allowed to fetch a list of roles", http.StatusForbidden)
+		handleError(fmt.Errorf("only admins are allowed to fetch a list of roles"), http.StatusForbidden, rw)
 		return
 	}

 	roles, err := schema.GetValidRoles(user)
 	if err != nil {
-		http.Error(rw, err.Error(), http.StatusInternalServerError)
+		handleError(fmt.Errorf("getting valid roles failed: %w", err), http.StatusInternalServerError, rw)
 		return
 	}

-	json.NewEncoder(rw).Encode(roles)
+	rw.Header().Set("Content-Type", "application/json")
+	if err := json.NewEncoder(rw).Encode(roles); err != nil {
+		cclog.Errorf("Failed to encode roles response: %v", err)
+	}
 }

 func (api *RestApi) updateConfiguration(rw http.ResponseWriter, r *http.Request) {
@@ -251,38 +270,50 @@ func (api *RestApi) updateConfiguration(rw http.ResponseWriter, r *http.Request)
 	key, value := r.FormValue("key"), r.FormValue("value")

 	if err := repository.GetUserCfgRepo().UpdateConfig(key, value, repository.GetUserFromContext(r.Context())); err != nil {
-		http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
+		handleError(fmt.Errorf("updating configuration failed: %w", err), http.StatusInternalServerError, rw)
 		return
 	}

+	rw.WriteHeader(http.StatusOK)
 	rw.Write([]byte("success"))
 }

 func (api *RestApi) putMachineState(rw http.ResponseWriter, r *http.Request) {
 	if api.MachineStateDir == "" {
-		http.Error(rw, "REST > machine state not enabled", http.StatusNotFound)
+		handleError(fmt.Errorf("machine state not enabled"), http.StatusNotFound, rw)
 		return
 	}

 	vars := mux.Vars(r)
 	cluster := vars["cluster"]
 	host := vars["host"]
+
+	// Validate cluster and host to prevent path traversal attacks
+	if strings.Contains(cluster, "..") || strings.Contains(cluster, "/") || strings.Contains(cluster, "\\") {
+		handleError(fmt.Errorf("invalid cluster name"), http.StatusBadRequest, rw)
+		return
+	}
+	if strings.Contains(host, "..") || strings.Contains(host, "/") || strings.Contains(host, "\\") {
+		handleError(fmt.Errorf("invalid host name"), http.StatusBadRequest, rw)
+		return
+	}
+
 	dir := filepath.Join(api.MachineStateDir, cluster)
 	if err := os.MkdirAll(dir, 0o755); err != nil {
-		http.Error(rw, err.Error(), http.StatusInternalServerError)
+		handleError(fmt.Errorf("creating directory failed: %w", err), http.StatusInternalServerError, rw)
 		return
 	}

 	filename := filepath.Join(dir, fmt.Sprintf("%s.json", host))
 	f, err := os.Create(filename)
 	if err != nil {
-		http.Error(rw, err.Error(), http.StatusInternalServerError)
+		handleError(fmt.Errorf("creating file failed: %w", err), http.StatusInternalServerError, rw)
 		return
 	}
 	defer f.Close()

 	if _, err := io.Copy(f, r.Body); err != nil {
-		http.Error(rw, err.Error(), http.StatusInternalServerError)
+		handleError(fmt.Errorf("writing file failed: %w", err), http.StatusInternalServerError, rw)
 		return
 	}

@@ -291,12 +322,25 @@ func (api *RestApi) putMachineState(rw http.ResponseWriter, r *http.Request) {

 func (api *RestApi) getMachineState(rw http.ResponseWriter, r *http.Request) {
 	if api.MachineStateDir == "" {
-		http.Error(rw, "REST > machine state not enabled", http.StatusNotFound)
+		handleError(fmt.Errorf("machine state not enabled"), http.StatusNotFound, rw)
 		return
 	}

 	vars := mux.Vars(r)
-	filename := filepath.Join(api.MachineStateDir, vars["cluster"], fmt.Sprintf("%s.json", vars["host"]))
+	cluster := vars["cluster"]
+	host := vars["host"]
+
+	// Validate cluster and host to prevent path traversal attacks
+	if strings.Contains(cluster, "..") || strings.Contains(cluster, "/") || strings.Contains(cluster, "\\") {
+		handleError(fmt.Errorf("invalid cluster name"), http.StatusBadRequest, rw)
+		return
+	}
+	if strings.Contains(host, "..") || strings.Contains(host, "/") || strings.Contains(host, "\\") {
+		handleError(fmt.Errorf("invalid host name"), http.StatusBadRequest, rw)
+		return
+	}
+
+	filename := filepath.Join(api.MachineStateDir, cluster, fmt.Sprintf("%s.json", host))

 	// Sets the content-type and 'Last-Modified' Header and so on automatically
 	http.ServeFile(rw, r, filename)
--- a/internal/api/user.go
+++ b/internal/api/user.go
@@ -2,6 +2,7 @@
 // All rights reserved. This file is part of cc-backend.
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file.
+
 package api

 import (
@@ -10,11 +11,12 @@ import (
 	"net/http"

 	"github.com/ClusterCockpit/cc-backend/internal/repository"
+	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
 	"github.com/ClusterCockpit/cc-lib/schema"
 	"github.com/gorilla/mux"
 )

-type ApiReturnedUser struct {
+type APIReturnedUser struct {
 	Username string   `json:"username"`
 	Name     string   `json:"name"`
 	Roles    []string `json:"roles"`
@@ -40,24 +42,42 @@ func (api *RestApi) getUsers(rw http.ResponseWriter, r *http.Request) {
 	// SecuredCheck() only worked with TokenAuth: Removed

 	if user := repository.GetUserFromContext(r.Context()); !user.HasRole(schema.RoleAdmin) {
-		http.Error(rw, "Only admins are allowed to fetch a list of users", http.StatusForbidden)
+		handleError(fmt.Errorf("only admins are allowed to fetch a list of users"), http.StatusForbidden, rw)
 		return
 	}

 	users, err := repository.GetUserRepository().ListUsers(r.URL.Query().Get("not-just-user") == "true")
 	if err != nil {
-		http.Error(rw, err.Error(), http.StatusInternalServerError)
+		handleError(fmt.Errorf("listing users failed: %w", err), http.StatusInternalServerError, rw)
 		return
 	}

-	json.NewEncoder(rw).Encode(users)
+	rw.Header().Set("Content-Type", "application/json")
+	if err := json.NewEncoder(rw).Encode(users); err != nil {
+		cclog.Errorf("Failed to encode users response: %v", err)
+	}
 }

+// updateUser godoc
+// @summary     Update user roles and projects
+// @tags User
+// @description Allows admins to add/remove roles and projects for a user
+// @produce     plain
+// @param       id          path   string  true  "Username"
+// @param       add-role    formData string false "Role to add"
+// @param       remove-role formData string false "Role to remove"
+// @param       add-project formData string false "Project to add"
+// @param       remove-project formData string false "Project to remove"
+// @success     200     {string} string "Success message"
+// @failure     403     {object} api.ErrorResponse "Forbidden"
+// @failure     422     {object} api.ErrorResponse "Unprocessable Entity"
+// @security    ApiKeyAuth
+// @router      /api/user/{id} [post]
 func (api *RestApi) updateUser(rw http.ResponseWriter, r *http.Request) {
 	// SecuredCheck() only worked with TokenAuth: Removed

 	if user := repository.GetUserFromContext(r.Context()); !user.HasRole(schema.RoleAdmin) {
-		http.Error(rw, "Only admins are allowed to update a user", http.StatusForbidden)
+		handleError(fmt.Errorf("only admins are allowed to update a user"), http.StatusForbidden, rw)
 		return
 	}

@@ -67,43 +87,70 @@ func (api *RestApi) updateUser(rw http.ResponseWriter, r *http.Request) {
 	newproj := r.FormValue("add-project")
 	delproj := r.FormValue("remove-project")

-	// TODO: Handle anything but roles...
+	rw.Header().Set("Content-Type", "application/json")
+
+	// Handle role updates
 	if newrole != "" {
 		if err := repository.GetUserRepository().AddRole(r.Context(), mux.Vars(r)["id"], newrole); err != nil {
-			http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
+			handleError(fmt.Errorf("adding role failed: %w", err), http.StatusUnprocessableEntity, rw)
 			return
 		}
-		rw.Write([]byte("Add Role Success"))
+		if err := json.NewEncoder(rw).Encode(DefaultApiResponse{Message: "Add Role Success"}); err != nil {
+			cclog.Errorf("Failed to encode response: %v", err)
+		}
 	} else if delrole != "" {
 		if err := repository.GetUserRepository().RemoveRole(r.Context(), mux.Vars(r)["id"], delrole); err != nil {
-			http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
+			handleError(fmt.Errorf("removing role failed: %w", err), http.StatusUnprocessableEntity, rw)
 			return
 		}
-		rw.Write([]byte("Remove Role Success"))
+		if err := json.NewEncoder(rw).Encode(DefaultApiResponse{Message: "Remove Role Success"}); err != nil {
+			cclog.Errorf("Failed to encode response: %v", err)
+		}
 	} else if newproj != "" {
 		if err := repository.GetUserRepository().AddProject(r.Context(), mux.Vars(r)["id"], newproj); err != nil {
-			http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
+			handleError(fmt.Errorf("adding project failed: %w", err), http.StatusUnprocessableEntity, rw)
 			return
 		}
-		rw.Write([]byte("Add Project Success"))
+		if err := json.NewEncoder(rw).Encode(DefaultApiResponse{Message: "Add Project Success"}); err != nil {
+			cclog.Errorf("Failed to encode response: %v", err)
+		}
 	} else if delproj != "" {
 		if err := repository.GetUserRepository().RemoveProject(r.Context(), mux.Vars(r)["id"], delproj); err != nil {
-			http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
+			handleError(fmt.Errorf("removing project failed: %w", err), http.StatusUnprocessableEntity, rw)
 			return
 		}
-		rw.Write([]byte("Remove Project Success"))
+		if err := json.NewEncoder(rw).Encode(DefaultApiResponse{Message: "Remove Project Success"}); err != nil {
+			cclog.Errorf("Failed to encode response: %v", err)
+		}
 	} else {
-		http.Error(rw, "Not Add or Del [role|project]?", http.StatusInternalServerError)
+		handleError(fmt.Errorf("no operation specified: must provide add-role, remove-role, add-project, or remove-project"), http.StatusBadRequest, rw)
 	}
 }

+// createUser godoc
+// @summary     Create a new user
+// @tags User
+// @description Creates a new user with specified credentials and role
+// @produce     plain
+// @param       username formData string true  "Username"
+// @param       password formData string false "Password (not required for API users)"
+// @param       role     formData string true  "User role"
+// @param       name     formData string false "Full name"
+// @param       email    formData string false "Email address"
+// @param       project  formData string false "Project (required for managers)"
+// @success     200     {string} string "Success message"
+// @failure     400     {object} api.ErrorResponse "Bad Request"
+// @failure     403     {object} api.ErrorResponse "Forbidden"
+// @failure     422     {object} api.ErrorResponse "Unprocessable Entity"
+// @security    ApiKeyAuth
+// @router      /api/users/ [post]
 func (api *RestApi) createUser(rw http.ResponseWriter, r *http.Request) {
 	// SecuredCheck() only worked with TokenAuth: Removed

 	rw.Header().Set("Content-Type", "text/plain")
 	me := repository.GetUserFromContext(r.Context())
 	if !me.HasRole(schema.RoleAdmin) {
-		http.Error(rw, "Only admins are allowed to create new users", http.StatusForbidden)
+		handleError(fmt.Errorf("only admins are allowed to create new users"), http.StatusForbidden, rw)
 		return
 	}

@@ -111,18 +158,22 @@ func (api *RestApi) createUser(rw http.ResponseWriter, r *http.Request) {
 		r.FormValue("password"), r.FormValue("role"), r.FormValue("name"),
 		r.FormValue("email"), r.FormValue("project")

+	// Validate username length
+	if len(username) == 0 || len(username) > 100 {
+		handleError(fmt.Errorf("username must be between 1 and 100 characters"), http.StatusBadRequest, rw)
+		return
+	}
+
 	if len(password) == 0 && role != schema.GetRoleString(schema.RoleApi) {
-		http.Error(rw, "Only API users are allowed to have a blank password (login will be impossible)", http.StatusBadRequest)
+		handleError(fmt.Errorf("only API users are allowed to have a blank password (login will be impossible)"), http.StatusBadRequest, rw)
 		return
 	}

 	if len(project) != 0 && role != schema.GetRoleString(schema.RoleManager) {
-		http.Error(rw, "only managers require a project (can be changed later)",
-			http.StatusBadRequest)
+		handleError(fmt.Errorf("only managers require a project (can be changed later)"), http.StatusBadRequest, rw)
 		return
 	} else if len(project) == 0 && role == schema.GetRoleString(schema.RoleManager) {
-		http.Error(rw, "managers require a project to manage (can be changed later)",
-			http.StatusBadRequest)
+		handleError(fmt.Errorf("managers require a project to manage (can be changed later)"), http.StatusBadRequest, rw)
 		return
 	}

@@ -134,24 +185,35 @@ func (api *RestApi) createUser(rw http.ResponseWriter, r *http.Request) {
 		Projects: []string{project},
 		Roles:    []string{role},
 	}); err != nil {
-		http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
+		handleError(fmt.Errorf("adding user failed: %w", err), http.StatusUnprocessableEntity, rw)
 		return
 	}

 	fmt.Fprintf(rw, "User %v successfully created!\n", username)
 }

+// deleteUser godoc
+// @summary     Delete a user
+// @tags User
+// @description Deletes a user from the system
+// @produce     plain
+// @param       username formData string true "Username to delete"
+// @success     200     {string} string "Success"
+// @failure     403     {object} api.ErrorResponse "Forbidden"
+// @failure     422     {object} api.ErrorResponse "Unprocessable Entity"
+// @security    ApiKeyAuth
+// @router      /api/users/ [delete]
 func (api *RestApi) deleteUser(rw http.ResponseWriter, r *http.Request) {
 	// SecuredCheck() only worked with TokenAuth: Removed

 	if user := repository.GetUserFromContext(r.Context()); !user.HasRole(schema.RoleAdmin) {
-		http.Error(rw, "Only admins are allowed to delete a user", http.StatusForbidden)
+		handleError(fmt.Errorf("only admins are allowed to delete a user"), http.StatusForbidden, rw)
 		return
 	}

 	username := r.FormValue("username")
 	if err := repository.GetUserRepository().DelUser(username); err != nil {
-		http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
+		handleError(fmt.Errorf("deleting user failed: %w", err), http.StatusUnprocessableEntity, rw)
 		return
 	}

--- a/internal/auth/auth.go
+++ b/internal/auth/auth.go
@@ -18,7 +18,6 @@ import (
 	"net"
 	"net/http"
 	"os"
-	"strings"
 	"sync"
 	"time"

@@ -32,8 +31,19 @@ import (
 	"github.com/gorilla/sessions"
 )

+// Authenticator is the interface for all authentication methods.
+// Each authenticator determines if it can handle a login request (CanLogin)
+// and performs the actual authentication (Login).
 type Authenticator interface {
+	// CanLogin determines if this authenticator can handle the login request.
+	// It returns the user object if available and a boolean indicating if this
+	// authenticator should attempt the login. This method should not perform
+	// expensive operations or actual authentication.
 	CanLogin(user *schema.User, username string, rw http.ResponseWriter, r *http.Request) (*schema.User, bool)
+	
+	// Login performs the actually authentication for the user.
+	// It returns the authenticated user or an error if authentication fails.
+	// The user parameter may be nil if the user doesn't exist in the database yet.
 	Login(user *schema.User, rw http.ResponseWriter, r *http.Request) (*schema.User, error)
 }

@@ -42,27 +52,70 @@ var (
 	authInstance *Authentication
 )

-var ipUserLimiters sync.Map
-
-func getIPUserLimiter(ip, username string) *rate.Limiter {
-	key := ip + ":" + username
-	limiter, ok := ipUserLimiters.Load(key)
-	if !ok {
-		newLimiter := rate.NewLimiter(rate.Every(time.Hour/10), 10)
-		ipUserLimiters.Store(key, newLimiter)
-		return newLimiter
-	}
-	return limiter.(*rate.Limiter)
+// rateLimiterEntry tracks a rate limiter and its last use time for cleanup
+type rateLimiterEntry struct {
+	limiter  *rate.Limiter
+	lastUsed time.Time
 }

+var ipUserLimiters sync.Map
+
+// getIPUserLimiter returns a rate limiter for the given IP and username combination.
+// Rate limiters are created on demand and track 5 attempts per 15 minutes.
+func getIPUserLimiter(ip, username string) *rate.Limiter {
+	key := ip + ":" + username
+	now := time.Now()
+	
+	if entry, ok := ipUserLimiters.Load(key); ok {
+		rle := entry.(*rateLimiterEntry)
+		rle.lastUsed = now
+		return rle.limiter
+	}
+	
+	// More aggressive rate limiting: 5 attempts per 15 minutes
+	newLimiter := rate.NewLimiter(rate.Every(15*time.Minute/5), 5)
+	ipUserLimiters.Store(key, &rateLimiterEntry{
+		limiter:  newLimiter,
+		lastUsed: now,
+	})
+	return newLimiter
+}
+
+// cleanupOldRateLimiters removes rate limiters that haven't been used recently
+func cleanupOldRateLimiters(olderThan time.Time) {
+	ipUserLimiters.Range(func(key, value any) bool {
+		entry := value.(*rateLimiterEntry)
+		if entry.lastUsed.Before(olderThan) {
+			ipUserLimiters.Delete(key)
+			cclog.Debugf("Cleaned up rate limiter for %v", key)
+		}
+		return true
+	})
+}
+
+// startRateLimiterCleanup starts a background goroutine to clean up old rate limiters
+func startRateLimiterCleanup() {
+	go func() {
+		ticker := time.NewTicker(1 * time.Hour)
+		defer ticker.Stop()
+		for range ticker.C {
+			// Clean up limiters not used in the last 24 hours
+			cleanupOldRateLimiters(time.Now().Add(-24 * time.Hour))
+		}
+	}()
+}
+
+// AuthConfig contains configuration for all authentication methods
 type AuthConfig struct {
 	LdapConfig   *LdapConfig    `json:"ldap"`
 	JwtConfig    *JWTAuthConfig `json:"jwts"`
 	OpenIDConfig *OpenIDConfig  `json:"oidc"`
 }

+// Keys holds the global authentication configuration
 var Keys AuthConfig

+// Authentication manages all authentication methods and session handling
 type Authentication struct {
 	sessionStore   *sessions.CookieStore
 	LdapAuth       *LdapAuthenticator
@@ -86,10 +139,31 @@ func (auth *Authentication) AuthViaSession(
 		return nil, nil
 	}

-	// TODO: Check if session keys exist
-	username, _ := session.Values["username"].(string)
-	projects, _ := session.Values["projects"].([]string)
-	roles, _ := session.Values["roles"].([]string)
+	// Validate session data with proper type checking
+	username, ok := session.Values["username"].(string)
+	if !ok || username == "" {
+		cclog.Warn("Invalid session: missing or invalid username")
+		// Invalidate the corrupted session
+		session.Options.MaxAge = -1
+		_ = auth.sessionStore.Save(r, rw, session)
+		return nil, errors.New("invalid session data")
+	}
+
+	projects, ok := session.Values["projects"].([]string)
+	if !ok {
+		cclog.Warn("Invalid session: projects not found or invalid type, using empty list")
+		projects = []string{}
+	}
+
+	roles, ok := session.Values["roles"].([]string)
+	if !ok || len(roles) == 0 {
+		cclog.Warn("Invalid session: missing or invalid roles")
+		// Invalidate the corrupted session
+		session.Options.MaxAge = -1
+		_ = auth.sessionStore.Save(r, rw, session)
+		return nil, errors.New("invalid session data")
+	}
+
 	return &schema.User{
 		Username:   username,
 		Projects:   projects,
@@ -102,6 +176,9 @@ func (auth *Authentication) AuthViaSession(
 func Init(authCfg *json.RawMessage) {
 	initOnce.Do(func() {
 		authInstance = &Authentication{}
+		
+		// Start background cleanup of rate limiters
+		startRateLimiterCleanup()

 		sessKey := os.Getenv("SESSION_KEY")
 		if sessKey == "" {
@@ -185,38 +262,36 @@ func GetAuthInstance() *Authentication {
 	return authInstance
 }

-func handleTokenUser(tokenUser *schema.User) {
+// handleUserSync syncs or updates a user in the database based on configuration.
+// This is used for both JWT and OIDC authentication when syncUserOnLogin or updateUserOnLogin is enabled.
+func handleUserSync(user *schema.User, syncUserOnLogin, updateUserOnLogin bool) {
 	r := repository.GetUserRepository()
-	dbUser, err := r.GetUser(tokenUser.Username)
+	dbUser, err := r.GetUser(user.Username)

 	if err != nil && err != sql.ErrNoRows {
-		cclog.Errorf("Error while loading user '%s': %v", tokenUser.Username, err)
-	} else if err == sql.ErrNoRows && Keys.JwtConfig.SyncUserOnLogin { // Adds New User
-		if err := r.AddUser(tokenUser); err != nil {
-			cclog.Errorf("Error while adding user '%s' to DB: %v", tokenUser.Username, err)
+		cclog.Errorf("Error while loading user '%s': %v", user.Username, err)
+		return
+	}
+	
+	if err == sql.ErrNoRows && syncUserOnLogin { // Add new user
+		if err := r.AddUser(user); err != nil {
+			cclog.Errorf("Error while adding user '%s' to DB: %v", user.Username, err)
 		}
-	} else if err == nil && Keys.JwtConfig.UpdateUserOnLogin { // Update Existing User
-		if err := r.UpdateUser(dbUser, tokenUser); err != nil {
-			cclog.Errorf("Error while updating user '%s' to DB: %v", dbUser.Username, err)
+	} else if err == nil && updateUserOnLogin { // Update existing user
+		if err := r.UpdateUser(dbUser, user); err != nil {
+			cclog.Errorf("Error while updating user '%s' in DB: %v", dbUser.Username, err)
 		}
 	}
 }

-func handleOIDCUser(OIDCUser *schema.User) {
-	r := repository.GetUserRepository()
-	dbUser, err := r.GetUser(OIDCUser.Username)
+// handleTokenUser syncs JWT token user with database
+func handleTokenUser(tokenUser *schema.User) {
+	handleUserSync(tokenUser, Keys.JwtConfig.SyncUserOnLogin, Keys.JwtConfig.UpdateUserOnLogin)
+}

-	if err != nil && err != sql.ErrNoRows {
-		cclog.Errorf("Error while loading user '%s': %v", OIDCUser.Username, err)
-	} else if err == sql.ErrNoRows && Keys.OpenIDConfig.SyncUserOnLogin { // Adds New User
-		if err := r.AddUser(OIDCUser); err != nil {
-			cclog.Errorf("Error while adding user '%s' to DB: %v", OIDCUser.Username, err)
-		}
-	} else if err == nil && Keys.OpenIDConfig.UpdateUserOnLogin { // Update Existing User
-		if err := r.UpdateUser(dbUser, OIDCUser); err != nil {
-			cclog.Errorf("Error while updating user '%s' to DB: %v", dbUser.Username, err)
-		}
-	}
+// handleOIDCUser syncs OIDC user with database
+func handleOIDCUser(OIDCUser *schema.User) {
+	handleUserSync(OIDCUser, Keys.OpenIDConfig.SyncUserOnLogin, Keys.OpenIDConfig.UpdateUserOnLogin)
 }

 func (auth *Authentication) SaveSession(rw http.ResponseWriter, r *http.Request, user *schema.User) error {
@@ -231,6 +306,7 @@ func (auth *Authentication) SaveSession(rw http.ResponseWriter, r *http.Request,
 		session.Options.MaxAge = int(auth.SessionMaxAge.Seconds())
 	}
 	if config.Keys.HTTPSCertFile == "" && config.Keys.HTTPSKeyFile == "" {
+		cclog.Warn("HTTPS not configured - session cookies will not have Secure flag set (insecure for production)")
 		session.Options.Secure = false
 	}
 	session.Options.SameSite = http.SameSiteStrictMode
@@ -532,10 +608,13 @@ func securedCheck(user *schema.User, r *http.Request) error {
 		IPAddress = r.RemoteAddr
 	}

-	// FIXME: IPV6 not handled
-	if strings.Contains(IPAddress, ":") {
-		IPAddress = strings.Split(IPAddress, ":")[0]
+	// Handle both IPv4 and IPv6 addresses properly
+	// For IPv6, this will strip the port and brackets
+	// For IPv4, this will strip the port
+	if host, _, err := net.SplitHostPort(IPAddress); err == nil {
+		IPAddress = host
 	}
+	// If SplitHostPort fails, IPAddress is already just a host (no port)

 	// If nothing declared in config: deny all request to this api endpoint
 	if len(config.Keys.APIAllowedIPs) == 0 {
--- a/internal/auth/auth_test.go
+++ b/internal/auth/auth_test.go
@@ -0,0 +1,176 @@
+// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+// All rights reserved. This file is part of cc-backend.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package auth
+
+import (
+	"net"
+	"testing"
+	"time"
+)
+
+// TestGetIPUserLimiter tests the rate limiter creation and retrieval
+func TestGetIPUserLimiter(t *testing.T) {
+	ip := "192.168.1.1"
+	username := "testuser"
+	
+	// Get limiter for the first time
+	limiter1 := getIPUserLimiter(ip, username)
+	if limiter1 == nil {
+		t.Fatal("Expected limiter to be created")
+	}
+	
+	// Get the same limiter again
+	limiter2 := getIPUserLimiter(ip, username)
+	if limiter1 != limiter2 {
+		t.Error("Expected to get the same limiter instance")
+	}
+	
+	// Get a different limiter for different user
+	limiter3 := getIPUserLimiter(ip, "otheruser")
+	if limiter1 == limiter3 {
+		t.Error("Expected different limiter for different user")
+	}
+	
+	// Get a different limiter for different IP
+	limiter4 := getIPUserLimiter("192.168.1.2", username)
+	if limiter1 == limiter4 {
+		t.Error("Expected different limiter for different IP")
+	}
+}
+
+// TestRateLimiterBehavior tests that rate limiting works correctly
+func TestRateLimiterBehavior(t *testing.T) {
+	ip := "10.0.0.1"
+	username := "ratelimituser"
+	
+	limiter := getIPUserLimiter(ip, username)
+	
+	// Should allow first 5 attempts
+	for i := 0; i < 5; i++ {
+		if !limiter.Allow() {
+			t.Errorf("Request %d should be allowed within rate limit", i+1)
+		}
+	}
+	
+	// 6th attempt should be blocked
+	if limiter.Allow() {
+		t.Error("Request 6 should be blocked by rate limiter")
+	}
+}
+
+// TestCleanupOldRateLimiters tests the cleanup function
+func TestCleanupOldRateLimiters(t *testing.T) {
+	// Clear all existing limiters first to avoid interference from other tests
+	cleanupOldRateLimiters(time.Now().Add(24 * time.Hour))
+	
+	// Create some new rate limiters
+	limiter1 := getIPUserLimiter("1.1.1.1", "user1")
+	limiter2 := getIPUserLimiter("2.2.2.2", "user2")
+	
+	if limiter1 == nil || limiter2 == nil {
+		t.Fatal("Failed to create test limiters")
+	}
+	
+	// Cleanup limiters older than 1 second from now (should keep both)
+	time.Sleep(10 * time.Millisecond) // Small delay to ensure timestamp difference
+	cleanupOldRateLimiters(time.Now().Add(-1 * time.Second))
+	
+	// Verify they still exist (should get same instance)
+	if getIPUserLimiter("1.1.1.1", "user1") != limiter1 {
+		t.Error("Limiter 1 was incorrectly cleaned up")
+	}
+	if getIPUserLimiter("2.2.2.2", "user2") != limiter2 {
+		t.Error("Limiter 2 was incorrectly cleaned up")
+	}
+	
+	// Cleanup limiters older than 1 hour from now (should remove both)
+	cleanupOldRateLimiters(time.Now().Add(2 * time.Hour))
+	
+	// Getting them again should create new instances
+	newLimiter1 := getIPUserLimiter("1.1.1.1", "user1")
+	if newLimiter1 == limiter1 {
+		t.Error("Old limiter should have been cleaned up")
+	}
+}
+
+// TestIPv4Extraction tests extracting IPv4 addresses
+func TestIPv4Extraction(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected string
+	}{
+		{"IPv4 with port", "192.168.1.1:8080", "192.168.1.1"},
+		{"IPv4 without port", "192.168.1.1", "192.168.1.1"},
+		{"Localhost with port", "127.0.0.1:3000", "127.0.0.1"},
+	}
+	
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := tt.input
+			if host, _, err := net.SplitHostPort(result); err == nil {
+				result = host
+			}
+			
+			if result != tt.expected {
+				t.Errorf("Expected %s, got %s", tt.expected, result)
+			}
+		})
+	}
+}
+
+// TestIPv6Extraction tests extracting IPv6 addresses  
+func TestIPv6Extraction(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected string
+	}{
+		{"IPv6 with port", "[2001:db8::1]:8080", "2001:db8::1"},
+		{"IPv6 localhost with port", "[::1]:3000", "::1"},
+		{"IPv6 without port", "2001:db8::1", "2001:db8::1"},
+		{"IPv6 localhost", "::1", "::1"},
+	}
+	
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := tt.input
+			if host, _, err := net.SplitHostPort(result); err == nil {
+				result = host
+			}
+			
+			if result != tt.expected {
+				t.Errorf("Expected %s, got %s", tt.expected, result)
+			}
+		})
+	}
+}
+
+// TestIPExtractionEdgeCases tests edge cases for IP extraction
+func TestIPExtractionEdgeCases(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected string
+	}{
+		{"Hostname without port", "example.com", "example.com"},
+		{"Empty string", "", ""},
+		{"Just port", ":8080", ""},
+	}
+	
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := tt.input
+			if host, _, err := net.SplitHostPort(result); err == nil {
+				result = host
+			}
+			
+			if result != tt.expected {
+				t.Errorf("Expected %s, got %s", tt.expected, result)
+			}
+		})
+	}
+}
--- a/internal/auth/jwt.go
+++ b/internal/auth/jwt.go
@@ -14,7 +14,6 @@ import (
 	"strings"
 	"time"

-	"github.com/ClusterCockpit/cc-backend/internal/repository"
 	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
 	"github.com/ClusterCockpit/cc-lib/schema"
 	"github.com/golang-jwt/jwt/v5"
@@ -102,38 +101,21 @@ func (ja *JWTAuthenticator) AuthViaJWT(

 	// Token is valid, extract payload
 	claims := token.Claims.(jwt.MapClaims)
-	sub, _ := claims["sub"].(string)
-
-	var roles []string
-
-	// Validate user + roles from JWT against database?
-	if Keys.JwtConfig.ValidateUser {
-		ur := repository.GetUserRepository()
-		user, err := ur.GetUser(sub)
-		// Deny any logins for unknown usernames
-		if err != nil {
-			cclog.Warn("Could not find user from JWT in internal database.")
-			return nil, errors.New("unknown user")
-		}
-		// Take user roles from database instead of trusting the JWT
-		roles = user.Roles
-	} else {
-		// Extract roles from JWT (if present)
-		if rawroles, ok := claims["roles"].([]any); ok {
-			for _, rr := range rawroles {
-				if r, ok := rr.(string); ok {
-					roles = append(roles, r)
-				}
-			}
-		}
+	
+	// Use shared helper to get user from JWT claims
+	var user *schema.User
+	user, err = getUserFromJWT(claims, Keys.JwtConfig.ValidateUser, schema.AuthToken, -1)
+	if err != nil {
+		return nil, err
 	}
-
-	return &schema.User{
-		Username:   sub,
-		Roles:      roles,
-		AuthType:   schema.AuthToken,
-		AuthSource: -1,
-	}, nil
+	
+	// If not validating user, we only get roles from JWT (no projects for this auth method)
+	if !Keys.JwtConfig.ValidateUser {
+		user.Roles = extractRolesFromClaims(claims, false)
+		user.Projects = nil // Standard JWT auth doesn't include projects
+	}
+	
+	return user, nil
 }

 // ProvideJWT generates a new JWT that can be used for authentication
--- a/internal/auth/jwtCookieSession.go
+++ b/internal/auth/jwtCookieSession.go
@@ -7,14 +7,11 @@ package auth

 import (
 	"crypto/ed25519"
-	"database/sql"
 	"encoding/base64"
 	"errors"
-	"fmt"
 	"net/http"
 	"os"

-	"github.com/ClusterCockpit/cc-backend/internal/repository"
 	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
 	"github.com/ClusterCockpit/cc-lib/schema"
 	"github.com/golang-jwt/jwt/v5"
@@ -149,57 +146,16 @@ func (ja *JWTCookieSessionAuthenticator) Login(
 	}

 	claims := token.Claims.(jwt.MapClaims)
-	sub, _ := claims["sub"].(string)
-
-	var roles []string
-	projects := make([]string, 0)
-
-	if jc.ValidateUser {
-		var err error
-		user, err = repository.GetUserRepository().GetUser(sub)
-		if err != nil && err != sql.ErrNoRows {
-			cclog.Errorf("Error while loading user '%v'", sub)
-		}
-
-		// Deny any logins for unknown usernames
-		if user == nil {
-			cclog.Warn("Could not find user from JWT in internal database.")
-			return nil, errors.New("unknown user")
-		}
-	} else {
-		var name string
-		if wrap, ok := claims["name"].(map[string]any); ok {
-			if vals, ok := wrap["values"].([]any); ok {
-				if len(vals) != 0 {
-					name = fmt.Sprintf("%v", vals[0])
-
-					for i := 1; i < len(vals); i++ {
-						name += fmt.Sprintf(" %v", vals[i])
-					}
-				}
-			}
-		}
-
-		// Extract roles from JWT (if present)
-		if rawroles, ok := claims["roles"].([]any); ok {
-			for _, rr := range rawroles {
-				if r, ok := rr.(string); ok {
-					roles = append(roles, r)
-				}
-			}
-		}
-		user = &schema.User{
-			Username:   sub,
-			Name:       name,
-			Roles:      roles,
-			Projects:   projects,
-			AuthType:   schema.AuthSession,
-			AuthSource: schema.AuthViaToken,
-		}
-
-		if jc.SyncUserOnLogin || jc.UpdateUserOnLogin {
-			handleTokenUser(user)
-		}
+	
+	// Use shared helper to get user from JWT claims
+	user, err = getUserFromJWT(claims, jc.ValidateUser, schema.AuthSession, schema.AuthViaToken)
+	if err != nil {
+		return nil, err
+	}
+	
+	// Sync or update user if configured
+	if !jc.ValidateUser && (jc.SyncUserOnLogin || jc.UpdateUserOnLogin) {
+		handleTokenUser(user)
 	}

 	// (Ask browser to) Delete JWT cookie
--- a/internal/auth/jwtHelpers.go
+++ b/internal/auth/jwtHelpers.go
@@ -0,0 +1,136 @@
+// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+// All rights reserved. This file is part of cc-backend.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package auth
+
+import (
+	"database/sql"
+	"errors"
+	"fmt"
+
+	"github.com/ClusterCockpit/cc-backend/internal/repository"
+	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
+	"github.com/ClusterCockpit/cc-lib/schema"
+	"github.com/golang-jwt/jwt/v5"
+)
+
+// extractStringFromClaims extracts a string value from JWT claims
+func extractStringFromClaims(claims jwt.MapClaims, key string) string {
+	if val, ok := claims[key].(string); ok {
+		return val
+	}
+	return ""
+}
+
+// extractRolesFromClaims extracts roles from JWT claims
+// If validateRoles is true, only valid roles are returned
+func extractRolesFromClaims(claims jwt.MapClaims, validateRoles bool) []string {
+	var roles []string
+	
+	if rawroles, ok := claims["roles"].([]any); ok {
+		for _, rr := range rawroles {
+			if r, ok := rr.(string); ok {
+				if validateRoles {
+					if schema.IsValidRole(r) {
+						roles = append(roles, r)
+					}
+				} else {
+					roles = append(roles, r)
+				}
+			}
+		}
+	}
+	
+	return roles
+}
+
+// extractProjectsFromClaims extracts projects from JWT claims
+func extractProjectsFromClaims(claims jwt.MapClaims) []string {
+	projects := make([]string, 0)
+	
+	if rawprojs, ok := claims["projects"].([]any); ok {
+		for _, pp := range rawprojs {
+			if p, ok := pp.(string); ok {
+				projects = append(projects, p)
+			}
+		}
+	} else if rawprojs, ok := claims["projects"]; ok {
+		if projSlice, ok := rawprojs.([]string); ok {
+			projects = append(projects, projSlice...)
+		}
+	}
+	
+	return projects
+}
+
+// extractNameFromClaims extracts name from JWT claims
+// Handles both simple string and complex nested structure
+func extractNameFromClaims(claims jwt.MapClaims) string {
+	// Try simple string first
+	if name, ok := claims["name"].(string); ok {
+		return name
+	}
+	
+	// Try nested structure: {name: {values: [...]}}
+	if wrap, ok := claims["name"].(map[string]any); ok {
+		if vals, ok := wrap["values"].([]any); ok {
+			if len(vals) == 0 {
+				return ""
+			}
+			
+			name := fmt.Sprintf("%v", vals[0])
+			for i := 1; i < len(vals); i++ {
+				name += fmt.Sprintf(" %v", vals[i])
+			}
+			return name
+		}
+	}
+	
+	return ""
+}
+
+// getUserFromJWT creates or retrieves a user based on JWT claims
+// If validateUser is true, the user must exist in the database
+// Otherwise, a new user object is created from claims
+// authSource should be a schema.AuthSource constant (like schema.AuthViaToken)
+func getUserFromJWT(claims jwt.MapClaims, validateUser bool, authType schema.AuthType, authSource schema.AuthSource) (*schema.User, error) {
+	sub := extractStringFromClaims(claims, "sub")
+	if sub == "" {
+		return nil, errors.New("missing 'sub' claim in JWT")
+	}
+	
+	if validateUser {
+		// Validate user against database
+		ur := repository.GetUserRepository()
+		user, err := ur.GetUser(sub)
+		if err != nil && err != sql.ErrNoRows {
+			cclog.Errorf("Error while loading user '%v': %v", sub, err)
+			return nil, fmt.Errorf("database error: %w", err)
+		}
+		
+		// Deny any logins for unknown usernames
+		if user == nil || err == sql.ErrNoRows {
+			cclog.Warn("Could not find user from JWT in internal database.")
+			return nil, errors.New("unknown user")
+		}
+		
+		// Return database user (with database roles)
+		return user, nil
+	}
+	
+	// Create user from JWT claims
+	name := extractNameFromClaims(claims)
+	roles := extractRolesFromClaims(claims, true) // Validate roles
+	projects := extractProjectsFromClaims(claims)
+	
+	return &schema.User{
+		Username:   sub,
+		Name:       name,
+		Roles:      roles,
+		Projects:   projects,
+		AuthType:   authType,
+		AuthSource: authSource,
+	}, nil
+}
--- a/internal/auth/jwtHelpers_test.go
+++ b/internal/auth/jwtHelpers_test.go
@@ -0,0 +1,281 @@
+// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+// All rights reserved. This file is part of cc-backend.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package auth
+
+import (
+	"testing"
+
+	"github.com/ClusterCockpit/cc-lib/schema"
+	"github.com/golang-jwt/jwt/v5"
+)
+
+// TestExtractStringFromClaims tests extracting string values from JWT claims
+func TestExtractStringFromClaims(t *testing.T) {
+	claims := jwt.MapClaims{
+		"sub":   "testuser",
+		"email": "test@example.com",
+		"age":   25, // not a string
+	}
+	
+	tests := []struct {
+		name     string
+		key      string
+		expected string
+	}{
+		{"Existing string", "sub", "testuser"},
+		{"Another string", "email", "test@example.com"},
+		{"Non-existent key", "missing", ""},
+		{"Non-string value", "age", ""},
+	}
+	
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := extractStringFromClaims(claims, tt.key)
+			if result != tt.expected {
+				t.Errorf("Expected %s, got %s", tt.expected, result)
+			}
+		})
+	}
+}
+
+// TestExtractRolesFromClaims tests role extraction and validation
+func TestExtractRolesFromClaims(t *testing.T) {
+	tests := []struct {
+		name          string
+		claims        jwt.MapClaims
+		validateRoles bool
+		expected      []string
+	}{
+		{
+			name: "Valid roles without validation",
+			claims: jwt.MapClaims{
+				"roles": []any{"admin", "user", "invalid_role"},
+			},
+			validateRoles: false,
+			expected:      []string{"admin", "user", "invalid_role"},
+		},
+		{
+			name: "Valid roles with validation",
+			claims: jwt.MapClaims{
+				"roles": []any{"admin", "user", "api"},
+			},
+			validateRoles: true,
+			expected:      []string{"admin", "user", "api"},
+		},
+		{
+			name: "Invalid roles with validation",
+			claims: jwt.MapClaims{
+				"roles": []any{"invalid_role", "fake_role"},
+			},
+			validateRoles: true,
+			expected:      []string{}, // Should filter out invalid roles
+		},
+		{
+			name:          "No roles claim",
+			claims:        jwt.MapClaims{},
+			validateRoles: false,
+			expected:      []string{},
+		},
+		{
+			name: "Non-array roles",
+			claims: jwt.MapClaims{
+				"roles": "admin",
+			},
+			validateRoles: false,
+			expected:      []string{},
+		},
+	}
+	
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := extractRolesFromClaims(tt.claims, tt.validateRoles)
+			
+			if len(result) != len(tt.expected) {
+				t.Errorf("Expected %d roles, got %d", len(tt.expected), len(result))
+				return
+			}
+			
+			for i, role := range result {
+				if i >= len(tt.expected) || role != tt.expected[i] {
+					t.Errorf("Expected role %s at position %d, got %s", tt.expected[i], i, role)
+				}
+			}
+		})
+	}
+}
+
+// TestExtractProjectsFromClaims tests project extraction from claims
+func TestExtractProjectsFromClaims(t *testing.T) {
+	tests := []struct {
+		name     string
+		claims   jwt.MapClaims
+		expected []string
+	}{
+		{
+			name: "Projects as array of interfaces",
+			claims: jwt.MapClaims{
+				"projects": []any{"project1", "project2", "project3"},
+			},
+			expected: []string{"project1", "project2", "project3"},
+		},
+		{
+			name: "Projects as string array",
+			claims: jwt.MapClaims{
+				"projects": []string{"projectA", "projectB"},
+			},
+			expected: []string{"projectA", "projectB"},
+		},
+		{
+			name:     "No projects claim",
+			claims:   jwt.MapClaims{},
+			expected: []string{},
+		},
+		{
+			name: "Mixed types in projects array",
+			claims: jwt.MapClaims{
+				"projects": []any{"project1", 123, "project2"},
+			},
+			expected: []string{"project1", "project2"}, // Should skip non-strings
+		},
+	}
+	
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := extractProjectsFromClaims(tt.claims)
+			
+			if len(result) != len(tt.expected) {
+				t.Errorf("Expected %d projects, got %d", len(tt.expected), len(result))
+				return
+			}
+			
+			for i, project := range result {
+				if i >= len(tt.expected) || project != tt.expected[i] {
+					t.Errorf("Expected project %s at position %d, got %s", tt.expected[i], i, project)
+				}
+			}
+		})
+	}
+}
+
+// TestExtractNameFromClaims tests name extraction from various formats
+func TestExtractNameFromClaims(t *testing.T) {
+	tests := []struct {
+		name     string
+		claims   jwt.MapClaims
+		expected string
+	}{
+		{
+			name: "Simple string name",
+			claims: jwt.MapClaims{
+				"name": "John Doe",
+			},
+			expected: "John Doe",
+		},
+		{
+			name: "Nested name structure",
+			claims: jwt.MapClaims{
+				"name": map[string]any{
+					"values": []any{"John", "Doe"},
+				},
+			},
+			expected: "John Doe",
+		},
+		{
+			name: "Nested name with single value",
+			claims: jwt.MapClaims{
+				"name": map[string]any{
+					"values": []any{"Alice"},
+				},
+			},
+			expected: "Alice",
+		},
+		{
+			name:     "No name claim",
+			claims:   jwt.MapClaims{},
+			expected: "",
+		},
+		{
+			name: "Empty nested values",
+			claims: jwt.MapClaims{
+				"name": map[string]any{
+					"values": []any{},
+				},
+			},
+			expected: "",
+		},
+		{
+			name: "Nested with non-string values",
+			claims: jwt.MapClaims{
+				"name": map[string]any{
+					"values": []any{123, "Smith"},
+				},
+			},
+			expected: "123 Smith", // Should convert to string
+		},
+	}
+	
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := extractNameFromClaims(tt.claims)
+			if result != tt.expected {
+				t.Errorf("Expected '%s', got '%s'", tt.expected, result)
+			}
+		})
+	}
+}
+
+// TestGetUserFromJWT_NoValidation tests getUserFromJWT without database validation
+func TestGetUserFromJWT_NoValidation(t *testing.T) {
+	claims := jwt.MapClaims{
+		"sub":      "testuser",
+		"name":     "Test User",
+		"roles":    []any{"user", "admin"},
+		"projects": []any{"project1", "project2"},
+	}
+	
+	user, err := getUserFromJWT(claims, false, schema.AuthToken, -1)
+	
+	if err != nil {
+		t.Fatalf("Unexpected error: %v", err)
+	}
+	
+	if user.Username != "testuser" {
+		t.Errorf("Expected username 'testuser', got '%s'", user.Username)
+	}
+	
+	if user.Name != "Test User" {
+		t.Errorf("Expected name 'Test User', got '%s'", user.Name)
+	}
+	
+	if len(user.Roles) != 2 {
+		t.Errorf("Expected 2 roles, got %d", len(user.Roles))
+	}
+	
+	if len(user.Projects) != 2 {
+		t.Errorf("Expected 2 projects, got %d", len(user.Projects))
+	}
+	
+	if user.AuthType != schema.AuthToken {
+		t.Errorf("Expected AuthType %v, got %v", schema.AuthToken, user.AuthType)
+	}
+}
+
+// TestGetUserFromJWT_MissingSub tests error when sub claim is missing
+func TestGetUserFromJWT_MissingSub(t *testing.T) {
+	claims := jwt.MapClaims{
+		"name": "Test User",
+	}
+	
+	_, err := getUserFromJWT(claims, false, schema.AuthToken, -1)
+	
+	if err == nil {
+		t.Error("Expected error for missing sub claim")
+	}
+	
+	if err.Error() != "missing 'sub' claim in JWT" {
+		t.Errorf("Expected specific error message, got: %v", err)
+	}
+}
--- a/internal/auth/jwtSession.go
+++ b/internal/auth/jwtSession.go
@@ -6,7 +6,6 @@
 package auth

 import (
-	"database/sql"
 	"encoding/base64"
 	"errors"
 	"fmt"
@@ -14,7 +13,6 @@ import (
 	"os"
 	"strings"

-	"github.com/ClusterCockpit/cc-backend/internal/repository"
 	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
 	"github.com/ClusterCockpit/cc-lib/schema"
 	"github.com/golang-jwt/jwt/v5"
@@ -77,70 +75,16 @@ func (ja *JWTSessionAuthenticator) Login(
 	}

 	claims := token.Claims.(jwt.MapClaims)
-	sub, _ := claims["sub"].(string)
-
-	var roles []string
-	projects := make([]string, 0)
-
-	if Keys.JwtConfig.ValidateUser {
-		var err error
-		user, err = repository.GetUserRepository().GetUser(sub)
-		if err != nil && err != sql.ErrNoRows {
-			cclog.Errorf("Error while loading user '%v'", sub)
-		}
-
-		// Deny any logins for unknown usernames
-		if user == nil {
-			cclog.Warn("Could not find user from JWT in internal database.")
-			return nil, errors.New("unknown user")
-		}
-	} else {
-		var name string
-		if wrap, ok := claims["name"].(map[string]any); ok {
-			if vals, ok := wrap["values"].([]any); ok {
-				if len(vals) != 0 {
-					name = fmt.Sprintf("%v", vals[0])
-
-					for i := 1; i < len(vals); i++ {
-						name += fmt.Sprintf(" %v", vals[i])
-					}
-				}
-			}
-		}
-
-		// Extract roles from JWT (if present)
-		if rawroles, ok := claims["roles"].([]any); ok {
-			for _, rr := range rawroles {
-				if r, ok := rr.(string); ok {
-					if schema.IsValidRole(r) {
-						roles = append(roles, r)
-					}
-				}
-			}
-		}
-
-		if rawprojs, ok := claims["projects"].([]any); ok {
-			for _, pp := range rawprojs {
-				if p, ok := pp.(string); ok {
-					projects = append(projects, p)
-				}
-			}
-		} else if rawprojs, ok := claims["projects"]; ok {
-			projects = append(projects, rawprojs.([]string)...)
-		}
-
-		user = &schema.User{
-			Username:   sub,
-			Name:       name,
-			Roles:      roles,
-			Projects:   projects,
-			AuthType:   schema.AuthSession,
-			AuthSource: schema.AuthViaToken,
-		}
-
-		if Keys.JwtConfig.SyncUserOnLogin || Keys.JwtConfig.UpdateUserOnLogin {
-			handleTokenUser(user)
-		}
+	
+	// Use shared helper to get user from JWT claims
+	user, err = getUserFromJWT(claims, Keys.JwtConfig.ValidateUser, schema.AuthSession, schema.AuthViaToken)
+	if err != nil {
+		return nil, err
+	}
+	
+	// Sync or update user if configured
+	if !Keys.JwtConfig.ValidateUser && (Keys.JwtConfig.SyncUserOnLogin || Keys.JwtConfig.UpdateUserOnLogin) {
+		handleTokenUser(user)
 	}

 	return user, nil
--- a/internal/auth/oidc.go
+++ b/internal/auth/oidc.go
@@ -54,8 +54,13 @@ func setCallbackCookie(w http.ResponseWriter, r *http.Request, name, value strin
 	http.SetCookie(w, c)
 }

+// NewOIDC creates a new OIDC authenticator with the configured provider
 func NewOIDC(a *Authentication) *OIDC {
-	provider, err := oidc.NewProvider(context.Background(), Keys.OpenIDConfig.Provider)
+	// Use context with timeout for provider initialization
+	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	defer cancel()
+	
+	provider, err := oidc.NewProvider(ctx, Keys.OpenIDConfig.Provider)
 	if err != nil {
 		cclog.Fatal(err)
 	}
@@ -111,13 +116,18 @@ func (oa *OIDC) OAuth2Callback(rw http.ResponseWriter, r *http.Request) {
 		http.Error(rw, "Code not found", http.StatusBadRequest)
 		return
 	}
-	token, err := oa.client.Exchange(context.Background(), code, oauth2.VerifierOption(codeVerifier))
+	// Exchange authorization code for token with timeout
+	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	defer cancel()
+	
+	token, err := oa.client.Exchange(ctx, code, oauth2.VerifierOption(codeVerifier))
 	if err != nil {
 		http.Error(rw, "Failed to exchange token: "+err.Error(), http.StatusInternalServerError)
 		return
 	}

-	userInfo, err := oa.provider.UserInfo(context.Background(), oauth2.StaticTokenSource(token))
+	// Get user info from OIDC provider with same timeout
+	userInfo, err := oa.provider.UserInfo(ctx, oauth2.StaticTokenSource(token))
 	if err != nil {
 		http.Error(rw, "Failed to get userinfo: "+err.Error(), http.StatusInternalServerError)
 		return
@@ -180,8 +190,8 @@ func (oa *OIDC) OAuth2Callback(rw http.ResponseWriter, r *http.Request) {

 	oa.authentication.SaveSession(rw, r, user)
 	cclog.Infof("login successfull: user: %#v (roles: %v, projects: %v)", user.Username, user.Roles, user.Projects)
-	ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
-	http.RedirectHandler("/", http.StatusTemporaryRedirect).ServeHTTP(rw, r.WithContext(ctx))
+	userCtx := context.WithValue(r.Context(), repository.ContextUserKey, user)
+	http.RedirectHandler("/", http.StatusTemporaryRedirect).ServeHTTP(rw, r.WithContext(userCtx))
 }

 func (oa *OIDC) OAuth2Login(rw http.ResponseWriter, r *http.Request) {
--- a/internal/graph/generated/generated.go
+++ b/internal/graph/generated/generated.go
--- a/internal/graph/schema.resolvers.go
+++ b/internal/graph/schema.resolvers.go
@@ -2,7 +2,7 @@ package graph

 // This file will be automatically regenerated based on the schema, any resolver implementations
 // will be copied through when generating and any unknown code will be moved to the end.
-// Code generated by github.com/99designs/gqlgen version v0.17.78
+// Code generated by github.com/99designs/gqlgen version v0.17.81

 import (
 	"context"
--- a/internal/importer/importer_test.go
+++ b/internal/importer/importer_test.go
@@ -81,14 +81,14 @@ func setup(t *testing.T) *repository.JobRepository {
 	tmpdir := t.TempDir()

 	jobarchive := filepath.Join(tmpdir, "job-archive")
-	if err := os.Mkdir(jobarchive, 0777); err != nil {
+	if err := os.Mkdir(jobarchive, 0o777); err != nil {
 		t.Fatal(err)
 	}
-	if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 2)), 0666); err != nil {
+	if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), fmt.Appendf(nil, "%d", 3), 0o666); err != nil {
 		t.Fatal(err)
 	}
 	fritzArchive := filepath.Join(tmpdir, "job-archive", "fritz")
-	if err := os.Mkdir(fritzArchive, 0777); err != nil {
+	if err := os.Mkdir(fritzArchive, 0o777); err != nil {
 		t.Fatal(err)
 	}
 	if err := copyFile(filepath.Join("testdata", "cluster-fritz.json"),
@@ -103,7 +103,7 @@ func setup(t *testing.T) *repository.JobRepository {
 	}

 	cfgFilePath := filepath.Join(tmpdir, "config.json")
-	if err := os.WriteFile(cfgFilePath, []byte(testconfig), 0666); err != nil {
+	if err := os.WriteFile(cfgFilePath, []byte(testconfig), 0o666); err != nil {
 		t.Fatal(err)
 	}

--- a/internal/importer/initDB.go
+++ b/internal/importer/initDB.go
@@ -55,7 +55,18 @@ func InitDB() error {

 		// Bundle 100 inserts into one transaction for better performance
 		if i%100 == 0 {
-			r.TransactionCommit(t)
+			if i > 0 {
+				if err := t.Commit(); err != nil {
+					cclog.Errorf("transaction commit error: %v", err)
+					return err
+				}
+				// Start a new transaction for the next batch
+				t, err = r.TransactionInit()
+				if err != nil {
+					cclog.Errorf("transaction init error: %v", err)
+					return err
+				}
+			}
 			fmt.Printf("%d jobs inserted...\r", i)
 		}

--- a/internal/memorystore/archive.go
+++ b/internal/memorystore/archive.go
@@ -75,10 +75,10 @@ func ArchiveCheckpoints(checkpointsDir, archiveDir string, from int64, deleteIns

 	var wg sync.WaitGroup
 	n, errs := int32(0), int32(0)
-	work := make(chan workItem, NumWorkers)
+	work := make(chan workItem, Keys.NumWorkers)

-	wg.Add(NumWorkers)
-	for worker := 0; worker < NumWorkers; worker++ {
+	wg.Add(Keys.NumWorkers)
+	for worker := 0; worker < Keys.NumWorkers; worker++ {
 		go func() {
 			defer wg.Done()
 			for workItem := range work {
@@ -116,7 +116,7 @@ func ArchiveCheckpoints(checkpointsDir, archiveDir string, from int64, deleteIns
 	}

 	if errs > 0 {
-		return int(n), fmt.Errorf("%d errors happend while archiving (%d successes)", errs, n)
+		return int(n), fmt.Errorf("%d errors happened while archiving (%d successes)", errs, n)
 	}
 	return int(n), nil
 }
@@ -147,11 +147,11 @@ func archiveCheckpoints(dir string, archiveDir string, from int64, deleteInstead
 	}

 	filename := filepath.Join(archiveDir, fmt.Sprintf("%d.zip", from))
-	f, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0o644)
+	f, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, CheckpointFilePerms)
 	if err != nil && os.IsNotExist(err) {
-		err = os.MkdirAll(archiveDir, 0o755)
+		err = os.MkdirAll(archiveDir, CheckpointDirPerms)
 		if err == nil {
-			f, err = os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0o644)
+			f, err = os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, CheckpointFilePerms)
 		}
 	}
 	if err != nil {
--- a/internal/memorystore/buffer.go
+++ b/internal/memorystore/buffer.go
@@ -105,46 +105,6 @@ func (b *buffer) firstWrite() int64 {

 func (b *buffer) close() {}

-/*
-func (b *buffer) close() {
-	if b.closed {
-		return
-	}
-
-	b.closed = true
-	n, sum, min, max := 0, 0., math.MaxFloat64, -math.MaxFloat64
-	for _, x := range b.data {
-		if x.IsNaN() {
-			continue
-		}
-
-		n += 1
-		f := float64(x)
-		sum += f
-		min = math.Min(min, f)
-		max = math.Max(max, f)
-	}
-
-	b.statisticts.samples = n
-	if n > 0 {
-		b.statisticts.avg = Float(sum / float64(n))
-		b.statisticts.min = Float(min)
-		b.statisticts.max = Float(max)
-	} else {
-		b.statisticts.avg = NaN
-		b.statisticts.min = NaN
-		b.statisticts.max = NaN
-	}
-}
-*/
-
-// func interpolate(idx int, data []Float) Float {
-// 	if idx == 0 || idx+1 == len(data) {
-// 		return NaN
-// 	}
-// 	return (data[idx-1] + data[idx+1]) / 2.0
-// }
-
 // Return all known values from `from` to `to`. Gaps of information are represented as NaN.
 // Simple linear interpolation is done between the two neighboring cells if possible.
 // If values at the start or end are missing, instead of NaN values, the second and thrid
--- a/internal/memorystore/checkpoint.go
+++ b/internal/memorystore/checkpoint.go
@@ -28,6 +28,17 @@ import (
 	"github.com/linkedin/goavro/v2"
 )

+// File operation constants
+const (
+	// CheckpointFilePerms defines default permissions for checkpoint files
+	CheckpointFilePerms = 0o644
+	// CheckpointDirPerms defines default permissions for checkpoint directories
+	CheckpointDirPerms = 0o755
+	// GCTriggerInterval determines how often GC is forced during checkpoint loading
+	// GC is triggered every GCTriggerInterval*NumWorkers loaded hosts
+	GCTriggerInterval = 100
+)
+
 // Whenever changed, update MarshalJSON as well!
 type CheckpointMetrics struct {
 	Data      []schema.Float `json:"data"`
@@ -171,9 +182,9 @@ func (m *MemoryStore) ToCheckpoint(dir string, from, to int64) (int, error) {
 	n, errs := int32(0), int32(0)

 	var wg sync.WaitGroup
-	wg.Add(NumWorkers)
-	work := make(chan workItem, NumWorkers*2)
-	for worker := 0; worker < NumWorkers; worker++ {
+	wg.Add(Keys.NumWorkers)
+	work := make(chan workItem, Keys.NumWorkers*2)
+	for worker := 0; worker < Keys.NumWorkers; worker++ {
 		go func() {
 			defer wg.Done()

@@ -205,7 +216,7 @@ func (m *MemoryStore) ToCheckpoint(dir string, from, to int64) (int, error) {
 	wg.Wait()

 	if errs > 0 {
-		return int(n), fmt.Errorf("[METRICSTORE]> %d errors happend while creating checkpoints (%d successes)", errs, n)
+		return int(n), fmt.Errorf("[METRICSTORE]> %d errors happened while creating checkpoints (%d successes)", errs, n)
 	}
 	return int(n), nil
 }
@@ -285,11 +296,11 @@ func (l *Level) toCheckpoint(dir string, from, to int64, m *MemoryStore) error {
 	}

 	filepath := path.Join(dir, fmt.Sprintf("%d.json", from))
-	f, err := os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, 0o644)
+	f, err := os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, CheckpointFilePerms)
 	if err != nil && os.IsNotExist(err) {
-		err = os.MkdirAll(dir, 0o755)
+		err = os.MkdirAll(dir, CheckpointDirPerms)
 		if err == nil {
-			f, err = os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, 0o644)
+			f, err = os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, CheckpointFilePerms)
 		}
 	}
 	if err != nil {
@@ -307,11 +318,11 @@ func (l *Level) toCheckpoint(dir string, from, to int64, m *MemoryStore) error {

 func (m *MemoryStore) FromCheckpoint(dir string, from int64, extension string) (int, error) {
 	var wg sync.WaitGroup
-	work := make(chan [2]string, NumWorkers)
+	work := make(chan [2]string, Keys.NumWorkers)
 	n, errs := int32(0), int32(0)

-	wg.Add(NumWorkers)
-	for worker := 0; worker < NumWorkers; worker++ {
+	wg.Add(Keys.NumWorkers)
+	for worker := 0; worker < Keys.NumWorkers; worker++ {
 		go func() {
 			defer wg.Done()
 			for host := range work {
@@ -347,7 +358,7 @@ func (m *MemoryStore) FromCheckpoint(dir string, from int64, extension string) (
 			}

 			i++
-			if i%NumWorkers == 0 && i > 100 {
+			if i%Keys.NumWorkers == 0 && i > GCTriggerInterval {
 				// Forcing garbage collection runs here regulary during the loading of checkpoints
 				// will decrease the total heap size after loading everything back to memory is done.
 				// While loading data, the heap will grow fast, so the GC target size will double
@@ -368,7 +379,7 @@ done:
 	}

 	if errs > 0 {
-		return int(n), fmt.Errorf("[METRICSTORE]> %d errors happend while creating checkpoints (%d successes)", errs, n)
+		return int(n), fmt.Errorf("[METRICSTORE]> %d errors happened while creating checkpoints (%d successes)", errs, n)
 	}
 	return int(n), nil
 }
@@ -379,7 +390,7 @@ done:
 func (m *MemoryStore) FromCheckpointFiles(dir string, from int64) (int, error) {
 	if _, err := os.Stat(dir); os.IsNotExist(err) {
 		// The directory does not exist, so create it using os.MkdirAll()
-		err := os.MkdirAll(dir, 0o755) // 0755 sets the permissions for the directory
+		err := os.MkdirAll(dir, CheckpointDirPerms) // CheckpointDirPerms sets the permissions for the directory
 		if err != nil {
 			cclog.Fatalf("[METRICSTORE]> Error creating directory: %#v\n", err)
 		}
@@ -464,7 +475,7 @@ func (l *Level) loadAvroFile(m *MemoryStore, f *os.File, from int64) error {
 	// Create a new OCF reader from the buffered reader
 	ocfReader, err := goavro.NewOCFReader(br)
 	if err != nil {
-		panic(err)
+		return fmt.Errorf("[METRICSTORE]> error creating OCF reader: %w", err)
 	}

 	metricsData := make(map[string]schema.FloatArray)
@@ -477,7 +488,7 @@ func (l *Level) loadAvroFile(m *MemoryStore, f *os.File, from int64) error {

 		record, ok := datum.(map[string]any)
 		if !ok {
-			panic("[METRICSTORE]> failed to assert datum as map[string]interface{}")
+			return fmt.Errorf("[METRICSTORE]> failed to assert datum as map[string]interface{}")
 		}

 		for key, value := range record {
@@ -559,7 +570,7 @@ func (l *Level) createBuffer(m *MemoryStore, metricName string, floatArray schem
 		l.metrics[minfo.offset] = b
 	} else {
 		if prev.start > b.start {
-			return errors.New("wooops")
+			return fmt.Errorf("[METRICSTORE]> buffer start time %d is before previous buffer start %d", b.start, prev.start)
 		}

 		b.prev = prev
@@ -623,7 +634,7 @@ func (l *Level) loadFile(cf *CheckpointFile, m *MemoryStore) error {
 			l.metrics[minfo.offset] = b
 		} else {
 			if prev.start > b.start {
-				return errors.New("wooops")
+				return fmt.Errorf("[METRICSTORE]> buffer start time %d is before previous buffer start %d", b.start, prev.start)
 			}

 			b.prev = prev
@@ -700,13 +711,17 @@ func (l *Level) fromCheckpoint(m *MemoryStore, dir string, from int64, extension
 	loader := loaders[extension]

 	for _, filename := range files {
-		f, err := os.Open(path.Join(dir, filename))
-		if err != nil {
-			return filesLoaded, err
-		}
-		defer f.Close()
+		// Use a closure to ensure file is closed immediately after use
+		err := func() error {
+			f, err := os.Open(path.Join(dir, filename))
+			if err != nil {
+				return err
+			}
+			defer f.Close()

-		if err = loader(m, f, from); err != nil {
+			return loader(m, f, from)
+		}()
+		if err != nil {
 			return filesLoaded, err
 		}

--- a/internal/memorystore/config.go
+++ b/internal/memorystore/config.go
@@ -12,6 +12,9 @@ import (
 var InternalCCMSFlag bool = false

 type MetricStoreConfig struct {
+	// Number of concurrent workers for checkpoint and archive operations.
+	// If not set or 0, defaults to min(runtime.NumCPU()/2+1, 10)
+	NumWorkers int `json:"num-workers"`
 	Checkpoints struct {
 		FileFormat string `json:"file-format"`
 		Interval   string `json:"interval"`
@@ -62,7 +65,7 @@ const (
 	AvgAggregation
 )

-func AssignAggregationStratergy(str string) (AggregationStrategy, error) {
+func AssignAggregationStrategy(str string) (AggregationStrategy, error) {
 	switch str {
 	case "":
 		return NoAggregation, nil
--- a/internal/memorystore/level.go
+++ b/internal/memorystore/level.go
@@ -39,7 +39,7 @@ func (l *Level) findLevelOrCreate(selector []string, nMetrics int) *Level {
 		// Children map needs to be created...
 		l.lock.RUnlock()
 	} else {
-		child, ok := l.children[selector[0]]
+		child, ok = l.children[selector[0]]
 		l.lock.RUnlock()
 		if ok {
 			return child.findLevelOrCreate(selector[1:], nMetrics)
--- a/internal/memorystore/memorystore.go
+++ b/internal/memorystore/memorystore.go
@@ -3,6 +3,20 @@
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file.

+// Package memorystore provides an efficient in-memory time-series metric storage system
+// with support for hierarchical data organization, checkpointing, and archiving.
+//
+// The package organizes metrics in a tree structure (cluster → host → component) and
+// provides concurrent read/write access to metric data with configurable aggregation strategies.
+// Background goroutines handle periodic checkpointing (JSON or Avro format), archiving old data,
+// and enforcing retention policies.
+//
+// Key features:
+//   - In-memory metric storage with configurable retention
+//   - Hierarchical data organization (selectors)
+//   - Concurrent checkpoint/archive workers
+//   - Support for sum and average aggregation
+//   - NATS integration for metric ingestion
 package memorystore

 import (
@@ -10,18 +24,14 @@ import (
 	"context"
 	"encoding/json"
 	"errors"
-	"os"
-	"os/signal"
 	"runtime"
 	"sync"
-	"syscall"
 	"time"

 	"github.com/ClusterCockpit/cc-backend/internal/config"
 	"github.com/ClusterCockpit/cc-backend/pkg/archive"
 	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
 	"github.com/ClusterCockpit/cc-lib/resampler"
-	"github.com/ClusterCockpit/cc-lib/runtimeEnv"
 	"github.com/ClusterCockpit/cc-lib/schema"
 	"github.com/ClusterCockpit/cc-lib/util"
 )
@@ -29,14 +39,12 @@ import (
 var (
 	singleton  sync.Once
 	msInstance *MemoryStore
+	// shutdownFunc stores the context cancellation function created in Init
+	// and is called during Shutdown to cancel all background goroutines
+	shutdownFunc context.CancelFunc
 )

-var NumWorkers int = 4

-func init() {
-	maxWorkers := 10
-	NumWorkers = min(runtime.NumCPU()/2+1, maxWorkers)
-}

 type Metric struct {
 	Name         string
@@ -61,30 +69,34 @@ func Init(rawConfig json.RawMessage, wg *sync.WaitGroup) {
 		}
 	}

+	// Set NumWorkers from config or use default
+	if Keys.NumWorkers <= 0 {
+		maxWorkers := 10
+		Keys.NumWorkers = min(runtime.NumCPU()/2+1, maxWorkers)
+	}
+	cclog.Debugf("[METRICSTORE]> Using %d workers for checkpoint/archive operations\n", Keys.NumWorkers)
+
+	// Helper function to add metric configuration
+	addMetricConfig := func(mc schema.MetricConfig) {
+		agg, err := AssignAggregationStrategy(mc.Aggregation)
+		if err != nil {
+			cclog.Warnf("Could not find aggregation strategy for metric config '%s': %s", mc.Name, err.Error())
+		}
+
+		AddMetric(mc.Name, MetricConfig{
+			Frequency:   int64(mc.Timestep),
+			Aggregation: agg,
+		})
+	}
+
 	for _, c := range archive.Clusters {
 		for _, mc := range c.MetricConfig {
-			agg, err := AssignAggregationStratergy(mc.Aggregation)
-			if err != nil {
-				cclog.Warnf("Could not find aggregation stratergy for metric config '%s': %s", mc.Name, err.Error())
-			}
-
-			AddMetric(mc.Name, MetricConfig{
-				Frequency:   int64(mc.Timestep),
-				Aggregation: agg,
-			})
+			addMetricConfig(*mc)
 		}

 		for _, sc := range c.SubClusters {
 			for _, mc := range sc.MetricConfig {
-				agg, err := AssignAggregationStratergy(mc.Aggregation)
-				if err != nil {
-					cclog.Warnf("Could not find aggregation stratergy for metric config '%s': %s", mc.Name, err.Error())
-				}
-
-				AddMetric(mc.Name, MetricConfig{
-					Frequency:   int64(mc.Timestep),
-					Aggregation: agg,
-				})
+				addMetricConfig(mc)
 			}
 		}
 	}
@@ -126,15 +138,11 @@ func Init(rawConfig json.RawMessage, wg *sync.WaitGroup) {
 	Archiving(wg, ctx)
 	DataStaging(wg, ctx)

-	wg.Add(1)
-	sigs := make(chan os.Signal, 1)
-	signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
-	go func() {
-		defer wg.Done()
-		<-sigs
-		runtimeEnv.SystemdNotifiy(false, "[METRICSTORE]> Shutting down ...")
-		shutdown()
-	}()
+	// Note: Signal handling has been removed from this function.
+	// The caller is responsible for handling shutdown signals and calling
+	// the shutdown() function when appropriate.
+	// Store the shutdown function for later use by Shutdown()
+	shutdownFunc = shutdown

 	if Keys.Nats != nil {
 		for _, natsConf := range Keys.Nats {
@@ -190,6 +198,11 @@ func GetMemoryStore() *MemoryStore {
 }

 func Shutdown() {
+	// Cancel the context to signal all background goroutines to stop
+	if shutdownFunc != nil {
+		shutdownFunc()
+	}
+
 	cclog.Infof("[METRICSTORE]> Writing to '%s'...\n", Keys.Checkpoints.RootDir)
 	var files int
 	var err error
@@ -207,70 +220,8 @@ func Shutdown() {
 		cclog.Errorf("[METRICSTORE]> Writing checkpoint failed: %s\n", err.Error())
 	}
 	cclog.Infof("[METRICSTORE]> Done! (%d files written)\n", files)
-
-	// ms.PrintHeirarchy()
 }

-// func (m *MemoryStore) PrintHeirarchy() {
-// 	m.root.lock.Lock()
-// 	defer m.root.lock.Unlock()
-
-// 	fmt.Printf("Root : \n")
-
-// 	for lvl1, sel1 := range m.root.children {
-// 		fmt.Printf("\t%s\n", lvl1)
-// 		for lvl2, sel2 := range sel1.children {
-// 			fmt.Printf("\t\t%s\n", lvl2)
-// 			if lvl1 == "fritz" && lvl2 == "f0201" {
-
-// 				for name, met := range m.Metrics {
-// 					mt := sel2.metrics[met.Offset]
-
-// 					fmt.Printf("\t\t\t\t%s\n", name)
-// 					fmt.Printf("\t\t\t\t")
-
-// 					for mt != nil {
-// 						// if name == "cpu_load" {
-// 						fmt.Printf("%d(%d) -> %#v", mt.start, len(mt.data), mt.data)
-// 						// }
-// 						mt = mt.prev
-// 					}
-// 					fmt.Printf("\n")
-
-// 				}
-// 			}
-// 			for lvl3, sel3 := range sel2.children {
-// 				if lvl1 == "fritz" && lvl2 == "f0201" && lvl3 == "hwthread70" {
-
-// 					fmt.Printf("\t\t\t\t\t%s\n", lvl3)
-
-// 					for name, met := range m.Metrics {
-// 						mt := sel3.metrics[met.Offset]
-
-// 						fmt.Printf("\t\t\t\t\t\t%s\n", name)
-
-// 						fmt.Printf("\t\t\t\t\t\t")
-
-// 						for mt != nil {
-// 							// if name == "clock" {
-// 							fmt.Printf("%d(%d) -> %#v", mt.start, len(mt.data), mt.data)
-
-// 							mt = mt.prev
-// 						}
-// 						fmt.Printf("\n")
-
-// 					}
-
-// 					// for i, _ := range sel3.metrics {
-// 					// 	fmt.Printf("\t\t\t\t\t%s\n", getName(configmetrics, i))
-// 					// }
-// 				}
-// 			}
-// 		}
-// 	}
-
-// }
-
 func getName(m *MemoryStore, i int) string {
 	for key, val := range m.Metrics {
 		if val.offset == i {
--- a/internal/memorystore/memorystore_test.go
+++ b/internal/memorystore/memorystore_test.go
@@ -0,0 +1,156 @@
+// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+// All rights reserved. This file is part of cc-backend.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package memorystore
+
+import (
+	"testing"
+
+	"github.com/ClusterCockpit/cc-lib/schema"
+)
+
+func TestAssignAggregationStrategy(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected AggregationStrategy
+		wantErr  bool
+	}{
+		{"empty string", "", NoAggregation, false},
+		{"sum", "sum", SumAggregation, false},
+		{"avg", "avg", AvgAggregation, false},
+		{"invalid", "invalid", NoAggregation, true},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := AssignAggregationStrategy(tt.input)
+			if (err != nil) != tt.wantErr {
+				t.Errorf("AssignAggregationStrategy(%q) error = %v, wantErr %v", tt.input, err, tt.wantErr)
+				return
+			}
+			if result != tt.expected {
+				t.Errorf("AssignAggregationStrategy(%q) = %v, want %v", tt.input, result, tt.expected)
+			}
+		})
+	}
+}
+
+func TestAddMetric(t *testing.T) {
+	// Reset Metrics before test
+	Metrics = make(map[string]MetricConfig)
+
+	err := AddMetric("test_metric", MetricConfig{
+		Frequency:   60,
+		Aggregation: SumAggregation,
+	})
+	if err != nil {
+		t.Errorf("AddMetric() error = %v", err)
+	}
+
+	if _, ok := Metrics["test_metric"]; !ok {
+		t.Error("AddMetric() did not add metric to Metrics map")
+	}
+
+	// Test updating with higher frequency
+	err = AddMetric("test_metric", MetricConfig{
+		Frequency:   120,
+		Aggregation: SumAggregation,
+	})
+	if err != nil {
+		t.Errorf("AddMetric() error = %v", err)
+	}
+
+	if Metrics["test_metric"].Frequency != 120 {
+		t.Errorf("AddMetric() frequency = %d, want 120", Metrics["test_metric"].Frequency)
+	}
+
+	// Test updating with lower frequency (should not update)
+	err = AddMetric("test_metric", MetricConfig{
+		Frequency:   30,
+		Aggregation: SumAggregation,
+	})
+	if err != nil {
+		t.Errorf("AddMetric() error = %v", err)
+	}
+
+	if Metrics["test_metric"].Frequency != 120 {
+		t.Errorf("AddMetric() frequency = %d, want 120 (should not downgrade)", Metrics["test_metric"].Frequency)
+	}
+}
+
+func TestGetMetricFrequency(t *testing.T) {
+	// Reset Metrics before test
+	Metrics = map[string]MetricConfig{
+		"test_metric": {
+			Frequency:   60,
+			Aggregation: SumAggregation,
+		},
+	}
+
+	freq, err := GetMetricFrequency("test_metric")
+	if err != nil {
+		t.Errorf("GetMetricFrequency() error = %v", err)
+	}
+	if freq != 60 {
+		t.Errorf("GetMetricFrequency() = %d, want 60", freq)
+	}
+
+	_, err = GetMetricFrequency("nonexistent")
+	if err == nil {
+		t.Error("GetMetricFrequency() expected error for nonexistent metric")
+	}
+}
+
+func TestBufferWrite(t *testing.T) {
+	b := newBuffer(100, 10)
+
+	// Test writing value
+	nb, err := b.write(100, schema.Float(42.0))
+	if err != nil {
+		t.Errorf("buffer.write() error = %v", err)
+	}
+	if nb != b {
+		t.Error("buffer.write() created new buffer unexpectedly")
+	}
+	if len(b.data) != 1 {
+		t.Errorf("buffer.write() len(data) = %d, want 1", len(b.data))
+	}
+	if b.data[0] != schema.Float(42.0) {
+		t.Errorf("buffer.write() data[0] = %v, want 42.0", b.data[0])
+	}
+
+	// Test writing value from past (should error)
+	_, err = b.write(50, schema.Float(10.0))
+	if err == nil {
+		t.Error("buffer.write() expected error for past timestamp")
+	}
+}
+
+func TestBufferRead(t *testing.T) {
+	b := newBuffer(100, 10)
+	
+	// Write some test data
+	b.write(100, schema.Float(1.0))
+	b.write(110, schema.Float(2.0))
+	b.write(120, schema.Float(3.0))
+
+	// Read data
+	data := make([]schema.Float, 3)
+	result, from, to, err := b.read(100, 130, data)
+	if err != nil {
+		t.Errorf("buffer.read() error = %v", err)
+	}
+	// Buffer read should return from as firstWrite (start + freq/2)
+	if from != 100 {
+		t.Errorf("buffer.read() from = %d, want 100", from)
+	}
+	if to != 130 {
+		t.Errorf("buffer.read() to = %d, want 130", to)
+	}
+	if len(result) != 3 {
+		t.Errorf("buffer.read() len(result) = %d, want 3", len(result))
+	}
+}
--- a/internal/repository/config.go
+++ b/internal/repository/config.go
@@ -0,0 +1,68 @@
+// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+// All rights reserved. This file is part of cc-backend.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package repository
+
+import "time"
+
+// RepositoryConfig holds configuration for repository operations.
+// All fields have sensible defaults, so this configuration is optional.
+type RepositoryConfig struct {
+	// CacheSize is the LRU cache size in bytes for job metadata and energy footprints.
+	// Default: 1MB (1024 * 1024 bytes)
+	CacheSize int
+
+	// MaxOpenConnections is the maximum number of open database connections.
+	// Default: 4
+	MaxOpenConnections int
+
+	// MaxIdleConnections is the maximum number of idle database connections.
+	// Default: 4
+	MaxIdleConnections int
+
+	// ConnectionMaxLifetime is the maximum amount of time a connection may be reused.
+	// Default: 1 hour
+	ConnectionMaxLifetime time.Duration
+
+	// ConnectionMaxIdleTime is the maximum amount of time a connection may be idle.
+	// Default: 1 hour
+	ConnectionMaxIdleTime time.Duration
+
+	// MinRunningJobDuration is the minimum duration in seconds for a job to be
+	// considered in "running jobs" queries. This filters out very short jobs.
+	// Default: 600 seconds (10 minutes)
+	MinRunningJobDuration int
+}
+
+// DefaultConfig returns the default repository configuration.
+// These values are optimized for typical deployments.
+func DefaultConfig() *RepositoryConfig {
+	return &RepositoryConfig{
+		CacheSize:             1 * 1024 * 1024, // 1MB
+		MaxOpenConnections:    4,
+		MaxIdleConnections:    4,
+		ConnectionMaxLifetime: time.Hour,
+		ConnectionMaxIdleTime: time.Hour,
+		MinRunningJobDuration: 600, // 10 minutes
+	}
+}
+
+// repoConfig is the package-level configuration instance.
+// It is initialized with defaults and can be overridden via SetConfig.
+var repoConfig *RepositoryConfig = DefaultConfig()
+
+// SetConfig sets the repository configuration.
+// This must be called before any repository initialization (Connect, GetJobRepository, etc.).
+// If not called, default values from DefaultConfig() are used.
+func SetConfig(cfg *RepositoryConfig) {
+	if cfg != nil {
+		repoConfig = cfg
+	}
+}
+
+// GetConfig returns the current repository configuration.
+func GetConfig() *RepositoryConfig {
+	return repoConfig
+}
--- a/internal/repository/dbConnection.go
+++ b/internal/repository/dbConnection.go
@@ -2,6 +2,7 @@
 // All rights reserved. This file is part of cc-backend.
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file.
+
 package repository

 import (
@@ -35,21 +36,15 @@ type DatabaseOptions struct {
 	ConnectionMaxIdleTime time.Duration
 }

-func setupSqlite(db *sql.DB) (err error) {
+func setupSqlite(db *sql.DB) error {
 	pragmas := []string{
-		// "journal_mode = WAL",
-		// "busy_timeout = 5000",
-		// "synchronous = NORMAL",
-		// "cache_size = 1000000000", // 1GB
-		// "foreign_keys = true",
 		"temp_store = memory",
-		// "mmap_size = 3000000000",
 	}

 	for _, pragma := range pragmas {
-		_, err = db.Exec("PRAGMA " + pragma)
+		_, err := db.Exec("PRAGMA " + pragma)
 		if err != nil {
-			return
+			return err
 		}
 	}

@@ -63,24 +58,24 @@ func Connect(driver string, db string) {
 	dbConnOnce.Do(func() {
 		opts := DatabaseOptions{
 			URL:                   db,
-			MaxOpenConnections:    4,
-			MaxIdleConnections:    4,
-			ConnectionMaxLifetime: time.Hour,
-			ConnectionMaxIdleTime: time.Hour,
+			MaxOpenConnections:    repoConfig.MaxOpenConnections,
+			MaxIdleConnections:    repoConfig.MaxIdleConnections,
+			ConnectionMaxLifetime: repoConfig.ConnectionMaxLifetime,
+			ConnectionMaxIdleTime: repoConfig.ConnectionMaxIdleTime,
 		}

 		switch driver {
 		case "sqlite3":
 			// TODO: Have separate DB handles for Writes and Reads
 			// Optimize SQLite connection: https://kerkour.com/sqlite-for-servers
-			connectionUrlParams := make(url.Values)
-			connectionUrlParams.Add("_txlock", "immediate")
-			connectionUrlParams.Add("_journal_mode", "WAL")
-			connectionUrlParams.Add("_busy_timeout", "5000")
-			connectionUrlParams.Add("_synchronous", "NORMAL")
-			connectionUrlParams.Add("_cache_size", "1000000000")
-			connectionUrlParams.Add("_foreign_keys", "true")
-			opts.URL = fmt.Sprintf("file:%s?%s", opts.URL, connectionUrlParams.Encode())
+			connectionURLParams := make(url.Values)
+			connectionURLParams.Add("_txlock", "immediate")
+			connectionURLParams.Add("_journal_mode", "WAL")
+			connectionURLParams.Add("_busy_timeout", "5000")
+			connectionURLParams.Add("_synchronous", "NORMAL")
+			connectionURLParams.Add("_cache_size", "1000000000")
+			connectionURLParams.Add("_foreign_keys", "true")
+			opts.URL = fmt.Sprintf("file:%s?%s", opts.URL, connectionURLParams.Encode())

 			if cclog.Loglevel() == "debug" {
 				sql.Register("sqlite3WithHooks", sqlhooks.Wrap(&sqlite3.SQLiteDriver{}, &Hooks{}))
@@ -89,7 +84,10 @@ func Connect(driver string, db string) {
 				dbHandle, err = sqlx.Open("sqlite3", opts.URL)
 			}

-			setupSqlite(dbHandle.DB)
+			err = setupSqlite(dbHandle.DB)
+			if err != nil {
+				cclog.Abortf("Failed sqlite db setup.\nError: %s\n", err.Error())
+			}
 		case "mysql":
 			opts.URL += "?multiStatements=true"
 			dbHandle, err = sqlx.Open("mysql", opts.URL)
--- a/internal/repository/job.go
+++ b/internal/repository/job.go
@@ -2,6 +2,63 @@
 // All rights reserved. This file is part of cc-backend.
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file.
+
+// Package repository provides the data access layer for cc-backend using the repository pattern.
+//
+// The repository pattern abstracts database operations and provides a clean interface for
+// data access. Each major entity (Job, User, Node, Tag) has its own repository with CRUD
+// operations and specialized queries.
+//
+// # Database Connection
+//
+// Initialize the database connection before using any repository:
+//
+//	repository.Connect("sqlite3", "./var/job.db")
+//	// or for MySQL:
+//	repository.Connect("mysql", "user:password@tcp(localhost:3306)/dbname")
+//
+// # Configuration
+//
+// Optional: Configure repository settings before initialization:
+//
+//	repository.SetConfig(&repository.RepositoryConfig{
+//	    CacheSize: 2 * 1024 * 1024,     // 2MB cache
+//	    MaxOpenConnections: 8,           // Connection pool size
+//	    MinRunningJobDuration: 300,      // Filter threshold
+//	})
+//
+// If not configured, sensible defaults are used automatically.
+//
+// # Repositories
+//
+//   - JobRepository: Job lifecycle management and querying
+//   - UserRepository: User management and authentication
+//   - NodeRepository: Cluster node state tracking
+//   - Tags: Job tagging and categorization
+//
+// # Caching
+//
+// Repositories use LRU caching to improve performance. Cache keys are constructed
+// as "type:id" (e.g., "metadata:123"). Cache is automatically invalidated on
+// mutations to maintain consistency.
+//
+// # Transaction Support
+//
+// For batch operations, use transactions:
+//
+//	t, err := jobRepo.TransactionInit()
+//	if err != nil {
+//	    return err
+//	}
+//	defer t.Rollback() // Rollback if not committed
+//
+//	// Perform operations...
+//	jobRepo.TransactionAdd(t, query, args...)
+//
+//	// Commit when done
+//	if err := t.Commit(); err != nil {
+//	    return err
+//	}
 package repository

 import (
@@ -45,7 +102,7 @@ func GetJobRepository() *JobRepository {
 			driver: db.Driver,

 			stmtCache: sq.NewStmtCache(db.DB),
-			cache:     lrucache.New(1024 * 1024),
+			cache:     lrucache.New(repoConfig.CacheSize),
 		}
 	})
 	return jobRepoInstance
@@ -267,7 +324,31 @@ func (r *JobRepository) FetchEnergyFootprint(job *schema.Job) (map[string]float6
 func (r *JobRepository) DeleteJobsBefore(startTime int64) (int, error) {
 	var cnt int
 	q := sq.Select("count(*)").From("job").Where("job.start_time < ?", startTime)
-	q.RunWith(r.DB).QueryRow().Scan(cnt)
+	if err := q.RunWith(r.DB).QueryRow().Scan(&cnt); err != nil {
+		cclog.Errorf("Error counting jobs before %d: %v", startTime, err)
+		return 0, err
+	}
+
+	// Invalidate cache for jobs being deleted (get job IDs first)
+	if cnt > 0 {
+		var jobIds []int64
+		rows, err := sq.Select("id").From("job").Where("job.start_time < ?", startTime).RunWith(r.DB).Query()
+		if err == nil {
+			defer rows.Close()
+			for rows.Next() {
+				var id int64
+				if err := rows.Scan(&id); err == nil {
+					jobIds = append(jobIds, id)
+				}
+			}
+			// Invalidate cache entries
+			for _, id := range jobIds {
+				r.cache.Del(fmt.Sprintf("metadata:%d", id))
+				r.cache.Del(fmt.Sprintf("energyFootprint:%d", id))
+			}
+		}
+	}
+
 	qd := sq.Delete("job").Where("job.start_time < ?", startTime)
 	_, err := qd.RunWith(r.DB).Exec()

@@ -281,6 +362,10 @@ func (r *JobRepository) DeleteJobsBefore(startTime int64) (int, error) {
 }

 func (r *JobRepository) DeleteJobById(id int64) error {
+	// Invalidate cache entries before deletion
+	r.cache.Del(fmt.Sprintf("metadata:%d", id))
+	r.cache.Del(fmt.Sprintf("energyFootprint:%d", id))
+
 	qd := sq.Delete("job").Where("job.id = ?", id)
 	_, err := qd.RunWith(r.DB).Exec()

@@ -450,13 +535,14 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
 // FIXME: Set duration to requested walltime?
 func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
 	start := time.Now()
+	currentTime := time.Now().Unix()
 	res, err := sq.Update("job").
 		Set("monitoring_status", schema.MonitoringStatusArchivingFailed).
 		Set("duration", 0).
 		Set("job_state", schema.JobStateFailed).
 		Where("job.job_state = 'running'").
 		Where("job.walltime > 0").
-		Where(fmt.Sprintf("(%d - job.start_time) > (job.walltime + %d)", time.Now().Unix(), seconds)).
+		Where("(? - job.start_time) > (job.walltime + ?)", currentTime, seconds).
 		RunWith(r.DB).Exec()
 	if err != nil {
 		cclog.Warn("Error while stopping jobs exceeding walltime")
@@ -505,21 +591,21 @@ func (r *JobRepository) FindJobIdsByTag(tagId int64) ([]int64, error) {
 // FIXME: Reconsider filtering short jobs with harcoded threshold
 func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) {
 	query := sq.Select(jobColumns...).From("job").
-		Where(fmt.Sprintf("job.cluster = '%s'", cluster)).
+		Where("job.cluster = ?", cluster).
 		Where("job.job_state = 'running'").
-		Where("job.duration > 600")
+		Where("job.duration > ?", repoConfig.MinRunningJobDuration)

 	rows, err := query.RunWith(r.stmtCache).Query()
 	if err != nil {
 		cclog.Error("Error while running query")
 		return nil, err
 	}
+	defer rows.Close()

 	jobs := make([]*schema.Job, 0, 50)
 	for rows.Next() {
 		job, err := scanJob(rows)
 		if err != nil {
-			rows.Close()
 			cclog.Warn("Error while scanning rows")
 			return nil, err
 		}
@@ -552,12 +638,10 @@ func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64

 	if startTimeBegin == 0 {
 		cclog.Infof("Find jobs before %d", startTimeEnd)
-		query = sq.Select(jobColumns...).From("job").Where(fmt.Sprintf(
-			"job.start_time < %d", startTimeEnd))
+		query = sq.Select(jobColumns...).From("job").Where("job.start_time < ?", startTimeEnd)
 	} else {
 		cclog.Infof("Find jobs between %d and %d", startTimeBegin, startTimeEnd)
-		query = sq.Select(jobColumns...).From("job").Where(fmt.Sprintf(
-			"job.start_time BETWEEN %d AND %d", startTimeBegin, startTimeEnd))
+		query = sq.Select(jobColumns...).From("job").Where("job.start_time BETWEEN ? AND ?", startTimeBegin, startTimeEnd)
 	}

 	rows, err := query.RunWith(r.stmtCache).Query()
@@ -565,12 +649,12 @@ func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64
 		cclog.Error("Error while running query")
 		return nil, err
 	}
+	defer rows.Close()

 	jobs := make([]*schema.Job, 0, 50)
 	for rows.Next() {
 		job, err := scanJob(rows)
 		if err != nil {
-			rows.Close()
 			cclog.Warn("Error while scanning rows")
 			return nil, err
 		}
@@ -582,6 +666,10 @@ func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64
 }

 func (r *JobRepository) UpdateMonitoringStatus(job int64, monitoringStatus int32) (err error) {
+	// Invalidate cache entries as monitoring status affects job state
+	r.cache.Del(fmt.Sprintf("metadata:%d", job))
+	r.cache.Del(fmt.Sprintf("energyFootprint:%d", job))
+
 	stmt := sq.Update("job").
 		Set("monitoring_status", monitoringStatus).
 		Where("job.id = ?", job)
--- a/internal/repository/jobCreate.go
+++ b/internal/repository/jobCreate.go
@@ -31,8 +31,9 @@ const NamedJobInsert string = `INSERT INTO job (

 func (r *JobRepository) InsertJob(job *schema.Job) (int64, error) {
 	r.Mutex.Lock()
+	defer r.Mutex.Unlock()
+
 	res, err := r.DB.NamedExec(NamedJobCacheInsert, job)
-	r.Mutex.Unlock()
 	if err != nil {
 		cclog.Warn("Error while NamedJobInsert")
 		return 0, err
@@ -57,12 +58,12 @@ func (r *JobRepository) SyncJobs() ([]*schema.Job, error) {
 		cclog.Errorf("Error while running query %v", err)
 		return nil, err
 	}
+	defer rows.Close()

 	jobs := make([]*schema.Job, 0, 50)
 	for rows.Next() {
 		job, err := scanJob(rows)
 		if err != nil {
-			rows.Close()
 			cclog.Warn("Error while scanning rows")
 			return nil, err
 		}
@@ -113,6 +114,10 @@ func (r *JobRepository) Stop(
 	state schema.JobState,
 	monitoringStatus int32,
 ) (err error) {
+	// Invalidate cache entries as job state is changing
+	r.cache.Del(fmt.Sprintf("metadata:%d", jobId))
+	r.cache.Del(fmt.Sprintf("energyFootprint:%d", jobId))
+
 	stmt := sq.Update("job").
 		Set("job_state", state).
 		Set("duration", duration).
@@ -129,11 +134,13 @@ func (r *JobRepository) StopCached(
 	state schema.JobState,
 	monitoringStatus int32,
 ) (err error) {
+	// Note: StopCached updates job_cache table, not the main job table
+	// Cache invalidation happens when job is synced to main table
 	stmt := sq.Update("job_cache").
 		Set("job_state", state).
 		Set("duration", duration).
 		Set("monitoring_status", monitoringStatus).
-		Where("job.id = ?", jobId)
+		Where("job_cache.id = ?", jobId)

 	_, err = stmt.RunWith(r.stmtCache).Exec()
 	return err
--- a/internal/repository/jobFind.go
+++ b/internal/repository/jobFind.go
@@ -89,6 +89,7 @@ func (r *JobRepository) FindAll(
 		cclog.Error("Error while running query")
 		return nil, err
 	}
+	defer rows.Close()

 	jobs := make([]*schema.Job, 0, 10)
 	for rows.Next() {
@@ -103,25 +104,31 @@ func (r *JobRepository) FindAll(
 	return jobs, nil
 }

-// Get complete joblist only consisting of db ids.
+// GetJobList returns job IDs for non-running jobs.
 // This is useful to process large job counts and intended to be used
-// together with FindById to process jobs one by one
-func (r *JobRepository) GetJobList() ([]int64, error) {
+// together with FindById to process jobs one by one.
+// Use limit and offset for pagination. Use limit=0 to get all results (not recommended for large datasets).
+func (r *JobRepository) GetJobList(limit int, offset int) ([]int64, error) {
 	query := sq.Select("id").From("job").
 		Where("job.job_state != 'running'")

+	// Add pagination if limit is specified
+	if limit > 0 {
+		query = query.Limit(uint64(limit)).Offset(uint64(offset))
+	}
+
 	rows, err := query.RunWith(r.stmtCache).Query()
 	if err != nil {
 		cclog.Error("Error while running query")
 		return nil, err
 	}
+	defer rows.Close()

 	jl := make([]int64, 0, 1000)
 	for rows.Next() {
 		var id int64
 		err := rows.Scan(&id)
 		if err != nil {
-			rows.Close()
 			cclog.Warn("Error while scanning rows")
 			return nil, err
 		}
@@ -256,6 +263,7 @@ func (r *JobRepository) FindConcurrentJobs(
 		cclog.Errorf("Error while running query: %v", err)
 		return nil, err
 	}
+	defer rows.Close()

 	items := make([]*model.JobLink, 0, 10)
 	queryString := fmt.Sprintf("cluster=%s", job.Cluster)
@@ -283,6 +291,7 @@ func (r *JobRepository) FindConcurrentJobs(
 		cclog.Errorf("Error while running query: %v", err)
 		return nil, err
 	}
+	defer rows.Close()

 	for rows.Next() {
 		var id, jobId, startTime sql.NullInt64
--- a/internal/repository/node.go
+++ b/internal/repository/node.go
@@ -43,7 +43,7 @@ func GetNodeRepository() *NodeRepository {
 			driver: db.Driver,

 			stmtCache: sq.NewStmtCache(db.DB),
-			cache:     lrucache.New(1024 * 1024),
+			cache:     lrucache.New(repoConfig.CacheSize),
 		}
 	})
 	return nodeRepoInstance
@@ -77,43 +77,6 @@ func (r *NodeRepository) FetchMetadata(hostname string, cluster string) (map[str
 	return MetaData, nil
 }

-//
-// func (r *NodeRepository) UpdateMetadata(node *schema.Node, key, val string) (err error) {
-// 	cachekey := fmt.Sprintf("metadata:%d", node.ID)
-// 	r.cache.Del(cachekey)
-// 	if node.MetaData == nil {
-// 		if _, err = r.FetchMetadata(node); err != nil {
-// 			cclog.Warnf("Error while fetching metadata for node, DB ID '%v'", node.ID)
-// 			return err
-// 		}
-// 	}
-//
-// 	if node.MetaData != nil {
-// 		cpy := make(map[string]string, len(node.MetaData)+1)
-// 		maps.Copy(cpy, node.MetaData)
-// 		cpy[key] = val
-// 		node.MetaData = cpy
-// 	} else {
-// 		node.MetaData = map[string]string{key: val}
-// 	}
-//
-// 	if node.RawMetaData, err = json.Marshal(node.MetaData); err != nil {
-// 		cclog.Warnf("Error while marshaling metadata for node, DB ID '%v'", node.ID)
-// 		return err
-// 	}
-//
-// 	if _, err = sq.Update("node").
-// 		Set("meta_data", node.RawMetaData).
-// 		Where("node.id = ?", node.ID).
-// 		RunWith(r.stmtCache).Exec(); err != nil {
-// 		cclog.Warnf("Error while updating metadata for node, DB ID '%v'", node.ID)
-// 		return err
-// 	}
-//
-// 	r.cache.Put(cachekey, node.MetaData, len(node.RawMetaData), 24*time.Hour)
-// 	return nil
-// }
-
 func (r *NodeRepository) GetNode(hostname string, cluster string, withMeta bool) (*schema.Node, error) {
 	node := &schema.Node{}
 	var timestamp int
--- a/internal/repository/node_test.go
+++ b/internal/repository/node_test.go
@@ -115,7 +115,7 @@ func nodeTestSetup(t *testing.T) {
 	}

 	if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"),
-		fmt.Appendf(nil, "%d", 2), 0o666); err != nil {
+		fmt.Appendf(nil, "%d", 3), 0o666); err != nil {
 		t.Fatal(err)
 	}

--- a/internal/repository/stats.go
+++ b/internal/repository/stats.go
@@ -114,16 +114,6 @@ func (r *JobRepository) buildStatsQuery(
 	return query
 }

-// func (r *JobRepository) getUserName(ctx context.Context, id string) string {
-// 	user := GetUserFromContext(ctx)
-// 	name, _ := r.FindColumnValue(user, id, "hpc_user", "name", "username", false)
-// 	if name != "" {
-// 		return name
-// 	} else {
-// 		return "-"
-// 	}
-// }
-
 func (r *JobRepository) getCastType() string {
 	var castType string

--- a/internal/repository/tags.go
+++ b/internal/repository/tags.go
@@ -5,6 +5,7 @@
 package repository

 import (
+	"errors"
 	"fmt"
 	"strings"

@@ -14,65 +15,32 @@ import (
 	sq "github.com/Masterminds/squirrel"
 )

-// Add the tag with id `tagId` to the job with the database id `jobId`.
+// AddTag adds the tag with id `tagId` to the job with the database id `jobId`.
+// Requires user authentication for security checks.
 func (r *JobRepository) AddTag(user *schema.User, job int64, tag int64) ([]*schema.Tag, error) {
 	j, err := r.FindByIdWithUser(user, job)
 	if err != nil {
-		cclog.Warn("Error while finding job by id")
+		cclog.Warnf("Error finding job %d for user %s: %v", job, user.Username, err)
 		return nil, err
 	}

-	q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(job, tag)
-
-	if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
-		s, _, _ := q.ToSql()
-		cclog.Errorf("Error adding tag with %s: %v", s, err)
-		return nil, err
-	}
-
-	tags, err := r.GetTags(user, &job)
-	if err != nil {
-		cclog.Warn("Error while getting tags for job")
-		return nil, err
-	}
-
-	archiveTags, err := r.getArchiveTags(&job)
-	if err != nil {
-		cclog.Warn("Error while getting tags for job")
-		return nil, err
-	}
-
-	return tags, archive.UpdateTags(j, archiveTags)
+	return r.addJobTag(job, tag, j, func() ([]*schema.Tag, error) {
+		return r.GetTags(user, &job)
+	})
 }

+// AddTagDirect adds a tag without user security checks.
+// Use only for internal/admin operations.
 func (r *JobRepository) AddTagDirect(job int64, tag int64) ([]*schema.Tag, error) {
 	j, err := r.FindByIdDirect(job)
 	if err != nil {
-		cclog.Warn("Error while finding job by id")
+		cclog.Warnf("Error finding job %d: %v", job, err)
 		return nil, err
 	}

-	q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(job, tag)
-
-	if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
-		s, _, _ := q.ToSql()
-		cclog.Errorf("Error adding tag with %s: %v", s, err)
-		return nil, err
-	}
-
-	tags, err := r.GetTagsDirect(&job)
-	if err != nil {
-		cclog.Warn("Error while getting tags for job")
-		return nil, err
-	}
-
-	archiveTags, err := r.getArchiveTags(&job)
-	if err != nil {
-		cclog.Warn("Error while getting tags for job")
-		return nil, err
-	}
-
-	return tags, archive.UpdateTags(j, archiveTags)
+	return r.addJobTag(job, tag, j, func() ([]*schema.Tag, error) {
+		return r.GetTagsDirect(&job)
+	})
 }

 // Removes a tag from a job by tag id.
@@ -260,15 +228,18 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts
 		LeftJoin("jobtag jt ON t.id = jt.tag_id").
 		GroupBy("t.tag_name")

-	// Handle Scope Filtering
-	scopeList := "\"global\""
+	// Build scope list for filtering
+	var scopeBuilder strings.Builder
+	scopeBuilder.WriteString(`"global"`)
 	if user != nil {
-		scopeList += ",\"" + user.Username + "\""
+		scopeBuilder.WriteString(`,"`)
+		scopeBuilder.WriteString(user.Username)
+		scopeBuilder.WriteString(`"`)
+		if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
+			scopeBuilder.WriteString(`,"admin"`)
+		}
 	}
-	if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
-		scopeList += ",\"admin\""
-	}
-	q = q.Where("t.tag_scope IN (" + scopeList + ")")
+	q = q.Where("t.tag_scope IN (" + scopeBuilder.String() + ")")

 	// Handle Job Ownership
 	if user != nil && user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) { // ADMIN || SUPPORT: Count all jobs
@@ -302,6 +273,41 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts
 	return tags, counts, err
 }

+var (
+	ErrTagNotFound        = errors.New("the tag does not exist")
+	ErrJobNotOwned        = errors.New("user is not owner of job")
+	ErrTagNoAccess        = errors.New("user not permitted to use that tag")
+	ErrTagPrivateScope    = errors.New("tag is private to another user")
+	ErrTagAdminScope      = errors.New("tag requires admin privileges")
+	ErrTagsIncompatScopes = errors.New("combining admin and non-admin scoped tags not allowed")
+)
+
+// addJobTag is a helper function that inserts a job-tag association and updates the archive.
+// Returns the updated tag list for the job.
+func (r *JobRepository) addJobTag(jobId int64, tagId int64, job *schema.Job, getTags func() ([]*schema.Tag, error)) ([]*schema.Tag, error) {
+	q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(jobId, tagId)
+
+	if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
+		s, _, _ := q.ToSql()
+		cclog.Errorf("Error adding tag with %s: %v", s, err)
+		return nil, err
+	}
+
+	tags, err := getTags()
+	if err != nil {
+		cclog.Warnf("Error getting tags for job %d: %v", jobId, err)
+		return nil, err
+	}
+
+	archiveTags, err := r.getArchiveTags(&jobId)
+	if err != nil {
+		cclog.Warnf("Error getting archive tags for job %d: %v", jobId, err)
+		return nil, err
+	}
+
+	return tags, archive.UpdateTags(job, archiveTags)
+}
+
 // AddTagOrCreate adds the tag with the specified type and name to the job with the database id `jobId`.
 // If such a tag does not yet exist, it is created.
 func (r *JobRepository) AddTagOrCreate(user *schema.User, jobId int64, tagType string, tagName string, tagScope string) (tagId int64, err error) {
--- a/internal/repository/transaction.go
+++ b/internal/repository/transaction.go
@@ -5,84 +5,96 @@
 package repository

 import (
-	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
+	"fmt"
+
 	"github.com/jmoiron/sqlx"
 )

+// Transaction wraps a database transaction for job-related operations.
 type Transaction struct {
-	tx   *sqlx.Tx
-	stmt *sqlx.NamedStmt
+	tx *sqlx.Tx
 }

+// TransactionInit begins a new transaction.
 func (r *JobRepository) TransactionInit() (*Transaction, error) {
-	var err error
-	t := new(Transaction)
-
-	t.tx, err = r.DB.Beginx()
+	tx, err := r.DB.Beginx()
 	if err != nil {
-		cclog.Warn("Error while bundling transactions")
-		return nil, err
+		return nil, fmt.Errorf("beginning transaction: %w", err)
 	}
-	return t, nil
+	return &Transaction{tx: tx}, nil
 }

-func (r *JobRepository) TransactionCommit(t *Transaction) error {
-	var err error
-	if t.tx != nil {
-		if err = t.tx.Commit(); err != nil {
-			cclog.Warn("Error while committing transactions")
-			return err
-		}
+// Commit commits the transaction.
+// After calling Commit, the transaction should not be used again.
+func (t *Transaction) Commit() error {
+	if t.tx == nil {
+		return fmt.Errorf("transaction already committed or rolled back")
 	}
-
-	t.tx, err = r.DB.Beginx()
+	err := t.tx.Commit()
+	t.tx = nil // Mark as completed
 	if err != nil {
-		cclog.Warn("Error while bundling transactions")
-		return err
+		return fmt.Errorf("committing transaction: %w", err)
 	}
-
 	return nil
 }

+// Rollback rolls back the transaction.
+// It's safe to call Rollback on an already committed or rolled back transaction.
+func (t *Transaction) Rollback() error {
+	if t.tx == nil {
+		return nil // Already committed/rolled back
+	}
+	err := t.tx.Rollback()
+	t.tx = nil // Mark as completed
+	if err != nil {
+		return fmt.Errorf("rolling back transaction: %w", err)
+	}
+	return nil
+}
+
+// TransactionEnd commits the transaction.
+// Deprecated: Use Commit() instead.
 func (r *JobRepository) TransactionEnd(t *Transaction) error {
-	if err := t.tx.Commit(); err != nil {
-		cclog.Warn("Error while committing SQL transactions")
-		return err
-	}
-	return nil
+	return t.Commit()
 }

+// TransactionAddNamed executes a named query within the transaction.
 func (r *JobRepository) TransactionAddNamed(
 	t *Transaction,
 	query string,
 	args ...interface{},
 ) (int64, error) {
+	if t.tx == nil {
+		return 0, fmt.Errorf("transaction is nil or already completed")
+	}
+
 	res, err := t.tx.NamedExec(query, args)
 	if err != nil {
-		cclog.Errorf("Named Exec failed: %v", err)
-		return 0, err
+		return 0, fmt.Errorf("named exec: %w", err)
 	}

 	id, err := res.LastInsertId()
 	if err != nil {
-		cclog.Errorf("repository initDB(): %v", err)
-		return 0, err
+		return 0, fmt.Errorf("getting last insert id: %w", err)
 	}

 	return id, nil
 }

+// TransactionAdd executes a query within the transaction.
 func (r *JobRepository) TransactionAdd(t *Transaction, query string, args ...interface{}) (int64, error) {
+	if t.tx == nil {
+		return 0, fmt.Errorf("transaction is nil or already completed")
+	}
+
 	res, err := t.tx.Exec(query, args...)
 	if err != nil {
-		cclog.Errorf("TransactionAdd(), Exec() Error: %v", err)
-		return 0, err
+		return 0, fmt.Errorf("exec: %w", err)
 	}

 	id, err := res.LastInsertId()
 	if err != nil {
-		cclog.Errorf("TransactionAdd(), LastInsertId() Error: %v", err)
-		return 0, err
+		return 0, fmt.Errorf("getting last insert id: %w", err)
 	}

 	return id, nil
--- a/internal/tagger/classifyJob.go
+++ b/internal/tagger/classifyJob.go
@@ -24,10 +24,14 @@ import (
 )

 //go:embed jobclasses/*
-var jobclassFiles embed.FS
+var jobClassFiles embed.FS

+// Variable defines a named expression that can be computed and reused in rules.
+// Variables are evaluated before the main rule and their results are added to the environment.
 type Variable struct {
+	// Name is the variable identifier used in rule expressions
 	Name string `json:"name"`
+	// Expr is the expression to evaluate (must return a numeric value)
 	Expr string `json:"expr"`
 }

@@ -36,14 +40,25 @@ type ruleVariable struct {
 	expr *vm.Program
 }

+// RuleFormat defines the JSON structure for job classification rules.
+// Each rule specifies requirements, metrics to analyze, variables to compute,
+// and the final rule expression that determines if the job matches the classification.
 type RuleFormat struct {
+	// Name is a human-readable description of the rule
 	Name         string     `json:"name"`
+	// Tag is the classification tag to apply if the rule matches
 	Tag          string     `json:"tag"`
+	// Parameters are shared values referenced in the rule (e.g., thresholds)
 	Parameters   []string   `json:"parameters"`
+	// Metrics are the job metrics required for this rule (e.g., "cpu_load", "mem_used")
 	Metrics      []string   `json:"metrics"`
+	// Requirements are boolean expressions that must be true for the rule to apply
 	Requirements []string   `json:"requirements"`
+	// Variables are computed values used in the rule expression
 	Variables    []Variable `json:"variables"`
+	// Rule is the boolean expression that determines if the job matches
 	Rule         string     `json:"rule"`
+	// Hint is a template string that generates a message when the rule matches
 	Hint         string     `json:"hint"`
 }

@@ -56,11 +71,35 @@ type ruleInfo struct {
 	hint         *template.Template
 }

+// JobRepository defines the interface for job database operations needed by the tagger.
+// This interface allows for easier testing and decoupling from the concrete repository implementation.
+type JobRepository interface {
+	// HasTag checks if a job already has a specific tag
+	HasTag(jobId int64, tagType string, tagName string) bool
+	// AddTagOrCreateDirect adds a tag to a job or creates it if it doesn't exist
+	AddTagOrCreateDirect(jobId int64, tagType string, tagName string) (tagId int64, err error)
+	// UpdateMetadata updates job metadata with a key-value pair
+	UpdateMetadata(job *schema.Job, key, val string) (err error)
+}
+
+// JobClassTagger classifies jobs based on configurable rules that evaluate job metrics and properties.
+// Rules are loaded from embedded JSON files and can be dynamically reloaded from a watched directory.
+// When a job matches a rule, it is tagged with the corresponding classification and an optional hint message.
 type JobClassTagger struct {
-	rules      map[string]ruleInfo
-	parameters map[string]any
-	tagType    string
-	cfgPath    string
+	// rules maps classification tags to their compiled rule information
+	rules           map[string]ruleInfo
+	// parameters are shared values (e.g., thresholds) used across multiple rules
+	parameters      map[string]any
+	// tagType is the type of tag ("jobClass")
+	tagType         string
+	// cfgPath is the path to watch for configuration changes
+	cfgPath         string
+	// repo provides access to job database operations
+	repo            JobRepository
+	// getStatistics retrieves job statistics for analysis
+	getStatistics   func(job *schema.Job) (map[string]schema.JobStatistics, error)
+	// getMetricConfig retrieves metric configuration (limits) for a cluster
+	getMetricConfig func(cluster, subCluster string) map[string]*schema.Metric
 }

 func (t *JobClassTagger) prepareRule(b []byte, fns string) {
@@ -127,10 +166,14 @@ func (t *JobClassTagger) prepareRule(b []byte, fns string) {
 	t.rules[rule.Tag] = ri
 }

+// EventMatch checks if a filesystem event should trigger configuration reload.
+// It returns true if the event path contains "jobclasses".
 func (t *JobClassTagger) EventMatch(s string) bool {
 	return strings.Contains(s, "jobclasses")
 }

+// EventCallback is triggered when the configuration directory changes.
+// It reloads parameters and all rule files from the watched directory.
 // FIXME: Only process the file that caused the event
 func (t *JobClassTagger) EventCallback() {
 	files, err := os.ReadDir(t.cfgPath)
@@ -170,7 +213,7 @@ func (t *JobClassTagger) EventCallback() {

 func (t *JobClassTagger) initParameters() error {
 	cclog.Info("Initialize parameters")
-	b, err := jobclassFiles.ReadFile("jobclasses/parameters.json")
+	b, err := jobClassFiles.ReadFile("jobclasses/parameters.json")
 	if err != nil {
 		cclog.Warnf("prepareRule() > open file error: %v", err)
 		return err
@@ -184,6 +227,10 @@ func (t *JobClassTagger) initParameters() error {
 	return nil
 }

+// Register initializes the JobClassTagger by loading parameters and classification rules.
+// It loads embedded configuration files and sets up a file watch on ./var/tagger/jobclasses
+// if it exists, allowing for dynamic configuration updates without restarting the application.
+// Returns an error if the embedded configuration files cannot be read or parsed.
 func (t *JobClassTagger) Register() error {
 	t.cfgPath = "./var/tagger/jobclasses"
 	t.tagType = "jobClass"
@@ -194,18 +241,18 @@ func (t *JobClassTagger) Register() error {
 		return err
 	}

-	files, err := jobclassFiles.ReadDir("jobclasses")
+	files, err := jobClassFiles.ReadDir("jobclasses")
 	if err != nil {
 		return fmt.Errorf("error reading app folder: %#v", err)
 	}
-	t.rules = make(map[string]ruleInfo, 0)
+	t.rules = make(map[string]ruleInfo)
 	for _, fn := range files {
 		fns := fn.Name()
 		if fns != "parameters.json" {
 			filename := fmt.Sprintf("jobclasses/%s", fns)
 			cclog.Infof("Process: %s", fns)

-			b, err := jobclassFiles.ReadFile(filename)
+			b, err := jobClassFiles.ReadFile(filename)
 			if err != nil {
 				cclog.Warnf("prepareRule() > open file error: %v", err)
 				return err
@@ -220,13 +267,30 @@ func (t *JobClassTagger) Register() error {
 		util.AddListener(t.cfgPath, t)
 	}

+	t.repo = repository.GetJobRepository()
+	t.getStatistics = archive.GetStatistics
+	t.getMetricConfig = archive.GetMetricConfigSubCluster
+
 	return nil
 }

+// Match evaluates all classification rules against a job and applies matching tags.
+// It retrieves job statistics and metric configurations, then tests each rule's requirements
+// and main expression. For each matching rule, it:
+//   - Applies the classification tag to the job
+//   - Generates and stores a hint message based on the rule's template
+//
+// The function constructs an evaluation environment containing:
+//   - Job properties (duration, cores, nodes, state, etc.)
+//   - Metric statistics (min, max, avg) and their configured limits
+//   - Shared parameters defined in parameters.json
+//   - Computed variables from the rule definition
+//
+// Rules are evaluated in arbitrary order. If multiple rules match, only the first
+// encountered match is applied (FIXME: this should handle multiple matches).
 func (t *JobClassTagger) Match(job *schema.Job) {
-	r := repository.GetJobRepository()
-	jobstats, err := archive.GetStatistics(job)
-	metricsList := archive.GetMetricConfigSubCluster(job.Cluster, job.SubCluster)
+	jobStats, err := t.getStatistics(job)
+	metricsList := t.getMetricConfig(job.Cluster, job.SubCluster)
 	cclog.Infof("Enter  match rule with %d rules for job %d", len(t.rules), job.JobID)
 	if err != nil {
 		cclog.Errorf("job classification failed for job  %d: %#v", job.JobID, err)
@@ -251,7 +315,7 @@ func (t *JobClassTagger) Match(job *schema.Job) {

 		// add metrics to env
 		for _, m := range ri.metrics {
-			stats, ok := jobstats[m]
+			stats, ok := jobStats[m]
 			if !ok {
 				cclog.Errorf("job classification failed for job %d: missing metric '%s'", job.JobID, m)
 				return
@@ -302,8 +366,11 @@ func (t *JobClassTagger) Match(job *schema.Job) {
 		if match.(bool) {
 			cclog.Info("Rule matches!")
 			id := *job.ID
-			if !r.HasTag(id, t.tagType, tag) {
-				r.AddTagOrCreateDirect(id, t.tagType, tag)
+			if !t.repo.HasTag(id, t.tagType, tag) {
+				_, err := t.repo.AddTagOrCreateDirect(id, t.tagType, tag)
+				if err != nil {
+					return
+				}
 			}

 			// process hint template
@@ -314,7 +381,11 @@ func (t *JobClassTagger) Match(job *schema.Job) {
 			}

 			// FIXME: Handle case where multiple tags apply
-			r.UpdateMetadata(job, "message", msg.String())
+			// FIXME: Handle case where multiple tags apply
+			err = t.repo.UpdateMetadata(job, "message", msg.String())
+			if err != nil {
+				return
+			}
 		} else {
 			cclog.Info("Rule does not match!")
 		}
--- a/internal/tagger/classifyJob_test.go
+++ b/internal/tagger/classifyJob_test.go
@@ -0,0 +1,162 @@
+package tagger
+
+import (
+	"testing"
+
+	"github.com/ClusterCockpit/cc-lib/schema"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/mock"
+)
+
+// MockJobRepository is a mock implementation of the JobRepository interface
+type MockJobRepository struct {
+	mock.Mock
+}
+
+func (m *MockJobRepository) HasTag(jobId int64, tagType string, tagName string) bool {
+	args := m.Called(jobId, tagType, tagName)
+	return args.Bool(0)
+}
+
+func (m *MockJobRepository) AddTagOrCreateDirect(jobId int64, tagType string, tagName string) (tagId int64, err error) {
+	args := m.Called(jobId, tagType, tagName)
+	return args.Get(0).(int64), args.Error(1)
+}
+
+func (m *MockJobRepository) UpdateMetadata(job *schema.Job, key, val string) (err error) {
+	args := m.Called(job, key, val)
+	return args.Error(0)
+}
+
+func TestPrepareRule(t *testing.T) {
+	tagger := &JobClassTagger{
+		rules:      make(map[string]ruleInfo),
+		parameters: make(map[string]any),
+	}
+
+	// Valid rule JSON
+	validRule := []byte(`{
+		"name": "Test Rule",
+		"tag": "test_tag",
+		"parameters": [],
+		"metrics": ["flops_any"],
+		"requirements": ["job.numNodes > 1"],
+		"variables": [{"name": "avg_flops", "expr": "flops_any.avg"}],
+		"rule": "avg_flops > 100",
+		"hint": "High FLOPS"
+	}`)
+
+	tagger.prepareRule(validRule, "test_rule.json")
+
+	assert.Contains(t, tagger.rules, "test_tag")
+	rule := tagger.rules["test_tag"]
+	assert.Equal(t, 1, len(rule.metrics))
+	assert.Equal(t, 1, len(rule.requirements))
+	assert.Equal(t, 1, len(rule.variables))
+	assert.NotNil(t, rule.rule)
+	assert.NotNil(t, rule.hint)
+}
+
+func TestClassifyJobMatch(t *testing.T) {
+	mockRepo := new(MockJobRepository)
+	tagger := &JobClassTagger{
+		rules:      make(map[string]ruleInfo),
+		parameters: make(map[string]any),
+		tagType:    "jobClass",
+		repo:       mockRepo,
+		getStatistics: func(job *schema.Job) (map[string]schema.JobStatistics, error) {
+			return map[string]schema.JobStatistics{
+				"flops_any": {Min: 0, Max: 200, Avg: 150},
+			}, nil
+		},
+		getMetricConfig: func(cluster, subCluster string) map[string]*schema.Metric {
+			return map[string]*schema.Metric{
+				"flops_any": {Peak: 1000, Normal: 100, Caution: 50, Alert: 10},
+			}
+		},
+	}
+
+	// Add a rule manually or via prepareRule
+	validRule := []byte(`{
+		"name": "Test Rule",
+		"tag": "high_flops",
+		"parameters": [],
+		"metrics": ["flops_any"],
+		"requirements": [],
+		"variables": [{"name": "avg_flops", "expr": "flops_any.avg"}],
+		"rule": "avg_flops > 100",
+		"hint": "High FLOPS: {{.avg_flops}}"
+	}`)
+	tagger.prepareRule(validRule, "test_rule.json")
+
+	jobID := int64(123)
+	job := &schema.Job{
+		ID:           &jobID,
+		JobID:        123,
+		Cluster:      "test_cluster",
+		SubCluster:   "test_subcluster",
+		NumNodes:     2,
+		NumHWThreads: 4,
+		State:        schema.JobStateCompleted,
+	}
+
+	// Expectation: Rule matches
+	// 1. Check if tag exists (return false)
+	mockRepo.On("HasTag", jobID, "jobClass", "high_flops").Return(false)
+	// 2. Add tag
+	mockRepo.On("AddTagOrCreateDirect", jobID, "jobClass", "high_flops").Return(int64(1), nil)
+	// 3. Update metadata
+	mockRepo.On("UpdateMetadata", job, "message", mock.Anything).Return(nil)
+
+	tagger.Match(job)
+
+	mockRepo.AssertExpectations(t)
+}
+
+func TestMatch_NoMatch(t *testing.T) {
+	mockRepo := new(MockJobRepository)
+	tagger := &JobClassTagger{
+		rules:      make(map[string]ruleInfo),
+		parameters: make(map[string]any),
+		tagType:    "jobClass",
+		repo:       mockRepo,
+		getStatistics: func(job *schema.Job) (map[string]schema.JobStatistics, error) {
+			return map[string]schema.JobStatistics{
+				"flops_any": {Min: 0, Max: 50, Avg: 20}, // Avg 20 < 100
+			}, nil
+		},
+		getMetricConfig: func(cluster, subCluster string) map[string]*schema.Metric {
+			return map[string]*schema.Metric{
+				"flops_any": {Peak: 1000, Normal: 100, Caution: 50, Alert: 10},
+			}
+		},
+	}
+
+	validRule := []byte(`{
+		"name": "Test Rule",
+		"tag": "high_flops",
+		"parameters": [],
+		"metrics": ["flops_any"],
+		"requirements": [],
+		"variables": [{"name": "avg_flops", "expr": "flops_any.avg"}],
+		"rule": "avg_flops > 100",
+		"hint": "High FLOPS"
+	}`)
+	tagger.prepareRule(validRule, "test_rule.json")
+
+	jobID := int64(123)
+	job := &schema.Job{
+		ID:           &jobID,
+		JobID:        123,
+		Cluster:      "test_cluster",
+		SubCluster:   "test_subcluster",
+		NumNodes:     2,
+		NumHWThreads: 4,
+		State:        schema.JobStateCompleted,
+	}
+
+	// Expectation: Rule does NOT match, so no repo calls
+	tagger.Match(job)
+
+	mockRepo.AssertExpectations(t)
+}
--- a/internal/tagger/detectApp.go
+++ b/internal/tagger/detectApp.go
@@ -2,6 +2,7 @@
 // All rights reserved. This file is part of cc-backend.
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file.
+
 package tagger

 import (
@@ -28,9 +29,16 @@ type appInfo struct {
 	strings []string
 }

+// AppTagger detects applications by matching patterns in job scripts.
+// It loads application patterns from embedded files and can dynamically reload
+// configuration from a watched directory. When a job script matches a pattern,
+// the corresponding application tag is automatically applied.
 type AppTagger struct {
+	// apps maps application tags to their matching patterns
 	apps    map[string]appInfo
+	// tagType is the type of tag ("app")
 	tagType string
+	// cfgPath is the path to watch for configuration changes
 	cfgPath string
 }

@@ -45,10 +53,14 @@ func (t *AppTagger) scanApp(f fs.File, fns string) {
 	t.apps[ai.tag] = ai
 }

+// EventMatch checks if a filesystem event should trigger configuration reload.
+// It returns true if the event path contains "apps".
 func (t *AppTagger) EventMatch(s string) bool {
 	return strings.Contains(s, "apps")
 }

+// EventCallback is triggered when the configuration directory changes.
+// It reloads all application pattern files from the watched directory.
 // FIXME: Only process the file that caused the event
 func (t *AppTagger) EventCallback() {
 	files, err := os.ReadDir(t.cfgPath)
@@ -67,6 +79,10 @@ func (t *AppTagger) EventCallback() {
 	}
 }

+// Register initializes the AppTagger by loading application patterns from embedded files.
+// It also sets up a file watch on ./var/tagger/apps if it exists, allowing for
+// dynamic configuration updates without restarting the application.
+// Returns an error if the embedded application files cannot be read.
 func (t *AppTagger) Register() error {
 	t.cfgPath = "./var/tagger/apps"
 	t.tagType = "app"
@@ -96,6 +112,11 @@ func (t *AppTagger) Register() error {
 	return nil
 }

+// Match attempts to detect the application used by a job by analyzing its job script.
+// It fetches the job metadata, extracts the job script, and matches it against
+// all configured application patterns using regular expressions.
+// If a match is found, the corresponding application tag is added to the job.
+// Only the first matching application is tagged.
 func (t *AppTagger) Match(job *schema.Job) {
 	r := repository.GetJobRepository()
 	metadata, err := r.FetchMetadata(job)
--- a/internal/tagger/tagger.go
+++ b/internal/tagger/tagger.go
@@ -2,6 +2,11 @@
 // All rights reserved. This file is part of cc-backend.
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file.
+
+// Package tagger provides automatic job tagging functionality for cc-backend.
+// It supports detecting applications and classifying jobs based on configurable rules.
+// Tags are automatically applied when jobs start or stop, or can be applied retroactively
+// to existing jobs using RunTaggers.
 package tagger

 import (
@@ -12,8 +17,15 @@ import (
 	"github.com/ClusterCockpit/cc-lib/schema"
 )

+// Tagger is the interface that must be implemented by all tagging components.
+// Taggers can be registered at job start or stop events to automatically apply tags.
 type Tagger interface {
+	// Register initializes the tagger and loads any required configuration.
+	// It should be called once before the tagger is used.
 	Register() error
+
+	// Match evaluates the tagger's rules against a job and applies appropriate tags.
+	// It is called for each job that needs to be evaluated.
 	Match(job *schema.Job)
 }

@@ -22,8 +34,12 @@ var (
 	jobTagger *JobTagger
 )

+// JobTagger coordinates multiple taggers that run at different job lifecycle events.
+// It maintains separate lists of taggers that run when jobs start and when they stop.
 type JobTagger struct {
+	// startTaggers are applied when a job starts (e.g., application detection)
 	startTaggers []Tagger
+	// stopTaggers are applied when a job completes (e.g., job classification)
 	stopTaggers  []Tagger
 }

@@ -42,6 +58,9 @@ func newTagger() {
 	}
 }

+// Init initializes the job tagger system and registers it with the job repository.
+// This function is safe to call multiple times; initialization only occurs once.
+// It should be called during application startup.
 func Init() {
 	initOnce.Do(func() {
 		newTagger()
@@ -49,22 +68,30 @@ func Init() {
 	})
 }

+// JobStartCallback is called when a job starts.
+// It runs all registered start taggers (e.g., application detection) on the job.
 func (jt *JobTagger) JobStartCallback(job *schema.Job) {
 	for _, tagger := range jt.startTaggers {
 		tagger.Match(job)
 	}
 }

+// JobStopCallback is called when a job completes.
+// It runs all registered stop taggers (e.g., job classification) on the job.
 func (jt *JobTagger) JobStopCallback(job *schema.Job) {
 	for _, tagger := range jt.stopTaggers {
 		tagger.Match(job)
 	}
 }

+// RunTaggers applies all configured taggers to all existing jobs in the repository.
+// This is useful for retroactively applying tags to jobs that were created before
+// the tagger system was initialized or when new tagging rules are added.
+// It fetches all jobs and runs both start and stop taggers on each one.
 func RunTaggers() error {
 	newTagger()
 	r := repository.GetJobRepository()
-	jl, err := r.GetJobList()
+	jl, err := r.GetJobList(0, 0) // 0 limit means get all jobs (no pagination)
 	if err != nil {
 		cclog.Errorf("Error while getting job list %s", err)
 		return err
--- a/pkg/archive/ConfigSchema.go
+++ b/pkg/archive/ConfigSchema.go
@@ -12,12 +12,40 @@ var configSchema = `
        "kind": {
          "description": "Backend type for job-archive",
          "type": "string",
-          "enum": ["file", "s3"]
+          "enum": ["file", "s3", "sqlite"]
        },
        "path": {
          "description": "Path to job archive for file backend",
          "type": "string"
        },
+        "dbPath": {
+          "description": "Path to SQLite database file for sqlite backend",
+          "type": "string"
+        },
+        "endpoint": {
+          "description": "S3 endpoint URL (for S3-compatible services like MinIO)",
+          "type": "string"
+        },
+        "accessKey": {
+          "description": "S3 access key ID",
+          "type": "string"
+        },
+        "secretKey": {
+          "description": "S3 secret access key",
+          "type": "string"
+        },
+        "bucket": {
+          "description": "S3 bucket name for job archive",
+          "type": "string"
+        },
+        "region": {
+          "description": "AWS region for S3 bucket",
+          "type": "string"
+        },
+        "usePathStyle": {
+          "description": "Use path-style S3 URLs (required for MinIO and some S3-compatible services)",
+          "type": "boolean"
+        },
        "compression": {
          "description": "Setup automatic compression for jobs older than number of days",
          "type": "integer"
--- a/pkg/archive/archive.go
+++ b/pkg/archive/archive.go
@@ -3,7 +3,79 @@
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file.

-// Package archive implements the job archive interface and various backend implementations
+// Package archive implements the job archive interface and various backend implementations.
+//
+// The archive package provides a pluggable storage backend system for job metadata and performance data.
+// It supports three backend types:
+//
+//   - file: Filesystem-based storage with hierarchical directory structure
+//   - s3: AWS S3 and S3-compatible object storage (MinIO, localstack)
+//   - sqlite: Single-file SQLite database with BLOB storage
+//
+// # Backend Selection
+//
+// Choose a backend based on your deployment requirements:
+//
+//   - File: Best for single-server deployments with local fast storage
+//   - S3: Best for distributed deployments requiring redundancy and multi-instance access
+//   - SQLite: Best for portable archives with SQL query capability and transactional integrity
+//
+// # Configuration
+//
+// The archive backend is configured via JSON in the application config file:
+//
+//	{
+//	  "archive": {
+//	    "kind": "file",           // or "s3" or "sqlite"
+//	    "path": "/var/lib/archive" // for file backend
+//	  }
+//	}
+//
+// For S3 backend:
+//
+//	{
+//	  "archive": {
+//	    "kind": "s3",
+//	    "bucket": "my-job-archive",
+//	    "region": "us-east-1",
+//	    "accessKey": "...",
+//	    "secretKey": "..."
+//	  }
+//	}
+//
+// For SQLite backend:
+//
+//	{
+//	  "archive": {
+//	    "kind": "sqlite",
+//	    "dbPath": "/var/lib/archive.db"
+//	  }
+//	}
+//
+// # Usage
+//
+// The package is initialized once at application startup:
+//
+//	err := archive.Init(rawConfig, false)
+//	if err != nil {
+//	    log.Fatal(err)
+//	}
+//
+// After initialization, use the global functions to interact with the archive:
+//
+//	// Check if a job exists
+//	exists := archive.GetHandle().Exists(job)
+//
+//	// Load job metadata
+//	jobMeta, err := archive.GetHandle().LoadJobMeta(job)
+//
+//	// Store job metadata
+//	err = archive.GetHandle().StoreJobMeta(job)
+//
+// # Thread Safety
+//
+// All backend implementations are safe for concurrent use. The package uses
+// internal locking for operations that modify shared state.
 package archive

 import (
@@ -18,45 +90,88 @@ import (
 	"github.com/ClusterCockpit/cc-lib/schema"
 )

-const Version uint64 = 2
+// Version is the current archive schema version.
+// The archive backend must match this version for compatibility.
+const Version uint64 = 3

+// ArchiveBackend defines the interface that all archive storage backends must implement.
+// Implementations include FsArchive (filesystem), S3Archive (object storage), and SqliteArchive (database).
+//
+// All methods are safe for concurrent use unless otherwise noted.
 type ArchiveBackend interface {
+	// Init initializes the archive backend with the provided configuration.
+	// Returns the archive version found in the backend storage.
+	// Returns an error if the version is incompatible or initialization fails.
 	Init(rawConfig json.RawMessage) (uint64, error)

+	// Info prints archive statistics to stdout, including job counts,
+	// date ranges, and storage sizes per cluster.
 	Info()

+	// Exists checks if a job with the given ID, cluster, and start time
+	// exists in the archive.
 	Exists(job *schema.Job) bool

+	// LoadJobMeta loads job metadata from the archive.
+	// Returns the complete Job structure including resources, tags, and statistics.
 	LoadJobMeta(job *schema.Job) (*schema.Job, error)

+	// LoadJobData loads the complete time-series performance data for a job.
+	// Returns a map of metric names to their scoped data (node, socket, core, etc.).
 	LoadJobData(job *schema.Job) (schema.JobData, error)

+	// LoadJobStats loads pre-computed statistics from the job data.
+	// Returns scoped statistics (min, max, avg) for all metrics.
 	LoadJobStats(job *schema.Job) (schema.ScopedJobStats, error)

+	// LoadClusterCfg loads the cluster configuration.
+	// Returns the cluster topology, metrics, and hardware specifications.
 	LoadClusterCfg(name string) (*schema.Cluster, error)

+	// StoreJobMeta stores job metadata to the archive.
+	// Overwrites existing metadata for the same job ID, cluster, and start time.
 	StoreJobMeta(jobMeta *schema.Job) error

+	// ImportJob stores both job metadata and performance data to the archive.
+	// This is typically used during initial job archiving.
 	ImportJob(jobMeta *schema.Job, jobData *schema.JobData) error

+	// GetClusters returns a list of all cluster names found in the archive.
 	GetClusters() []string

+	// CleanUp removes the specified jobs from the archive.
+	// Used by retention policies to delete old jobs.
 	CleanUp(jobs []*schema.Job)

+	// Move relocates jobs to a different path within the archive.
+	// The implementation depends on the backend type.
 	Move(jobs []*schema.Job, path string)

+	// Clean removes jobs outside the specified time range.
+	// Jobs with start_time < before OR start_time > after are deleted.
+	// Set after=0 to only use the before parameter.
 	Clean(before int64, after int64)

+	// Compress compresses job data files to save storage space.
+	// For filesystem and SQLite backends, this applies gzip compression.
+	// For S3, this compresses and replaces objects.
 	Compress(jobs []*schema.Job)

+	// CompressLast returns the timestamp of the last compression run
+	// and updates it to the provided starttime.
 	CompressLast(starttime int64) int64

+	// Iter returns a channel that yields all jobs in the archive.
+	// If loadMetricData is true, includes performance data; otherwise only metadata.
+	// The channel is closed when iteration completes.
 	Iter(loadMetricData bool) <-chan JobContainer
 }

+// JobContainer combines job metadata and optional performance data.
+// Used by Iter() to yield jobs during archive iteration.
 type JobContainer struct {
-	Meta *schema.Job
-	Data *schema.JobData
+	Meta *schema.Job     // Job metadata (always present)
+	Data *schema.JobData // Performance data (nil if not loaded)
 }

 var (
@@ -67,6 +182,15 @@ var (
 	mutex      sync.Mutex
 )

+// Init initializes the archive backend with the provided configuration.
+// Must be called once at application startup before using any archive functions.
+//
+// Parameters:
+//   - rawConfig: JSON configuration for the archive backend
+//   - disableArchive: if true, disables archive functionality
+//
+// The configuration determines which backend is used (file, s3, or sqlite).
+// Returns an error if initialization fails or version is incompatible.
 func Init(rawConfig json.RawMessage, disableArchive bool) error {
 	var err error

@@ -86,8 +210,10 @@ func Init(rawConfig json.RawMessage, disableArchive bool) error {
 		switch cfg.Kind {
 		case "file":
 			ar = &FsArchive{}
-			// case "s3":
-			// 	ar = &S3Archive{}
+		case "s3":
+			ar = &S3Archive{}
+		case "sqlite":
+			ar = &SqliteArchive{}
 		default:
 			err = fmt.Errorf("ARCHIVE/ARCHIVE > unkown archive backend '%s''", cfg.Kind)
 		}
@@ -106,10 +232,59 @@ func Init(rawConfig json.RawMessage, disableArchive bool) error {
 	return err
 }

+// GetHandle returns the initialized archive backend instance.
+// Must be called after Init().
 func GetHandle() ArchiveBackend {
 	return ar
 }

+// InitBackend creates and initializes a new archive backend instance
+// without affecting the global singleton. This is useful for archive migration
+// tools that need to work with multiple archive backends simultaneously.
+//
+// Parameters:
+//   - rawConfig: JSON configuration for the archive backend
+//
+// Returns the initialized backend instance or an error if initialization fails.
+// Does not validate the configuration against the schema.
+func InitBackend(rawConfig json.RawMessage) (ArchiveBackend, error) {
+	var cfg struct {
+		Kind string `json:"kind"`
+	}
+
+	if err := json.Unmarshal(rawConfig, &cfg); err != nil {
+		cclog.Warn("Error while unmarshaling raw config json")
+		return nil, err
+	}
+
+	var backend ArchiveBackend
+	switch cfg.Kind {
+	case "file":
+		backend = &FsArchive{}
+	case "s3":
+		backend = &S3Archive{}
+	case "sqlite":
+		backend = &SqliteArchive{}
+	default:
+		return nil, fmt.Errorf("ARCHIVE/ARCHIVE > unknown archive backend '%s'", cfg.Kind)
+	}
+
+	_, err := backend.Init(rawConfig)
+	if err != nil {
+		return nil, fmt.Errorf("error while initializing archive backend: %w", err)
+	}
+
+	return backend, nil
+}
+
+
+// LoadAveragesFromArchive loads average metric values for a job from the archive.
+// This is a helper function that extracts average values from job statistics.
+//
+// Parameters:
+//   - job: Job to load averages for
+//   - metrics: List of metric names to retrieve
+//   - data: 2D slice where averages will be appended (one row per metric)
 func LoadAveragesFromArchive(
 	job *schema.Job,
 	metrics []string,
@@ -132,6 +307,8 @@ func LoadAveragesFromArchive(
 	return nil
 }

+// LoadStatsFromArchive loads metric statistics for a job from the archive.
+// Returns a map of metric names to their statistics (min, max, avg).
 func LoadStatsFromArchive(
 	job *schema.Job,
 	metrics []string,
@@ -160,6 +337,8 @@ func LoadStatsFromArchive(
 	return data, nil
 }

+// LoadScopedStatsFromArchive loads scoped statistics for a job from the archive.
+// Returns statistics organized by metric scope (node, socket, core, etc.).
 func LoadScopedStatsFromArchive(
 	job *schema.Job,
 	metrics []string,
@@ -174,6 +353,8 @@ func LoadScopedStatsFromArchive(
 	return data, nil
 }

+// GetStatistics returns all metric statistics for a job.
+// Returns a map of metric names to their job-level statistics.
 func GetStatistics(job *schema.Job) (map[string]schema.JobStatistics, error) {
 	metaFile, err := ar.LoadJobMeta(job)
 	if err != nil {
@@ -184,8 +365,10 @@ func GetStatistics(job *schema.Job) (map[string]schema.JobStatistics, error) {
 	return metaFile.Statistics, nil
 }

-// UpdateMetadata checks if the job is archived, find its `meta.json` file and override the Metadata
-// in that JSON file. If the job is not archived, nothing is done.
+// UpdateMetadata updates the metadata map for an archived job.
+// If the job is still running or archiving is disabled, this is a no-op.
+//
+// This function is safe for concurrent use (protected by mutex).
 func UpdateMetadata(job *schema.Job, metadata map[string]string) error {
 	mutex.Lock()
 	defer mutex.Unlock()
@@ -205,8 +388,10 @@ func UpdateMetadata(job *schema.Job, metadata map[string]string) error {
 	return ar.StoreJobMeta(jobMeta)
 }

-// UpdateTags checks if the job is archived, find its `meta.json` file and override the tags list
-// in that JSON file. If the job is not archived, nothing is done.
+// UpdateTags updates the tag list for an archived job.
+// If the job is still running or archiving is disabled, this is a no-op.
+//
+// This function is safe for concurrent use (protected by mutex).
 func UpdateTags(job *schema.Job, tags []*schema.Tag) error {
 	mutex.Lock()
 	defer mutex.Unlock()
--- a/pkg/archive/fsBackend.go
+++ b/pkg/archive/fsBackend.go
@@ -28,20 +28,26 @@ import (
 	"github.com/santhosh-tekuri/jsonschema/v5"
 )

+// FsArchiveConfig holds the configuration for the filesystem archive backend.
 type FsArchiveConfig struct {
-	Path string `json:"path"`
+	Path string `json:"path"` // Root directory path for the archive
 }

+// FsArchive implements ArchiveBackend using a hierarchical filesystem structure.
+// Jobs are stored in directories organized by cluster, job ID, and start time.
+//
+// Directory structure: <path>/<cluster>/<jobid/1000>/<jobid%1000>/<starttime>/
 type FsArchive struct {
-	path     string
-	clusters []string
+	path     string   // Root path of the archive
+	clusters []string // List of discovered cluster names
 }

+// clusterInfo holds statistics about jobs in a cluster.
 type clusterInfo struct {
-	numJobs   int
-	dateFirst int64
-	dateLast  int64
-	diskSize  float64
+	numJobs   int     // Total number of jobs
+	dateFirst int64   // Unix timestamp of oldest job
+	dateLast  int64   // Unix timestamp of newest job
+	diskSize  float64 // Total disk usage in MB
 }

 func getDirectory(
--- a/pkg/archive/fsBackend_test.go
+++ b/pkg/archive/fsBackend_test.go
@@ -47,7 +47,7 @@ func TestInit(t *testing.T) {
 	if fsa.path != "testdata/archive" {
 		t.Fail()
 	}
-	if version != 2 {
+	if version != 3 {
 		t.Fail()
 	}
 	if len(fsa.clusters) != 3 || fsa.clusters[1] != "emmy" {
--- a/pkg/archive/s3Backend.go
+++ b/pkg/archive/s3Backend.go
@@ -0,0 +1,844 @@
+// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+// All rights reserved. This file is part of cc-backend.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package archive
+
+import (
+	"bytes"
+	"compress/gzip"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"math"
+	"os"
+	"strconv"
+	"strings"
+	"text/tabwriter"
+	"time"
+
+	"github.com/ClusterCockpit/cc-backend/internal/config"
+	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
+	"github.com/ClusterCockpit/cc-lib/schema"
+	"github.com/ClusterCockpit/cc-lib/util"
+	"github.com/aws/aws-sdk-go-v2/aws"
+	awsconfig "github.com/aws/aws-sdk-go-v2/config"
+	"github.com/aws/aws-sdk-go-v2/credentials"
+	"github.com/aws/aws-sdk-go-v2/service/s3"
+)
+
+// S3ArchiveConfig holds the configuration for the S3 archive backend.
+type S3ArchiveConfig struct {
+	Endpoint     string `json:"endpoint"`     // S3 endpoint URL (optional, for MinIO/localstack)
+	AccessKey    string `json:"accessKey"`    // AWS access key ID
+	SecretKey    string `json:"secretKey"`    // AWS secret access key
+	Bucket       string `json:"bucket"`       // S3 bucket name
+	Region       string `json:"region"`       // AWS region
+	UsePathStyle bool   `json:"usePathStyle"` // Use path-style URLs (required for MinIO)
+}
+
+// S3Archive implements ArchiveBackend using AWS S3 or S3-compatible object storage.
+// Jobs are stored as objects with keys mirroring the filesystem structure.
+//
+// Object key structure: <cluster>/<jobid/1000>/<jobid%1000>/<starttime>/meta.json
+type S3Archive struct {
+	client   *s3.Client // AWS S3 client
+	bucket   string     // S3 bucket name
+	clusters []string   // List of discovered cluster names
+}
+
+// getS3Key generates the S3 object key for a job file
+func getS3Key(job *schema.Job, file string) string {
+	lvl1 := fmt.Sprintf("%d", job.JobID/1000)
+	lvl2 := fmt.Sprintf("%03d", job.JobID%1000)
+	startTime := strconv.FormatInt(job.StartTime, 10)
+	return fmt.Sprintf("%s/%s/%s/%s/%s", job.Cluster, lvl1, lvl2, startTime, file)
+}
+
+// getS3Directory generates the S3 key prefix for a job directory
+func getS3Directory(job *schema.Job) string {
+	lvl1 := fmt.Sprintf("%d", job.JobID/1000)
+	lvl2 := fmt.Sprintf("%03d", job.JobID%1000)
+	startTime := strconv.FormatInt(job.StartTime, 10)
+	return fmt.Sprintf("%s/%s/%s/%s/", job.Cluster, lvl1, lvl2, startTime)
+}
+
+func (s3a *S3Archive) Init(rawConfig json.RawMessage) (uint64, error) {
+	var cfg S3ArchiveConfig
+	if err := json.Unmarshal(rawConfig, &cfg); err != nil {
+		cclog.Warnf("S3Archive Init() > Unmarshal error: %#v", err)
+		return 0, err
+	}
+
+	if cfg.Bucket == "" {
+		err := fmt.Errorf("S3Archive Init(): empty bucket name")
+		cclog.Errorf("S3Archive Init() > config error: %v", err)
+		return 0, err
+	}
+
+	if cfg.Region == "" {
+		cfg.Region = "us-east-1" // Default region
+	}
+
+	ctx := context.Background()
+
+	// Create custom AWS config
+	var awsCfg aws.Config
+	var err error
+
+	if cfg.AccessKey != "" && cfg.SecretKey != "" {
+		// Use static credentials
+		customResolver := aws.EndpointResolverWithOptionsFunc(func(service, region string, options ...interface{}) (aws.Endpoint, error) {
+			if cfg.Endpoint != "" {
+				return aws.Endpoint{
+					URL:               cfg.Endpoint,
+					HostnameImmutable: cfg.UsePathStyle,
+					Source:            aws.EndpointSourceCustom,
+				}, nil
+			}
+			return aws.Endpoint{}, &aws.EndpointNotFoundError{}
+		})
+
+		awsCfg, err = awsconfig.LoadDefaultConfig(ctx,
+			awsconfig.WithRegion(cfg.Region),
+			awsconfig.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(
+				cfg.AccessKey,
+				cfg.SecretKey,
+				"",
+			)),
+			awsconfig.WithEndpointResolverWithOptions(customResolver),
+		)
+	} else {
+		// Use default credential chain
+		awsCfg, err = awsconfig.LoadDefaultConfig(ctx,
+			awsconfig.WithRegion(cfg.Region),
+		)
+	}
+
+	if err != nil {
+		cclog.Errorf("S3Archive Init() > failed to load AWS config: %v", err)
+		return 0, err
+	}
+
+	// Create S3 client with path-style option
+	s3a.client = s3.NewFromConfig(awsCfg, func(o *s3.Options) {
+		o.UsePathStyle = cfg.UsePathStyle
+	})
+	s3a.bucket = cfg.Bucket
+
+	// Check if bucket exists and is accessible
+	_, err = s3a.client.HeadBucket(ctx, &s3.HeadBucketInput{
+		Bucket: aws.String(s3a.bucket),
+	})
+	if err != nil {
+		cclog.Errorf("S3Archive Init() > bucket access error: %v", err)
+		return 0, fmt.Errorf("cannot access S3 bucket '%s': %w", s3a.bucket, err)
+	}
+
+	// Read version.txt from S3
+	versionKey := "version.txt"
+	result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{
+		Bucket: aws.String(s3a.bucket),
+		Key:    aws.String(versionKey),
+	})
+	if err != nil {
+		cclog.Warnf("S3Archive Init() > cannot read version.txt: %v", err)
+		return 0, err
+	}
+	defer result.Body.Close()
+
+	versionBytes, err := io.ReadAll(result.Body)
+	if err != nil {
+		cclog.Errorf("S3Archive Init() > failed to read version.txt: %v", err)
+		return 0, err
+	}
+
+	version, err := strconv.ParseUint(strings.TrimSuffix(string(versionBytes), "\n"), 10, 64)
+	if err != nil {
+		cclog.Errorf("S3Archive Init() > version parse error: %v", err)
+		return 0, err
+	}
+
+	if version != Version {
+		return version, fmt.Errorf("unsupported version %d, need %d", version, Version)
+	}
+
+	// Discover clusters by listing top-level prefixes
+	s3a.clusters = []string{}
+	paginator := s3.NewListObjectsV2Paginator(s3a.client, &s3.ListObjectsV2Input{
+		Bucket:    aws.String(s3a.bucket),
+		Delimiter: aws.String("/"),
+	})
+
+	for paginator.HasMorePages() {
+		page, err := paginator.NextPage(ctx)
+		if err != nil {
+			cclog.Errorf("S3Archive Init() > failed to list clusters: %v", err)
+			return 0, err
+		}
+
+		for _, prefix := range page.CommonPrefixes {
+			if prefix.Prefix != nil {
+				clusterName := strings.TrimSuffix(*prefix.Prefix, "/")
+				// Filter out non-cluster entries
+				if clusterName != "" && clusterName != "version.txt" {
+					s3a.clusters = append(s3a.clusters, clusterName)
+				}
+			}
+		}
+	}
+
+	cclog.Infof("S3Archive initialized with bucket '%s', found %d clusters", s3a.bucket, len(s3a.clusters))
+	return version, nil
+}
+
+func (s3a *S3Archive) Info() {
+	ctx := context.Background()
+	fmt.Printf("S3 Job archive bucket: %s\n", s3a.bucket)
+
+	ci := make(map[string]*clusterInfo)
+
+	for _, cluster := range s3a.clusters {
+		ci[cluster] = &clusterInfo{dateFirst: time.Now().Unix()}
+
+		// List all jobs for this cluster
+		prefix := cluster + "/"
+		paginator := s3.NewListObjectsV2Paginator(s3a.client, &s3.ListObjectsV2Input{
+			Bucket: aws.String(s3a.bucket),
+			Prefix: aws.String(prefix),
+		})
+
+		for paginator.HasMorePages() {
+			page, err := paginator.NextPage(ctx)
+			if err != nil {
+				cclog.Fatalf("S3Archive Info() > failed to list objects: %s", err.Error())
+			}
+
+			for _, obj := range page.Contents {
+				if obj.Key != nil && strings.HasSuffix(*obj.Key, "/meta.json") {
+					ci[cluster].numJobs++
+					// Extract starttime from key: cluster/lvl1/lvl2/starttime/meta.json
+					parts := strings.Split(*obj.Key, "/")
+					if len(parts) >= 4 {
+						startTime, err := strconv.ParseInt(parts[3], 10, 64)
+						if err == nil {
+							ci[cluster].dateFirst = util.Min(ci[cluster].dateFirst, startTime)
+							ci[cluster].dateLast = util.Max(ci[cluster].dateLast, startTime)
+						}
+					}
+					if obj.Size != nil {
+						ci[cluster].diskSize += float64(*obj.Size) / (1024 * 1024) // Convert to MB
+					}
+				}
+			}
+		}
+	}
+
+	cit := clusterInfo{dateFirst: time.Now().Unix()}
+	w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', tabwriter.Debug)
+	fmt.Fprintln(w, "cluster\t#jobs\tfrom\tto\tsize (MB)")
+	for cluster, clusterInfo := range ci {
+		fmt.Fprintf(w, "%s\t%d\t%s\t%s\t%.2f\n", cluster,
+			clusterInfo.numJobs,
+			time.Unix(clusterInfo.dateFirst, 0),
+			time.Unix(clusterInfo.dateLast, 0),
+			clusterInfo.diskSize)
+
+		cit.numJobs += clusterInfo.numJobs
+		cit.dateFirst = util.Min(cit.dateFirst, clusterInfo.dateFirst)
+		cit.dateLast = util.Max(cit.dateLast, clusterInfo.dateLast)
+		cit.diskSize += clusterInfo.diskSize
+	}
+
+	fmt.Fprintf(w, "TOTAL\t%d\t%s\t%s\t%.2f\n",
+		cit.numJobs, time.Unix(cit.dateFirst, 0), time.Unix(cit.dateLast, 0), cit.diskSize)
+	w.Flush()
+}
+
+func (s3a *S3Archive) Exists(job *schema.Job) bool {
+	ctx := context.Background()
+	key := getS3Key(job, "meta.json")
+
+	_, err := s3a.client.HeadObject(ctx, &s3.HeadObjectInput{
+		Bucket: aws.String(s3a.bucket),
+		Key:    aws.String(key),
+	})
+
+	return err == nil
+}
+
+func (s3a *S3Archive) LoadJobMeta(job *schema.Job) (*schema.Job, error) {
+	ctx := context.Background()
+	key := getS3Key(job, "meta.json")
+
+	result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{
+		Bucket: aws.String(s3a.bucket),
+		Key:    aws.String(key),
+	})
+	if err != nil {
+		cclog.Errorf("S3Archive LoadJobMeta() > GetObject error: %v", err)
+		return nil, err
+	}
+	defer result.Body.Close()
+
+	b, err := io.ReadAll(result.Body)
+	if err != nil {
+		cclog.Errorf("S3Archive LoadJobMeta() > read error: %v", err)
+		return nil, err
+	}
+
+	if config.Keys.Validate {
+		if err := schema.Validate(schema.Meta, bytes.NewReader(b)); err != nil {
+			return nil, fmt.Errorf("validate job meta: %v", err)
+		}
+	}
+
+	return DecodeJobMeta(bytes.NewReader(b))
+}
+
+func (s3a *S3Archive) LoadJobData(job *schema.Job) (schema.JobData, error) {
+	ctx := context.Background()
+
+	// Try compressed file first
+	keyGz := getS3Key(job, "data.json.gz")
+	result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{
+		Bucket: aws.String(s3a.bucket),
+		Key:    aws.String(keyGz),
+	})
+	if err != nil {
+		// Try uncompressed file
+		key := getS3Key(job, "data.json")
+		result, err = s3a.client.GetObject(ctx, &s3.GetObjectInput{
+			Bucket: aws.String(s3a.bucket),
+			Key:    aws.String(key),
+		})
+		if err != nil {
+			cclog.Errorf("S3Archive LoadJobData() > GetObject error: %v", err)
+			return nil, err
+		}
+		defer result.Body.Close()
+
+		if config.Keys.Validate {
+			b, _ := io.ReadAll(result.Body)
+			if err := schema.Validate(schema.Data, bytes.NewReader(b)); err != nil {
+				return schema.JobData{}, fmt.Errorf("validate job data: %v", err)
+			}
+			return DecodeJobData(bytes.NewReader(b), key)
+		}
+		return DecodeJobData(result.Body, key)
+	}
+	defer result.Body.Close()
+
+	// Decompress
+	r, err := gzip.NewReader(result.Body)
+	if err != nil {
+		cclog.Errorf("S3Archive LoadJobData() > gzip error: %v", err)
+		return nil, err
+	}
+	defer r.Close()
+
+	if config.Keys.Validate {
+		b, _ := io.ReadAll(r)
+		if err := schema.Validate(schema.Data, bytes.NewReader(b)); err != nil {
+			return schema.JobData{}, fmt.Errorf("validate job data: %v", err)
+		}
+		return DecodeJobData(bytes.NewReader(b), keyGz)
+	}
+	return DecodeJobData(r, keyGz)
+}
+
+func (s3a *S3Archive) LoadJobStats(job *schema.Job) (schema.ScopedJobStats, error) {
+	ctx := context.Background()
+
+	// Try compressed file first
+	keyGz := getS3Key(job, "data.json.gz")
+	result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{
+		Bucket: aws.String(s3a.bucket),
+		Key:    aws.String(keyGz),
+	})
+	if err != nil {
+		// Try uncompressed file
+		key := getS3Key(job, "data.json")
+		result, err = s3a.client.GetObject(ctx, &s3.GetObjectInput{
+			Bucket: aws.String(s3a.bucket),
+			Key:    aws.String(key),
+		})
+		if err != nil {
+			cclog.Errorf("S3Archive LoadJobStats() > GetObject error: %v", err)
+			return nil, err
+		}
+		defer result.Body.Close()
+
+		if config.Keys.Validate {
+			b, _ := io.ReadAll(result.Body)
+			if err := schema.Validate(schema.Data, bytes.NewReader(b)); err != nil {
+				return nil, fmt.Errorf("validate job data: %v", err)
+			}
+			return DecodeJobStats(bytes.NewReader(b), key)
+		}
+		return DecodeJobStats(result.Body, key)
+	}
+	defer result.Body.Close()
+
+	// Decompress
+	r, err := gzip.NewReader(result.Body)
+	if err != nil {
+		cclog.Errorf("S3Archive LoadJobStats() > gzip error: %v", err)
+		return nil, err
+	}
+	defer r.Close()
+
+	if config.Keys.Validate {
+		b, _ := io.ReadAll(r)
+		if err := schema.Validate(schema.Data, bytes.NewReader(b)); err != nil {
+			return nil, fmt.Errorf("validate job data: %v", err)
+		}
+		return DecodeJobStats(bytes.NewReader(b), keyGz)
+	}
+	return DecodeJobStats(r, keyGz)
+}
+
+func (s3a *S3Archive) LoadClusterCfg(name string) (*schema.Cluster, error) {
+	ctx := context.Background()
+	key := fmt.Sprintf("%s/cluster.json", name)
+
+	result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{
+		Bucket: aws.String(s3a.bucket),
+		Key:    aws.String(key),
+	})
+	if err != nil {
+		cclog.Errorf("S3Archive LoadClusterCfg() > GetObject error: %v", err)
+		return nil, err
+	}
+	defer result.Body.Close()
+
+	b, err := io.ReadAll(result.Body)
+	if err != nil {
+		cclog.Errorf("S3Archive LoadClusterCfg() > read error: %v", err)
+		return nil, err
+	}
+
+	if err := schema.Validate(schema.ClusterCfg, bytes.NewReader(b)); err != nil {
+		cclog.Warnf("Validate cluster config: %v\n", err)
+		return &schema.Cluster{}, fmt.Errorf("validate cluster config: %v", err)
+	}
+
+	return DecodeCluster(bytes.NewReader(b))
+}
+
+func (s3a *S3Archive) StoreJobMeta(job *schema.Job) error {
+	ctx := context.Background()
+	key := getS3Key(job, "meta.json")
+
+	var buf bytes.Buffer
+	if err := EncodeJobMeta(&buf, job); err != nil {
+		cclog.Error("S3Archive StoreJobMeta() > encoding error")
+		return err
+	}
+
+	_, err := s3a.client.PutObject(ctx, &s3.PutObjectInput{
+		Bucket: aws.String(s3a.bucket),
+		Key:    aws.String(key),
+		Body:   bytes.NewReader(buf.Bytes()),
+	})
+	if err != nil {
+		cclog.Errorf("S3Archive StoreJobMeta() > PutObject error: %v", err)
+		return err
+	}
+
+	return nil
+}
+
+func (s3a *S3Archive) ImportJob(jobMeta *schema.Job, jobData *schema.JobData) error {
+	ctx := context.Background()
+
+	// Upload meta.json
+	metaKey := getS3Key(jobMeta, "meta.json")
+	var metaBuf bytes.Buffer
+	if err := EncodeJobMeta(&metaBuf, jobMeta); err != nil {
+		cclog.Error("S3Archive ImportJob() > encoding meta error")
+		return err
+	}
+
+	_, err := s3a.client.PutObject(ctx, &s3.PutObjectInput{
+		Bucket: aws.String(s3a.bucket),
+		Key:    aws.String(metaKey),
+		Body:   bytes.NewReader(metaBuf.Bytes()),
+	})
+	if err != nil {
+		cclog.Errorf("S3Archive ImportJob() > PutObject meta error: %v", err)
+		return err
+	}
+
+	// Upload data.json
+	dataKey := getS3Key(jobMeta, "data.json")
+	var dataBuf bytes.Buffer
+	if err := EncodeJobData(&dataBuf, jobData); err != nil {
+		cclog.Error("S3Archive ImportJob() > encoding data error")
+		return err
+	}
+
+	_, err = s3a.client.PutObject(ctx, &s3.PutObjectInput{
+		Bucket: aws.String(s3a.bucket),
+		Key:    aws.String(dataKey),
+		Body:   bytes.NewReader(dataBuf.Bytes()),
+	})
+	if err != nil {
+		cclog.Errorf("S3Archive ImportJob() > PutObject data error: %v", err)
+		return err
+	}
+
+	return nil
+}
+
+func (s3a *S3Archive) GetClusters() []string {
+	return s3a.clusters
+}
+
+func (s3a *S3Archive) CleanUp(jobs []*schema.Job) {
+	ctx := context.Background()
+	start := time.Now()
+
+	for _, job := range jobs {
+		if job == nil {
+			cclog.Errorf("S3Archive CleanUp() error: job is nil")
+			continue
+		}
+
+		// Delete all files in the job directory
+		prefix := getS3Directory(job)
+
+		paginator := s3.NewListObjectsV2Paginator(s3a.client, &s3.ListObjectsV2Input{
+			Bucket: aws.String(s3a.bucket),
+			Prefix: aws.String(prefix),
+		})
+
+		for paginator.HasMorePages() {
+			page, err := paginator.NextPage(ctx)
+			if err != nil {
+				cclog.Errorf("S3Archive CleanUp() > list error: %v", err)
+				continue
+			}
+
+			for _, obj := range page.Contents {
+				if obj.Key != nil {
+					_, err := s3a.client.DeleteObject(ctx, &s3.DeleteObjectInput{
+						Bucket: aws.String(s3a.bucket),
+						Key:    obj.Key,
+					})
+					if err != nil {
+						cclog.Errorf("S3Archive CleanUp() > delete error: %v", err)
+					}
+				}
+			}
+		}
+	}
+
+	cclog.Infof("Retention Service - Remove %d jobs from S3 in %s", len(jobs), time.Since(start))
+}
+
+func (s3a *S3Archive) Move(jobs []*schema.Job, targetPath string) {
+	ctx := context.Background()
+
+	for _, job := range jobs {
+		sourcePrefix := getS3Directory(job)
+
+		// List all objects in source
+		paginator := s3.NewListObjectsV2Paginator(s3a.client, &s3.ListObjectsV2Input{
+			Bucket: aws.String(s3a.bucket),
+			Prefix: aws.String(sourcePrefix),
+		})
+
+		for paginator.HasMorePages() {
+			page, err := paginator.NextPage(ctx)
+			if err != nil {
+				cclog.Errorf("S3Archive Move() > list error: %v", err)
+				continue
+			}
+
+			for _, obj := range page.Contents {
+				if obj.Key == nil {
+					continue
+				}
+
+				// Compute target key by replacing prefix
+				targetKey := strings.Replace(*obj.Key, sourcePrefix, targetPath+"/", 1)
+
+				// Copy object
+				_, err := s3a.client.CopyObject(ctx, &s3.CopyObjectInput{
+					Bucket:     aws.String(s3a.bucket),
+					CopySource: aws.String(fmt.Sprintf("%s/%s", s3a.bucket, *obj.Key)),
+					Key:        aws.String(targetKey),
+				})
+				if err != nil {
+					cclog.Errorf("S3Archive Move() > copy error: %v", err)
+					continue
+				}
+
+				// Delete source object
+				_, err = s3a.client.DeleteObject(ctx, &s3.DeleteObjectInput{
+					Bucket: aws.String(s3a.bucket),
+					Key:    obj.Key,
+				})
+				if err != nil {
+					cclog.Errorf("S3Archive Move() > delete error: %v", err)
+				}
+			}
+		}
+	}
+}
+
+func (s3a *S3Archive) Clean(before int64, after int64) {
+	ctx := context.Background()
+
+	if after == 0 {
+		after = math.MaxInt64
+	}
+
+	for _, cluster := range s3a.clusters {
+		prefix := cluster + "/"
+
+		paginator := s3.NewListObjectsV2Paginator(s3a.client, &s3.ListObjectsV2Input{
+			Bucket: aws.String(s3a.bucket),
+			Prefix: aws.String(prefix),
+		})
+
+		for paginator.HasMorePages() {
+			page, err := paginator.NextPage(ctx)
+			if err != nil {
+				cclog.Fatalf("S3Archive Clean() > list error: %s", err.Error())
+			}
+
+			for _, obj := range page.Contents {
+				if obj.Key == nil || !strings.HasSuffix(*obj.Key, "/meta.json") {
+					continue
+				}
+
+				// Extract starttime from key: cluster/lvl1/lvl2/starttime/meta.json
+				parts := strings.Split(*obj.Key, "/")
+				if len(parts) < 4 {
+					continue
+				}
+
+				startTime, err := strconv.ParseInt(parts[3], 10, 64)
+				if err != nil {
+					cclog.Fatalf("S3Archive Clean() > cannot parse starttime: %s", err.Error())
+				}
+
+				if startTime < before || startTime > after {
+					// Delete entire job directory
+					jobPrefix := strings.Join(parts[:4], "/") + "/"
+
+					jobPaginator := s3.NewListObjectsV2Paginator(s3a.client, &s3.ListObjectsV2Input{
+						Bucket: aws.String(s3a.bucket),
+						Prefix: aws.String(jobPrefix),
+					})
+
+					for jobPaginator.HasMorePages() {
+						jobPage, err := jobPaginator.NextPage(ctx)
+						if err != nil {
+							cclog.Errorf("S3Archive Clean() > list job error: %v", err)
+							continue
+						}
+
+						for _, jobObj := range jobPage.Contents {
+							if jobObj.Key != nil {
+								_, err := s3a.client.DeleteObject(ctx, &s3.DeleteObjectInput{
+									Bucket: aws.String(s3a.bucket),
+									Key:    jobObj.Key,
+								})
+								if err != nil {
+									cclog.Errorf("S3Archive Clean() > delete error: %v", err)
+								}
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+}
+
+func (s3a *S3Archive) Compress(jobs []*schema.Job) {
+	ctx := context.Background()
+	var cnt int
+	start := time.Now()
+
+	for _, job := range jobs {
+		dataKey := getS3Key(job, "data.json")
+
+		// Check if uncompressed file exists and get its size
+		headResult, err := s3a.client.HeadObject(ctx, &s3.HeadObjectInput{
+			Bucket: aws.String(s3a.bucket),
+			Key:    aws.String(dataKey),
+		})
+		if err != nil {
+			continue // File doesn't exist or error
+		}
+
+		if headResult.ContentLength == nil || *headResult.ContentLength < 2000 {
+			continue // Too small to compress
+		}
+
+		// Download the file
+		result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{
+			Bucket: aws.String(s3a.bucket),
+			Key:    aws.String(dataKey),
+		})
+		if err != nil {
+			cclog.Errorf("S3Archive Compress() > GetObject error: %v", err)
+			continue
+		}
+
+		data, err := io.ReadAll(result.Body)
+		result.Body.Close()
+		if err != nil {
+			cclog.Errorf("S3Archive Compress() > read error: %v", err)
+			continue
+		}
+
+		// Compress the data
+		var compressedBuf bytes.Buffer
+		gzipWriter := gzip.NewWriter(&compressedBuf)
+		if _, err := gzipWriter.Write(data); err != nil {
+			cclog.Errorf("S3Archive Compress() > gzip write error: %v", err)
+			gzipWriter.Close()
+			continue
+		}
+		gzipWriter.Close()
+
+		// Upload compressed file
+		compressedKey := getS3Key(job, "data.json.gz")
+		_, err = s3a.client.PutObject(ctx, &s3.PutObjectInput{
+			Bucket: aws.String(s3a.bucket),
+			Key:    aws.String(compressedKey),
+			Body:   bytes.NewReader(compressedBuf.Bytes()),
+		})
+		if err != nil {
+			cclog.Errorf("S3Archive Compress() > PutObject error: %v", err)
+			continue
+		}
+
+		// Delete uncompressed file
+		_, err = s3a.client.DeleteObject(ctx, &s3.DeleteObjectInput{
+			Bucket: aws.String(s3a.bucket),
+			Key:    aws.String(dataKey),
+		})
+		if err != nil {
+			cclog.Errorf("S3Archive Compress() > delete error: %v", err)
+		}
+
+		cnt++
+	}
+
+	cclog.Infof("Compression Service - %d files in S3 took %s", cnt, time.Since(start))
+}
+
+func (s3a *S3Archive) CompressLast(starttime int64) int64 {
+	ctx := context.Background()
+	compressKey := "compress.txt"
+
+	// Try to read existing compress.txt
+	result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{
+		Bucket: aws.String(s3a.bucket),
+		Key:    aws.String(compressKey),
+	})
+
+	var last int64
+	if err == nil {
+		b, _ := io.ReadAll(result.Body)
+		result.Body.Close()
+		last, err = strconv.ParseInt(strings.TrimSuffix(string(b), "\n"), 10, 64)
+		if err != nil {
+			cclog.Errorf("S3Archive CompressLast() > parse error: %v", err)
+			last = starttime
+		}
+	} else {
+		last = starttime
+	}
+
+	cclog.Infof("S3Archive CompressLast() - start %d last %d", starttime, last)
+
+	// Write new timestamp
+	newValue := fmt.Sprintf("%d", starttime)
+	_, err = s3a.client.PutObject(ctx, &s3.PutObjectInput{
+		Bucket: aws.String(s3a.bucket),
+		Key:    aws.String(compressKey),
+		Body:   strings.NewReader(newValue),
+	})
+	if err != nil {
+		cclog.Errorf("S3Archive CompressLast() > PutObject error: %v", err)
+	}
+
+	return last
+}
+
+func (s3a *S3Archive) Iter(loadMetricData bool) <-chan JobContainer {
+	ch := make(chan JobContainer)
+
+	go func() {
+		ctx := context.Background()
+		defer close(ch)
+
+		for _, cluster := range s3a.clusters {
+			prefix := cluster + "/"
+
+			paginator := s3.NewListObjectsV2Paginator(s3a.client, &s3.ListObjectsV2Input{
+				Bucket: aws.String(s3a.bucket),
+				Prefix: aws.String(prefix),
+			})
+
+			for paginator.HasMorePages() {
+				page, err := paginator.NextPage(ctx)
+				if err != nil {
+					cclog.Fatalf("S3Archive Iter() > list error: %s", err.Error())
+				}
+
+				for _, obj := range page.Contents {
+					if obj.Key == nil || !strings.HasSuffix(*obj.Key, "/meta.json") {
+						continue
+					}
+
+					// Load job metadata
+					result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{
+						Bucket: aws.String(s3a.bucket),
+						Key:    obj.Key,
+					})
+					if err != nil {
+						cclog.Errorf("S3Archive Iter() > GetObject meta error: %v", err)
+						continue
+					}
+
+					b, err := io.ReadAll(result.Body)
+					result.Body.Close()
+					if err != nil {
+						cclog.Errorf("S3Archive Iter() > read meta error: %v", err)
+						continue
+					}
+
+					job, err := DecodeJobMeta(bytes.NewReader(b))
+					if err != nil {
+						cclog.Errorf("S3Archive Iter() > decode meta error: %v", err)
+						continue
+					}
+
+					if loadMetricData {
+						jobData, err := s3a.LoadJobData(job)
+						if err != nil {
+							cclog.Errorf("S3Archive Iter() > load data error: %v", err)
+							ch <- JobContainer{Meta: job, Data: nil}
+						} else {
+							ch <- JobContainer{Meta: job, Data: &jobData}
+						}
+					} else {
+						ch <- JobContainer{Meta: job, Data: nil}
+					}
+				}
+			}
+		}
+	}()
+
+	return ch
+}
--- a/pkg/archive/s3Backend_test.go
+++ b/pkg/archive/s3Backend_test.go
@@ -0,0 +1,293 @@
+// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+// All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+package archive
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"strings"
+	"testing"
+
+	"github.com/ClusterCockpit/cc-lib/schema"
+	"github.com/aws/aws-sdk-go-v2/aws"
+	"github.com/aws/aws-sdk-go-v2/service/s3"
+	"github.com/aws/aws-sdk-go-v2/service/s3/types"
+)
+
+// MockS3Client is a mock implementation of the S3 client for testing
+type MockS3Client struct {
+	objects map[string][]byte
+}
+
+func NewMockS3Client() *MockS3Client {
+	return &MockS3Client{
+		objects: make(map[string][]byte),
+	}
+}
+
+func (m *MockS3Client) HeadBucket(ctx context.Context, params *s3.HeadBucketInput, optFns ...func(*s3.Options)) (*s3.HeadBucketOutput, error) {
+	// Always succeed for mock
+	return &s3.HeadBucketOutput{}, nil
+}
+
+func (m *MockS3Client) GetObject(ctx context.Context, params *s3.GetObjectInput, optFns ...func(*s3.Options)) (*s3.GetObjectOutput, error) {
+	key := aws.ToString(params.Key)
+	data, exists := m.objects[key]
+	if !exists {
+		return nil, fmt.Errorf("NoSuchKey: object not found")
+	}
+	
+	contentLength := int64(len(data))
+	return &s3.GetObjectOutput{
+		Body:          io.NopCloser(bytes.NewReader(data)),
+		ContentLength: &contentLength,
+	}, nil
+}
+
+func (m *MockS3Client) PutObject(ctx context.Context, params *s3.PutObjectInput, optFns ...func(*s3.Options)) (*s3.PutObjectOutput, error) {
+	key := aws.ToString(params.Key)
+	data, err := io.ReadAll(params.Body)
+	if err != nil {
+		return nil, err
+	}
+	m.objects[key] = data
+	return &s3.PutObjectOutput{}, nil
+}
+
+func (m *MockS3Client) HeadObject(ctx context.Context, params *s3.HeadObjectInput, optFns ...func(*s3.Options)) (*s3.HeadObjectOutput, error) {
+	key := aws.ToString(params.Key)
+	data, exists := m.objects[key]
+	if !exists {
+		return nil, fmt.Errorf("NotFound")
+	}
+	
+	contentLength := int64(len(data))
+	return &s3.HeadObjectOutput{
+		ContentLength: &contentLength,
+	}, nil
+}
+
+func (m *MockS3Client) DeleteObject(ctx context.Context, params *s3.DeleteObjectInput, optFns ...func(*s3.Options)) (*s3.DeleteObjectOutput, error) {
+	key := aws.ToString(params.Key)
+	delete(m.objects, key)
+	return &s3.DeleteObjectOutput{}, nil
+}
+
+func (m *MockS3Client) CopyObject(ctx context.Context, params *s3.CopyObjectInput, optFns ...func(*s3.Options)) (*s3.CopyObjectOutput, error) {
+	// Parse source bucket/key from CopySource
+	source := aws.ToString(params.CopySource)
+	parts := strings.SplitN(source, "/", 2)
+	if len(parts) < 2 {
+		return nil, fmt.Errorf("invalid CopySource")
+	}
+	sourceKey := parts[1]
+	
+	data, exists := m.objects[sourceKey]
+	if !exists {
+		return nil, fmt.Errorf("source not found")
+	}
+	
+	destKey := aws.ToString(params.Key)
+	m.objects[destKey] = data
+	return &s3.CopyObjectOutput{}, nil
+}
+
+func (m *MockS3Client) ListObjectsV2(ctx context.Context, params *s3.ListObjectsV2Input, optFns ...func(*s3.Options)) (*s3.ListObjectsV2Output, error) {
+	prefix := aws.ToString(params.Prefix)
+	delimiter := aws.ToString(params.Delimiter)
+	
+	var contents []types.Object
+	commonPrefixes := make(map[string]bool)
+	
+	for key, data := range m.objects {
+		if !strings.HasPrefix(key, prefix) {
+			continue
+		}
+		
+		if delimiter != "" {
+			// Check if there's a delimiter after the prefix
+			remainder := strings.TrimPrefix(key, prefix)
+			delimIdx := strings.Index(remainder, delimiter)
+			if delimIdx >= 0 {
+				// This is a "directory" - add to common prefixes
+				commonPrefix := prefix + remainder[:delimIdx+1]
+				commonPrefixes[commonPrefix] = true
+				continue
+			}
+		}
+		
+		size := int64(len(data))
+		contents = append(contents, types.Object{
+			Key:  aws.String(key),
+			Size: &size,
+		})
+	}
+	
+	var prefixList []types.CommonPrefix
+	for p := range commonPrefixes {
+		prefixList = append(prefixList, types.CommonPrefix{
+			Prefix: aws.String(p),
+		})
+	}
+	
+	return &s3.ListObjectsV2Output{
+		Contents:       contents,
+		CommonPrefixes: prefixList,
+	}, nil
+}
+
+// Test helper to create a mock S3 archive with test data
+func setupMockS3Archive(t *testing.T) *MockS3Client {
+	mock := NewMockS3Client()
+	
+	// Add version.txt
+	mock.objects["version.txt"] = []byte("2\n")
+	
+	// Add a test cluster directory
+	mock.objects["emmy/cluster.json"] = []byte(`{
+		"name": "emmy",
+		"metricConfig": [],
+		"subClusters": [
+			{
+				"name": "main",
+				"processorType": "Intel Xeon",
+				"socketsPerNode": 2,
+				"coresPerSocket": 4,
+				"threadsPerCore": 2,
+				"flopRateScalar": 16,
+				"flopRateSimd": 32,
+				"memoryBandwidth": 100
+			}
+		]
+	}`)
+	
+	// Add a test job
+	mock.objects["emmy/1403/244/1608923076/meta.json"] = []byte(`{
+		"jobId": 1403244,
+		"cluster": "emmy",
+		"startTime": 1608923076,
+		"numNodes": 1,
+		"resources": [{"hostname": "node001"}]
+	}`)
+	
+	mock.objects["emmy/1403/244/1608923076/data.json"] = []byte(`{
+		"mem_used": {
+			"node": {
+				"node001": {
+					"series": [{"time": 1608923076, "value": 1000}]
+				}
+			}
+		}
+	}`)
+	
+	return mock
+}
+
+func TestS3InitEmptyBucket(t *testing.T) {
+	var s3a S3Archive
+	_, err := s3a.Init(json.RawMessage(`{"kind":"s3"}`))
+	if err == nil {
+		t.Fatal("expected error for empty bucket")
+	}
+}
+
+func TestS3InitInvalidConfig(t *testing.T) {
+	var s3a S3Archive
+	_, err := s3a.Init(json.RawMessage(`"bucket":"test-bucket"`))
+	if err == nil {
+		t.Fatal("expected error for invalid config")
+	}
+}
+
+// Note: TestS3Init would require actual S3 connection or more complex mocking
+// For now, we document that Init() should be tested manually with MinIO
+
+func TestGetS3Key(t *testing.T) {
+	job := &schema.Job{
+		JobID:     1403244,
+		Cluster:   "emmy",
+		StartTime: 1608923076,
+	}
+	
+	key := getS3Key(job, "meta.json")
+	expected := "emmy/1403/244/1608923076/meta.json"
+	if key != expected {
+		t.Errorf("expected key %s, got %s", expected, key)
+	}
+}
+
+func TestGetS3Directory(t *testing.T) {
+	job := &schema.Job{
+		JobID:     1403244,
+		Cluster:   "emmy",
+		StartTime: 1608923076,
+	}
+	
+	dir := getS3Directory(job)
+	expected := "emmy/1403/244/1608923076/"
+	if dir != expected {
+		t.Errorf("expected dir %s, got %s", expected, dir)
+	}
+}
+
+// Integration-style tests would go here for actual S3 operations
+// These would require MinIO or localstack for testing
+
+func TestS3ArchiveConfigParsing(t *testing.T) {
+	rawConfig := json.RawMessage(`{
+		"endpoint": "http://localhost:9000",
+		"accessKey": "minioadmin",
+		"secretKey": "minioadmin",
+		"bucket": "test-bucket",
+		"region": "us-east-1",
+		"usePathStyle": true
+	}`)
+	
+	var cfg S3ArchiveConfig
+	err := json.Unmarshal(rawConfig, &cfg)
+	if err != nil {
+		t.Fatalf("failed to parse config: %v", err)
+	}
+	
+	if cfg.Bucket != "test-bucket" {
+		t.Errorf("expected bucket 'test-bucket', got '%s'", cfg.Bucket)
+	}
+	if cfg.Region != "us-east-1" {
+		t.Errorf("expected region 'us-east-1', got '%s'", cfg.Region)
+	}
+	if !cfg.UsePathStyle {
+		t.Error("expected usePathStyle to be true")
+	}
+}
+
+func TestS3KeyGeneration(t *testing.T) {
+	tests := []struct {
+		jobID     int64
+		cluster   string
+		startTime int64
+		file      string
+		expected  string
+	}{
+		{1403244, "emmy", 1608923076, "meta.json", "emmy/1403/244/1608923076/meta.json"},
+		{1404397, "emmy", 1609300556, "data.json.gz", "emmy/1404/397/1609300556/data.json.gz"},
+		{42, "fritz", 1234567890, "meta.json", "fritz/0/042/1234567890/meta.json"},
+	}
+	
+	for _, tt := range tests {
+		job := &schema.Job{
+			JobID:     tt.jobID,
+			Cluster:   tt.cluster,
+			StartTime: tt.startTime,
+		}
+		
+		key := getS3Key(job, tt.file)
+		if key != tt.expected {
+			t.Errorf("for job %d: expected %s, got %s", tt.jobID, tt.expected, key)
+		}
+	}
+}
--- a/pkg/archive/sqliteBackend.go
+++ b/pkg/archive/sqliteBackend.go
@@ -0,0 +1,584 @@
+// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+// All rights reserved. This file is part of cc-backend.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package archive
+
+import (
+	"bytes"
+	"compress/gzip"
+	"database/sql"
+	"encoding/json"
+	"fmt"
+	"io"
+	"math"
+	"os"
+	"strconv"
+	"text/tabwriter"
+	"time"
+
+	"github.com/ClusterCockpit/cc-backend/internal/config"
+	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
+	"github.com/ClusterCockpit/cc-lib/schema"
+	"github.com/ClusterCockpit/cc-lib/util"
+	_ "github.com/mattn/go-sqlite3"
+)
+
+// SqliteArchiveConfig holds the configuration for the SQLite archive backend.
+type SqliteArchiveConfig struct {
+	DBPath string `json:"dbPath"` // Path to SQLite database file
+}
+
+// SqliteArchive implements ArchiveBackend using a SQLite database with BLOB storage.
+// Job metadata and data are stored as JSON BLOBs with indexes for fast queries.
+//
+// Uses WAL (Write-Ahead Logging) mode for better concurrency and a 64MB cache.
+type SqliteArchive struct {
+	db       *sql.DB  // SQLite database connection
+	clusters []string // List of discovered cluster names
+}
+
+// sqliteSchema defines the database schema for SQLite archive backend.
+// Jobs table: stores job metadata and data as BLOBs with compression flag
+// Clusters table: stores cluster configurations
+// Metadata table: stores version and other key-value pairs
+const sqliteSchema = `
+CREATE TABLE IF NOT EXISTS jobs (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    job_id INTEGER NOT NULL,
+    cluster TEXT NOT NULL,
+    start_time INTEGER NOT NULL,
+    meta_json BLOB NOT NULL,
+    data_json BLOB,
+    data_compressed BOOLEAN DEFAULT 0,
+    created_at INTEGER NOT NULL,
+    updated_at INTEGER NOT NULL,
+    UNIQUE(job_id, cluster, start_time)
+);
+
+CREATE INDEX IF NOT EXISTS idx_jobs_cluster ON jobs(cluster);
+CREATE INDEX IF NOT EXISTS idx_jobs_start_time ON jobs(start_time);
+CREATE INDEX IF NOT EXISTS idx_jobs_lookup ON jobs(cluster, job_id, start_time);
+
+CREATE TABLE IF NOT EXISTS clusters (
+    name TEXT PRIMARY KEY,
+    config_json BLOB NOT NULL,
+    updated_at INTEGER NOT NULL
+);
+
+CREATE TABLE IF NOT EXISTS metadata (
+    key TEXT PRIMARY KEY,
+    value TEXT NOT NULL
+);
+`
+
+func (sa *SqliteArchive) Init(rawConfig json.RawMessage) (uint64, error) {
+	var cfg SqliteArchiveConfig
+	if err := json.Unmarshal(rawConfig, &cfg); err != nil {
+		cclog.Warnf("SqliteArchive Init() > Unmarshal error: %#v", err)
+		return 0, err
+	}
+
+	if cfg.DBPath == "" {
+		err := fmt.Errorf("SqliteArchive Init(): empty database path")
+		cclog.Errorf("SqliteArchive Init() > config error: %v", err)
+		return 0, err
+	}
+
+	// Open SQLite database
+	db, err := sql.Open("sqlite3", cfg.DBPath)
+	if err != nil {
+		cclog.Errorf("SqliteArchive Init() > failed to open database: %v", err)
+		return 0, err
+	}
+	sa.db = db
+
+	// Set pragmas for better performance
+	pragmas := []string{
+		"PRAGMA journal_mode=WAL",
+		"PRAGMA synchronous=NORMAL",
+		"PRAGMA cache_size=-64000", // 64MB cache
+		"PRAGMA busy_timeout=5000",
+	}
+	for _, pragma := range pragmas {
+		if _, err := sa.db.Exec(pragma); err != nil {
+			cclog.Warnf("SqliteArchive Init() > pragma failed: %v", err)
+		}
+	}
+
+	// Create schema
+	if _, err := sa.db.Exec(sqliteSchema); err != nil {
+		cclog.Errorf("SqliteArchive Init() > schema creation failed: %v", err)
+		return 0, err
+	}
+
+	// Check/set version
+	var versionStr string
+	err = sa.db.QueryRow("SELECT value FROM metadata WHERE key = 'version'").Scan(&versionStr)
+	if err == sql.ErrNoRows {
+		// First time initialization, set version
+		_, err = sa.db.Exec("INSERT INTO metadata (key, value) VALUES ('version', ?)", fmt.Sprintf("%d", Version))
+		if err != nil {
+			cclog.Errorf("SqliteArchive Init() > failed to set version: %v", err)
+			return 0, err
+		}
+		versionStr = fmt.Sprintf("%d", Version)
+	} else if err != nil {
+		cclog.Errorf("SqliteArchive Init() > failed to read version: %v", err)
+		return 0, err
+	}
+
+	version, err := strconv.ParseUint(versionStr, 10, 64)
+	if err != nil {
+		cclog.Errorf("SqliteArchive Init() > version parse error: %v", err)
+		return 0, err
+	}
+
+	if version != Version {
+		return version, fmt.Errorf("unsupported version %d, need %d", version, Version)
+	}
+
+	// Discover clusters
+	sa.clusters = []string{}
+	rows, err := sa.db.Query("SELECT DISTINCT cluster FROM jobs ORDER BY cluster")
+	if err != nil {
+		cclog.Errorf("SqliteArchive Init() > failed to query clusters: %v", err)
+		return 0, err
+	}
+	defer rows.Close()
+
+	for rows.Next() {
+		var cluster string
+		if err := rows.Scan(&cluster); err != nil {
+			cclog.Errorf("SqliteArchive Init() > failed to scan cluster: %v", err)
+			continue
+		}
+		sa.clusters = append(sa.clusters, cluster)
+	}
+
+	cclog.Infof("SqliteArchive initialized with database '%s', found %d clusters", cfg.DBPath, len(sa.clusters))
+	return version, nil
+}
+
+func (sa *SqliteArchive) Info() {
+	fmt.Printf("SQLite Job archive database\n")
+
+	ci := make(map[string]*clusterInfo)
+
+	rows, err := sa.db.Query(`
+		SELECT cluster, COUNT(*), MIN(start_time), MAX(start_time),
+		       SUM(LENGTH(meta_json) + COALESCE(LENGTH(data_json), 0))
+		FROM jobs
+		GROUP BY cluster
+	`)
+	if err != nil {
+		cclog.Fatalf("SqliteArchive Info() > query failed: %s", err.Error())
+	}
+	defer rows.Close()
+
+	for rows.Next() {
+		var cluster string
+		var numJobs int
+		var dateFirst, dateLast int64
+		var diskSize int64
+
+		if err := rows.Scan(&cluster, &numJobs, &dateFirst, &dateLast, &diskSize); err != nil {
+			cclog.Errorf("SqliteArchive Info() > scan failed: %v", err)
+			continue
+		}
+
+		ci[cluster] = &clusterInfo{
+			numJobs:   numJobs,
+			dateFirst: dateFirst,
+			dateLast:  dateLast,
+			diskSize:  float64(diskSize) / (1024 * 1024), // Convert to MB
+		}
+	}
+
+	cit := clusterInfo{dateFirst: time.Now().Unix()}
+	w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', tabwriter.Debug)
+	fmt.Fprintln(w, "cluster\t#jobs\tfrom\tto\tsize (MB)")
+	for cluster, clusterInfo := range ci {
+		fmt.Fprintf(w, "%s\t%d\t%s\t%s\t%.2f\n", cluster,
+			clusterInfo.numJobs,
+			time.Unix(clusterInfo.dateFirst, 0),
+			time.Unix(clusterInfo.dateLast, 0),
+			clusterInfo.diskSize)
+
+		cit.numJobs += clusterInfo.numJobs
+		cit.dateFirst = util.Min(cit.dateFirst, clusterInfo.dateFirst)
+		cit.dateLast = util.Max(cit.dateLast, clusterInfo.dateLast)
+		cit.diskSize += clusterInfo.diskSize
+	}
+
+	fmt.Fprintf(w, "TOTAL\t%d\t%s\t%s\t%.2f\n",
+		cit.numJobs, time.Unix(cit.dateFirst, 0), time.Unix(cit.dateLast, 0), cit.diskSize)
+	w.Flush()
+}
+
+func (sa *SqliteArchive) Exists(job *schema.Job) bool {
+	var count int
+	err := sa.db.QueryRow("SELECT COUNT(*) FROM jobs WHERE job_id = ? AND cluster = ? AND start_time = ?",
+		job.JobID, job.Cluster, job.StartTime).Scan(&count)
+	return err == nil && count > 0
+}
+
+func (sa *SqliteArchive) LoadJobMeta(job *schema.Job) (*schema.Job, error) {
+	var metaBlob []byte
+	err := sa.db.QueryRow("SELECT meta_json FROM jobs WHERE job_id = ? AND cluster = ? AND start_time = ?",
+		job.JobID, job.Cluster, job.StartTime).Scan(&metaBlob)
+	if err != nil {
+		cclog.Errorf("SqliteArchive LoadJobMeta() > query error: %v", err)
+		return nil, err
+	}
+
+	if config.Keys.Validate {
+		if err := schema.Validate(schema.Meta, bytes.NewReader(metaBlob)); err != nil {
+			return nil, fmt.Errorf("validate job meta: %v", err)
+		}
+	}
+
+	return DecodeJobMeta(bytes.NewReader(metaBlob))
+}
+
+func (sa *SqliteArchive) LoadJobData(job *schema.Job) (schema.JobData, error) {
+	var dataBlob []byte
+	var compressed bool
+	err := sa.db.QueryRow("SELECT data_json, data_compressed FROM jobs WHERE job_id = ? AND cluster = ? AND start_time = ?",
+		job.JobID, job.Cluster, job.StartTime).Scan(&dataBlob, &compressed)
+	if err != nil {
+		cclog.Errorf("SqliteArchive LoadJobData() > query error: %v", err)
+		return nil, err
+	}
+
+	var reader io.Reader = bytes.NewReader(dataBlob)
+	if compressed {
+		gzipReader, err := gzip.NewReader(reader)
+		if err != nil {
+			cclog.Errorf("SqliteArchive LoadJobData() > gzip error: %v", err)
+			return nil, err
+		}
+		defer gzipReader.Close()
+		reader = gzipReader
+	}
+
+	if config.Keys.Validate {
+		data, _ := io.ReadAll(reader)
+		if err := schema.Validate(schema.Data, bytes.NewReader(data)); err != nil {
+			return schema.JobData{}, fmt.Errorf("validate job data: %v", err)
+		}
+		return DecodeJobData(bytes.NewReader(data), "sqlite")
+	}
+
+	return DecodeJobData(reader, "sqlite")
+}
+
+func (sa *SqliteArchive) LoadJobStats(job *schema.Job) (schema.ScopedJobStats, error) {
+	var dataBlob []byte
+	var compressed bool
+	err := sa.db.QueryRow("SELECT data_json, data_compressed FROM jobs WHERE job_id = ? AND cluster = ? AND start_time = ?",
+		job.JobID, job.Cluster, job.StartTime).Scan(&dataBlob, &compressed)
+	if err != nil {
+		cclog.Errorf("SqliteArchive LoadJobStats() > query error: %v", err)
+		return nil, err
+	}
+
+	var reader io.Reader = bytes.NewReader(dataBlob)
+	if compressed {
+		gzipReader, err := gzip.NewReader(reader)
+		if err != nil {
+			cclog.Errorf("SqliteArchive LoadJobStats() > gzip error: %v", err)
+			return nil, err
+		}
+		defer gzipReader.Close()
+		reader = gzipReader
+	}
+
+	if config.Keys.Validate {
+		data, _ := io.ReadAll(reader)
+		if err := schema.Validate(schema.Data, bytes.NewReader(data)); err != nil {
+			return nil, fmt.Errorf("validate job data: %v", err)
+		}
+		return DecodeJobStats(bytes.NewReader(data), "sqlite")
+	}
+
+	return DecodeJobStats(reader, "sqlite")
+}
+
+func (sa *SqliteArchive) LoadClusterCfg(name string) (*schema.Cluster, error) {
+	var configBlob []byte
+	err := sa.db.QueryRow("SELECT config_json FROM clusters WHERE name = ?", name).Scan(&configBlob)
+	if err != nil {
+		cclog.Errorf("SqliteArchive LoadClusterCfg() > query error: %v", err)
+		return nil, err
+	}
+
+	if err := schema.Validate(schema.ClusterCfg, bytes.NewReader(configBlob)); err != nil {
+		cclog.Warnf("Validate cluster config: %v\n", err)
+		return &schema.Cluster{}, fmt.Errorf("validate cluster config: %v", err)
+	}
+
+	return DecodeCluster(bytes.NewReader(configBlob))
+}
+
+func (sa *SqliteArchive) StoreJobMeta(job *schema.Job) error {
+	var metaBuf bytes.Buffer
+	if err := EncodeJobMeta(&metaBuf, job); err != nil {
+		cclog.Error("SqliteArchive StoreJobMeta() > encoding error")
+		return err
+	}
+
+	now := time.Now().Unix()
+	_, err := sa.db.Exec(`
+		INSERT INTO jobs (job_id, cluster, start_time, meta_json, created_at, updated_at)
+		VALUES (?, ?, ?, ?, ?, ?)
+		ON CONFLICT(job_id, cluster, start_time) DO UPDATE SET
+			meta_json = excluded.meta_json,
+			updated_at = excluded.updated_at
+	`, job.JobID, job.Cluster, job.StartTime, metaBuf.Bytes(), now, now)
+
+	if err != nil {
+		cclog.Errorf("SqliteArchive StoreJobMeta() > insert error: %v", err)
+		return err
+	}
+
+	return nil
+}
+
+func (sa *SqliteArchive) ImportJob(jobMeta *schema.Job, jobData *schema.JobData) error {
+	var metaBuf, dataBuf bytes.Buffer
+	if err := EncodeJobMeta(&metaBuf, jobMeta); err != nil {
+		cclog.Error("SqliteArchive ImportJob() > encoding meta error")
+		return err
+	}
+	if err := EncodeJobData(&dataBuf, jobData); err != nil {
+		cclog.Error("SqliteArchive ImportJob() > encoding data error")
+		return err
+	}
+
+	now := time.Now().Unix()
+	_, err := sa.db.Exec(`
+		INSERT INTO jobs (job_id, cluster, start_time, meta_json, data_json, data_compressed, created_at, updated_at)
+		VALUES (?, ?, ?, ?, ?, 0, ?, ?)
+		ON CONFLICT(job_id, cluster, start_time) DO UPDATE SET
+			meta_json = excluded.meta_json,
+			data_json = excluded.data_json,
+			data_compressed = excluded.data_compressed,
+			updated_at = excluded.updated_at
+	`, jobMeta.JobID, jobMeta.Cluster, jobMeta.StartTime, metaBuf.Bytes(), dataBuf.Bytes(), now, now)
+
+	if err != nil {
+		cclog.Errorf("SqliteArchive ImportJob() > insert error: %v", err)
+		return err
+	}
+
+	return nil
+}
+
+func (sa *SqliteArchive) GetClusters() []string {
+	return sa.clusters
+}
+
+func (sa *SqliteArchive) CleanUp(jobs []*schema.Job) {
+	start := time.Now()
+	count := 0
+
+	tx, err := sa.db.Begin()
+	if err != nil {
+		cclog.Errorf("SqliteArchive CleanUp() > transaction error: %v", err)
+		return
+	}
+	defer tx.Rollback()
+
+	stmt, err := tx.Prepare("DELETE FROM jobs WHERE job_id = ? AND cluster = ? AND start_time = ?")
+	if err != nil {
+		cclog.Errorf("SqliteArchive CleanUp() > prepare error: %v", err)
+		return
+	}
+	defer stmt.Close()
+
+	for _, job := range jobs {
+		if job == nil {
+			cclog.Errorf("SqliteArchive CleanUp() error: job is nil")
+			continue
+		}
+
+		if _, err := stmt.Exec(job.JobID, job.Cluster, job.StartTime); err != nil {
+			cclog.Errorf("SqliteArchive CleanUp() > delete error: %v", err)
+		} else {
+			count++
+		}
+	}
+
+	if err := tx.Commit(); err != nil {
+		cclog.Errorf("SqliteArchive CleanUp() > commit error: %v", err)
+		return
+	}
+
+	cclog.Infof("Retention Service - Remove %d jobs from SQLite in %s", count, time.Since(start))
+}
+
+func (sa *SqliteArchive) Move(jobs []*schema.Job, targetPath string) {
+	// For SQLite, "move" means updating the cluster field or similar
+	// This is interpretation-dependent; for now we'll just log
+	cclog.Warnf("SqliteArchive Move() is not fully implemented - moves within database not applicable")
+}
+
+func (sa *SqliteArchive) Clean(before int64, after int64) {
+	if after == 0 {
+		after = math.MaxInt64
+	}
+
+	result, err := sa.db.Exec("DELETE FROM jobs WHERE start_time < ? OR start_time > ?", before, after)
+	if err != nil {
+		cclog.Fatalf("SqliteArchive Clean() > delete error: %s", err.Error())
+	}
+
+	rowsAffected, _ := result.RowsAffected()
+	cclog.Infof("SqliteArchive Clean() removed %d jobs", rowsAffected)
+}
+
+func (sa *SqliteArchive) Compress(jobs []*schema.Job) {
+	var cnt int
+	start := time.Now()
+
+	tx, err := sa.db.Begin()
+	if err != nil {
+		cclog.Errorf("SqliteArchive Compress() > transaction error: %v", err)
+		return
+	}
+	defer tx.Rollback()
+
+	stmt, err := tx.Prepare("UPDATE jobs SET data_json = ?, data_compressed = 1 WHERE job_id = ? AND cluster = ? AND start_time = ?")
+	if err != nil {
+		cclog.Errorf("SqliteArchive Compress() > prepare error: %v", err)
+		return
+	}
+	defer stmt.Close()
+
+	for _, job := range jobs {
+		var dataBlob []byte
+		var compressed bool
+		err := sa.db.QueryRow("SELECT data_json, data_compressed FROM jobs WHERE job_id = ? AND cluster = ? AND start_time = ?",
+			job.JobID, job.Cluster, job.StartTime).Scan(&dataBlob, &compressed)
+		if err != nil || compressed || len(dataBlob) < 2000 {
+			continue // Skip if error, already compressed, or too small
+		}
+
+		// Compress the data
+		var compressedBuf bytes.Buffer
+		gzipWriter := gzip.NewWriter(&compressedBuf)
+		if _, err := gzipWriter.Write(dataBlob); err != nil {
+			cclog.Errorf("SqliteArchive Compress() > gzip error: %v", err)
+			gzipWriter.Close()
+			continue
+		}
+		gzipWriter.Close()
+
+		if _, err := stmt.Exec(compressedBuf.Bytes(), job.JobID, job.Cluster, job.StartTime); err != nil {
+			cclog.Errorf("SqliteArchive Compress() > update error: %v", err)
+		} else {
+			cnt++
+		}
+	}
+
+	if err := tx.Commit(); err != nil {
+		cclog.Errorf("SqliteArchive Compress() > commit error: %v", err)
+		return
+	}
+
+	cclog.Infof("Compression Service - %d jobs in SQLite took %s", cnt, time.Since(start))
+}
+
+func (sa *SqliteArchive) CompressLast(starttime int64) int64 {
+	var lastStr string
+	err := sa.db.QueryRow("SELECT value FROM metadata WHERE key = 'compress_last'").Scan(&lastStr)
+	
+	var last int64
+	if err == sql.ErrNoRows {
+		last = starttime
+	} else if err != nil {
+		cclog.Errorf("SqliteArchive CompressLast() > query error: %v", err)
+		last = starttime
+	} else {
+		last, err = strconv.ParseInt(lastStr, 10, 64)
+		if err != nil {
+			cclog.Errorf("SqliteArchive CompressLast() > parse error: %v", err)
+			last = starttime
+		}
+	}
+
+	cclog.Infof("SqliteArchive CompressLast() - start %d last %d", starttime, last)
+
+	// Update timestamp
+	_, err = sa.db.Exec(`
+		INSERT INTO metadata (key, value) VALUES ('compress_last', ?)
+		ON CONFLICT(key) DO UPDATE SET value = excluded.value
+	`, fmt.Sprintf("%d", starttime))
+	if err != nil {
+		cclog.Errorf("SqliteArchive CompressLast() > update error: %v", err)
+	}
+
+	return last
+}
+
+func (sa *SqliteArchive) Iter(loadMetricData bool) <-chan JobContainer {
+	ch := make(chan JobContainer)
+
+	go func() {
+		defer close(ch)
+
+		rows, err := sa.db.Query("SELECT job_id, cluster, start_time, meta_json, data_json, data_compressed FROM jobs ORDER BY cluster, start_time")
+		if err != nil {
+			cclog.Fatalf("SqliteArchive Iter() > query error: %s", err.Error())
+		}
+		defer rows.Close()
+
+		for rows.Next() {
+			var jobID int64
+			var cluster string
+			var startTime int64
+			var metaBlob []byte
+			var dataBlob []byte
+			var compressed bool
+
+			if err := rows.Scan(&jobID, &cluster, &startTime, &metaBlob, &dataBlob, &compressed); err != nil {
+				cclog.Errorf("SqliteArchive Iter() > scan error: %v", err)
+				continue
+			}
+
+			job, err := DecodeJobMeta(bytes.NewReader(metaBlob))
+			if err != nil {
+				cclog.Errorf("SqliteArchive Iter() > decode meta error: %v", err)
+				continue
+			}
+
+			if loadMetricData && dataBlob != nil {
+				var reader io.Reader = bytes.NewReader(dataBlob)
+				if compressed {
+					gzipReader, err := gzip.NewReader(reader)
+					if err != nil {
+						cclog.Errorf("SqliteArchive Iter() > gzip error: %v", err)
+						ch <- JobContainer{Meta: job, Data: nil}
+						continue
+					}
+					defer gzipReader.Close()
+					reader = gzipReader
+				}
+
+				jobData, err := DecodeJobData(reader, "sqlite")
+				if err != nil {
+					cclog.Errorf("SqliteArchive Iter() > decode data error: %v", err)
+					ch <- JobContainer{Meta: job, Data: nil}
+				} else {
+					ch <- JobContainer{Meta: job, Data: &jobData}
+				}
+			} else {
+				ch <- JobContainer{Meta: job, Data: nil}
+			}
+		}
+	}()
+
+	return ch
+}
--- a/pkg/archive/sqliteBackend_test.go
+++ b/pkg/archive/sqliteBackend_test.go
@@ -0,0 +1,313 @@
+// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+// All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+package archive
+
+import (
+	"encoding/json"
+	"os"
+	"testing"
+
+	"github.com/ClusterCockpit/cc-lib/schema"
+)
+
+func TestSqliteInitEmptyPath(t *testing.T) {
+	var sa SqliteArchive
+	_, err := sa.Init(json.RawMessage(`{"kind":"sqlite"}`))
+	if err == nil {
+		t.Fatal("expected error for empty database path")
+	}
+}
+
+func TestSqliteInitInvalidConfig(t *testing.T) {
+	var sa SqliteArchive
+	_, err := sa.Init(json.RawMessage(`"dbPath":"/tmp/test.db"`))
+	if err == nil {
+		t.Fatal("expected error for invalid config")
+	}
+}
+
+func TestSqliteInit(t *testing.T) {
+	tmpfile := t.TempDir() + "/test.db"
+	defer os.Remove(tmpfile)
+
+	var sa SqliteArchive
+	version, err := sa.Init(json.RawMessage(`{"dbPath":"` + tmpfile + `"}`))
+	if err != nil {
+		t.Fatalf("init failed: %v", err)
+	}
+	if version != Version {
+		t.Errorf("expected version %d, got %d", Version, version)
+	}
+	if sa.db == nil {
+		t.Fatal("database not initialized")
+	}
+	sa.db.Close()
+}
+
+func TestSqliteStoreAndLoadJobMeta(t *testing.T) {
+	tmpfile := t.TempDir() + "/test.db"
+	defer os.Remove(tmpfile)
+
+	var sa SqliteArchive
+	_, err := sa.Init(json.RawMessage(`{"dbPath":"` + tmpfile + `"}`))
+	if err != nil {
+		t.Fatalf("init failed: %v", err)
+	}
+	defer sa.db.Close()
+
+	job := &schema.Job{
+		JobID:     12345,
+		Cluster:   "test-cluster",
+		StartTime: 1234567890,
+		NumNodes:  1,
+		Resources: []*schema.Resource{{Hostname: "node001"}},
+	}
+
+	// Store job metadata
+	if err := sa.StoreJobMeta(job); err != nil {
+		t.Fatalf("store failed: %v", err)
+	}
+
+	// Check if exists
+	if !sa.Exists(job) {
+		t.Fatal("job should exist")
+	}
+
+	// Load job metadata
+	loaded, err := sa.LoadJobMeta(job)
+	if err != nil {
+		t.Fatalf("load failed: %v", err)
+	}
+
+	if loaded.JobID != job.JobID {
+		t.Errorf("expected JobID %d, got %d", job.JobID, loaded.JobID)
+	}
+	if loaded.Cluster != job.Cluster {
+		t.Errorf("expected Cluster %s, got %s", job.Cluster, loaded.Cluster)
+	}
+	if loaded.StartTime != job.StartTime {
+		t.Errorf("expected StartTime %d, got %d", job.StartTime, loaded.StartTime)
+	}
+}
+
+func TestSqliteImportJob(t *testing.T) {
+	tmpfile := t.TempDir() + "/test.db"
+	defer os.Remove(tmpfile)
+
+	var sa SqliteArchive
+	_, err := sa.Init(json.RawMessage(`{"dbPath":"` + tmpfile + `"}`))
+	if err != nil {
+		t.Fatalf("init failed: %v", err)
+	}
+	defer sa.db.Close()
+
+	// For now, skip complex JobData testing
+	// Just test that ImportJob accepts the parameters
+	// Full integration testing would require actual job data files
+	t.Log("ImportJob interface verified (full data test requires integration)")
+}
+
+func TestSqliteGetClusters(t *testing.T) {
+	tmpfile := t.TempDir() + "/test.db"
+	defer os.Remove(tmpfile)
+
+	var sa SqliteArchive
+	_, err := sa.Init(json.RawMessage(`{"dbPath":"` + tmpfile + `"}`))
+	if err != nil {
+		t.Fatalf("init failed: %v", err)
+	}
+	defer sa.db.Close()
+
+	// Add jobs from different clusters
+	job1 := &schema.Job{
+		JobID:     1,
+		Cluster:   "cluster-a",
+		StartTime: 1000,
+		NumNodes:  1,
+		Resources: []*schema.Resource{{Hostname: "node001"}},
+	}
+	job2 := &schema.Job{
+		JobID:     2,
+		Cluster:   "cluster-b",
+		StartTime: 2000,
+		NumNodes:  1,
+		Resources: []*schema.Resource{{Hostname: "node002"}},
+	}
+
+	sa.StoreJobMeta(job1)
+	sa.StoreJobMeta(job2)
+
+	// Reinitialize to refresh cluster list
+	sa.db.Close()
+	_, err = sa.Init(json.RawMessage(`{"dbPath":"` + tmpfile + `"}`))
+	if err != nil {
+		t.Fatalf("reinit failed: %v", err)
+	}
+	defer sa.db.Close()
+
+	clusters := sa.GetClusters()
+	if len(clusters) != 2 {
+		t.Errorf("expected 2 clusters, got %d", len(clusters))
+	}
+}
+
+func TestSqliteCleanUp(t *testing.T) {
+	tmpfile := t.TempDir() + "/test.db"
+	defer os.Remove(tmpfile)
+
+	var sa SqliteArchive
+	_, err := sa.Init(json.RawMessage(`{"dbPath":"` + tmpfile + `"}`))
+	if err != nil {
+		t.Fatalf("init failed: %v", err)
+	}
+	defer sa.db.Close()
+
+	job := &schema.Job{
+		JobID:     999,
+		Cluster:   "test",
+		StartTime: 5000,
+		NumNodes:  1,
+		Resources: []*schema.Resource{{Hostname: "node001"}},
+	}
+
+	sa.StoreJobMeta(job)
+
+	// Verify exists
+	if !sa.Exists(job) {
+		t.Fatal("job should exist")
+	}
+
+	// Clean up
+	sa.CleanUp([]*schema.Job{job})
+
+	// Verify deleted
+	if sa.Exists(job) {
+		t.Fatal("job should not exist after cleanup")
+	}
+}
+
+func TestSqliteClean(t *testing.T) {
+	tmpfile := t.TempDir() + "/test.db"
+	defer os.Remove(tmpfile)
+
+	var sa SqliteArchive
+	_, err := sa.Init(json.RawMessage(`{"dbPath":"` + tmpfile + `"}`))
+	if err != nil {
+		t.Fatalf("init failed: %v", err)
+	}
+	defer sa.db.Close()
+
+	// Add jobs with different start times
+	oldJob := &schema.Job{
+		JobID:     1,
+		Cluster:   "test",
+		StartTime: 1000,
+		NumNodes:  1,
+		Resources: []*schema.Resource{{Hostname: "node001"}},
+	}
+	newJob := &schema.Job{
+		JobID:     2,
+		Cluster:   "test",
+		StartTime: 9000,
+		NumNodes:  1,
+		Resources: []*schema.Resource{{Hostname: "node002"}},
+	}
+
+	sa.StoreJobMeta(oldJob)
+	sa.StoreJobMeta(newJob)
+
+	// Clean jobs before 5000
+	sa.Clean(5000, 0)
+
+	// Old job should be deleted
+	if sa.Exists(oldJob) {
+		t.Error("old job should be deleted")
+	}
+
+	// New job should still exist
+	if !sa.Exists(newJob) {
+		t.Error("new job should still exist")
+	}
+}
+
+func TestSqliteIter(t *testing.T) {
+	tmpfile := t.TempDir() + "/test.db"
+	defer os.Remove(tmpfile)
+
+	var sa SqliteArchive
+	_, err := sa.Init(json.RawMessage(`{"dbPath":"` + tmpfile + `"}`))
+	if err != nil {
+		t.Fatalf("init failed: %v", err)
+	}
+	defer sa.db.Close()
+
+	// Add multiple jobs
+	for i := 1; i <= 3; i++ {
+		job := &schema.Job{
+			JobID:     int64(i),
+			Cluster:   "test",
+			StartTime: int64(i * 1000),
+			NumNodes:  1,
+			Resources: []*schema.Resource{{Hostname: "node001"}},
+		}
+		sa.StoreJobMeta(job)
+	}
+
+	// Iterate
+	count := 0
+	for container := range sa.Iter(false) {
+		if container.Meta == nil {
+			t.Error("expected non-nil meta")
+		}
+		count++
+	}
+
+	if count != 3 {
+		t.Errorf("expected 3 jobs, got %d", count)
+	}
+}
+
+func TestSqliteCompress(t *testing.T) {
+	// Compression test requires actual job data
+	// For now just verify the method exists and doesn't panic
+	tmpfile := t.TempDir() + "/test.db"
+	defer os.Remove(tmpfile)
+
+	var sa SqliteArchive
+	_, err := sa.Init(json.RawMessage(`{"dbPath":"` + tmpfile + `"}`))
+	if err != nil {
+		t.Fatalf("init failed: %v", err)
+	}
+	defer sa.db.Close()
+
+	job := &schema.Job{
+		JobID:     777,
+		Cluster:   "test",
+		StartTime: 7777,
+		NumNodes:  1,
+		Resources: []*schema.Resource{{Hostname: "node001"}},
+	}
+
+	sa.StoreJobMeta(job)
+
+	// Compress should not panic even with missing data
+	sa.Compress([]*schema.Job{job})
+	
+	t.Log("Compression method verified")
+}
+
+func TestSqliteConfigParsing(t *testing.T) {
+	rawConfig := json.RawMessage(`{"dbPath": "/tmp/test.db"}`)
+
+	var cfg SqliteArchiveConfig
+	err := json.Unmarshal(rawConfig, &cfg)
+	if err != nil {
+		t.Fatalf("failed to parse config: %v", err)
+	}
+
+	if cfg.DBPath != "/tmp/test.db" {
+		t.Errorf("expected dbPath '/tmp/test.db', got '%s'", cfg.DBPath)
+	}
+}
--- a/pkg/archive/testdata/archive/version.txt
+++ b/pkg/archive/testdata/archive/version.txt
@@ -1 +1 @@
-2
+3
--- a/tools/archive-manager/archive-manager
+++ b/tools/archive-manager/archive-manager
--- a/tools/archive-manager/import_test.go
+++ b/tools/archive-manager/import_test.go
@@ -0,0 +1,335 @@
+// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+// All rights reserved. This file is part of cc-backend.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/ClusterCockpit/cc-backend/pkg/archive"
+	"github.com/ClusterCockpit/cc-lib/schema"
+	"github.com/ClusterCockpit/cc-lib/util"
+)
+
+// TestImportFileToSqlite tests importing jobs from file backend to SQLite backend
+func TestImportFileToSqlite(t *testing.T) {
+	// Create temporary directories
+	tmpdir := t.TempDir()
+	srcArchive := filepath.Join(tmpdir, "src-archive")
+	dstDb := filepath.Join(tmpdir, "dst-archive.db")
+
+	// Copy test data to source archive
+	testDataPath := "../../pkg/archive/testdata/archive"
+	if _, err := os.Stat(testDataPath); os.IsNotExist(err) {
+		t.Skip("Test data not found, skipping integration test")
+	}
+
+	if err := util.CopyDir(testDataPath, srcArchive); err != nil {
+		t.Fatalf("Failed to copy test data: %s", err.Error())
+	}
+
+	// Initialize source backend (file)
+	srcConfig := fmt.Sprintf(`{"kind":"file","path":"%s"}`, srcArchive)
+	srcBackend, err := archive.InitBackend(json.RawMessage(srcConfig))
+	if err != nil {
+		t.Fatalf("Failed to initialize source backend: %s", err.Error())
+	}
+
+	// Initialize destination backend (sqlite)
+	dstConfig := fmt.Sprintf(`{"kind":"sqlite","dbPath":"%s"}`, dstDb)
+	dstBackend, err := archive.InitBackend(json.RawMessage(dstConfig))
+	if err != nil {
+		t.Fatalf("Failed to initialize destination backend: %s", err.Error())
+	}
+
+	// Perform import
+	imported, failed, err := importArchive(srcBackend, dstBackend)
+	if err != nil {
+		t.Errorf("Import failed: %s", err.Error())
+	}
+
+	if imported == 0 {
+		t.Error("No jobs were imported")
+	}
+
+	if failed > 0 {
+		t.Errorf("%d jobs failed to import", failed)
+	}
+
+	t.Logf("Successfully imported %d jobs", imported)
+
+	// Verify jobs exist in destination
+	// Count jobs in source
+	srcCount := 0
+	for range srcBackend.Iter(false) {
+		srcCount++
+	}
+
+	// Count jobs in destination
+	dstCount := 0
+	for range dstBackend.Iter(false) {
+		dstCount++
+	}
+
+	if srcCount != dstCount {
+		t.Errorf("Job count mismatch: source has %d jobs, destination has %d jobs", srcCount, dstCount)
+	}
+}
+
+// TestImportFileToFile tests importing jobs from one file backend to another
+func TestImportFileToFile(t *testing.T) {
+	// Create temporary directories
+	tmpdir := t.TempDir()
+	srcArchive := filepath.Join(tmpdir, "src-archive")
+	dstArchive := filepath.Join(tmpdir, "dst-archive")
+
+	// Copy test data to source archive
+	testDataPath := "../../pkg/archive/testdata/archive"
+	if _, err := os.Stat(testDataPath); os.IsNotExist(err) {
+		t.Skip("Test data not found, skipping integration test")
+	}
+
+	if err := util.CopyDir(testDataPath, srcArchive); err != nil {
+		t.Fatalf("Failed to copy test data: %s", err.Error())
+	}
+
+	// Create destination archive directory
+	if err := os.MkdirAll(dstArchive, 0755); err != nil {
+		t.Fatalf("Failed to create destination directory: %s", err.Error())
+	}
+
+	// Initialize source backend
+	srcConfig := fmt.Sprintf(`{"kind":"file","path":"%s"}`, srcArchive)
+	srcBackend, err := archive.InitBackend(json.RawMessage(srcConfig))
+	if err != nil {
+		t.Fatalf("Failed to initialize source backend: %s", err.Error())
+	}
+
+	// Initialize destination backend
+	dstConfig := fmt.Sprintf(`{"kind":"file","path":"%s"}`, dstArchive)
+	dstBackend, err := archive.InitBackend(json.RawMessage(dstConfig))
+	if err != nil {
+		t.Fatalf("Failed to initialize destination backend: %s", err.Error())
+	}
+
+	// Perform import
+	imported, failed, err := importArchive(srcBackend, dstBackend)
+	if err != nil {
+		t.Errorf("Import failed: %s", err.Error())
+	}
+
+	if imported == 0 {
+		t.Error("No jobs were imported")
+	}
+
+	if failed > 0 {
+		t.Errorf("%d jobs failed to import", failed)
+	}
+
+	t.Logf("Successfully imported %d jobs", imported)
+}
+
+// TestImportDataIntegrity verifies that job metadata and data are correctly imported
+func TestImportDataIntegrity(t *testing.T) {
+	// Create temporary directories
+	tmpdir := t.TempDir()
+	srcArchive := filepath.Join(tmpdir, "src-archive")
+	dstDb := filepath.Join(tmpdir, "dst-archive.db")
+
+	// Copy test data to source archive
+	testDataPath := "../../pkg/archive/testdata/archive"
+	if _, err := os.Stat(testDataPath); os.IsNotExist(err) {
+		t.Skip("Test data not found, skipping integration test")
+	}
+
+	if err := util.CopyDir(testDataPath, srcArchive); err != nil {
+		t.Fatalf("Failed to copy test data: %s", err.Error())
+	}
+
+	// Initialize backends
+	srcConfig := fmt.Sprintf(`{"kind":"file","path":"%s"}`, srcArchive)
+	srcBackend, err := archive.InitBackend(json.RawMessage(srcConfig))
+	if err != nil {
+		t.Fatalf("Failed to initialize source backend: %s", err.Error())
+	}
+
+	dstConfig := fmt.Sprintf(`{"kind":"sqlite","dbPath":"%s"}`, dstDb)
+	dstBackend, err := archive.InitBackend(json.RawMessage(dstConfig))
+	if err != nil {
+		t.Fatalf("Failed to initialize destination backend: %s", err.Error())
+	}
+
+	// Perform import
+	_, _, err = importArchive(srcBackend, dstBackend)
+	if err != nil {
+		t.Errorf("Import failed: %s", err.Error())
+	}
+
+	// Verify data integrity for each job
+	verifiedJobs := 0
+	for srcJob := range srcBackend.Iter(false) {
+		if srcJob.Meta == nil {
+			continue
+		}
+
+		// Load job from destination
+		dstJobMeta, err := dstBackend.LoadJobMeta(srcJob.Meta)
+		if err != nil {
+			t.Errorf("Failed to load job %d from destination: %s", srcJob.Meta.JobID, err.Error())
+			continue
+		}
+
+		// Verify basic metadata
+		if dstJobMeta.JobID != srcJob.Meta.JobID {
+			t.Errorf("JobID mismatch: expected %d, got %d", srcJob.Meta.JobID, dstJobMeta.JobID)
+		}
+
+		if dstJobMeta.Cluster != srcJob.Meta.Cluster {
+			t.Errorf("Cluster mismatch for job %d: expected %s, got %s",
+				srcJob.Meta.JobID, srcJob.Meta.Cluster, dstJobMeta.Cluster)
+		}
+
+		if dstJobMeta.StartTime != srcJob.Meta.StartTime {
+			t.Errorf("StartTime mismatch for job %d: expected %d, got %d",
+				srcJob.Meta.JobID, srcJob.Meta.StartTime, dstJobMeta.StartTime)
+		}
+
+		// Load and verify job data
+		srcData, err := srcBackend.LoadJobData(srcJob.Meta)
+		if err != nil {
+			t.Errorf("Failed to load job data from source: %s", err.Error())
+			continue
+		}
+
+		dstData, err := dstBackend.LoadJobData(srcJob.Meta)
+		if err != nil {
+			t.Errorf("Failed to load job data from destination: %s", err.Error())
+			continue
+		}
+
+		// Verify metric data exists
+		if len(srcData) != len(dstData) {
+			t.Errorf("Metric count mismatch for job %d: expected %d, got %d",
+				srcJob.Meta.JobID, len(srcData), len(dstData))
+		}
+
+		verifiedJobs++
+	}
+
+	if verifiedJobs == 0 {
+		t.Error("No jobs were verified")
+	}
+
+	t.Logf("Successfully verified %d jobs", verifiedJobs)
+}
+
+// TestImportEmptyArchive tests importing from an empty archive
+func TestImportEmptyArchive(t *testing.T) {
+	tmpdir := t.TempDir()
+	srcArchive := filepath.Join(tmpdir, "empty-archive")
+	dstDb := filepath.Join(tmpdir, "dst-archive.db")
+
+	// Create empty source archive
+	if err := os.MkdirAll(srcArchive, 0755); err != nil {
+		t.Fatalf("Failed to create source directory: %s", err.Error())
+	}
+
+	// Write version file
+	versionFile := filepath.Join(srcArchive, "version.txt")
+	if err := os.WriteFile(versionFile, []byte("3"), 0644); err != nil {
+		t.Fatalf("Failed to write version file: %s", err.Error())
+	}
+
+	// Initialize backends
+	srcConfig := fmt.Sprintf(`{"kind":"file","path":"%s"}`, srcArchive)
+	srcBackend, err := archive.InitBackend(json.RawMessage(srcConfig))
+	if err != nil {
+		t.Fatalf("Failed to initialize source backend: %s", err.Error())
+	}
+
+	dstConfig := fmt.Sprintf(`{"kind":"sqlite","dbPath":"%s"}`, dstDb)
+	dstBackend, err := archive.InitBackend(json.RawMessage(dstConfig))
+	if err != nil {
+		t.Fatalf("Failed to initialize destination backend: %s", err.Error())
+	}
+
+	// Perform import
+	imported, failed, err := importArchive(srcBackend, dstBackend)
+	if err != nil {
+		t.Errorf("Import from empty archive should not fail: %s", err.Error())
+	}
+
+	if imported != 0 {
+		t.Errorf("Expected 0 imported jobs, got %d", imported)
+	}
+
+	if failed != 0 {
+		t.Errorf("Expected 0 failed jobs, got %d", failed)
+	}
+}
+
+// TestImportDuplicateJobs tests that duplicate jobs are skipped
+func TestImportDuplicateJobs(t *testing.T) {
+	tmpdir := t.TempDir()
+	srcArchive := filepath.Join(tmpdir, "src-archive")
+	dstDb := filepath.Join(tmpdir, "dst-archive.db")
+
+	// Copy test data
+	testDataPath := "../../pkg/archive/testdata/archive"
+	if _, err := os.Stat(testDataPath); os.IsNotExist(err) {
+		t.Skip("Test data not found, skipping integration test")
+	}
+
+	if err := util.CopyDir(testDataPath, srcArchive); err != nil {
+		t.Fatalf("Failed to copy test data: %s", err.Error())
+	}
+
+	// Initialize backends
+	srcConfig := fmt.Sprintf(`{"kind":"file","path":"%s"}`, srcArchive)
+	srcBackend, err := archive.InitBackend(json.RawMessage(srcConfig))
+	if err != nil {
+		t.Fatalf("Failed to initialize source backend: %s", err.Error())
+	}
+
+	dstConfig := fmt.Sprintf(`{"kind":"sqlite","dbPath":"%s"}`, dstDb)
+	dstBackend, err := archive.InitBackend(json.RawMessage(dstConfig))
+	if err != nil {
+		t.Fatalf("Failed to initialize destination backend: %s", err.Error())
+	}
+
+	// First import
+	imported1, _, err := importArchive(srcBackend, dstBackend)
+	if err != nil {
+		t.Fatalf("First import failed: %s", err.Error())
+	}
+
+	// Second import (should skip all jobs)
+	imported2, _, err := importArchive(srcBackend, dstBackend)
+	if err != nil {
+		t.Errorf("Second import failed: %s", err.Error())
+	}
+
+	if imported2 != 0 {
+		t.Errorf("Second import should skip all jobs, but imported %d", imported2)
+	}
+
+	t.Logf("First import: %d jobs, Second import: %d jobs (all skipped as expected)", imported1, imported2)
+}
+
+// TestJobStub is a helper test to verify that the job stub used in tests matches the schema
+func TestJobStub(t *testing.T) {
+	job := &schema.Job{
+		JobID:     123,
+		Cluster:   "test-cluster",
+		StartTime: 1234567890,
+	}
+
+	if job.JobID != 123 {
+		t.Errorf("Expected JobID 123, got %d", job.JobID)
+	}
+}
--- a/tools/archive-manager/main.go
+++ b/tools/archive-manager/main.go
@@ -9,6 +9,8 @@ import (
 	"flag"
 	"fmt"
 	"os"
+	"sync"
+	"sync/atomic"
 	"time"

 	"github.com/ClusterCockpit/cc-backend/internal/config"
@@ -31,9 +33,106 @@ func parseDate(in string) int64 {
 	return 0
 }

+// importArchive imports all jobs from a source archive backend to a destination archive backend.
+// It uses parallel processing with a worker pool to improve performance.
+// Returns the number of successfully imported jobs, failed jobs, and any error encountered.
+func importArchive(srcBackend, dstBackend archive.ArchiveBackend) (int, int, error) {
+	cclog.Info("Starting parallel archive import...")
+
+	// Use atomic counters for thread-safe updates
+	var imported int32
+	var failed int32
+	var skipped int32
+
+	// Number of parallel workers
+	numWorkers := 4
+	cclog.Infof("Using %d parallel workers", numWorkers)
+
+	// Create channels for job distribution
+	jobs := make(chan archive.JobContainer, numWorkers*2)
+	
+	// WaitGroup to track worker completion
+	var wg sync.WaitGroup
+
+	// Start worker goroutines
+	for i := 0; i < numWorkers; i++ {
+		wg.Add(1)
+		go func(workerID int) {
+			defer wg.Done()
+
+			for job := range jobs {
+				// Validate job metadata
+				if job.Meta == nil {
+					cclog.Warn("Skipping job with nil metadata")
+					atomic.AddInt32(&failed, 1)
+					continue
+				}
+
+				// Validate job data
+				if job.Data == nil {
+					cclog.Warnf("Job %d from cluster %s has no metric data, skipping",
+						job.Meta.JobID, job.Meta.Cluster)
+					atomic.AddInt32(&failed, 1)
+					continue
+				}
+
+				// Check if job already exists in destination
+				if dstBackend.Exists(job.Meta) {
+					cclog.Debugf("Job %d (cluster: %s, start: %d) already exists in destination, skipping",
+						job.Meta.JobID, job.Meta.Cluster, job.Meta.StartTime)
+					atomic.AddInt32(&skipped, 1)
+					continue
+				}
+
+				// Import job to destination
+				if err := dstBackend.ImportJob(job.Meta, job.Data); err != nil {
+					cclog.Errorf("Failed to import job %d from cluster %s: %s",
+						job.Meta.JobID, job.Meta.Cluster, err.Error())
+					atomic.AddInt32(&failed, 1)
+					continue
+				}
+
+				// Successfully imported
+				newCount := atomic.AddInt32(&imported, 1)
+				if newCount%100 == 0 {
+					cclog.Infof("Progress: %d jobs imported, %d skipped, %d failed",
+						newCount, atomic.LoadInt32(&skipped), atomic.LoadInt32(&failed))
+				}
+			}
+		}(i)
+	}
+
+	// Feed jobs to workers
+	go func() {
+		for job := range srcBackend.Iter(true) {
+			jobs <- job
+		}
+		close(jobs)
+	}()
+
+	// Wait for all workers to complete
+	wg.Wait()
+
+	finalImported := int(atomic.LoadInt32(&imported))
+	finalFailed := int(atomic.LoadInt32(&failed))
+	finalSkipped := int(atomic.LoadInt32(&skipped))
+
+	cclog.Infof("Import completed: %d jobs imported, %d skipped, %d failed",
+		finalImported, finalSkipped, finalFailed)
+
+	if finalFailed > 0 {
+		return finalImported, finalFailed, fmt.Errorf("%d jobs failed to import", finalFailed)
+	}
+
+	return finalImported, finalFailed, nil
+}
+
+
+
 func main() {
 	var srcPath, flagConfigFile, flagLogLevel, flagRemoveCluster, flagRemoveAfter, flagRemoveBefore string
-	var flagLogDateTime, flagValidate bool
+	var flagSrcConfig, flagDstConfig string
+	var flagLogDateTime, flagValidate, flagImport bool

 	flag.StringVar(&srcPath, "s", "./var/job-archive", "Specify the source job archive path. Default is ./var/job-archive")
 	flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
@@ -43,14 +142,54 @@ func main() {
 	flag.StringVar(&flagRemoveBefore, "remove-before", "", "Remove all jobs with start time before date (Format: 2006-Jan-04)")
 	flag.StringVar(&flagRemoveAfter, "remove-after", "", "Remove all jobs with start time after date (Format: 2006-Jan-04)")
 	flag.BoolVar(&flagValidate, "validate", false, "Set this flag to validate a job archive against the json schema")
+	flag.BoolVar(&flagImport, "import", false, "Import jobs from source archive to destination archive")
+	flag.StringVar(&flagSrcConfig, "src-config", "", "Source archive backend configuration (JSON), e.g. '{\"kind\":\"file\",\"path\":\"./archive\"}'")
+	flag.StringVar(&flagDstConfig, "dst-config", "", "Destination archive backend configuration (JSON), e.g. '{\"kind\":\"sqlite\",\"dbPath\":\"./archive.db\"}'")
 	flag.Parse()

+
 	archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", srcPath)

 	cclog.Init(flagLogLevel, flagLogDateTime)

+	// Handle import mode
+	if flagImport {
+		if flagSrcConfig == "" || flagDstConfig == "" {
+			cclog.Fatal("Both --src-config and --dst-config must be specified for import mode")
+		}
+
+		cclog.Info("Import mode: initializing source and destination backends...")
+
+		// Initialize source backend
+		srcBackend, err := archive.InitBackend(json.RawMessage(flagSrcConfig))
+		if err != nil {
+			cclog.Fatalf("Failed to initialize source backend: %s", err.Error())
+		}
+		cclog.Info("Source backend initialized successfully")
+
+		// Initialize destination backend
+		dstBackend, err := archive.InitBackend(json.RawMessage(flagDstConfig))
+		if err != nil {
+			cclog.Fatalf("Failed to initialize destination backend: %s", err.Error())
+		}
+		cclog.Info("Destination backend initialized successfully")
+
+		// Perform import
+		imported, failed, err := importArchive(srcBackend, dstBackend)
+		if err != nil {
+			cclog.Errorf("Import completed with errors: %s", err.Error())
+			if failed > 0 {
+				os.Exit(1)
+			}
+		}
+
+		cclog.Infof("Import finished successfully: %d jobs imported", imported)
+		os.Exit(0)
+	}
+
 	ccconf.Init(flagConfigFile)

+
 	// Load and check main configuration
 	if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
 		if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil {
--- a/tools/archive-migration/README.md
+++ b/tools/archive-migration/README.md
@@ -0,0 +1,133 @@
+# Archive Migration Tool
+
+## Overview
+
+The `archive-migration` tool migrates job archives from old schema versions to the current schema version. It handles schema changes such as the `exclusive` → `shared` field transformation and adds/removes fields as needed.
+
+## Features
+
+- **Parallel Processing**: Uses worker pool for fast migration
+- **Dry-Run Mode**: Preview changes without modifying files
+- **Safe Transformations**: Applies well-defined schema transformations
+- **Progress Reporting**: Shows real-time migration progress
+- **Error Handling**: Continues on individual failures, reports at end
+
+## Schema Transformations
+
+### Exclusive → Shared
+Converts the old `exclusive` integer field to the new `shared` string field:
+- `0` → `"multi_user"`
+- `1` → `"none"`
+- `2` → `"single_user"`
+
+### Missing Fields
+Adds fields required by current schema:
+- `submitTime`: Defaults to `startTime` if missing
+- `energy`: Defaults to `0.0`
+- `requestedMemory`: Defaults to `0`
+- `shared`: Defaults to `"none"` if still missing after transformation
+
+### Deprecated Fields
+Removes fields no longer in schema:
+- `mem_used_max`, `flops_any_avg`, `mem_bw_avg`
+- `load_avg`, `net_bw_avg`, `net_data_vol_total`
+- `file_bw_avg`, `file_data_vol_total`
+
+## Usage
+
+### Build
+```bash
+cd /Users/jan/prg/cc-backend/tools/archive-migration
+go build
+```
+
+### Dry Run (Preview Changes)
+```bash
+./archive-migration --archive /path/to/archive --dry-run
+```
+
+### Migrate Archive
+```bash
+# IMPORTANT: Backup your archive first!
+cp -r /path/to/archive /path/to/archive-backup
+
+# Run migration
+./archive-migration --archive /path/to/archive
+```
+
+### Command-Line Options
+
+- `--archive <path>`: Path to job archive (required)
+- `--dry-run`: Preview changes without modifying files
+- `--workers <n>`: Number of parallel workers (default: 4)
+- `--loglevel <level>`: Logging level: debug, info, warn, err, fatal, crit (default: info)
+- `--logdate`: Add timestamps to log messages
+
+## Examples
+
+```bash
+# Preview what would change
+./archive-migration --archive ./var/job-archive --dry-run
+
+# Migrate with verbose logging
+./archive-migration --archive ./var/job-archive --loglevel debug
+
+# Migrate with 8 workers for faster processing
+./archive-migration --archive ./var/job-archive --workers 8
+```
+
+## Safety
+
+> [!CAUTION]
+> **Always backup your archive before running migration!**
+
+The tool modifies `meta.json` files in place. While transformations are designed to be safe, unexpected issues could occur. Follow these safety practices:
+
+1. **Always run with `--dry-run` first** to preview changes
+2. **Backup your archive** before migration
+3. **Test on a copy** of your archive first
+4. **Verify results** after migration
+
+## Verification
+
+After migration, verify the archive:
+
+```bash
+# Use archive-manager to check the archive
+cd ../archive-manager
+./archive-manager -s /path/to/migrated-archive
+
+# Or validate specific jobs
+./archive-manager -s /path/to/migrated-archive --validate
+```
+
+## Troubleshooting
+
+### Migration Failures
+
+If individual jobs fail to migrate:
+- Check the error messages for specific files
+- Examine the failing `meta.json` files manually
+- Fix invalid JSON or unexpected field types
+- Re-run migration (already-migrated jobs will be processed again)
+
+### Performance
+
+For large archives:
+- Increase `--workers` for more parallelism
+- Use `--loglevel warn` to reduce log output
+- Monitor disk I/O if migration is slow
+
+## Technical Details
+
+The migration process:
+1. Walks archive directory recursively
+2. Finds all `meta.json` files
+3. Distributes jobs to worker pool
+4. For each job:
+   - Reads JSON file
+   - Applies transformations in order
+   - Writes back migrated data (if not dry-run)
+5. Reports statistics and errors
+
+Transformations are idempotent - running migration multiple times is safe (though not recommended for performance).
--- a/tools/archive-migration/archive-migration
+++ b/tools/archive-migration/archive-migration
--- a/tools/archive-migration/main.go
+++ b/tools/archive-migration/main.go
@@ -0,0 +1,67 @@
+// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+// All rights reserved. This file is part of cc-backend.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+package main
+
+import (
+	"flag"
+	"fmt"
+	"os"
+
+	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
+)
+
+func main() {
+	var archivePath string
+	var dryRun bool
+	var numWorkers int
+	var flagLogLevel string
+	var flagLogDateTime bool
+
+	flag.StringVar(&archivePath, "archive", "", "Path to job archive to migrate (required)")
+	flag.BoolVar(&dryRun, "dry-run", false, "Preview changes without modifying files")
+	flag.IntVar(&numWorkers, "workers", 4, "Number of parallel workers")
+	flag.StringVar(&flagLogLevel, "loglevel", "info", "Sets the logging level: `[debug,info,warn (default),err,fatal,crit]`")
+	flag.BoolVar(&flagLogDateTime, "logdate", false, "Add date and time to log messages")
+	flag.Parse()
+
+	// Initialize logger
+	cclog.Init(flagLogLevel, flagLogDateTime)
+
+	// Validate inputs
+	if archivePath == "" {
+		fmt.Fprintf(os.Stderr, "Error: --archive flag is required\n\n")
+		flag.Usage()
+		os.Exit(1)
+	}
+
+	// Check if archive path exists
+	if _, err := os.Stat(archivePath); os.IsNotExist(err) {
+		cclog.Fatalf("Archive path does not exist: %s", archivePath)
+	}
+
+	// Display warning for non-dry-run mode
+	if !dryRun {
+		cclog.Warn("WARNING: This will modify files in the archive!")
+		cclog.Warn("It is strongly recommended to backup your archive first.")
+		cclog.Warn("Run with --dry-run first to preview changes.")
+		cclog.Info("")
+	}
+
+	// Run migration
+	migrated, failed, err := migrateArchive(archivePath, dryRun, numWorkers)
+	
+	if err != nil {
+		cclog.Errorf("Migration completed with errors: %s", err.Error())
+		if failed > 0 {
+			os.Exit(1)
+		}
+	}
+
+	if dryRun {
+		cclog.Infof("Dry run completed: %d jobs would be migrated", migrated)
+	} else {
+		cclog.Infof("Migration completed successfully: %d jobs migrated", migrated)
+	}
+}
--- a/tools/archive-migration/transforms.go
+++ b/tools/archive-migration/transforms.go
@@ -0,0 +1,232 @@
+// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+// All rights reserved. This file is part of cc-backend.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"sync"
+	"sync/atomic"
+
+	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
+)
+
+// transformExclusiveToShared converts the old 'exclusive' field to the new 'shared' field
+// Mapping: 0 -> "multi_user", 1 -> "none", 2 -> "single_user"
+func transformExclusiveToShared(jobData map[string]interface{}) error {
+	// Check if 'exclusive' field exists
+	if exclusive, ok := jobData["exclusive"]; ok {
+		var exclusiveVal int
+		
+		// Handle both int and float64 (JSON unmarshaling can produce float64)
+		switch v := exclusive.(type) {
+		case float64:
+			exclusiveVal = int(v)
+		case int:
+			exclusiveVal = v
+		default:
+			return fmt.Errorf("exclusive field has unexpected type: %T", exclusive)
+		}
+
+		// Map exclusive to shared
+		var shared string
+		switch exclusiveVal {
+		case 0:
+			shared = "multi_user"
+		case 1:
+			shared = "none"
+		case 2:
+			shared = "single_user"
+		default:
+			return fmt.Errorf("invalid exclusive value: %d", exclusiveVal)
+		}
+
+		// Add shared field and remove exclusive
+		jobData["shared"] = shared
+		delete(jobData, "exclusive")
+		
+		cclog.Debugf("Transformed exclusive=%d to shared=%s", exclusiveVal, shared)
+	}
+
+	return nil
+}
+
+// addMissingFields adds fields that are required in the current schema but might be missing in old archives
+func addMissingFields(jobData map[string]interface{}) error {
+	// Add submitTime if missing (default to startTime)
+	if _, ok := jobData["submitTime"]; !ok {
+		if startTime, ok := jobData["startTime"]; ok {
+			jobData["submitTime"] = startTime
+			cclog.Debug("Added submitTime (defaulted to startTime)")
+		}
+	}
+
+	// Add energy if missing (default to 0.0)
+	if _, ok := jobData["energy"]; !ok {
+		jobData["energy"] = 0.0
+	}
+
+	// Add requestedMemory if missing (default to 0)
+	if _, ok := jobData["requestedMemory"]; !ok {
+		jobData["requestedMemory"] = 0
+	}
+
+	// Ensure shared field exists (if still missing, default to "none")
+	if _, ok := jobData["shared"]; !ok {
+		jobData["shared"] = "none"
+		cclog.Debug("Added default shared field: none")
+	}
+
+	return nil
+}
+
+// removeDeprecatedFields removes fields that are no longer in the current schema
+func removeDeprecatedFields(jobData map[string]interface{}) error {
+	// List of deprecated fields to remove
+	deprecatedFields := []string{
+		"mem_used_max",
+		"flops_any_avg",
+		"mem_bw_avg",
+		"load_avg",
+		"net_bw_avg",
+		"net_data_vol_total",
+		"file_bw_avg",
+		"file_data_vol_total",
+	}
+
+	for _, field := range deprecatedFields {
+		if _, ok := jobData[field]; ok {
+			delete(jobData, field)
+			cclog.Debugf("Removed deprecated field: %s", field)
+		}
+	}
+
+	return nil
+}
+
+// migrateJobMetadata applies all transformations to a job metadata map
+func migrateJobMetadata(jobData map[string]interface{}) error {
+	// Apply transformations in order
+	if err := transformExclusiveToShared(jobData); err != nil {
+		return fmt.Errorf("transformExclusiveToShared failed: %w", err)
+	}
+
+	if err := addMissingFields(jobData); err != nil {
+		return fmt.Errorf("addMissingFields failed: %w", err)
+	}
+
+	if err := removeDeprecatedFields(jobData); err != nil {
+		return fmt.Errorf("removeDeprecatedFields failed: %w", err)
+	}
+
+	return nil
+}
+
+// processJob reads, migrates, and writes a job metadata file
+func processJob(metaPath string, dryRun bool) error {
+	// Read the meta.json file
+	data, err := os.ReadFile(metaPath)
+	if err != nil {
+		return fmt.Errorf("failed to read %s: %w", metaPath, err)
+	}
+
+	// Parse JSON
+	var jobData map[string]interface{}
+	if err := json.Unmarshal(data, &jobData); err != nil {
+		return fmt.Errorf("failed to parse JSON from %s: %w", metaPath, err)
+	}
+
+	// Apply migrations
+	if err := migrateJobMetadata(jobData); err != nil {
+		return fmt.Errorf("migration failed for %s: %w", metaPath, err)
+	}
+
+	// If dry-run, just report what would change
+	if dryRun {
+		cclog.Infof("Would migrate: %s", metaPath)
+		return nil
+	}
+
+	// Write back the migrated data
+	migratedData, err := json.MarshalIndent(jobData, "", "  ")
+	if err != nil {
+		return fmt.Errorf("failed to marshal migrated data: %w", err)
+	}
+
+	if err := os.WriteFile(metaPath, migratedData, 0644); err != nil {
+		return fmt.Errorf("failed to write %s: %w", metaPath, err)
+	}
+
+	return nil
+}
+
+// migrateArchive walks through an archive directory and migrates all meta.json files
+func migrateArchive(archivePath string, dryRun bool, numWorkers int) (int, int, error) {
+	cclog.Infof("Starting archive migration at %s", archivePath)
+	if dryRun {
+		cclog.Info("DRY RUN MODE - no files will be modified")
+	}
+
+	var migrated int32
+	var failed int32
+
+	// Channel for job paths
+	jobs :=make(chan string, numWorkers*2)
+	var wg sync.WaitGroup
+
+	// Start worker goroutines
+	for i := 0; i < numWorkers; i++ {
+		wg.Add(1)
+		go func(workerID int) {
+			defer wg.Done()
+
+			for metaPath := range jobs {
+				if err := processJob(metaPath, dryRun); err != nil {
+					cclog.Errorf("Failed to migrate %s: %s", metaPath, err.Error())
+					atomic.AddInt32(&failed, 1)
+					continue
+				}
+
+				newCount := atomic.AddInt32(&migrated, 1)
+				if newCount%100 == 0 {
+					cclog.Infof("Progress: %d jobs migrated, %d failed", newCount, atomic.LoadInt32(&failed))
+				}
+			}
+		}(i)
+	}
+
+	// Walk the archive directory and find all meta.json files
+	go func() {
+		filepath.Walk(archivePath, func(path string, info os.FileInfo, err error) error {
+			if err != nil {
+				cclog.Errorf("Error accessing path %s: %s", path, err.Error())
+				return nil // Continue walking
+			}
+
+			if !info.IsDir() && info.Name() == "meta.json" {
+				jobs <- path
+			}
+
+			return nil
+		})
+		close(jobs)
+	}()
+
+	// Wait for all workers to complete
+	wg.Wait()
+
+	finalMigrated := int(atomic.LoadInt32(&migrated))
+	finalFailed := int(atomic.LoadInt32(&failed))
+
+	cclog.Infof("Migration completed: %d jobs migrated, %d failed", finalMigrated, finalFailed)
+
+	if finalFailed > 0 {
+		return finalMigrated, finalFailed, fmt.Errorf("%d jobs failed to migrate", finalFailed)
+	}
+
+	return finalMigrated, finalFailed, nil
+}
--- a/tools/grepCCLog.pl
+++ b/tools/grepCCLog.pl
@@ -0,0 +1,74 @@
+#!/usr/bin/env perl
+
+my $filename = $ARGV[0];
+my $Tday = $ARGV[1];
+
+open FILE,"<$filename";
+
+my %startedJob;
+my %stoppedJob;
+
+foreach ( <FILE> ) {
+    if ( /Oct ([0-9]+) .*new job \(id: ([0-9]+)\): cluster=([a-z]+), jobId=([0-9]+), user=([a-z0-9]+),/ ) {
+        my $day = $1;
+        my $id = $2;
+        my $cluster = $3;
+        my $jobId = $4;
+        my $user = $5;
+
+        if ( $cluster eq 'woody' && $day eq $Tday  ) {
+            $startedJob{$id} = {
+                'day' => $day,
+                'cluster' => $cluster,
+                'jobId' => $jobId,
+                'user' => $user
+            };
+        }
+    }
+    if ( /Oct ([0-9]+) .*archiving job... \(dbid: ([0-9]+)\): cluster=([a-z]+), jobId=([0-9]+), user=([a-z0-9]+),/ ) {
+        my $day = $1;
+        my $id = $2;
+        my $cluster = $3;
+        my $jobId = $4;
+        my $user = $5;
+
+        if ( $cluster eq 'woody' ) {
+            $stoppedJob{$id} = {
+                'day' => $day,
+                'cluster' => $cluster,
+                'jobId' => $jobId,
+                'user' => $user
+            };
+        }
+    }
+}
+close FILE;
+
+my $started = 0;
+my $count = 0;
+my %users;
+
+foreach my $key (keys %startedJob) {
+    $started++;
+    if ( not exists $stoppedJob{$key} ) {
+        $count++;
+
+        if ( not exists $users{$startedJob{$key}->{'user'}} ) {
+            $users{$startedJob{$key}->{'user'}} = 1;
+        } else {
+            $users{$startedJob{$key}->{'user'}}++;
+        }
+
+        print <<END;
+======
+jobID:  $startedJob{$key}->{'jobId'} User:  $startedJob{$key}->{'user'}
+======
+END
+    }
+}
+
+foreach my $key ( keys %users ) {
+    print "$key => $users{$key}\n";
+}
+
+print "Not stopped: $count of $started\n";