Merge branch 'hotfix' of github.com:ClusterCockpit/cc-backend into hotfix

2026-06-17 17:07:29 +02:00 · 2026-03-11 05:06:26 +01:00
parent 5d3d77620e 5c72664162
commit 1cf99206a9
317 changed files with 32717 additions and 15040 deletions
--- a/internal/repository/dbConnection.go
+++ b/internal/repository/dbConnection.go
@@ -12,7 +12,7 @@ import (
 	"sync"
 	"time"

-	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
+	cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
 	"github.com/jmoiron/sqlx"
 	"github.com/mattn/go-sqlite3"
 	"github.com/qustavo/sqlhooks/v2"
@@ -51,7 +51,7 @@ func setupSqlite(db *sql.DB) error {
 	return nil
 }

-func Connect(driver string, db string) {
+func Connect(db string) {
 	var err error
 	var dbHandle *sqlx.DB

@@ -64,39 +64,31 @@ func Connect(driver string, db string) {
 			ConnectionMaxIdleTime: repoConfig.ConnectionMaxIdleTime,
 		}

-		switch driver {
-		case "sqlite3":
-			// TODO: Have separate DB handles for Writes and Reads
-			// Optimize SQLite connection: https://kerkour.com/sqlite-for-servers
-			connectionURLParams := make(url.Values)
-			connectionURLParams.Add("_txlock", "immediate")
-			connectionURLParams.Add("_journal_mode", "WAL")
-			connectionURLParams.Add("_busy_timeout", "5000")
-			connectionURLParams.Add("_synchronous", "NORMAL")
-			connectionURLParams.Add("_cache_size", "1000000000")
-			connectionURLParams.Add("_foreign_keys", "true")
-			opts.URL = fmt.Sprintf("file:%s?%s", opts.URL, connectionURLParams.Encode())
+		// TODO: Have separate DB handles for Writes and Reads
+		// Optimize SQLite connection: https://kerkour.com/sqlite-for-servers
+		connectionURLParams := make(url.Values)
+		connectionURLParams.Add("_txlock", "immediate")
+		connectionURLParams.Add("_journal_mode", "WAL")
+		connectionURLParams.Add("_busy_timeout", "5000")
+		connectionURLParams.Add("_synchronous", "NORMAL")
+		connectionURLParams.Add("_cache_size", "1000000000")
+		connectionURLParams.Add("_foreign_keys", "true")
+		opts.URL = fmt.Sprintf("file:%s?%s", opts.URL, connectionURLParams.Encode())

-			if cclog.Loglevel() == "debug" {
-				sql.Register("sqlite3WithHooks", sqlhooks.Wrap(&sqlite3.SQLiteDriver{}, &Hooks{}))
-				dbHandle, err = sqlx.Open("sqlite3WithHooks", opts.URL)
-			} else {
-				dbHandle, err = sqlx.Open("sqlite3", opts.URL)
-			}
-
-			err = setupSqlite(dbHandle.DB)
-			if err != nil {
-				cclog.Abortf("Failed sqlite db setup.\nError: %s\n", err.Error())
-			}
-		case "mysql":
-			opts.URL += "?multiStatements=true"
-			dbHandle, err = sqlx.Open("mysql", opts.URL)
-		default:
-			cclog.Abortf("DB Connection: Unsupported database driver '%s'.\n", driver)
+		if cclog.Loglevel() == "debug" {
+			sql.Register("sqlite3WithHooks", sqlhooks.Wrap(&sqlite3.SQLiteDriver{}, &Hooks{}))
+			dbHandle, err = sqlx.Open("sqlite3WithHooks", opts.URL)
+		} else {
+			dbHandle, err = sqlx.Open("sqlite3", opts.URL)
 		}

 		if err != nil {
-			cclog.Abortf("DB Connection: Could not connect to '%s' database with sqlx.Open().\nError: %s\n", driver, err.Error())
+			cclog.Abortf("DB Connection: Could not connect to SQLite database with sqlx.Open().\nError: %s\n", err.Error())
+		}
+
+		err = setupSqlite(dbHandle.DB)
+		if err != nil {
+			cclog.Abortf("Failed sqlite db setup.\nError: %s\n", err.Error())
 		}

 		dbHandle.SetMaxOpenConns(opts.MaxOpenConnections)
@@ -104,8 +96,8 @@ func Connect(driver string, db string) {
 		dbHandle.SetConnMaxLifetime(opts.ConnectionMaxLifetime)
 		dbHandle.SetConnMaxIdleTime(opts.ConnectionMaxIdleTime)

-		dbConnInstance = &DBConnection{DB: dbHandle, Driver: driver}
-		err = checkDBVersion(driver, dbHandle.DB)
+		dbConnInstance = &DBConnection{DB: dbHandle}
+		err = checkDBVersion(dbHandle.DB)
 		if err != nil {
 			cclog.Abortf("DB Connection: Failed DB version check.\nError: %s\n", err.Error())
 		}
@@ -119,3 +111,26 @@ func GetConnection() *DBConnection {

 	return dbConnInstance
 }
+
+// ResetConnection closes the current database connection and resets the connection state.
+// This function is intended for testing purposes only to allow test isolation.
+func ResetConnection() error {
+	if dbConnInstance != nil && dbConnInstance.DB != nil {
+		if err := dbConnInstance.DB.Close(); err != nil {
+			return fmt.Errorf("failed to close database connection: %w", err)
+		}
+	}
+
+	dbConnInstance = nil
+	dbConnOnce = sync.Once{}
+	jobRepoInstance = nil
+	jobRepoOnce = sync.Once{}
+	nodeRepoInstance = nil
+	nodeRepoOnce = sync.Once{}
+	userRepoInstance = nil
+	userRepoOnce = sync.Once{}
+	userCfgRepoInstance = nil
+	userCfgRepoOnce = sync.Once{}
+
+	return nil
+}
--- a/internal/repository/hooks.go
+++ b/internal/repository/hooks.go
@@ -2,13 +2,14 @@
 // All rights reserved. This file is part of cc-backend.
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file.
+
 package repository

 import (
 	"context"
 	"time"

-	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
+	cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
 )

 // Hooks satisfies the sqlhook.Hooks interface
--- a/internal/repository/hooks_test.go
+++ b/internal/repository/hooks_test.go
@@ -0,0 +1,274 @@
+// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+// All rights reserved. This file is part of cc-backend.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+package repository
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"github.com/ClusterCockpit/cc-lib/v2/schema"
+	_ "github.com/mattn/go-sqlite3"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+type MockJobHook struct {
+	startCalled bool
+	stopCalled  bool
+	startJobs   []*schema.Job
+	stopJobs    []*schema.Job
+}
+
+func (m *MockJobHook) JobStartCallback(job *schema.Job) {
+	m.startCalled = true
+	m.startJobs = append(m.startJobs, job)
+}
+
+func (m *MockJobHook) JobStopCallback(job *schema.Job) {
+	m.stopCalled = true
+	m.stopJobs = append(m.stopJobs, job)
+}
+
+func TestRegisterJobHook(t *testing.T) {
+	t.Run("register single hook", func(t *testing.T) {
+		hooks = nil
+		mock := &MockJobHook{}
+
+		RegisterJobHook(mock)
+
+		assert.NotNil(t, hooks)
+		assert.Len(t, hooks, 1)
+		assert.Equal(t, mock, hooks[0])
+
+		hooks = nil
+	})
+
+	t.Run("register multiple hooks", func(t *testing.T) {
+		hooks = nil
+		mock1 := &MockJobHook{}
+		mock2 := &MockJobHook{}
+
+		RegisterJobHook(mock1)
+		RegisterJobHook(mock2)
+
+		assert.Len(t, hooks, 2)
+		assert.Equal(t, mock1, hooks[0])
+		assert.Equal(t, mock2, hooks[1])
+
+		hooks = nil
+	})
+
+	t.Run("register nil hook does not add to hooks", func(t *testing.T) {
+		hooks = nil
+		RegisterJobHook(nil)
+
+		if hooks != nil {
+			assert.Len(t, hooks, 0, "Nil hook should not be added")
+		}
+
+		hooks = nil
+	})
+}
+
+func TestCallJobStartHooks(t *testing.T) {
+	t.Run("call start hooks with single job", func(t *testing.T) {
+		hooks = nil
+		mock := &MockJobHook{}
+		RegisterJobHook(mock)
+
+		job := &schema.Job{
+			JobID:   123,
+			User:    "testuser",
+			Cluster: "testcluster",
+		}
+
+		CallJobStartHooks([]*schema.Job{job})
+
+		assert.True(t, mock.startCalled)
+		assert.False(t, mock.stopCalled)
+		assert.Len(t, mock.startJobs, 1)
+		assert.Equal(t, int64(123), mock.startJobs[0].JobID)
+
+		hooks = nil
+	})
+
+	t.Run("call start hooks with multiple jobs", func(t *testing.T) {
+		hooks = nil
+		mock := &MockJobHook{}
+		RegisterJobHook(mock)
+
+		jobs := []*schema.Job{
+			{JobID: 1, User: "user1", Cluster: "cluster1"},
+			{JobID: 2, User: "user2", Cluster: "cluster2"},
+			{JobID: 3, User: "user3", Cluster: "cluster3"},
+		}
+
+		CallJobStartHooks(jobs)
+
+		assert.True(t, mock.startCalled)
+		assert.Len(t, mock.startJobs, 3)
+		assert.Equal(t, int64(1), mock.startJobs[0].JobID)
+		assert.Equal(t, int64(2), mock.startJobs[1].JobID)
+		assert.Equal(t, int64(3), mock.startJobs[2].JobID)
+
+		hooks = nil
+	})
+
+	t.Run("call start hooks with multiple registered hooks", func(t *testing.T) {
+		hooks = nil
+		mock1 := &MockJobHook{}
+		mock2 := &MockJobHook{}
+		RegisterJobHook(mock1)
+		RegisterJobHook(mock2)
+
+		job := &schema.Job{
+			JobID: 456, User: "testuser", Cluster: "testcluster",
+		}
+
+		CallJobStartHooks([]*schema.Job{job})
+
+		assert.True(t, mock1.startCalled)
+		assert.True(t, mock2.startCalled)
+		assert.Len(t, mock1.startJobs, 1)
+		assert.Len(t, mock2.startJobs, 1)
+
+		hooks = nil
+	})
+
+	t.Run("call start hooks with nil hooks", func(t *testing.T) {
+		hooks = nil
+
+		job := &schema.Job{
+			JobID: 789, User: "testuser", Cluster: "testcluster",
+		}
+
+		CallJobStartHooks([]*schema.Job{job})
+
+		hooks = nil
+	})
+
+	t.Run("call start hooks with empty job list", func(t *testing.T) {
+		hooks = nil
+		mock := &MockJobHook{}
+		RegisterJobHook(mock)
+
+		CallJobStartHooks([]*schema.Job{})
+
+		assert.False(t, mock.startCalled)
+		assert.Len(t, mock.startJobs, 0)
+
+		hooks = nil
+	})
+}
+
+func TestCallJobStopHooks(t *testing.T) {
+	t.Run("call stop hooks with single job", func(t *testing.T) {
+		hooks = nil
+		mock := &MockJobHook{}
+		RegisterJobHook(mock)
+
+		job := &schema.Job{
+			JobID:   123,
+			User:    "testuser",
+			Cluster: "testcluster",
+		}
+
+		CallJobStopHooks(job)
+
+		assert.True(t, mock.stopCalled)
+		assert.False(t, mock.startCalled)
+		assert.Len(t, mock.stopJobs, 1)
+		assert.Equal(t, int64(123), mock.stopJobs[0].JobID)
+
+		hooks = nil
+	})
+
+	t.Run("call stop hooks with multiple registered hooks", func(t *testing.T) {
+		hooks = nil
+		mock1 := &MockJobHook{}
+		mock2 := &MockJobHook{}
+		RegisterJobHook(mock1)
+		RegisterJobHook(mock2)
+
+		job := &schema.Job{
+			JobID: 456, User: "testuser", Cluster: "testcluster",
+		}
+
+		CallJobStopHooks(job)
+
+		assert.True(t, mock1.stopCalled)
+		assert.True(t, mock2.stopCalled)
+		assert.Len(t, mock1.stopJobs, 1)
+		assert.Len(t, mock2.stopJobs, 1)
+
+		hooks = nil
+	})
+
+	t.Run("call stop hooks with nil hooks", func(t *testing.T) {
+		hooks = nil
+
+		job := &schema.Job{
+			JobID: 789, User: "testuser", Cluster: "testcluster",
+		}
+
+		CallJobStopHooks(job)
+
+		hooks = nil
+	})
+}
+
+func TestSQLHooks(t *testing.T) {
+	_ = setup(t)
+
+	t.Run("hooks log queries in debug mode", func(t *testing.T) {
+		h := &Hooks{}
+
+		ctx := context.Background()
+		query := "SELECT * FROM job WHERE job_id = ?"
+		args := []any{123}
+
+		ctxWithTime, err := h.Before(ctx, query, args...)
+		require.NoError(t, err)
+		assert.NotNil(t, ctxWithTime)
+
+		beginTime := ctxWithTime.Value("begin")
+		require.NotNil(t, beginTime)
+		_, ok := beginTime.(time.Time)
+		assert.True(t, ok, "Begin time should be time.Time")
+
+		time.Sleep(10 * time.Millisecond)
+
+		ctxAfter, err := h.After(ctxWithTime, query, args...)
+		require.NoError(t, err)
+		assert.NotNil(t, ctxAfter)
+	})
+}
+
+func TestHookIntegration(t *testing.T) {
+	t.Run("hooks are called during job lifecycle", func(t *testing.T) {
+		hooks = nil
+		mock := &MockJobHook{}
+		RegisterJobHook(mock)
+
+		job := &schema.Job{
+			JobID:   999,
+			User:    "integrationuser",
+			Cluster: "integrationcluster",
+		}
+
+		CallJobStartHooks([]*schema.Job{job})
+		assert.True(t, mock.startCalled)
+		assert.Equal(t, 1, len(mock.startJobs))
+
+		CallJobStopHooks(job)
+		assert.True(t, mock.stopCalled)
+		assert.Equal(t, 1, len(mock.stopJobs))
+
+		assert.Equal(t, mock.startJobs[0].JobID, mock.stopJobs[0].JobID)
+
+		hooks = nil
+	})
+}
--- a/internal/repository/job.go
+++ b/internal/repository/job.go
--- a/internal/repository/jobCreate.go
+++ b/internal/repository/jobCreate.go
@@ -2,14 +2,15 @@
 // All rights reserved. This file is part of cc-backend.
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file.
+
 package repository

 import (
 	"encoding/json"
 	"fmt"

-	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
-	"github.com/ClusterCockpit/cc-lib/schema"
+	cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
+	"github.com/ClusterCockpit/cc-lib/v2/schema"
 	sq "github.com/Masterminds/squirrel"
 )

@@ -29,6 +30,27 @@ const NamedJobInsert string = `INSERT INTO job (
  :shared, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint,  :energy, :energy_footprint, :resources, :meta_data
 );`

+// InsertJobDirect inserts a job directly into the job table (not job_cache).
+// Use this when the returned ID will be used for operations on the job table
+// (e.g., adding tags), or for imported jobs that are already completed.
+func (r *JobRepository) InsertJobDirect(job *schema.Job) (int64, error) {
+	r.Mutex.Lock()
+	defer r.Mutex.Unlock()
+
+	res, err := r.DB.NamedExec(NamedJobInsert, job)
+	if err != nil {
+		cclog.Warn("Error while NamedJobInsert (direct)")
+		return 0, err
+	}
+	id, err := res.LastInsertId()
+	if err != nil {
+		cclog.Warn("Error while getting last insert ID (direct)")
+		return 0, err
+	}
+
+	return id, nil
+}
+
 func (r *JobRepository) InsertJob(job *schema.Job) (int64, error) {
 	r.Mutex.Lock()
 	defer r.Mutex.Unlock()
@@ -70,8 +92,9 @@ func (r *JobRepository) SyncJobs() ([]*schema.Job, error) {
 		jobs = append(jobs, job)
 	}

+	// Use INSERT OR IGNORE to skip jobs already transferred by the stop path
 	_, err = r.DB.Exec(
-		"INSERT INTO job (job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache")
+		"INSERT OR IGNORE INTO job (job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache")
 	if err != nil {
 		cclog.Warnf("Error while Job sync: %v", err)
 		return nil, err
@@ -83,9 +106,48 @@ func (r *JobRepository) SyncJobs() ([]*schema.Job, error) {
 		return nil, err
 	}

+	// Resolve correct job.id from the job table. The IDs read from job_cache
+	// are from a different auto-increment sequence and must not be used to
+	// query the job table.
+	for _, job := range jobs {
+		var newID int64
+		if err := sq.Select("job.id").From("job").
+			Where("job.job_id = ? AND job.cluster = ? AND job.start_time = ?",
+				job.JobID, job.Cluster, job.StartTime).
+			RunWith(r.stmtCache).QueryRow().Scan(&newID); err != nil {
+			cclog.Warnf("SyncJobs: could not resolve job table id for job %d on %s: %v",
+				job.JobID, job.Cluster, err)
+			continue
+		}
+		job.ID = &newID
+	}
+
 	return jobs, nil
 }

+// TransferCachedJobToMain moves a job from job_cache to the job table.
+// Caller must hold r.Mutex. Returns the new job table ID.
+func (r *JobRepository) TransferCachedJobToMain(cacheID int64) (int64, error) {
+	res, err := r.DB.Exec(
+		"INSERT INTO job (job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache WHERE id = ?",
+		cacheID)
+	if err != nil {
+		return 0, fmt.Errorf("transferring cached job %d to main table failed: %w", cacheID, err)
+	}
+
+	newID, err := res.LastInsertId()
+	if err != nil {
+		return 0, fmt.Errorf("getting new job ID after transfer failed: %w", err)
+	}
+
+	_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", cacheID)
+	if err != nil {
+		return 0, fmt.Errorf("deleting cached job %d after transfer failed: %w", cacheID, err)
+	}
+
+	return newID, nil
+}
+
 // Start inserts a new job in the table, returning the unique job ID.
 // Statistics are not transfered!
 func (r *JobRepository) Start(job *schema.Job) (id int64, err error) {
@@ -107,41 +169,46 @@ func (r *JobRepository) Start(job *schema.Job) (id int64, err error) {
 	return r.InsertJob(job)
 }

+// StartDirect inserts a new job directly into the job table (not job_cache).
+// Use this when the returned ID will immediately be used for job table
+// operations such as adding tags.
+func (r *JobRepository) StartDirect(job *schema.Job) (id int64, err error) {
+	job.RawFootprint, err = json.Marshal(job.Footprint)
+	if err != nil {
+		return -1, fmt.Errorf("REPOSITORY/JOB > encoding footprint field failed: %w", err)
+	}
+
+	job.RawResources, err = json.Marshal(job.Resources)
+	if err != nil {
+		return -1, fmt.Errorf("REPOSITORY/JOB > encoding resources field failed: %w", err)
+	}
+
+	job.RawMetaData, err = json.Marshal(job.MetaData)
+	if err != nil {
+		return -1, fmt.Errorf("REPOSITORY/JOB > encoding metaData field failed: %w", err)
+	}
+
+	return r.InsertJobDirect(job)
+}
+
 // Stop updates the job with the database id jobId using the provided arguments.
 func (r *JobRepository) Stop(
-	jobId int64,
+	jobID int64,
 	duration int32,
 	state schema.JobState,
 	monitoringStatus int32,
 ) (err error) {
 	// Invalidate cache entries as job state is changing
-	r.cache.Del(fmt.Sprintf("metadata:%d", jobId))
-	r.cache.Del(fmt.Sprintf("energyFootprint:%d", jobId))
+	r.cache.Del(fmt.Sprintf("metadata:%d", jobID))
+	r.cache.Del(fmt.Sprintf("energyFootprint:%d", jobID))

 	stmt := sq.Update("job").
 		Set("job_state", state).
 		Set("duration", duration).
 		Set("monitoring_status", monitoringStatus).
-		Where("job.id = ?", jobId)
+		Where("job.id = ?", jobID)

 	_, err = stmt.RunWith(r.stmtCache).Exec()
 	return err
 }

-func (r *JobRepository) StopCached(
-	jobId int64,
-	duration int32,
-	state schema.JobState,
-	monitoringStatus int32,
-) (err error) {
-	// Note: StopCached updates job_cache table, not the main job table
-	// Cache invalidation happens when job is synced to main table
-	stmt := sq.Update("job_cache").
-		Set("job_state", state).
-		Set("duration", duration).
-		Set("monitoring_status", monitoringStatus).
-		Where("job_cache.id = ?", jobId)
-
-	_, err = stmt.RunWith(r.stmtCache).Exec()
-	return err
-}
--- a/internal/repository/jobCreate_test.go
+++ b/internal/repository/jobCreate_test.go
@@ -0,0 +1,607 @@
+// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+// All rights reserved. This file is part of cc-backend.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+package repository
+
+import (
+	"encoding/json"
+	"testing"
+
+	"github.com/ClusterCockpit/cc-lib/v2/schema"
+	_ "github.com/mattn/go-sqlite3"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// createTestJob creates a minimal valid job for testing
+func createTestJob(jobID int64, cluster string) *schema.Job {
+	return &schema.Job{
+		JobID:            jobID,
+		User:             "testuser",
+		Project:          "testproject",
+		Cluster:          cluster,
+		SubCluster:       "main",
+		Partition:        "batch",
+		NumNodes:         1,
+		NumHWThreads:     4,
+		NumAcc:           0,
+		Shared:           "none",
+		MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
+		SMT:              1,
+		State:            schema.JobStateRunning,
+		StartTime:        1234567890,
+		Duration:         0,
+		Walltime:         3600,
+		Resources: []*schema.Resource{
+			{
+				Hostname:  "node01",
+				HWThreads: []int{0, 1, 2, 3},
+			},
+		},
+		Footprint: map[string]float64{
+			"cpu_load":      50.0,
+			"mem_used":      8000.0,
+			"flops_any":     0.5,
+			"mem_bw":        10.0,
+			"net_bw":        2.0,
+			"file_bw":       1.0,
+			"cpu_used":      2.0,
+			"cpu_load_core": 12.5,
+		},
+		MetaData: map[string]string{
+			"jobName":     "test_job",
+			"queue":       "normal",
+			"qosName":     "default",
+			"accountName": "testaccount",
+		},
+	}
+}
+
+func TestInsertJob(t *testing.T) {
+	r := setup(t)
+
+	t.Run("successful insertion", func(t *testing.T) {
+		job := createTestJob(999001, "testcluster")
+		job.RawResources, _ = json.Marshal(job.Resources)
+		job.RawFootprint, _ = json.Marshal(job.Footprint)
+		job.RawMetaData, _ = json.Marshal(job.MetaData)
+
+		id, err := r.InsertJob(job)
+		require.NoError(t, err, "InsertJob should succeed")
+		assert.Greater(t, id, int64(0), "Should return valid insert ID")
+
+		// Verify job was inserted into job_cache
+		var count int
+		err = r.DB.QueryRow("SELECT COUNT(*) FROM job_cache WHERE job_id = ? AND cluster = ?",
+			job.JobID, job.Cluster).Scan(&count)
+		require.NoError(t, err)
+		assert.Equal(t, 1, count, "Job should be in job_cache table")
+
+		// Clean up
+		_, err = r.DB.Exec("DELETE FROM job_cache WHERE job_id = ? AND cluster = ?", job.JobID, job.Cluster)
+		require.NoError(t, err)
+	})
+
+	t.Run("insertion with all fields", func(t *testing.T) {
+		job := createTestJob(999002, "testcluster")
+		job.ArrayJobID = 5000
+		job.Energy = 1500.5
+		job.RawResources, _ = json.Marshal(job.Resources)
+		job.RawFootprint, _ = json.Marshal(job.Footprint)
+		job.RawMetaData, _ = json.Marshal(job.MetaData)
+
+		id, err := r.InsertJob(job)
+		require.NoError(t, err)
+		assert.Greater(t, id, int64(0))
+
+		// Verify all fields were stored correctly
+		var retrievedJob schema.Job
+		err = r.DB.QueryRow(`SELECT job_id, hpc_user, project, cluster, array_job_id, energy 
+			FROM job_cache WHERE id = ?`, id).Scan(
+			&retrievedJob.JobID, &retrievedJob.User, &retrievedJob.Project,
+			&retrievedJob.Cluster, &retrievedJob.ArrayJobID, &retrievedJob.Energy)
+		require.NoError(t, err)
+		assert.Equal(t, job.JobID, retrievedJob.JobID)
+		assert.Equal(t, job.User, retrievedJob.User)
+		assert.Equal(t, job.Project, retrievedJob.Project)
+		assert.Equal(t, job.Cluster, retrievedJob.Cluster)
+		assert.Equal(t, job.ArrayJobID, retrievedJob.ArrayJobID)
+		assert.Equal(t, job.Energy, retrievedJob.Energy)
+
+		// Clean up
+		_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
+		require.NoError(t, err)
+	})
+}
+
+func TestStart(t *testing.T) {
+	r := setup(t)
+
+	t.Run("successful job start with JSON encoding", func(t *testing.T) {
+		job := createTestJob(999003, "testcluster")
+
+		id, err := r.Start(job)
+		require.NoError(t, err, "Start should succeed")
+		assert.Greater(t, id, int64(0), "Should return valid insert ID")
+
+		// Verify job was inserted and JSON fields were encoded
+		var rawResources, rawFootprint, rawMetaData []byte
+		err = r.DB.QueryRow(`SELECT resources, footprint, meta_data FROM job_cache WHERE id = ?`, id).Scan(
+			&rawResources, &rawFootprint, &rawMetaData)
+		require.NoError(t, err)
+
+		// Verify resources JSON
+		var resources []*schema.Resource
+		err = json.Unmarshal(rawResources, &resources)
+		require.NoError(t, err, "Resources should be valid JSON")
+		assert.Equal(t, 1, len(resources))
+		assert.Equal(t, "node01", resources[0].Hostname)
+
+		// Verify footprint JSON
+		var footprint map[string]float64
+		err = json.Unmarshal(rawFootprint, &footprint)
+		require.NoError(t, err, "Footprint should be valid JSON")
+		assert.Equal(t, 50.0, footprint["cpu_load"])
+		assert.Equal(t, 8000.0, footprint["mem_used"])
+
+		// Verify metadata JSON
+		var metaData map[string]string
+		err = json.Unmarshal(rawMetaData, &metaData)
+		require.NoError(t, err, "MetaData should be valid JSON")
+		assert.Equal(t, "test_job", metaData["jobName"])
+
+		// Clean up
+		_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
+		require.NoError(t, err)
+	})
+
+	t.Run("job start with empty footprint", func(t *testing.T) {
+		job := createTestJob(999004, "testcluster")
+		job.Footprint = map[string]float64{}
+
+		id, err := r.Start(job)
+		require.NoError(t, err)
+		assert.Greater(t, id, int64(0))
+
+		// Verify empty footprint was encoded as empty JSON object
+		var rawFootprint []byte
+		err = r.DB.QueryRow(`SELECT footprint FROM job_cache WHERE id = ?`, id).Scan(&rawFootprint)
+		require.NoError(t, err)
+		assert.Equal(t, []byte("{}"), rawFootprint)
+
+		// Clean up
+		_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
+		require.NoError(t, err)
+	})
+
+	t.Run("job start with nil metadata", func(t *testing.T) {
+		job := createTestJob(999005, "testcluster")
+		job.MetaData = nil
+
+		id, err := r.Start(job)
+		require.NoError(t, err)
+		assert.Greater(t, id, int64(0))
+
+		// Clean up
+		_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
+		require.NoError(t, err)
+	})
+}
+
+func TestStop(t *testing.T) {
+	r := setup(t)
+
+	t.Run("successful job stop", func(t *testing.T) {
+		// First insert a job using Start
+		job := createTestJob(999106, "testcluster")
+		id, err := r.Start(job)
+		require.NoError(t, err)
+
+		// Move from job_cache to job table (simulate SyncJobs) - exclude id to let it auto-increment
+		_, err = r.DB.Exec(`INSERT INTO job (job_id, cluster, subcluster, submit_time, start_time, hpc_user, project, 
+			cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, num_nodes, 
+			num_hwthreads, num_acc, smt, shared, monitoring_status, energy, energy_footprint, footprint) 
+			SELECT job_id, cluster, subcluster, submit_time, start_time, hpc_user, project, 
+			cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, num_nodes, 
+			num_hwthreads, num_acc, smt, shared, monitoring_status, energy, energy_footprint, footprint 
+			FROM job_cache WHERE id = ?`, id)
+		require.NoError(t, err)
+		_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
+		require.NoError(t, err)
+
+		// Get the new job id in the job table
+		err = r.DB.QueryRow("SELECT id FROM job WHERE job_id = ? AND cluster = ? AND start_time = ?",
+			job.JobID, job.Cluster, job.StartTime).Scan(&id)
+		require.NoError(t, err)
+
+		// Stop the job
+		duration := int32(3600)
+		state := schema.JobStateCompleted
+		monitoringStatus := int32(schema.MonitoringStatusArchivingSuccessful)
+
+		err = r.Stop(id, duration, state, monitoringStatus)
+		require.NoError(t, err, "Stop should succeed")
+
+		// Verify job was updated
+		var retrievedDuration int32
+		var retrievedState string
+		var retrievedMonStatus int32
+		err = r.DB.QueryRow(`SELECT duration, job_state, monitoring_status FROM job WHERE id = ?`, id).Scan(
+			&retrievedDuration, &retrievedState, &retrievedMonStatus)
+		require.NoError(t, err)
+		assert.Equal(t, duration, retrievedDuration)
+		assert.Equal(t, string(state), retrievedState)
+		assert.Equal(t, monitoringStatus, retrievedMonStatus)
+
+		// Clean up
+		_, err = r.DB.Exec("DELETE FROM job WHERE id = ?", id)
+		require.NoError(t, err)
+	})
+
+	t.Run("stop updates job state transitions", func(t *testing.T) {
+		// Insert a job
+		job := createTestJob(999107, "testcluster")
+		id, err := r.Start(job)
+		require.NoError(t, err)
+
+		// Move to job table
+		_, err = r.DB.Exec(`INSERT INTO job (job_id, cluster, subcluster, submit_time, start_time, hpc_user, project, 
+			cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, num_nodes, 
+			num_hwthreads, num_acc, smt, shared, monitoring_status, energy, energy_footprint, footprint) 
+			SELECT job_id, cluster, subcluster, submit_time, start_time, hpc_user, project, 
+			cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, num_nodes, 
+			num_hwthreads, num_acc, smt, shared, monitoring_status, energy, energy_footprint, footprint 
+			FROM job_cache WHERE id = ?`, id)
+		require.NoError(t, err)
+		_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
+		require.NoError(t, err)
+
+		// Get the new job id in the job table
+		err = r.DB.QueryRow("SELECT id FROM job WHERE job_id = ? AND cluster = ? AND start_time = ?",
+			job.JobID, job.Cluster, job.StartTime).Scan(&id)
+		require.NoError(t, err)
+
+		// Stop the job with different duration
+		err = r.Stop(id, 7200, schema.JobStateCompleted, int32(schema.MonitoringStatusArchivingSuccessful))
+		require.NoError(t, err)
+
+		// Verify the duration was updated correctly
+		var duration int32
+		err = r.DB.QueryRow(`SELECT duration FROM job WHERE id = ?`, id).Scan(&duration)
+		require.NoError(t, err)
+		assert.Equal(t, int32(7200), duration, "Duration should be updated to 7200")
+
+		// Clean up
+		_, err = r.DB.Exec("DELETE FROM job WHERE id = ?", id)
+		require.NoError(t, err)
+	})
+
+	t.Run("stop with different states", func(t *testing.T) {
+		testCases := []struct {
+			name             string
+			jobID            int64
+			state            schema.JobState
+			monitoringStatus int32
+		}{
+			{"completed", 999108, schema.JobStateCompleted, int32(schema.MonitoringStatusArchivingSuccessful)},
+			{"failed", 999118, schema.JobStateFailed, int32(schema.MonitoringStatusArchivingSuccessful)},
+			{"cancelled", 999119, schema.JobStateCancelled, int32(schema.MonitoringStatusArchivingSuccessful)},
+			{"timeout", 999120, schema.JobStateTimeout, int32(schema.MonitoringStatusArchivingSuccessful)},
+		}
+
+		for _, tc := range testCases {
+			t.Run(tc.name, func(t *testing.T) {
+				job := createTestJob(tc.jobID, "testcluster")
+				id, err := r.Start(job)
+				require.NoError(t, err)
+
+				// Move to job table
+				_, err = r.DB.Exec(`INSERT INTO job (job_id, cluster, subcluster, submit_time, start_time, hpc_user, project, 
+					cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, num_nodes, 
+					num_hwthreads, num_acc, smt, shared, monitoring_status, energy, energy_footprint, footprint) 
+					SELECT job_id, cluster, subcluster, submit_time, start_time, hpc_user, project, 
+					cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, num_nodes, 
+					num_hwthreads, num_acc, smt, shared, monitoring_status, energy, energy_footprint, footprint 
+					FROM job_cache WHERE id = ?`, id)
+				require.NoError(t, err)
+				_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
+				require.NoError(t, err)
+
+				// Get the new job id in the job table
+				err = r.DB.QueryRow("SELECT id FROM job WHERE job_id = ? AND cluster = ? AND start_time = ?",
+					job.JobID, job.Cluster, job.StartTime).Scan(&id)
+				require.NoError(t, err)
+
+				// Stop with specific state
+				err = r.Stop(id, 1800, tc.state, tc.monitoringStatus)
+				require.NoError(t, err)
+
+				// Verify state was set correctly
+				var retrievedState string
+				err = r.DB.QueryRow(`SELECT job_state FROM job WHERE id = ?`, id).Scan(&retrievedState)
+				require.NoError(t, err)
+				assert.Equal(t, string(tc.state), retrievedState)
+
+				// Clean up
+				_, err = r.DB.Exec("DELETE FROM job WHERE id = ?", id)
+				require.NoError(t, err)
+			})
+		}
+	})
+}
+
+func TestTransferCachedJobToMain(t *testing.T) {
+	r := setup(t)
+
+	t.Run("successful transfer from cache to main", func(t *testing.T) {
+		// Insert a job in job_cache
+		job := createTestJob(999009, "testcluster")
+		cacheID, err := r.Start(job)
+		require.NoError(t, err)
+
+		// Transfer the cached job to the main table
+		r.Mutex.Lock()
+		newID, err := r.TransferCachedJobToMain(cacheID)
+		r.Mutex.Unlock()
+		require.NoError(t, err, "TransferCachedJobToMain should succeed")
+		assert.NotEqual(t, cacheID, newID, "New ID should differ from cache ID")
+
+		// Verify job exists in job table
+		var count int
+		err = r.DB.QueryRow(`SELECT COUNT(*) FROM job WHERE id = ?`, newID).Scan(&count)
+		require.NoError(t, err)
+		assert.Equal(t, 1, count, "Job should exist in main table")
+
+		// Verify job was removed from job_cache
+		err = r.DB.QueryRow(`SELECT COUNT(*) FROM job_cache WHERE id = ?`, cacheID).Scan(&count)
+		require.NoError(t, err)
+		assert.Equal(t, 0, count, "Job should be removed from cache")
+
+		// Clean up
+		_, err = r.DB.Exec("DELETE FROM job WHERE id = ?", newID)
+		require.NoError(t, err)
+	})
+
+	t.Run("transfer preserves job data", func(t *testing.T) {
+		// Insert a job in job_cache
+		job := createTestJob(999010, "testcluster")
+		cacheID, err := r.Start(job)
+		require.NoError(t, err)
+
+		// Transfer the cached job
+		r.Mutex.Lock()
+		newID, err := r.TransferCachedJobToMain(cacheID)
+		r.Mutex.Unlock()
+		require.NoError(t, err)
+
+		// Verify the transferred job has the correct data
+		var jobID int64
+		var cluster string
+		err = r.DB.QueryRow(`SELECT job_id, cluster FROM job WHERE id = ?`, newID).Scan(&jobID, &cluster)
+		require.NoError(t, err)
+		assert.Equal(t, job.JobID, jobID)
+		assert.Equal(t, job.Cluster, cluster)
+
+		// Clean up
+		_, err = r.DB.Exec("DELETE FROM job WHERE id = ?", newID)
+		require.NoError(t, err)
+	})
+}
+
+func TestSyncJobs(t *testing.T) {
+	r := setup(t)
+
+	t.Run("sync jobs from cache to main table", func(t *testing.T) {
+		// Ensure cache is empty first
+		_, err := r.DB.Exec("DELETE FROM job_cache")
+		require.NoError(t, err)
+
+		// Insert multiple jobs in job_cache
+		job1 := createTestJob(999011, "testcluster")
+		job2 := createTestJob(999012, "testcluster")
+		job3 := createTestJob(999013, "testcluster")
+
+		_, err = r.Start(job1)
+		require.NoError(t, err)
+		_, err = r.Start(job2)
+		require.NoError(t, err)
+		_, err = r.Start(job3)
+		require.NoError(t, err)
+
+		// Verify jobs are in job_cache
+		var cacheCount int
+		err = r.DB.QueryRow("SELECT COUNT(*) FROM job_cache WHERE job_id IN (?, ?, ?)",
+			job1.JobID, job2.JobID, job3.JobID).Scan(&cacheCount)
+		require.NoError(t, err)
+		assert.Equal(t, 3, cacheCount, "All jobs should be in job_cache")
+
+		// Sync jobs
+		jobs, err := r.SyncJobs()
+		require.NoError(t, err, "SyncJobs should succeed")
+		assert.Equal(t, 3, len(jobs), "Should return 3 synced jobs")
+
+		// Verify jobs were moved to job table
+		var jobCount int
+		err = r.DB.QueryRow("SELECT COUNT(*) FROM job WHERE job_id IN (?, ?, ?)",
+			job1.JobID, job2.JobID, job3.JobID).Scan(&jobCount)
+		require.NoError(t, err)
+		assert.Equal(t, 3, jobCount, "All jobs should be in job table")
+
+		// Verify job_cache was cleared
+		err = r.DB.QueryRow("SELECT COUNT(*) FROM job_cache WHERE job_id IN (?, ?, ?)",
+			job1.JobID, job2.JobID, job3.JobID).Scan(&cacheCount)
+		require.NoError(t, err)
+		assert.Equal(t, 0, cacheCount, "job_cache should be empty after sync")
+
+		// Clean up
+		_, err = r.DB.Exec("DELETE FROM job WHERE job_id IN (?, ?, ?)", job1.JobID, job2.JobID, job3.JobID)
+		require.NoError(t, err)
+	})
+
+	t.Run("sync preserves job data", func(t *testing.T) {
+		// Ensure cache is empty first
+		_, err := r.DB.Exec("DELETE FROM job_cache")
+		require.NoError(t, err)
+
+		// Insert a job with specific data
+		job := createTestJob(999014, "testcluster")
+		job.ArrayJobID = 7777
+		job.Energy = 2500.75
+		job.Duration = 1800
+
+		id, err := r.Start(job)
+		require.NoError(t, err)
+
+		// Update some fields to simulate job progress
+		result, err := r.DB.Exec(`UPDATE job_cache SET duration = ?, energy = ? WHERE id = ?`,
+			3600, 3000.5, id)
+		require.NoError(t, err)
+		rowsAffected, _ := result.RowsAffected()
+		require.Equal(t, int64(1), rowsAffected, "UPDATE should affect exactly 1 row")
+
+		// Verify the update worked
+		var checkDuration int32
+		var checkEnergy float64
+		err = r.DB.QueryRow(`SELECT duration, energy FROM job_cache WHERE id = ?`, id).Scan(&checkDuration, &checkEnergy)
+		require.NoError(t, err)
+		require.Equal(t, int32(3600), checkDuration, "Duration should be updated to 3600 before sync")
+		require.Equal(t, 3000.5, checkEnergy, "Energy should be updated to 3000.5 before sync")
+
+		// Sync jobs
+		jobs, err := r.SyncJobs()
+		require.NoError(t, err)
+		require.Equal(t, 1, len(jobs), "Should return exactly 1 synced job")
+
+		// Verify in database
+		var dbJob schema.Job
+		err = r.DB.QueryRow(`SELECT job_id, hpc_user, project, cluster, array_job_id, duration, energy 
+			FROM job WHERE job_id = ? AND cluster = ?`, job.JobID, job.Cluster).Scan(
+			&dbJob.JobID, &dbJob.User, &dbJob.Project, &dbJob.Cluster,
+			&dbJob.ArrayJobID, &dbJob.Duration, &dbJob.Energy)
+		require.NoError(t, err)
+		assert.Equal(t, job.JobID, dbJob.JobID)
+		assert.Equal(t, int32(3600), dbJob.Duration)
+		assert.Equal(t, 3000.5, dbJob.Energy)
+
+		// Clean up
+		_, err = r.DB.Exec("DELETE FROM job WHERE job_id = ? AND cluster = ?", job.JobID, job.Cluster)
+		require.NoError(t, err)
+	})
+
+	t.Run("sync returns job table IDs not cache IDs", func(t *testing.T) {
+		// Ensure cache is empty first
+		_, err := r.DB.Exec("DELETE FROM job_cache")
+		require.NoError(t, err)
+
+		// Insert a job into job_cache
+		job := createTestJob(999015, "testcluster")
+		cacheID, err := r.Start(job)
+		require.NoError(t, err)
+
+		// Sync jobs
+		jobs, err := r.SyncJobs()
+		require.NoError(t, err)
+		require.Equal(t, 1, len(jobs))
+
+		// The returned ID must refer to the job table, not job_cache
+		var jobTableID int64
+		err = r.DB.QueryRow("SELECT id FROM job WHERE job_id = ? AND cluster = ? AND start_time = ?",
+			jobs[0].JobID, jobs[0].Cluster, jobs[0].StartTime).Scan(&jobTableID)
+		require.NoError(t, err)
+		assert.Equal(t, jobTableID, *jobs[0].ID,
+			"returned ID should match the job table row, not the cache ID (%d)", cacheID)
+
+		// Clean up
+		_, err = r.DB.Exec("DELETE FROM job WHERE job_id = ? AND cluster = ?", job.JobID, job.Cluster)
+		require.NoError(t, err)
+	})
+
+	t.Run("sync with empty cache returns empty list", func(t *testing.T) {
+		// Ensure cache is empty
+		_, err := r.DB.Exec("DELETE FROM job_cache")
+		require.NoError(t, err)
+
+		// Sync should return empty list
+		jobs, err := r.SyncJobs()
+		require.NoError(t, err)
+		assert.Equal(t, 0, len(jobs), "Should return empty list when cache is empty")
+	})
+}
+
+func TestInsertJobDirect(t *testing.T) {
+	r := setup(t)
+
+	t.Run("inserts into job table not cache", func(t *testing.T) {
+		job := createTestJob(999020, "testcluster")
+		job.RawResources, _ = json.Marshal(job.Resources)
+		job.RawFootprint, _ = json.Marshal(job.Footprint)
+		job.RawMetaData, _ = json.Marshal(job.MetaData)
+
+		id, err := r.InsertJobDirect(job)
+		require.NoError(t, err, "InsertJobDirect should succeed")
+		assert.Greater(t, id, int64(0), "Should return valid insert ID")
+
+		// Verify job is in job table
+		var count int
+		err = r.DB.QueryRow("SELECT COUNT(*) FROM job WHERE id = ?", id).Scan(&count)
+		require.NoError(t, err)
+		assert.Equal(t, 1, count, "Job should be in job table")
+
+		// Verify job is NOT in job_cache
+		err = r.DB.QueryRow("SELECT COUNT(*) FROM job_cache WHERE job_id = ? AND cluster = ?",
+			job.JobID, job.Cluster).Scan(&count)
+		require.NoError(t, err)
+		assert.Equal(t, 0, count, "Job should NOT be in job_cache")
+
+		// Clean up
+		_, err = r.DB.Exec("DELETE FROM job WHERE id = ?", id)
+		require.NoError(t, err)
+	})
+
+	t.Run("returned ID works for tag operations", func(t *testing.T) {
+		job := createTestJob(999021, "testcluster")
+		job.RawResources, _ = json.Marshal(job.Resources)
+		job.RawFootprint, _ = json.Marshal(job.Footprint)
+		job.RawMetaData, _ = json.Marshal(job.MetaData)
+
+		id, err := r.InsertJobDirect(job)
+		require.NoError(t, err)
+
+		// Adding a tag using the returned ID should succeed (FK constraint on jobtag)
+		err = r.ImportTag(id, "test_type", "test_name", "global")
+		require.NoError(t, err, "ImportTag should succeed with direct insert ID")
+
+		// Clean up
+		_, err = r.DB.Exec("DELETE FROM jobtag WHERE job_id = ?", id)
+		require.NoError(t, err)
+		_, err = r.DB.Exec("DELETE FROM job WHERE id = ?", id)
+		require.NoError(t, err)
+	})
+}
+
+func TestStartDirect(t *testing.T) {
+	r := setup(t)
+
+	t.Run("inserts into job table with JSON encoding", func(t *testing.T) {
+		job := createTestJob(999022, "testcluster")
+
+		id, err := r.StartDirect(job)
+		require.NoError(t, err, "StartDirect should succeed")
+		assert.Greater(t, id, int64(0))
+
+		// Verify job is in job table with encoded JSON
+		var rawResources []byte
+		err = r.DB.QueryRow("SELECT resources FROM job WHERE id = ?", id).Scan(&rawResources)
+		require.NoError(t, err)
+
+		var resources []*schema.Resource
+		err = json.Unmarshal(rawResources, &resources)
+		require.NoError(t, err, "Resources should be valid JSON")
+		assert.Equal(t, "node01", resources[0].Hostname)
+
+		// Clean up
+		_, err = r.DB.Exec("DELETE FROM job WHERE id = ?", id)
+		require.NoError(t, err)
+	})
+}
--- a/internal/repository/jobFind.go
+++ b/internal/repository/jobFind.go
@@ -2,6 +2,7 @@
 // All rights reserved. This file is part of cc-backend.
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file.
+
 package repository

 import (
@@ -11,8 +12,8 @@ import (
 	"time"

 	"github.com/ClusterCockpit/cc-backend/internal/graph/model"
-	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
-	"github.com/ClusterCockpit/cc-lib/schema"
+	cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
+	"github.com/ClusterCockpit/cc-lib/v2/schema"
 	sq "github.com/Masterminds/squirrel"
 )

@@ -22,13 +23,17 @@ import (
 // It returns a pointer to a schema.Job data structure and an error variable.
 // To check if no job was found test err == sql.ErrNoRows
 func (r *JobRepository) Find(
-	jobId *int64,
+	jobID *int64,
 	cluster *string,
 	startTime *int64,
 ) (*schema.Job, error) {
+	if jobID == nil {
+		return nil, fmt.Errorf("jobID cannot be nil")
+	}
+
 	start := time.Now()
 	q := sq.Select(jobColumns...).From("job").
-		Where("job.job_id = ?", *jobId)
+		Where("job.job_id = ?", *jobID)

 	if cluster != nil {
 		q = q.Where("job.cluster = ?", *cluster)
@@ -37,19 +42,29 @@ func (r *JobRepository) Find(
 		q = q.Where("job.start_time = ?", *startTime)
 	}

-	q = q.OrderBy("job.id DESC") // always use newest matching job by db id if more than one match
+	q = q.OrderBy("job.id DESC").Limit(1) // always use newest matching job by db id if more than one match

 	cclog.Debugf("Timer Find %s", time.Since(start))
 	return scanJob(q.RunWith(r.stmtCache).QueryRow())
 }

+// FindCached executes a SQL query to find a specific batch job from the job_cache table.
+// The job is queried using the batch job id, and optionally filtered by cluster name
+// and start time (UNIX epoch time seconds). This method uses cached job data which
+// may be stale but provides faster access than Find().
+// It returns a pointer to a schema.Job data structure and an error variable.
+// To check if no job was found test err == sql.ErrNoRows
 func (r *JobRepository) FindCached(
-	jobId *int64,
+	jobID *int64,
 	cluster *string,
 	startTime *int64,
 ) (*schema.Job, error) {
+	if jobID == nil {
+		return nil, fmt.Errorf("jobID cannot be nil")
+	}
+
 	q := sq.Select(jobCacheColumns...).From("job_cache").
-		Where("job_cache.job_id = ?", *jobId)
+		Where("job_cache.job_id = ?", *jobID)

 	if cluster != nil {
 		q = q.Where("job_cache.cluster = ?", *cluster)
@@ -58,24 +73,28 @@ func (r *JobRepository) FindCached(
 		q = q.Where("job_cache.start_time = ?", *startTime)
 	}

-	q = q.OrderBy("job_cache.id DESC") // always use newest matching job by db id if more than one match
+	q = q.OrderBy("job_cache.id DESC").Limit(1) // always use newest matching job by db id if more than one match

 	return scanJob(q.RunWith(r.stmtCache).QueryRow())
 }

-// Find executes a SQL query to find a specific batch job.
-// The job is queried using the batch job id, the cluster name,
-// and the start time of the job in UNIX epoch time seconds.
-// It returns a pointer to a schema.Job data structure and an error variable.
-// To check if no job was found test err == sql.ErrNoRows
+// FindAll executes a SQL query to find all batch jobs matching the given criteria.
+// Jobs are queried using the batch job id, and optionally filtered by cluster name
+// and start time (UNIX epoch time seconds).
+// It returns a slice of pointers to schema.Job data structures and an error variable.
+// An empty slice is returned if no matching jobs are found.
 func (r *JobRepository) FindAll(
-	jobId *int64,
+	jobID *int64,
 	cluster *string,
 	startTime *int64,
 ) ([]*schema.Job, error) {
+	if jobID == nil {
+		return nil, fmt.Errorf("jobID cannot be nil")
+	}
+
 	start := time.Now()
 	q := sq.Select(jobColumns...).From("job").
-		Where("job.job_id = ?", *jobId)
+		Where("job.job_id = ?", *jobID)

 	if cluster != nil {
 		q = q.Where("job.cluster = ?", *cluster)
@@ -86,8 +105,8 @@ func (r *JobRepository) FindAll(

 	rows, err := q.RunWith(r.stmtCache).Query()
 	if err != nil {
-		cclog.Error("Error while running query")
-		return nil, err
+		cclog.Errorf("Error while running FindAll query for jobID=%d: %v", *jobID, err)
+		return nil, fmt.Errorf("failed to execute FindAll query: %w", err)
 	}
 	defer rows.Close()

@@ -95,8 +114,8 @@ func (r *JobRepository) FindAll(
 	for rows.Next() {
 		job, err := scanJob(rows)
 		if err != nil {
-			cclog.Warn("Error while scanning rows")
-			return nil, err
+			cclog.Warnf("Error while scanning rows in FindAll: %v", err)
+			return nil, fmt.Errorf("failed to scan job row: %w", err)
 		}
 		jobs = append(jobs, job)
 	}
@@ -119,8 +138,8 @@ func (r *JobRepository) GetJobList(limit int, offset int) ([]int64, error) {

 	rows, err := query.RunWith(r.stmtCache).Query()
 	if err != nil {
-		cclog.Error("Error while running query")
-		return nil, err
+		cclog.Errorf("Error while running GetJobList query (limit=%d, offset=%d): %v", limit, offset, err)
+		return nil, fmt.Errorf("failed to execute GetJobList query: %w", err)
 	}
 	defer rows.Close()

@@ -129,23 +148,23 @@ func (r *JobRepository) GetJobList(limit int, offset int) ([]int64, error) {
 		var id int64
 		err := rows.Scan(&id)
 		if err != nil {
-			cclog.Warn("Error while scanning rows")
-			return nil, err
+			cclog.Warnf("Error while scanning rows in GetJobList: %v", err)
+			return nil, fmt.Errorf("failed to scan job ID: %w", err)
 		}
 		jl = append(jl, id)
 	}

-	cclog.Infof("Return job count %d", len(jl))
+	cclog.Debugf("JobRepository.GetJobList(): Return job count %d", len(jl))
 	return jl, nil
 }

-// FindById executes a SQL query to find a specific batch job.
+// FindByID executes a SQL query to find a specific batch job.
 // The job is queried using the database id.
 // It returns a pointer to a schema.Job data structure and an error variable.
 // To check if no job was found test err == sql.ErrNoRows
-func (r *JobRepository) FindById(ctx context.Context, jobId int64) (*schema.Job, error) {
+func (r *JobRepository) FindByID(ctx context.Context, jobID int64) (*schema.Job, error) {
 	q := sq.Select(jobColumns...).
-		From("job").Where("job.id = ?", jobId)
+		From("job").Where("job.id = ?", jobID)

 	q, qerr := SecurityCheck(ctx, q)
 	if qerr != nil {
@@ -155,14 +174,14 @@ func (r *JobRepository) FindById(ctx context.Context, jobId int64) (*schema.Job,
 	return scanJob(q.RunWith(r.stmtCache).QueryRow())
 }

-// FindByIdWithUser executes a SQL query to find a specific batch job.
+// FindByIDWithUser executes a SQL query to find a specific batch job.
 // The job is queried using the database id. The user is passed directly,
 // instead as part of the context.
 // It returns a pointer to a schema.Job data structure and an error variable.
 // To check if no job was found test err == sql.ErrNoRows
-func (r *JobRepository) FindByIdWithUser(user *schema.User, jobId int64) (*schema.Job, error) {
+func (r *JobRepository) FindByIDWithUser(user *schema.User, jobID int64) (*schema.Job, error) {
 	q := sq.Select(jobColumns...).
-		From("job").Where("job.id = ?", jobId)
+		From("job").Where("job.id = ?", jobID)

 	q, qerr := SecurityCheckWithUser(user, q)
 	if qerr != nil {
@@ -172,24 +191,24 @@ func (r *JobRepository) FindByIdWithUser(user *schema.User, jobId int64) (*schem
 	return scanJob(q.RunWith(r.stmtCache).QueryRow())
 }

-// FindByIdDirect executes a SQL query to find a specific batch job.
+// FindByIDDirect executes a SQL query to find a specific batch job.
 // The job is queried using the database id.
 // It returns a pointer to a schema.Job data structure and an error variable.
 // To check if no job was found test err == sql.ErrNoRows
-func (r *JobRepository) FindByIdDirect(jobId int64) (*schema.Job, error) {
+func (r *JobRepository) FindByIDDirect(jobID int64) (*schema.Job, error) {
 	q := sq.Select(jobColumns...).
-		From("job").Where("job.id = ?", jobId)
+		From("job").Where("job.id = ?", jobID)
 	return scanJob(q.RunWith(r.stmtCache).QueryRow())
 }

-// FindByJobId executes a SQL query to find a specific batch job.
+// FindByJobID executes a SQL query to find a specific batch job.
 // The job is queried using the slurm id and the clustername.
 // It returns a pointer to a schema.Job data structure and an error variable.
 // To check if no job was found test err == sql.ErrNoRows
-func (r *JobRepository) FindByJobId(ctx context.Context, jobId int64, startTime int64, cluster string) (*schema.Job, error) {
+func (r *JobRepository) FindByJobID(ctx context.Context, jobID int64, startTime int64, cluster string) (*schema.Job, error) {
 	q := sq.Select(jobColumns...).
 		From("job").
-		Where("job.job_id = ?", jobId).
+		Where("job.job_id = ?", jobID).
 		Where("job.cluster = ?", cluster).
 		Where("job.start_time = ?", startTime)

@@ -201,19 +220,22 @@ func (r *JobRepository) FindByJobId(ctx context.Context, jobId int64, startTime
 	return scanJob(q.RunWith(r.stmtCache).QueryRow())
 }

-// IsJobOwner executes a SQL query to find a specific batch job.
-// The job is queried using the slurm id,a username and the cluster.
-// It returns a bool.
-// If job was found, user is owner: test err != sql.ErrNoRows
-func (r *JobRepository) IsJobOwner(jobId int64, startTime int64, user string, cluster string) bool {
+// IsJobOwner checks if the specified user owns the batch job identified by jobID,
+// startTime, and cluster. Returns true if the user is the owner, false otherwise.
+// This method does not return errors; it returns false for both non-existent jobs
+// and jobs owned by other users.
+func (r *JobRepository) IsJobOwner(jobID int64, startTime int64, user string, cluster string) bool {
 	q := sq.Select("id").
 		From("job").
-		Where("job.job_id = ?", jobId).
+		Where("job.job_id = ?", jobID).
 		Where("job.hpc_user = ?", user).
 		Where("job.cluster = ?", cluster).
 		Where("job.start_time = ?", startTime)

 	_, err := scanJob(q.RunWith(r.stmtCache).QueryRow())
+	if err != nil && err != sql.ErrNoRows {
+		cclog.Warnf("IsJobOwner: unexpected error for jobID=%d, user=%s, cluster=%s: %v", jobID, user, cluster, err)
+	}
 	return err != sql.ErrNoRows
 }

@@ -231,6 +253,11 @@ func (r *JobRepository) FindConcurrentJobs(
 	}

 	query = query.Where("cluster = ?", job.Cluster)
+
+	if len(job.Resources) == 0 {
+		return nil, fmt.Errorf("job has no resources defined")
+	}
+
 	var startTime int64
 	var stopTime int64

@@ -243,25 +270,28 @@ func (r *JobRepository) FindConcurrentJobs(
 		stopTime = startTime + int64(job.Duration)
 	}

-	// Add 200s overlap for jobs start time at the end
-	startTimeTail := startTime + 10
-	stopTimeTail := stopTime - 200
-	startTimeFront := startTime + 200
+	// Time buffer constant for finding overlapping jobs
+	// overlapBufferEnd: 200s buffer at job end to account for scheduling/cleanup overlap
+	const overlapBufferEnd = 200

-	queryRunning := query.Where("job.job_state = ?").Where("(job.start_time BETWEEN ? AND ? OR job.start_time < ?)",
-		"running", startTimeTail, stopTimeTail, startTime)
+	stopTimeTail := stopTime - overlapBufferEnd
+	startTimeFront := startTime + overlapBufferEnd
+
+	queryRunning := query.Where("job.job_state = ?", "running").
+		Where("job.start_time <= ?", stopTimeTail)
 	// Get At Least One Exact Hostname Match from JSON Resources Array in Database
 	queryRunning = queryRunning.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, '$.hostname') = ?)", hostname)

-	query = query.Where("job.job_state != ?").Where("((job.start_time BETWEEN ? AND ?) OR (job.start_time + job.duration) BETWEEN ? AND ? OR (job.start_time < ?) AND (job.start_time + job.duration) > ?)",
-		"running", startTimeTail, stopTimeTail, startTimeFront, stopTimeTail, startTime, stopTime)
+	query = query.Where("job.job_state != ?", "running").
+		Where("job.start_time < ?", stopTimeTail).
+		Where("(job.start_time + job.duration) > ?", startTimeFront)
 	// Get At Least One Exact Hostname Match from JSON Resources Array in Database
 	query = query.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, '$.hostname') = ?)", hostname)

 	rows, err := query.RunWith(r.stmtCache).Query()
 	if err != nil {
-		cclog.Errorf("Error while running query: %v", err)
-		return nil, err
+		cclog.Errorf("Error while running concurrent jobs query: %v", err)
+		return nil, fmt.Errorf("failed to execute concurrent jobs query: %w", err)
 	}
 	defer rows.Close()

@@ -269,44 +299,44 @@ func (r *JobRepository) FindConcurrentJobs(
 	queryString := fmt.Sprintf("cluster=%s", job.Cluster)

 	for rows.Next() {
-		var id, jobId, startTime sql.NullInt64
+		var id, jobID, startTime sql.NullInt64

-		if err = rows.Scan(&id, &jobId, &startTime); err != nil {
-			cclog.Warn("Error while scanning rows")
-			return nil, err
+		if err = rows.Scan(&id, &jobID, &startTime); err != nil {
+			cclog.Warnf("Error while scanning concurrent job rows: %v", err)
+			return nil, fmt.Errorf("failed to scan concurrent job row: %w", err)
 		}

 		if id.Valid {
-			queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
+			queryString += fmt.Sprintf("&jobId=%d", int(jobID.Int64))
 			items = append(items,
 				&model.JobLink{
 					ID:    fmt.Sprint(id.Int64),
-					JobID: int(jobId.Int64),
+					JobID: int(jobID.Int64),
 				})
 		}
 	}

 	rows, err = queryRunning.RunWith(r.stmtCache).Query()
 	if err != nil {
-		cclog.Errorf("Error while running query: %v", err)
-		return nil, err
+		cclog.Errorf("Error while running concurrent running jobs query: %v", err)
+		return nil, fmt.Errorf("failed to execute concurrent running jobs query: %w", err)
 	}
 	defer rows.Close()

 	for rows.Next() {
-		var id, jobId, startTime sql.NullInt64
+		var id, jobID, startTime sql.NullInt64

-		if err := rows.Scan(&id, &jobId, &startTime); err != nil {
-			cclog.Warn("Error while scanning rows")
-			return nil, err
+		if err := rows.Scan(&id, &jobID, &startTime); err != nil {
+			cclog.Warnf("Error while scanning running concurrent job rows: %v", err)
+			return nil, fmt.Errorf("failed to scan running concurrent job row: %w", err)
 		}

 		if id.Valid {
-			queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
+			queryString += fmt.Sprintf("&jobId=%d", int(jobID.Int64))
 			items = append(items,
 				&model.JobLink{
 					ID:    fmt.Sprint(id.Int64),
-					JobID: int(jobId.Int64),
+					JobID: int(jobID.Int64),
 				})
 		}
 	}
--- a/internal/repository/jobHooks.go
+++ b/internal/repository/jobHooks.go
@@ -2,16 +2,45 @@
 // All rights reserved. This file is part of cc-backend.
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file.
+
 package repository

 import (
 	"sync"

-	"github.com/ClusterCockpit/cc-lib/schema"
+	"github.com/ClusterCockpit/cc-lib/v2/schema"
 )

+// JobHook interface allows external components to hook into job lifecycle events.
+// Implementations can perform actions when jobs start or stop, such as tagging,
+// logging, notifications, or triggering external workflows.
+//
+// Example implementation:
+//
+//	type MyJobTagger struct{}
+//
+//	func (t *MyJobTagger) JobStartCallback(job *schema.Job) {
+//	    if job.NumNodes > 100 {
+//	        // Tag large jobs automatically
+//	    }
+//	}
+//
+//	func (t *MyJobTagger) JobStopCallback(job *schema.Job) {
+//	    if job.State == schema.JobStateFailed {
+//	        // Log or alert on failed jobs
+//	    }
+//	}
+//
+// Register hooks during application initialization:
+//
+//	repository.RegisterJobHook(&MyJobTagger{})
 type JobHook interface {
+	// JobStartCallback is invoked when one or more jobs start.
+	// This is called synchronously, so implementations should be fast.
 	JobStartCallback(job *schema.Job)
+
+	// JobStopCallback is invoked when a job completes.
+	// This is called synchronously, so implementations should be fast.
 	JobStopCallback(job *schema.Job)
 }

@@ -20,7 +49,13 @@ var (
 	hooks    []JobHook
 )

-func RegisterJobJook(hook JobHook) {
+// RegisterJobHook registers a JobHook to receive job lifecycle callbacks.
+// Multiple hooks can be registered and will be called in registration order.
+// This function is safe to call multiple times and is typically called during
+// application initialization.
+//
+// Nil hooks are silently ignored to simplify conditional registration.
+func RegisterJobHook(hook JobHook) {
 	initOnce.Do(func() {
 		hooks = make([]JobHook, 0)
 	})
@@ -30,6 +65,12 @@ func RegisterJobJook(hook JobHook) {
 	}
 }

+// CallJobStartHooks invokes all registered JobHook.JobStartCallback methods
+// for each job in the provided slice. This is called internally by the repository
+// when jobs are started (e.g., via StartJob or batch job imports).
+//
+// Hooks are called synchronously in registration order. If a hook panics,
+// the panic will propagate to the caller.
 func CallJobStartHooks(jobs []*schema.Job) {
 	if hooks == nil {
 		return
@@ -44,6 +85,12 @@ func CallJobStartHooks(jobs []*schema.Job) {
 	}
 }

+// CallJobStopHooks invokes all registered JobHook.JobStopCallback methods
+// for the provided job. This is called internally by the repository when a
+// job completes (e.g., via StopJob or job state updates).
+//
+// Hooks are called synchronously in registration order. If a hook panics,
+// the panic will propagate to the caller.
 func CallJobStopHooks(job *schema.Job) {
 	if hooks == nil {
 		return
--- a/internal/repository/jobQuery.go
+++ b/internal/repository/jobQuery.go
@@ -2,6 +2,10 @@
 // All rights reserved. This file is part of cc-backend.
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file.
+
+// Package repository provides job query functionality with filtering, pagination,
+// and security controls. This file contains the main query builders and security
+// checks for job retrieval operations.
 package repository

 import (
@@ -14,11 +18,27 @@ import (

 	"github.com/ClusterCockpit/cc-backend/internal/config"
 	"github.com/ClusterCockpit/cc-backend/internal/graph/model"
-	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
-	"github.com/ClusterCockpit/cc-lib/schema"
+	cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
+	"github.com/ClusterCockpit/cc-lib/v2/schema"
 	sq "github.com/Masterminds/squirrel"
 )

+const (
+	// Default initial capacity for job result slices
+	defaultJobsCapacity = 50
+)
+
+// QueryJobs retrieves jobs from the database with optional filtering, pagination,
+// and sorting. Security controls are automatically applied based on the user context.
+//
+// Parameters:
+//   - ctx: Context containing user authentication information
+//   - filters: Optional job filters (cluster, state, user, time ranges, etc.)
+//   - page: Optional pagination parameters (page number and items per page)
+//   - order: Optional sorting specification (column or footprint field)
+//
+// Returns a slice of jobs matching the criteria, or an error if the query fails.
+// The function enforces role-based access control through SecurityCheck.
 func (r *JobRepository) QueryJobs(
 	ctx context.Context,
 	filters []*model.JobFilter,
@@ -33,26 +53,24 @@ func (r *JobRepository) QueryJobs(
 	if order != nil {
 		field := toSnakeCase(order.Field)
 		if order.Type == "col" {
-			// "col": Fixed column name query
 			switch order.Order {
 			case model.SortDirectionEnumAsc:
 				query = query.OrderBy(fmt.Sprintf("job.%s ASC", field))
 			case model.SortDirectionEnumDesc:
 				query = query.OrderBy(fmt.Sprintf("job.%s DESC", field))
 			default:
-				return nil, errors.New("REPOSITORY/QUERY > invalid sorting order for column")
+				return nil, errors.New("invalid sorting order for column")
 			}
 		} else {
-			// "foot": Order by footprint JSON field values
-			// Verify and Search Only in Valid Jsons
-			query = query.Where("JSON_VALID(meta_data)")
+			// Order by footprint JSON field values
+			query = query.Where("JSON_VALID(footprint)")
 			switch order.Order {
 			case model.SortDirectionEnumAsc:
 				query = query.OrderBy(fmt.Sprintf("JSON_EXTRACT(footprint, \"$.%s\") ASC", field))
 			case model.SortDirectionEnumDesc:
 				query = query.OrderBy(fmt.Sprintf("JSON_EXTRACT(footprint, \"$.%s\") DESC", field))
 			default:
-				return nil, errors.New("REPOSITORY/QUERY > invalid sorting order for footprint")
+				return nil, errors.New("invalid sorting order for footprint")
 			}
 		}
 	}
@@ -69,29 +87,35 @@ func (r *JobRepository) QueryJobs(
 	rows, err := query.RunWith(r.stmtCache).Query()
 	if err != nil {
 		queryString, queryVars, _ := query.ToSql()
-		cclog.Errorf("Error while running query '%s' %v: %v", queryString, queryVars, err)
-		return nil, err
+		return nil, fmt.Errorf("query failed [%s] %v: %w", queryString, queryVars, err)
 	}
+	defer rows.Close()

-	jobs := make([]*schema.Job, 0, 50)
+	jobs := make([]*schema.Job, 0, defaultJobsCapacity)
 	for rows.Next() {
 		job, err := scanJob(rows)
 		if err != nil {
-			rows.Close()
-			cclog.Warn("Error while scanning rows (Jobs)")
-			return nil, err
+			cclog.Warnf("Error scanning job row: %v", err)
+			return nil, fmt.Errorf("failed to scan job row: %w", err)
 		}
 		jobs = append(jobs, job)
 	}

+	if err := rows.Err(); err != nil {
+		return nil, fmt.Errorf("error iterating job rows: %w", err)
+	}
+
 	return jobs, nil
 }

+// CountJobs returns the total number of jobs matching the given filters.
+// Security controls are automatically applied based on the user context.
+// Uses DISTINCT count to handle tag filters correctly (jobs may appear multiple
+// times when joined with the tag table).
 func (r *JobRepository) CountJobs(
 	ctx context.Context,
 	filters []*model.JobFilter,
 ) (int, error) {
-	// DISTICT count for tags filters, does not affect other queries
 	query, qerr := SecurityCheck(ctx, sq.Select("count(DISTINCT job.id)").From("job"))
 	if qerr != nil {
 		return 0, qerr
@@ -103,12 +127,22 @@ func (r *JobRepository) CountJobs(

 	var count int
 	if err := query.RunWith(r.DB).Scan(&count); err != nil {
-		return 0, err
+		return 0, fmt.Errorf("failed to count jobs: %w", err)
 	}

 	return count, nil
 }

+// SecurityCheckWithUser applies role-based access control filters to a job query
+// based on the provided user's roles and permissions.
+//
+// Access rules by role:
+//   - API role (exclusive): Full access to all jobs
+//   - Admin/Support roles: Full access to all jobs
+//   - Manager role: Access to jobs in managed projects plus own jobs
+//   - User role: Access only to own jobs
+//
+// Returns an error if the user is nil or has no recognized roles.
 func SecurityCheckWithUser(user *schema.User, query sq.SelectBuilder) (sq.SelectBuilder, error) {
 	if user == nil {
 		var qnil sq.SelectBuilder
@@ -116,84 +150,68 @@ func SecurityCheckWithUser(user *schema.User, query sq.SelectBuilder) (sq.Select
 	}

 	switch {
-	case len(user.Roles) == 1 && user.HasRole(schema.RoleApi): // API-User : All jobs
+	case len(user.Roles) == 1 && user.HasRole(schema.RoleAPI):
 		return query, nil
-	case user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}): // Admin & Support : All jobs
+	case user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}):
 		return query, nil
-	case user.HasRole(schema.RoleManager): // Manager : Add filter for managed projects' jobs only + personal jobs
+	case user.HasRole(schema.RoleManager):
 		if len(user.Projects) != 0 {
 			return query.Where(sq.Or{sq.Eq{"job.project": user.Projects}, sq.Eq{"job.hpc_user": user.Username}}), nil
-		} else {
-			cclog.Debugf("Manager-User '%s' has no defined projects to lookup! Query only personal jobs ...", user.Username)
-			return query.Where("job.hpc_user = ?", user.Username), nil
 		}
-	case user.HasRole(schema.RoleUser): // User : Only personal jobs
+		cclog.Debugf("Manager '%s' has no assigned projects, restricting to personal jobs", user.Username)
 		return query.Where("job.hpc_user = ?", user.Username), nil
-	default: // No known Role, return error
+	case user.HasRole(schema.RoleUser):
+		return query.Where("job.hpc_user = ?", user.Username), nil
+	default:
 		var qnil sq.SelectBuilder
 		return qnil, fmt.Errorf("user has no or unknown roles")
 	}
 }

+// SecurityCheck extracts the user from the context and applies role-based access
+// control filters to the query. This is a convenience wrapper around SecurityCheckWithUser.
 func SecurityCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilder, error) {
 	user := GetUserFromContext(ctx)
-
 	return SecurityCheckWithUser(user, query)
 }

-// Build a sq.SelectBuilder out of a schema.JobFilter.
+// BuildWhereClause constructs SQL WHERE conditions from a JobFilter and applies
+// them to the query. Supports filtering by job properties (cluster, state, user),
+// time ranges, resource usage, tags, and JSON field searches in meta_data,
+// footprint, and resources columns.
 func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.SelectBuilder {
-	if filter.Tags != nil {
-		// This is an OR-Logic query: Returns all distinct jobs with at least one of the requested tags; TODO: AND-Logic query?
-		query = query.Join("jobtag ON jobtag.job_id = job.id").Where(sq.Eq{"jobtag.tag_id": filter.Tags}).Distinct()
-	}
+	// Primary Key
 	if filter.DbID != nil {
 		dbIDs := make([]string, len(filter.DbID))
 		copy(dbIDs, filter.DbID)
 		query = query.Where(sq.Eq{"job.id": dbIDs})
 	}
-	if filter.JobID != nil {
-		query = buildStringCondition("job.job_id", filter.JobID, query)
-	}
-	if filter.ArrayJobID != nil {
-		query = query.Where("job.array_job_id = ?", *filter.ArrayJobID)
-	}
-	if filter.User != nil {
-		query = buildStringCondition("job.hpc_user", filter.User, query)
-	}
-	if filter.Project != nil {
-		query = buildStringCondition("job.project", filter.Project, query)
-	}
-	if filter.JobName != nil {
-		query = buildMetaJsonCondition("jobName", filter.JobName, query)
-	}
+	// Explicit indices
 	if filter.Cluster != nil {
 		query = buildStringCondition("job.cluster", filter.Cluster, query)
 	}
+	if filter.SubCluster != nil {
+		query = buildStringCondition("job.subcluster", filter.SubCluster, query)
+	}
 	if filter.Partition != nil {
 		query = buildStringCondition("job.cluster_partition", filter.Partition, query)
 	}
-	if filter.StartTime != nil {
-		query = buildTimeCondition("job.start_time", filter.StartTime, query)
-	}
-	if filter.Duration != nil {
-		query = buildIntCondition("job.duration", filter.Duration, query)
-	}
-	if filter.MinRunningFor != nil {
-		now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs.
-		query = query.Where("(job.job_state != 'running' OR (? - job.start_time) > ?)", now, *filter.MinRunningFor)
-	}
-	if filter.Shared != nil {
-		query = query.Where("job.shared = ?", *filter.Shared)
-	}
 	if filter.State != nil {
 		states := make([]string, len(filter.State))
 		for i, val := range filter.State {
 			states[i] = string(val)
 		}
-
 		query = query.Where(sq.Eq{"job.job_state": states})
 	}
+	if filter.Shared != nil {
+		query = query.Where("job.shared = ?", *filter.Shared)
+	}
+	if filter.Project != nil {
+		query = buildStringCondition("job.project", filter.Project, query)
+	}
+	if filter.User != nil {
+		query = buildStringCondition("job.hpc_user", filter.User, query)
+	}
 	if filter.NumNodes != nil {
 		query = buildIntCondition("job.num_nodes", filter.NumNodes, query)
 	}
@@ -203,33 +221,95 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
 	if filter.NumHWThreads != nil {
 		query = buildIntCondition("job.num_hwthreads", filter.NumHWThreads, query)
 	}
-	if filter.Node != nil {
-		query = buildResourceJsonCondition("hostname", filter.Node, query)
+	if filter.ArrayJobID != nil {
+		query = query.Where("job.array_job_id = ?", *filter.ArrayJobID)
+	}
+	if filter.StartTime != nil {
+		query = buildTimeCondition("job.start_time", filter.StartTime, query)
+	}
+	if filter.Duration != nil {
+		query = buildIntCondition("job.duration", filter.Duration, query)
 	}
 	if filter.Energy != nil {
 		query = buildFloatCondition("job.energy", filter.Energy, query)
 	}
+	// Indices on Tag Table
+	if filter.Tags != nil {
+		// This is an OR-Logic query: Returns all distinct jobs with at least one of the requested tags; TODO: AND-Logic query?
+		query = query.Join("jobtag ON jobtag.job_id = job.id").Where(sq.Eq{"jobtag.tag_id": filter.Tags}).Distinct()
+	}
+	// No explicit Indices
+	if filter.JobID != nil {
+		query = buildStringCondition("job.job_id", filter.JobID, query)
+	}
+	// Queries Within JSONs
 	if filter.MetricStats != nil {
 		for _, ms := range filter.MetricStats {
-			query = buildFloatJsonCondition(ms.MetricName, ms.Range, query)
+			query = buildFloatJSONCondition(ms.MetricName, ms.Range, query)
 		}
 	}
+	if filter.Node != nil {
+		query = buildResourceJSONCondition("hostname", filter.Node, query)
+	}
+	if filter.JobName != nil {
+		query = buildMetaJSONCondition("jobName", filter.JobName, query)
+	}
+	if filter.Schedule != nil {
+		interactiveJobname := "interactive"
+		switch *filter.Schedule {
+		case "interactive":
+			iFilter := model.StringInput{Eq: &interactiveJobname}
+			query = buildMetaJSONCondition("jobName", &iFilter, query)
+		case "batch":
+			sFilter := model.StringInput{Neq: &interactiveJobname}
+			query = buildMetaJSONCondition("jobName", &sFilter, query)
+		}
+	}
+
+	// Configurable Filter to exclude recently started jobs, see config.go: ShortRunningJobsDuration
+	if filter.MinRunningFor != nil {
+		now := time.Now().Unix()
+		// Only jobs whose start timestamp is more than MinRunningFor seconds in the past
+		// If a job completed within the configured timeframe, it will still show up after the start_time matches the condition!
+		query = query.Where(sq.Lt{"job.start_time": (now - int64(*filter.MinRunningFor))})
+	}
 	return query
 }

+// buildIntCondition creates clauses for integer range filters, using BETWEEN only if required.
 func buildIntCondition(field string, cond *config.IntRange, query sq.SelectBuilder) sq.SelectBuilder {
-	return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
+	if cond.From != 1 && cond.To != 0 {
+		return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
+	} else if cond.From != 1 && cond.To == 0 {
+		return query.Where(field+" >= ?", cond.From)
+	} else if cond.From == 1 && cond.To != 0 {
+		return query.Where(field+" <= ?", cond.To)
+	} else {
+		return query
+	}
 }

+// buildFloatCondition creates a clauses for float range filters, using BETWEEN only if required.
 func buildFloatCondition(field string, cond *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
-	return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
+	if cond.From != 1.0 && cond.To != 0.0 {
+		return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
+	} else if cond.From != 1.0 && cond.To == 0.0 {
+		return query.Where(field+" >= ?", cond.From)
+	} else if cond.From == 1.0 && cond.To != 0.0 {
+		return query.Where(field+" <= ?", cond.To)
+	} else {
+		return query
+	}
 }

+// buildTimeCondition creates time range filters supporting absolute timestamps,
+// relative time ranges (last6h, last24h, last7d, last30d), or open-ended ranges.
+// Reminder: BETWEEN Queries are slower and dont use indices as frequently: Only use if both conditions required
 func buildTimeCondition(field string, cond *config.TimeRange, query sq.SelectBuilder) sq.SelectBuilder {
 	if cond.From != nil && cond.To != nil {
 		return query.Where(field+" BETWEEN ? AND ?", cond.From.Unix(), cond.To.Unix())
 	} else if cond.From != nil {
-		return query.Where("? <= "+field, cond.From.Unix())
+		return query.Where(field+" >= ?", cond.From.Unix())
 	} else if cond.To != nil {
 		return query.Where(field+" <= ?", cond.To.Unix())
 	} else if cond.Range != "" {
@@ -248,18 +328,28 @@ func buildTimeCondition(field string, cond *config.TimeRange, query sq.SelectBui
 			cclog.Debugf("No known named timeRange: startTime.range = %s", cond.Range)
 			return query
 		}
-		return query.Where(field+" BETWEEN ? AND ?", then, now)
+		return query.Where(field+" >= ?", then)
 	} else {
 		return query
 	}
 }

-func buildFloatJsonCondition(condName string, condRange *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
-	// Verify and Search Only in Valid Jsons
+// buildFloatJSONCondition creates a filter on a numeric field within the footprint JSON column, using BETWEEN only if required.
+func buildFloatJSONCondition(jsonField string, cond *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
 	query = query.Where("JSON_VALID(footprint)")
-	return query.Where("JSON_EXTRACT(footprint, \"$."+condName+"\") BETWEEN ? AND ?", condRange.From, condRange.To)
+	if cond.From != 1.0 && cond.To != 0.0 {
+		return query.Where("JSON_EXTRACT(footprint, \"$."+jsonField+"\") BETWEEN ? AND ?", cond.From, cond.To)
+	} else if cond.From != 1.0 && cond.To == 0.0 {
+		return query.Where("JSON_EXTRACT(footprint, \"$."+jsonField+"\") >= ?", cond.From)
+	} else if cond.From == 1.0 && cond.To != 0.0 {
+		return query.Where("JSON_EXTRACT(footprint, \"$."+jsonField+"\") <= ?", cond.To)
+	} else {
+		return query
+	}
 }

+// buildStringCondition creates filters for string fields supporting equality,
+// inequality, prefix, suffix, substring, and IN list matching.
 func buildStringCondition(field string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
 	if cond.Eq != nil {
 		return query.Where(field+" = ?", *cond.Eq)
@@ -284,10 +374,9 @@ func buildStringCondition(field string, cond *model.StringInput, query sq.Select
 	return query
 }

-func buildMetaJsonCondition(jsonField string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
-	// Verify and Search Only in Valid Jsons
+// buildMetaJSONCondition creates filters on fields within the meta_data JSON column.
+func buildMetaJSONCondition(jsonField string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
 	query = query.Where("JSON_VALID(meta_data)")
-	// add "AND" Sql query Block for field match
 	if cond.Eq != nil {
 		return query.Where("JSON_EXTRACT(meta_data, \"$."+jsonField+"\") = ?", *cond.Eq)
 	}
@@ -306,10 +395,10 @@ func buildMetaJsonCondition(jsonField string, cond *model.StringInput, query sq.
 	return query
 }

-func buildResourceJsonCondition(jsonField string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
-	// Verify and Search Only in Valid Jsons
+// buildResourceJSONCondition creates filters on fields within the resources JSON array column.
+// Uses json_each to search within array elements.
+func buildResourceJSONCondition(jsonField string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
 	query = query.Where("JSON_VALID(resources)")
-	// add "AND" Sql query Block for field match
 	if cond.Eq != nil {
 		return query.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, \"$."+jsonField+"\") = ?)", *cond.Eq)
 	}
@@ -333,15 +422,16 @@ var (
 	matchAllCap   = regexp.MustCompile("([a-z0-9])([A-Z])")
 )

+// toSnakeCase converts camelCase strings to snake_case for SQL column names.
+// Includes security checks to prevent SQL injection attempts.
+// Panics if potentially dangerous characters are detected.
 func toSnakeCase(str string) string {
 	for _, c := range str {
-		if c == '\'' || c == '\\' {
-			cclog.Panic("toSnakeCase() attack vector!")
+		if c == '\'' || c == '\\' || c == '"' || c == ';' || c == '-' || c == ' ' {
+			cclog.Panicf("toSnakeCase: potentially dangerous character detected in input: %q", str)
 		}
 	}

-	str = strings.ReplaceAll(str, "'", "")
-	str = strings.ReplaceAll(str, "\\", "")
 	snake := matchFirstCap.ReplaceAllString(str, "${1}_${2}")
 	snake = matchAllCap.ReplaceAllString(snake, "${1}_${2}")
 	return strings.ToLower(snake)
--- a/internal/repository/job_test.go
+++ b/internal/repository/job_test.go
@@ -10,7 +10,7 @@ import (
 	"testing"
 	"time"

-	"github.com/ClusterCockpit/cc-lib/schema"
+	"github.com/ClusterCockpit/cc-lib/v2/schema"
 	_ "github.com/mattn/go-sqlite3"
 )

@@ -33,7 +33,7 @@ func TestFind(t *testing.T) {
 func TestFindById(t *testing.T) {
 	r := setup(t)

-	job, err := r.FindById(getContext(t), 338)
+	job, err := r.FindByID(getContext(t), 338)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -78,7 +78,7 @@ func TestFindJobsBetween(t *testing.T) {

 	// 1. Find a job to use (Find all jobs)
 	// We use a large time range to ensure we get something if it exists
-	jobs, err := r.FindJobsBetween(0, 9999999999, false)
+	jobs, err := r.FindJobsBetween(0, 9999999999, "none")
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -88,21 +88,21 @@ func TestFindJobsBetween(t *testing.T) {

 	targetJob := jobs[0]

-	// 2. Create a tag
-	tagName := fmt.Sprintf("testtag_%d", time.Now().UnixNano())
-	tagId, err := r.CreateTag("testtype", tagName, "global")
+	// 2. Create an auto-tagger tag (type "app")
+	appTagName := fmt.Sprintf("apptag_%d", time.Now().UnixNano())
+	appTagID, err := r.CreateTag("app", appTagName, "global")
 	if err != nil {
 		t.Fatal(err)
 	}

-	// 3. Link Tag (Manually to avoid archive dependency side-effects in unit test)
-	_, err = r.DB.Exec("INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)", *targetJob.ID, tagId)
+	// 3. Link auto-tagger tag to job
+	_, err = r.DB.Exec("INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)", *targetJob.ID, appTagID)
 	if err != nil {
 		t.Fatal(err)
 	}

-	// 4. Search with omitTagged = false (Should find the job)
-	jobsFound, err := r.FindJobsBetween(0, 9999999999, false)
+	// 4. Search with omitTagged = "none" (Should find the job)
+	jobsFound, err := r.FindJobsBetween(0, 9999999999, "none")
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -115,18 +115,58 @@ func TestFindJobsBetween(t *testing.T) {
 		}
 	}
 	if !found {
-		t.Errorf("Target job %d should be found when omitTagged=false", *targetJob.ID)
+		t.Errorf("Target job %d should be found when omitTagged=none", *targetJob.ID)
 	}

-	// 5. Search with omitTagged = true (Should NOT find the job)
-	jobsFiltered, err := r.FindJobsBetween(0, 9999999999, true)
+	// 5. Search with omitTagged = "all" (Should NOT find the job — it has a tag)
+	jobsFiltered, err := r.FindJobsBetween(0, 9999999999, "all")
 	if err != nil {
 		t.Fatal(err)
 	}

 	for _, j := range jobsFiltered {
 		if *j.ID == *targetJob.ID {
-			t.Errorf("Target job %d should NOT be found when omitTagged=true", *targetJob.ID)
+			t.Errorf("Target job %d should NOT be found when omitTagged=all", *targetJob.ID)
+		}
+	}
+
+	// 6. Search with omitTagged = "user": auto-tagger tag ("app") should NOT exclude the job
+	jobsUserFilter, err := r.FindJobsBetween(0, 9999999999, "user")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	found = false
+	for _, j := range jobsUserFilter {
+		if *j.ID == *targetJob.ID {
+			found = true
+			break
+		}
+	}
+	if !found {
+		t.Errorf("Target job %d should be found when omitTagged=user (only has auto-tagger tag)", *targetJob.ID)
+	}
+
+	// 7. Add a user-created tag (type "testtype") to the same job
+	userTagName := fmt.Sprintf("usertag_%d", time.Now().UnixNano())
+	userTagID, err := r.CreateTag("testtype", userTagName, "global")
+	if err != nil {
+		t.Fatal(err)
+	}
+	_, err = r.DB.Exec("INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)", *targetJob.ID, userTagID)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// 8. Now omitTagged = "user" should exclude the job (has a user-created tag)
+	jobsUserFilter2, err := r.FindJobsBetween(0, 9999999999, "user")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	for _, j := range jobsUserFilter2 {
+		if *j.ID == *targetJob.ID {
+			t.Errorf("Target job %d should NOT be found when omitTagged=user (has user-created tag)", *targetJob.ID)
 		}
 	}
 }
--- a/internal/repository/migration.go
+++ b/internal/repository/migration.go
@@ -10,52 +10,48 @@ import (
 	"embed"
 	"fmt"

-	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
+	cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
 	"github.com/golang-migrate/migrate/v4"
-	"github.com/golang-migrate/migrate/v4/database/mysql"
 	"github.com/golang-migrate/migrate/v4/database/sqlite3"
 	"github.com/golang-migrate/migrate/v4/source/iofs"
 )

+// Version is the current database schema version required by this version of cc-backend.
+// When the database schema changes, this version is incremented and a new migration file
+// is added to internal/repository/migrations/sqlite3/.
+//
+// Version history:
+//   - Version 10: Current version
+//
+// Migration files are embedded at build time from the migrations directory.
 const Version uint = 10

 //go:embed migrations/*
 var migrationFiles embed.FS

-func checkDBVersion(backend string, db *sql.DB) error {
-	var m *migrate.Migrate
+// checkDBVersion verifies that the database schema version matches the expected version.
+// This is called automatically during Connect() to ensure schema compatibility.
+//
+// Returns an error if:
+//   - Database version is older than expected (needs migration)
+//   - Database version is newer than expected (needs app upgrade)
+//   - Database is in a dirty state (failed migration)
+//
+// A "dirty" database indicates a migration was started but not completed successfully.
+// This requires manual intervention to fix the database and force the version.
+func checkDBVersion(db *sql.DB) error {
+	driver, err := sqlite3.WithInstance(db, &sqlite3.Config{})
+	if err != nil {
+		return err
+	}
+	d, err := iofs.New(migrationFiles, "migrations/sqlite3")
+	if err != nil {
+		return err
+	}

-	switch backend {
-	case "sqlite3":
-		driver, err := sqlite3.WithInstance(db, &sqlite3.Config{})
-		if err != nil {
-			return err
-		}
-		d, err := iofs.New(migrationFiles, "migrations/sqlite3")
-		if err != nil {
-			return err
-		}
-
-		m, err = migrate.NewWithInstance("iofs", d, "sqlite3", driver)
-		if err != nil {
-			return err
-		}
-	case "mysql":
-		driver, err := mysql.WithInstance(db, &mysql.Config{})
-		if err != nil {
-			return err
-		}
-		d, err := iofs.New(migrationFiles, "migrations/mysql")
-		if err != nil {
-			return err
-		}
-
-		m, err = migrate.NewWithInstance("iofs", d, "mysql", driver)
-		if err != nil {
-			return err
-		}
-	default:
-		cclog.Abortf("Migration: Unsupported database backend '%s'.\n", backend)
+	m, err := migrate.NewWithInstance("iofs", d, "sqlite3", driver)
+	if err != nil {
+		return err
 	}

 	v, dirty, err := m.Version()
@@ -80,37 +76,41 @@ func checkDBVersion(backend string, db *sql.DB) error {
 	return nil
 }

-func getMigrateInstance(backend string, db string) (m *migrate.Migrate, err error) {
-	switch backend {
-	case "sqlite3":
-		d, err := iofs.New(migrationFiles, "migrations/sqlite3")
-		if err != nil {
-			cclog.Fatal(err)
-		}
+// getMigrateInstance creates a new migration instance for the given database file.
+// This is used internally by MigrateDB, RevertDB, and ForceDB.
+func getMigrateInstance(db string) (m *migrate.Migrate, err error) {
+	d, err := iofs.New(migrationFiles, "migrations/sqlite3")
+	if err != nil {
+		return nil, err
+	}

-		m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("sqlite3://%s?_foreign_keys=on", db))
-		if err != nil {
-			return m, err
-		}
-	case "mysql":
-		d, err := iofs.New(migrationFiles, "migrations/mysql")
-		if err != nil {
-			return m, err
-		}
-
-		m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("mysql://%s?multiStatements=true", db))
-		if err != nil {
-			return m, err
-		}
-	default:
-		cclog.Abortf("Migration: Unsupported database backend '%s'.\n", backend)
+	m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("sqlite3://%s?_foreign_keys=on", db))
+	if err != nil {
+		return nil, err
 	}

 	return m, nil
 }

-func MigrateDB(backend string, db string) error {
-	m, err := getMigrateInstance(backend, db)
+// MigrateDB applies all pending database migrations to bring the schema up to date.
+// This should be run with the -migrate-db flag before starting the application
+// after upgrading to a new version that requires schema changes.
+//
+// Process:
+//  1. Checks current database version
+//  2. Applies all migrations from current version to target Version
+//  3. Updates schema_migrations table to track applied migrations
+//
+// Important:
+//   - Always backup your database before running migrations
+//   - Migrations are irreversible without manual intervention
+//   - If a migration fails, the database is marked "dirty" and requires manual fix
+//
+// Usage:
+//
+//	cc-backend -migrate-db
+func MigrateDB(db string) error {
+	m, err := getMigrateInstance(db)
 	if err != nil {
 		return err
 	}
@@ -118,7 +118,7 @@ func MigrateDB(backend string, db string) error {
 	v, dirty, err := m.Version()
 	if err != nil {
 		if err == migrate.ErrNilVersion {
-			cclog.Warn("Legacy database without version or missing database file!")
+			cclog.Info("Legacy database without version or missing database file!")
 		} else {
 			return err
 		}
@@ -144,8 +144,19 @@ func MigrateDB(backend string, db string) error {
 	return nil
 }

-func RevertDB(backend string, db string) error {
-	m, err := getMigrateInstance(backend, db)
+// RevertDB rolls back the database schema to the previous version (Version - 1).
+// This is primarily used for testing or emergency rollback scenarios.
+//
+// Warning:
+//   - This may cause data loss if newer schema added columns/tables
+//   - Always backup before reverting
+//   - Not all migrations are safely reversible
+//
+// Usage:
+//
+//	cc-backend -revert-db
+func RevertDB(db string) error {
+	m, err := getMigrateInstance(db)
 	if err != nil {
 		return err
 	}
@@ -162,8 +173,23 @@ func RevertDB(backend string, db string) error {
 	return nil
 }

-func ForceDB(backend string, db string) error {
-	m, err := getMigrateInstance(backend, db)
+// ForceDB forces the database schema version to the current Version without running migrations.
+// This is only used to recover from failed migrations that left the database in a "dirty" state.
+//
+// When to use:
+//   - After manually fixing a failed migration
+//   - When you've manually applied schema changes and need to update the version marker
+//
+// Warning:
+//   - This does NOT apply any schema changes
+//   - Only use after manually verifying the schema is correct
+//   - Improper use can cause schema/version mismatch
+//
+// Usage:
+//
+//	cc-backend -force-db
+func ForceDB(db string) error {
+	m, err := getMigrateInstance(db)
 	if err != nil {
 		return err
 	}
--- a/internal/repository/migrations/mysql/01_init-schema.down.sql
+++ b/internal/repository/migrations/mysql/01_init-schema.down.sql
@@ -1,5 +0,0 @@
-DROP TABLE IF EXISTS job;
-DROP TABLE IF EXISTS tags;
-DROP TABLE IF EXISTS jobtag;
-DROP TABLE IF EXISTS configuration;
-DROP TABLE IF EXISTS user;
--- a/internal/repository/migrations/mysql/01_init-schema.up.sql
+++ b/internal/repository/migrations/mysql/01_init-schema.up.sql
@@ -1,66 +0,0 @@
-CREATE TABLE IF NOT EXISTS job (
-    id                INTEGER AUTO_INCREMENT PRIMARY KEY ,
-    job_id            BIGINT NOT NULL,
-    cluster           VARCHAR(255) NOT NULL,
-    subcluster        VARCHAR(255) NOT NULL,
-    start_time        BIGINT NOT NULL, -- Unix timestamp
-
-    user              VARCHAR(255) NOT NULL,
-    project           VARCHAR(255) NOT NULL,
-    `partition`       VARCHAR(255) NOT NULL,
-    array_job_id      BIGINT NOT NULL,
-    duration          INT NOT NULL DEFAULT 0,
-    walltime          INT NOT NULL DEFAULT 0,
-    job_state         VARCHAR(255) NOT NULL 
-    CHECK(job_state IN ('running', 'completed', 'failed', 'cancelled',
-            'stopped', 'timeout', 'preempted', 'out_of_memory')),
-    meta_data         TEXT,          -- JSON
-    resources         TEXT NOT NULL, -- JSON
-
-    num_nodes         INT NOT NULL,
-    num_hwthreads     INT NOT NULL,
-    num_acc           INT NOT NULL,
-    smt               TINYINT NOT NULL DEFAULT 1 CHECK(smt               IN (0, 1   )),
-    exclusive         TINYINT NOT NULL DEFAULT 1 CHECK(exclusive         IN (0, 1, 2)),
-    monitoring_status TINYINT NOT NULL DEFAULT 1 CHECK(monitoring_status IN (0, 1, 2, 3)),
-
-    mem_used_max        REAL NOT NULL DEFAULT 0.0,
-    flops_any_avg       REAL NOT NULL DEFAULT 0.0,
-    mem_bw_avg          REAL NOT NULL DEFAULT 0.0,
-    load_avg            REAL NOT NULL DEFAULT 0.0,
-    net_bw_avg          REAL NOT NULL DEFAULT 0.0,
-    net_data_vol_total  REAL NOT NULL DEFAULT 0.0,
-    file_bw_avg         REAL NOT NULL DEFAULT 0.0,
-    file_data_vol_total REAL NOT NULL DEFAULT 0.0,
-    UNIQUE (job_id, cluster, start_time)
-    );
-
-CREATE TABLE IF NOT EXISTS tag (
-    id       INTEGER PRIMARY KEY,
-    tag_type VARCHAR(255) NOT NULL,
-    tag_name VARCHAR(255) NOT NULL,
-    UNIQUE (tag_type, tag_name));
-
-CREATE TABLE IF NOT EXISTS jobtag (
-    job_id INTEGER,
-    tag_id INTEGER,
-    PRIMARY KEY (job_id, tag_id),
-    FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE,
-    FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE);
-
-CREATE TABLE IF NOT EXISTS user (
-	username varchar(255) PRIMARY KEY NOT NULL,
-	password varchar(255) DEFAULT NULL,
-	ldap     tinyint      NOT NULL DEFAULT 0, /* col called "ldap" for historic reasons, fills the "AuthSource" */
-	name     varchar(255) DEFAULT NULL,
-	roles    varchar(255) NOT NULL DEFAULT "[]",
-	email    varchar(255) DEFAULT NULL);
-
-CREATE TABLE IF NOT EXISTS configuration (
-	username varchar(255),
-	confkey  varchar(255),
-	value    varchar(255),
-	PRIMARY KEY (username, confkey),
-	FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION);
-
-
--- a/internal/repository/migrations/mysql/02_add-index.down.sql
+++ b/internal/repository/migrations/mysql/02_add-index.down.sql
@@ -1,8 +0,0 @@
-DROP INDEX IF EXISTS job_stats;
-DROP INDEX IF EXISTS job_by_user;
-DROP INDEX IF EXISTS job_by_starttime;
-DROP INDEX IF EXISTS job_by_job_id;
-DROP INDEX IF EXISTS job_list;
-DROP INDEX IF EXISTS job_list_user;
-DROP INDEX IF EXISTS job_list_users;
-DROP INDEX IF EXISTS job_list_users_start;
--- a/internal/repository/migrations/mysql/02_add-index.up.sql
+++ b/internal/repository/migrations/mysql/02_add-index.up.sql
@@ -1,8 +0,0 @@
-CREATE INDEX IF NOT EXISTS job_stats        ON job (cluster,subcluster,user);
-CREATE INDEX IF NOT EXISTS job_by_user      ON job (user);
-CREATE INDEX IF NOT EXISTS job_by_starttime ON job (start_time);
-CREATE INDEX IF NOT EXISTS job_by_job_id    ON job (job_id);
-CREATE INDEX IF NOT EXISTS job_list         ON job (cluster, job_state);
-CREATE INDEX IF NOT EXISTS job_list_user    ON job (user, cluster, job_state);
-CREATE INDEX IF NOT EXISTS job_list_users   ON job (user, job_state);
-CREATE INDEX IF NOT EXISTS job_list_users_start ON job (start_time, user, job_state);
--- a/internal/repository/migrations/mysql/03_add-userprojects.down.sql
+++ b/internal/repository/migrations/mysql/03_add-userprojects.down.sql
@@ -1 +0,0 @@
-ALTER TABLE user DROP COLUMN projects;
--- a/internal/repository/migrations/mysql/03_add-userprojects.up.sql
+++ b/internal/repository/migrations/mysql/03_add-userprojects.up.sql
@@ -1 +0,0 @@
-ALTER TABLE user ADD COLUMN projects varchar(255) NOT NULL DEFAULT "[]";
--- a/internal/repository/migrations/mysql/04_alter-table-job.down.sql
+++ b/internal/repository/migrations/mysql/04_alter-table-job.down.sql
@@ -1,5 +0,0 @@
-ALTER TABLE job
-    MODIFY `partition` VARCHAR(255) NOT NULL,
-    MODIFY array_job_id BIGINT NOT NULL,
-    MODIFY num_hwthreads INT NOT NULL,
-    MODIFY num_acc INT NOT NULL;
--- a/internal/repository/migrations/mysql/04_alter-table-job.up.sql
+++ b/internal/repository/migrations/mysql/04_alter-table-job.up.sql
@@ -1,5 +0,0 @@
-ALTER TABLE job
-    MODIFY `partition` VARCHAR(255),
-    MODIFY array_job_id BIGINT,
-    MODIFY num_hwthreads INT,
-    MODIFY num_acc INT;
--- a/internal/repository/migrations/mysql/05_extend-tags.down.sql
+++ b/internal/repository/migrations/mysql/05_extend-tags.down.sql
@@ -1,2 +0,0 @@
-ALTER TABLE tag DROP COLUMN insert_time;
-ALTER TABLE jobtag DROP COLUMN insert_time;
--- a/internal/repository/migrations/mysql/05_extend-tags.up.sql
+++ b/internal/repository/migrations/mysql/05_extend-tags.up.sql
@@ -1,2 +0,0 @@
-ALTER TABLE tag ADD COLUMN insert_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP;
-ALTER TABLE jobtag ADD COLUMN insert_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP;
--- a/internal/repository/migrations/mysql/06_change-config.down.sql
+++ b/internal/repository/migrations/mysql/06_change-config.down.sql
@@ -1 +0,0 @@
-ALTER TABLE configuration MODIFY value VARCHAR(255);
--- a/internal/repository/migrations/mysql/06_change-config.up.sql
+++ b/internal/repository/migrations/mysql/06_change-config.up.sql
@@ -1 +0,0 @@
-ALTER TABLE configuration MODIFY value TEXT;
--- a/internal/repository/migrations/mysql/07_fix-tag-id.down.sql
+++ b/internal/repository/migrations/mysql/07_fix-tag-id.down.sql
@@ -1,3 +0,0 @@
-SET FOREIGN_KEY_CHECKS = 0;
-ALTER TABLE tag MODIFY id INTEGER;
-SET FOREIGN_KEY_CHECKS = 1;
--- a/internal/repository/migrations/mysql/07_fix-tag-id.up.sql
+++ b/internal/repository/migrations/mysql/07_fix-tag-id.up.sql
@@ -1,3 +0,0 @@
-SET FOREIGN_KEY_CHECKS = 0;
-ALTER TABLE tag MODIFY id INTEGER AUTO_INCREMENT;
-SET FOREIGN_KEY_CHECKS = 1;
--- a/internal/repository/migrations/mysql/08_add-footprint.down.sql
+++ b/internal/repository/migrations/mysql/08_add-footprint.down.sql
@@ -1,83 +0,0 @@
-ALTER TABLE job DROP energy;
-ALTER TABLE job DROP energy_footprint;
-ALTER TABLE job ADD COLUMN flops_any_avg;
-ALTER TABLE job ADD COLUMN mem_bw_avg;
-ALTER TABLE job ADD COLUMN mem_used_max;
-ALTER TABLE job ADD COLUMN load_avg;
-ALTER TABLE job ADD COLUMN net_bw_avg;
-ALTER TABLE job ADD COLUMN net_data_vol_total;
-ALTER TABLE job ADD COLUMN file_bw_avg;
-ALTER TABLE job ADD COLUMN file_data_vol_total;
-
-UPDATE job SET flops_any_avg = json_extract(footprint, '$.flops_any_avg');
-UPDATE job SET mem_bw_avg = json_extract(footprint, '$.mem_bw_avg');
-UPDATE job SET mem_used_max = json_extract(footprint, '$.mem_used_max');
-UPDATE job SET load_avg = json_extract(footprint, '$.cpu_load_avg');
-UPDATE job SET net_bw_avg = json_extract(footprint, '$.net_bw_avg');
-UPDATE job SET net_data_vol_total = json_extract(footprint, '$.net_data_vol_total');
-UPDATE job SET file_bw_avg = json_extract(footprint, '$.file_bw_avg');
-UPDATE job SET file_data_vol_total = json_extract(footprint, '$.file_data_vol_total');
-
-ALTER TABLE job DROP footprint;
-- Do not use reserved keywords anymore
-RENAME TABLE hpc_user TO `user`;
-ALTER TABLE job RENAME COLUMN hpc_user TO `user`;
-ALTER TABLE job RENAME COLUMN cluster_partition TO `partition`;
-
-DROP INDEX IF EXISTS jobs_cluster;
-DROP INDEX IF EXISTS jobs_cluster_user;
-DROP INDEX IF EXISTS jobs_cluster_project;
-DROP INDEX IF EXISTS jobs_cluster_subcluster;
-DROP INDEX IF EXISTS jobs_cluster_starttime;
-DROP INDEX IF EXISTS jobs_cluster_duration;
-DROP INDEX IF EXISTS jobs_cluster_numnodes;
-
-DROP INDEX IF EXISTS jobs_cluster_partition;
-DROP INDEX IF EXISTS jobs_cluster_partition_starttime;
-DROP INDEX IF EXISTS jobs_cluster_partition_duration;
-DROP INDEX IF EXISTS jobs_cluster_partition_numnodes;
-
-DROP INDEX IF EXISTS jobs_cluster_partition_jobstate;
-DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_user;
-DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_project;
-DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_starttime;
-DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_duration;
-DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numnodes;
-
-DROP INDEX IF EXISTS jobs_cluster_jobstate;
-DROP INDEX IF EXISTS jobs_cluster_jobstate_user;
-DROP INDEX IF EXISTS jobs_cluster_jobstate_project;
-
-DROP INDEX IF EXISTS jobs_cluster_jobstate_starttime;
-DROP INDEX IF EXISTS jobs_cluster_jobstate_duration;
-DROP INDEX IF EXISTS jobs_cluster_jobstate_numnodes;
-
-DROP INDEX IF EXISTS jobs_user;
-DROP INDEX IF EXISTS jobs_user_starttime;
-DROP INDEX IF EXISTS jobs_user_duration;
-DROP INDEX IF EXISTS jobs_user_numnodes;
-
-DROP INDEX IF EXISTS jobs_project;
-DROP INDEX IF EXISTS jobs_project_user;
-DROP INDEX IF EXISTS jobs_project_starttime;
-DROP INDEX IF EXISTS jobs_project_duration;
-DROP INDEX IF EXISTS jobs_project_numnodes;
-
-DROP INDEX IF EXISTS jobs_jobstate;
-DROP INDEX IF EXISTS jobs_jobstate_user;
-DROP INDEX IF EXISTS jobs_jobstate_project;
-DROP INDEX IF EXISTS jobs_jobstate_starttime;
-DROP INDEX IF EXISTS jobs_jobstate_duration;
-DROP INDEX IF EXISTS jobs_jobstate_numnodes;
-
-DROP INDEX IF EXISTS jobs_arrayjobid_starttime;
-DROP INDEX IF EXISTS jobs_cluster_arrayjobid_starttime;
-
-DROP INDEX IF EXISTS jobs_starttime;
-DROP INDEX IF EXISTS jobs_duration;
-DROP INDEX IF EXISTS jobs_numnodes;
-
-DROP INDEX IF EXISTS jobs_duration_starttime;
-DROP INDEX IF EXISTS jobs_numnodes_starttime;
-DROP INDEX IF EXISTS jobs_numacc_starttime;
-DROP INDEX IF EXISTS jobs_energy_starttime;
--- a/internal/repository/migrations/mysql/08_add-footprint.up.sql
+++ b/internal/repository/migrations/mysql/08_add-footprint.up.sql
@@ -1,123 +0,0 @@
-DROP INDEX IF EXISTS job_stats ON job;
-DROP INDEX IF EXISTS job_by_user ON job;
-DROP INDEX IF EXISTS job_by_starttime ON job;
-DROP INDEX IF EXISTS job_by_job_id ON job;
-DROP INDEX IF EXISTS job_list ON job;
-DROP INDEX IF EXISTS job_list_user ON job;
-DROP INDEX IF EXISTS job_list_users ON job;
-DROP INDEX IF EXISTS job_list_users_start ON job;
-
-ALTER TABLE job ADD COLUMN energy REAL NOT NULL DEFAULT 0.0;
-ALTER TABLE job ADD COLUMN energy_footprint JSON;
-
-ALTER TABLE job ADD COLUMN footprint JSON;
-ALTER TABLE tag ADD COLUMN tag_scope TEXT NOT NULL DEFAULT 'global';
-
-- Do not use reserved keywords anymore
-RENAME TABLE `user` TO hpc_user;
-ALTER TABLE job RENAME COLUMN `user` TO hpc_user;
-ALTER TABLE job RENAME COLUMN `partition` TO cluster_partition;
-
-ALTER TABLE job MODIFY COLUMN cluster VARCHAR(50);
-ALTER TABLE job MODIFY COLUMN hpc_user VARCHAR(50);
-ALTER TABLE job MODIFY COLUMN subcluster VARCHAR(50);
-ALTER TABLE job MODIFY COLUMN project VARCHAR(50);
-ALTER TABLE job MODIFY COLUMN cluster_partition VARCHAR(50);
-ALTER TABLE job MODIFY COLUMN job_state VARCHAR(25);
-
-UPDATE job SET footprint = '{"flops_any_avg": 0.0}';
-UPDATE job SET footprint = json_replace(footprint, '$.flops_any_avg', job.flops_any_avg);
-UPDATE job SET footprint = json_insert(footprint, '$.mem_bw_avg', job.mem_bw_avg);
-UPDATE job SET footprint = json_insert(footprint, '$.mem_used_max', job.mem_used_max);
-UPDATE job SET footprint = json_insert(footprint, '$.cpu_load_avg', job.load_avg);
-UPDATE job SET footprint = json_insert(footprint, '$.net_bw_avg', job.net_bw_avg) WHERE job.net_bw_avg != 0;
-UPDATE job SET footprint = json_insert(footprint, '$.net_data_vol_total', job.net_data_vol_total) WHERE job.net_data_vol_total != 0;
-UPDATE job SET footprint = json_insert(footprint, '$.file_bw_avg', job.file_bw_avg) WHERE job.file_bw_avg != 0;
-UPDATE job SET footprint = json_insert(footprint, '$.file_data_vol_total', job.file_data_vol_total) WHERE job.file_data_vol_total != 0;
-
-ALTER TABLE job DROP flops_any_avg;
-ALTER TABLE job DROP mem_bw_avg;
-ALTER TABLE job DROP mem_used_max;
-ALTER TABLE job DROP load_avg;
-ALTER TABLE job DROP net_bw_avg;
-ALTER TABLE job DROP net_data_vol_total;
-ALTER TABLE job DROP file_bw_avg;
-ALTER TABLE job DROP file_data_vol_total;
-
-- Indices for: Single filters, combined filters, sorting, sorting with filters
-- Cluster Filter
-CREATE INDEX IF NOT EXISTS jobs_cluster ON job (cluster);
-CREATE INDEX IF NOT EXISTS jobs_cluster_user ON job (cluster, hpc_user);
-CREATE INDEX IF NOT EXISTS jobs_cluster_project ON job (cluster, project);
-CREATE INDEX IF NOT EXISTS jobs_cluster_subcluster ON job (cluster, subcluster);
-- Cluster Filter Sorting
-CREATE INDEX IF NOT EXISTS jobs_cluster_starttime ON job (cluster, start_time);
-CREATE INDEX IF NOT EXISTS jobs_cluster_duration ON job (cluster, duration);
-CREATE INDEX IF NOT EXISTS jobs_cluster_numnodes ON job (cluster, num_nodes);
-
-- Cluster+Partition Filter
-CREATE INDEX IF NOT EXISTS jobs_cluster_partition ON job (cluster, cluster_partition);
-- Cluster+Partition Filter Sorting
-CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime ON job (cluster, cluster_partition, start_time);
-CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration ON job (cluster, cluster_partition, duration);
-CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numnodes ON job (cluster, cluster_partition, num_nodes);
-
-- Cluster+Partition+Jobstate Filter
-CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, cluster_partition, job_state);
-CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_user ON job (cluster, cluster_partition, job_state, hpc_user);
-CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_project ON job (cluster, cluster_partition, job_state, project);
-- Cluster+Partition+Jobstate Filter Sorting
-CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_starttime ON job (cluster, cluster_partition, job_state, start_time);
-CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_duration ON job (cluster, cluster_partition, job_state, duration);
-CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numnodes ON job (cluster, cluster_partition, job_state, num_nodes);
-
-- Cluster+JobState Filter
-CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate ON job (cluster, job_state);
-CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_user ON job (cluster, job_state, hpc_user);
-CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_project ON job (cluster, job_state, project);
-- Cluster+JobState Filter Sorting
-CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_starttime ON job (cluster, job_state, start_time);
-CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_duration ON job (cluster, job_state, duration);
-CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numnodes ON job (cluster, job_state, num_nodes);
-
-- User Filter
-CREATE INDEX IF NOT EXISTS jobs_user ON job (hpc_user);
-- User Filter Sorting
-CREATE INDEX IF NOT EXISTS jobs_user_starttime ON job (hpc_user, start_time);
-CREATE INDEX IF NOT EXISTS jobs_user_duration ON job (hpc_user, duration);
-CREATE INDEX IF NOT EXISTS jobs_user_numnodes ON job (hpc_user, num_nodes);
-
-- Project Filter
-CREATE INDEX IF NOT EXISTS jobs_project ON job (project);
-CREATE INDEX IF NOT EXISTS jobs_project_user ON job (project, hpc_user);
-- Project Filter Sorting
-CREATE INDEX IF NOT EXISTS jobs_project_starttime ON job (project, start_time);
-CREATE INDEX IF NOT EXISTS jobs_project_duration ON job (project, duration);
-CREATE INDEX IF NOT EXISTS jobs_project_numnodes ON job (project, num_nodes);
-
-- JobState Filter
-CREATE INDEX IF NOT EXISTS jobs_jobstate ON job (job_state);
-CREATE INDEX IF NOT EXISTS jobs_jobstate_user ON job (job_state, hpc_user);
-CREATE INDEX IF NOT EXISTS jobs_jobstate_project ON job (job_state, project);
-CREATE INDEX IF NOT EXISTS jobs_jobstate_cluster ON job (job_state, cluster);
-- JobState Filter Sorting
-CREATE INDEX IF NOT EXISTS jobs_jobstate_starttime ON job (job_state, start_time);
-CREATE INDEX IF NOT EXISTS jobs_jobstate_duration ON job (job_state, duration);
-CREATE INDEX IF NOT EXISTS jobs_jobstate_numnodes ON job (job_state, num_nodes);
-
-- ArrayJob Filter
-CREATE INDEX IF NOT EXISTS jobs_arrayjobid_starttime ON job (array_job_id, start_time);
-CREATE INDEX IF NOT EXISTS jobs_cluster_arrayjobid_starttime ON job (cluster, array_job_id, start_time);
-
-- Sorting without active filters
-CREATE INDEX IF NOT EXISTS jobs_starttime ON job (start_time);
-CREATE INDEX IF NOT EXISTS jobs_duration ON job (duration);
-CREATE INDEX IF NOT EXISTS jobs_numnodes ON job (num_nodes);
-
-- Single filters with default starttime sorting
-CREATE INDEX IF NOT EXISTS jobs_duration_starttime ON job (duration, start_time);
-CREATE INDEX IF NOT EXISTS jobs_numnodes_starttime ON job (num_nodes, start_time);
-CREATE INDEX IF NOT EXISTS jobs_numacc_starttime ON job (num_acc, start_time);
-CREATE INDEX IF NOT EXISTS jobs_energy_starttime ON job (energy, start_time);
-
-- Optimize DB index usage
--- a/internal/repository/migrations/sqlite3/09_add-job-cache.up.sql
+++ b/internal/repository/migrations/sqlite3/09_add-job-cache.up.sql
@@ -118,104 +118,116 @@ DROP TABLE lookup_exclusive;
 DROP TABLE job; -- Deletes All Existing 'job' Indices; Recreate after Renaming
 ALTER TABLE job_new RENAME TO job;

-- Recreate Indices from 08_add-footprint, include new submit_time indices
+-- Recreate Indices from 08_add-footprint; include new 'shared' column
 -- Cluster Filter
-CREATE INDEX IF NOT EXISTS jobs_cluster ON job (cluster);
 CREATE INDEX IF NOT EXISTS jobs_cluster_user ON job (cluster, hpc_user);
 CREATE INDEX IF NOT EXISTS jobs_cluster_project ON job (cluster, project);
 CREATE INDEX IF NOT EXISTS jobs_cluster_subcluster ON job (cluster, subcluster);
 -- Cluster Filter Sorting
-CREATE INDEX IF NOT EXISTS jobs_cluster_starttime ON job (cluster, start_time);
-CREATE INDEX IF NOT EXISTS jobs_cluster_submittime ON job (cluster, submit_time);
-CREATE INDEX IF NOT EXISTS jobs_cluster_duration ON job (cluster, duration);
 CREATE INDEX IF NOT EXISTS jobs_cluster_numnodes ON job (cluster, num_nodes);
 CREATE INDEX IF NOT EXISTS jobs_cluster_numhwthreads ON job (cluster, num_hwthreads);
 CREATE INDEX IF NOT EXISTS jobs_cluster_numacc ON job (cluster, num_acc);
 CREATE INDEX IF NOT EXISTS jobs_cluster_energy ON job (cluster, energy);

+-- Cluster Time Filter Sorting
+CREATE INDEX IF NOT EXISTS jobs_cluster_duration_starttime ON job (cluster, duration, start_time);
+CREATE INDEX IF NOT EXISTS jobs_cluster_starttime_duration ON job (cluster, start_time, duration);
+
 -- Cluster+Partition Filter
-CREATE INDEX IF NOT EXISTS jobs_cluster_partition ON job (cluster, cluster_partition);
+CREATE INDEX IF NOT EXISTS jobs_cluster_partition_user ON job (cluster, cluster_partition, hpc_user);
+CREATE INDEX IF NOT EXISTS jobs_cluster_partition_project ON job (cluster, cluster_partition, project);
+CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, cluster_partition, job_state);
+CREATE INDEX IF NOT EXISTS jobs_cluster_partition_shared ON job (cluster, cluster_partition, shared);
+
 -- Cluster+Partition Filter Sorting
-CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime ON job (cluster, cluster_partition, start_time);
-CREATE INDEX IF NOT EXISTS jobs_cluster_partition_submittime ON job (cluster, cluster_partition, submit_time);
-CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration ON job (cluster, cluster_partition, duration);
 CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numnodes ON job (cluster, cluster_partition, num_nodes);
 CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numhwthreads ON job (cluster, cluster_partition, num_hwthreads);
 CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numacc ON job (cluster, cluster_partition, num_acc);
 CREATE INDEX IF NOT EXISTS jobs_cluster_partition_energy ON job (cluster, cluster_partition, energy);

-- Cluster+Partition+Jobstate Filter
-CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, cluster_partition, job_state);
-CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_user ON job (cluster, cluster_partition, job_state, hpc_user);
-CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_project ON job (cluster, cluster_partition, job_state, project);
-- Cluster+Partition+Jobstate Filter Sorting
-CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_starttime ON job (cluster, cluster_partition, job_state, start_time);
-CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_submittime ON job (cluster, cluster_partition, job_state, submit_time);
-CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_duration ON job (cluster, cluster_partition, job_state, duration);
-CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numnodes ON job (cluster, cluster_partition, job_state, num_nodes);
-CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numhwthreads ON job (cluster, cluster_partition, job_state, num_hwthreads);
-CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numacc ON job (cluster, cluster_partition, job_state, num_acc);
-CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_energy ON job (cluster, cluster_partition, job_state, energy);
+-- Cluster+Partition Time Filter Sorting
+CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration_starttime ON job (cluster, cluster_partition, duration, start_time);
+CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime_duration ON job (cluster, cluster_partition, start_time, duration);

 -- Cluster+JobState Filter
-CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate ON job (cluster, job_state);
 CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_user ON job (cluster, job_state, hpc_user);
 CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_project ON job (cluster, job_state, project);
 -- Cluster+JobState Filter Sorting
-CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_starttime ON job (cluster, job_state, start_time);
-CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_submittime ON job (cluster, job_state, submit_time);
-CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_duration ON job (cluster, job_state, duration);
 CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numnodes ON job (cluster, job_state, num_nodes);
 CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numhwthreads ON job (cluster, job_state, num_hwthreads);
 CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numacc ON job (cluster, job_state, num_acc);
 CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_energy ON job (cluster, job_state, energy);

+-- Cluster+JobState Time Filter Sorting
+CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_starttime_duration ON job (cluster, job_state, start_time, duration);
+CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_duration_starttime ON job (cluster, job_state, duration, start_time);
+
+-- Cluster+Shared Filter
+CREATE INDEX IF NOT EXISTS jobs_cluster_shared_user ON job (cluster, shared, hpc_user);
+CREATE INDEX IF NOT EXISTS jobs_cluster_shared_project ON job (cluster, shared, project);
+-- Cluster+Shared Filter Sorting
+CREATE INDEX IF NOT EXISTS jobs_cluster_shared_numnodes ON job (cluster, shared, num_nodes);
+CREATE INDEX IF NOT EXISTS jobs_cluster_shared_numhwthreads ON job (cluster, shared, num_hwthreads);
+CREATE INDEX IF NOT EXISTS jobs_cluster_shared_numacc ON job (cluster, shared, num_acc);
+CREATE INDEX IF NOT EXISTS jobs_cluster_shared_energy ON job (cluster, shared, energy);
+
+-- Cluster+Shared Time Filter Sorting
+CREATE INDEX IF NOT EXISTS jobs_cluster_shared_starttime_duration ON job (cluster, shared, start_time, duration);
+CREATE INDEX IF NOT EXISTS jobs_cluster_shared_duration_starttime ON job (cluster, shared, duration, start_time);
+
 -- User Filter
-CREATE INDEX IF NOT EXISTS jobs_user ON job (hpc_user);
 -- User Filter Sorting
-CREATE INDEX IF NOT EXISTS jobs_user_starttime ON job (hpc_user, start_time);
-CREATE INDEX IF NOT EXISTS jobs_user_duration ON job (hpc_user, duration);
 CREATE INDEX IF NOT EXISTS jobs_user_numnodes ON job (hpc_user, num_nodes);
 CREATE INDEX IF NOT EXISTS jobs_user_numhwthreads ON job (hpc_user, num_hwthreads);
 CREATE INDEX IF NOT EXISTS jobs_user_numacc ON job (hpc_user, num_acc);
 CREATE INDEX IF NOT EXISTS jobs_user_energy ON job (hpc_user, energy);

+-- Cluster+Shared Time Filter Sorting
+CREATE INDEX IF NOT EXISTS jobs_user_starttime_duration ON job (hpc_user, start_time, duration);
+CREATE INDEX IF NOT EXISTS jobs_user_duration_starttime ON job (hpc_user, duration, start_time);
+
 -- Project Filter
-CREATE INDEX IF NOT EXISTS jobs_project ON job (project);
 CREATE INDEX IF NOT EXISTS jobs_project_user ON job (project, hpc_user);
 -- Project Filter Sorting
-CREATE INDEX IF NOT EXISTS jobs_project_starttime ON job (project, start_time);
-CREATE INDEX IF NOT EXISTS jobs_project_duration ON job (project, duration);
 CREATE INDEX IF NOT EXISTS jobs_project_numnodes ON job (project, num_nodes);
 CREATE INDEX IF NOT EXISTS jobs_project_numhwthreads ON job (project, num_hwthreads);
 CREATE INDEX IF NOT EXISTS jobs_project_numacc ON job (project, num_acc);
 CREATE INDEX IF NOT EXISTS jobs_project_energy ON job (project, energy);

+-- Cluster+Shared Time Filter Sorting
+CREATE INDEX IF NOT EXISTS jobs_project_starttime_duration ON job (project, start_time, duration);
+CREATE INDEX IF NOT EXISTS jobs_project_duration_starttime ON job (project, duration, start_time);
+
 -- JobState Filter
-CREATE INDEX IF NOT EXISTS jobs_jobstate ON job (job_state);
 CREATE INDEX IF NOT EXISTS jobs_jobstate_user ON job (job_state, hpc_user);
 CREATE INDEX IF NOT EXISTS jobs_jobstate_project ON job (job_state, project);
-CREATE INDEX IF NOT EXISTS jobs_jobstate_cluster ON job (job_state, cluster);
 -- JobState Filter Sorting
-CREATE INDEX IF NOT EXISTS jobs_jobstate_starttime ON job (job_state, start_time);
-CREATE INDEX IF NOT EXISTS jobs_jobstate_duration ON job (job_state, duration);
 CREATE INDEX IF NOT EXISTS jobs_jobstate_numnodes ON job (job_state, num_nodes);
 CREATE INDEX IF NOT EXISTS jobs_jobstate_numhwthreads ON job (job_state, num_hwthreads);
 CREATE INDEX IF NOT EXISTS jobs_jobstate_numacc ON job (job_state, num_acc);
 CREATE INDEX IF NOT EXISTS jobs_jobstate_energy ON job (job_state, energy);

+-- Cluster+Shared Time Filter Sorting
+CREATE INDEX IF NOT EXISTS jobs_jobstate_starttime_duration ON job (job_state, start_time, duration);
+CREATE INDEX IF NOT EXISTS jobs_jobstate_duration_starttime ON job (job_state, duration, start_time);
+
+-- Shared Filter
+CREATE INDEX IF NOT EXISTS jobs_shared_user ON job (shared, hpc_user);
+CREATE INDEX IF NOT EXISTS jobs_shared_project ON job (shared, project);
+-- Shared Filter Sorting
+CREATE INDEX IF NOT EXISTS jobs_shared_numnodes ON job (shared, num_nodes);
+CREATE INDEX IF NOT EXISTS jobs_shared_numhwthreads ON job (shared, num_hwthreads);
+CREATE INDEX IF NOT EXISTS jobs_shared_numacc ON job (shared, num_acc);
+CREATE INDEX IF NOT EXISTS jobs_shared_energy ON job (shared, energy);
+
+-- Cluster+Shared Time Filter Sorting
+CREATE INDEX IF NOT EXISTS jobs_shared_starttime_duration ON job (shared, start_time, duration);
+CREATE INDEX IF NOT EXISTS jobs_shared_duration_starttime ON job (shared, duration, start_time);
+
 -- ArrayJob Filter
 CREATE INDEX IF NOT EXISTS jobs_arrayjobid_starttime ON job (array_job_id, start_time);
 CREATE INDEX IF NOT EXISTS jobs_cluster_arrayjobid_starttime ON job (cluster, array_job_id, start_time);

-- Sorting without active filters
-CREATE INDEX IF NOT EXISTS jobs_starttime ON job (start_time);
-CREATE INDEX IF NOT EXISTS jobs_duration ON job (duration);
-CREATE INDEX IF NOT EXISTS jobs_numnodes ON job (num_nodes);
-CREATE INDEX IF NOT EXISTS jobs_numhwthreads ON job (num_hwthreads);
-CREATE INDEX IF NOT EXISTS jobs_numacc ON job (num_acc);
-CREATE INDEX IF NOT EXISTS jobs_energy ON job (energy);
-
 -- Single filters with default starttime sorting
 CREATE INDEX IF NOT EXISTS jobs_duration_starttime ON job (duration, start_time);
 CREATE INDEX IF NOT EXISTS jobs_numnodes_starttime ON job (num_nodes, start_time);
@@ -223,6 +235,22 @@ CREATE INDEX IF NOT EXISTS jobs_numhwthreads_starttime ON job (num_hwthreads, st
 CREATE INDEX IF NOT EXISTS jobs_numacc_starttime ON job (num_acc, start_time);
 CREATE INDEX IF NOT EXISTS jobs_energy_starttime ON job (energy, start_time);

+-- Single filters with duration sorting
+CREATE INDEX IF NOT EXISTS jobs_starttime_duration ON job (start_time, duration);
+CREATE INDEX IF NOT EXISTS jobs_numnodes_duration ON job (num_nodes, duration);
+CREATE INDEX IF NOT EXISTS jobs_numhwthreads_duration ON job (num_hwthreads, duration);
+CREATE INDEX IF NOT EXISTS jobs_numacc_duration ON job (num_acc, duration);
+CREATE INDEX IF NOT EXISTS jobs_energy_duration ON job (energy, duration);
+
+-- Backup Indices For High Variety Columns
+CREATE INDEX IF NOT EXISTS jobs_starttime ON job (start_time);
+CREATE INDEX IF NOT EXISTS jobs_duration ON job (duration);
+
+-- Notes:
+-- Cluster+Partition+Jobstate Filter: Tested -> Full Array Of Combinations non-required
+-- Cluster+JobState+Shared Filter: Tested -> No further timing improvement
+-- JobState+Shared Filter: Tested -> No further timing improvement
+
 -- Optimize DB index usage
 PRAGMA optimize;

--- a/internal/repository/migrations/sqlite3/10_node-table.up.sql
+++ b/internal/repository/migrations/sqlite3/10_node-table.up.sql
@@ -23,6 +23,7 @@ CREATE TABLE "node_state" (
    CHECK (health_state IN (
        'full', 'partial', 'failed'
    )),
+    health_metrics TEXT,          -- JSON array of strings
    node_id INTEGER,
    FOREIGN KEY (node_id) REFERENCES node (id)
 );
@@ -33,12 +34,11 @@ CREATE INDEX IF NOT EXISTS nodes_cluster_subcluster ON node (cluster, subcluster

 -- Add NEW Indices For New Node_State Table Fields
 CREATE INDEX IF NOT EXISTS nodestates_timestamp ON node_state (time_stamp);
-CREATE INDEX IF NOT EXISTS nodestates_state ON node_state (node_state);
-CREATE INDEX IF NOT EXISTS nodestates_health ON node_state (health_state);
 CREATE INDEX IF NOT EXISTS nodestates_state_timestamp ON node_state (node_state, time_stamp);
 CREATE INDEX IF NOT EXISTS nodestates_health_timestamp ON node_state (health_state, time_stamp);
 CREATE INDEX IF NOT EXISTS nodestates_nodeid_state ON node_state (node_id, node_state);
 CREATE INDEX IF NOT EXISTS nodestates_nodeid_health ON node_state (node_id, health_state);
+CREATE INDEX IF NOT EXISTS nodestates_nodeid_timestamp ON node_state (node_id, time_stamp DESC);

 -- Add NEW Indices For Increased Amounts of Tags
 CREATE INDEX IF NOT EXISTS tags_jobid ON jobtag (job_id);
--- a/internal/repository/node.go
+++ b/internal/repository/node.go
@@ -10,14 +10,17 @@ import (
 	"database/sql"
 	"encoding/json"
 	"fmt"
+	"slices"
+	"sort"
+	"strings"
 	"sync"
 	"time"

 	"github.com/ClusterCockpit/cc-backend/internal/graph/model"
 	"github.com/ClusterCockpit/cc-backend/pkg/archive"
-	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
-	"github.com/ClusterCockpit/cc-lib/lrucache"
-	"github.com/ClusterCockpit/cc-lib/schema"
+	cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
+	"github.com/ClusterCockpit/cc-lib/v2/lrucache"
+	"github.com/ClusterCockpit/cc-lib/v2/schema"
 	sq "github.com/Masterminds/squirrel"
 	"github.com/jmoiron/sqlx"
 )
@@ -49,6 +52,38 @@ func GetNodeRepository() *NodeRepository {
 	return nodeRepoInstance
 }

+// latestStateCondition returns a squirrel expression that restricts node_state
+// rows to the latest per node_id using a correlated subquery.
+// Requires the query to join node and node_state tables.
+func latestStateCondition() sq.Sqlizer {
+	return sq.Expr(
+		"node_state.id = (SELECT ns2.id FROM node_state ns2 WHERE ns2.node_id = node.id ORDER BY ns2.time_stamp DESC LIMIT 1)",
+	)
+}
+
+// applyNodeFilters applies common NodeFilter conditions to a query that joins
+// the node and node_state tables with latestStateCondition.
+func applyNodeFilters(query sq.SelectBuilder, filters []*model.NodeFilter) sq.SelectBuilder {
+	for _, f := range filters {
+		if f.Cluster != nil {
+			query = buildStringCondition("node.cluster", f.Cluster, query)
+		}
+		if f.SubCluster != nil {
+			query = buildStringCondition("node.subcluster", f.SubCluster, query)
+		}
+		if f.Hostname != nil {
+			query = buildStringCondition("node.hostname", f.Hostname, query)
+		}
+		if f.SchedulerState != nil {
+			query = query.Where("node_state.node_state = ?", f.SchedulerState)
+		}
+		if f.HealthState != nil {
+			query = query.Where("node_state.health_state = ?", f.HealthState)
+		}
+	}
+	return query
+}
+
 func (r *NodeRepository) FetchMetadata(hostname string, cluster string) (map[string]string, error) {
 	start := time.Now()

@@ -79,17 +114,16 @@ func (r *NodeRepository) FetchMetadata(hostname string, cluster string) (map[str

 func (r *NodeRepository) GetNode(hostname string, cluster string, withMeta bool) (*schema.Node, error) {
 	node := &schema.Node{}
-	var timestamp int
-	if err := sq.Select("node.hostname", "node.cluster", "node.subcluster", "node_state.node_state",
-		"node_state.health_state", "MAX(node_state.time_stamp) as time").
-		From("node_state").
-		Join("node ON node_state.node_id = node.id").
+	if err := sq.Select("node.hostname", "node.cluster", "node.subcluster",
+		"node_state.node_state", "node_state.health_state").
+		From("node").
+		Join("node_state ON node_state.node_id = node.id").
+		Where(latestStateCondition()).
 		Where("node.hostname = ?", hostname).
 		Where("node.cluster = ?", cluster).
-		GroupBy("node_state.node_id").
 		RunWith(r.DB).
-		QueryRow().Scan(&node.Hostname, &node.Cluster, &node.SubCluster, &node.NodeState, &node.HealthState, &timestamp); err != nil {
-		cclog.Warnf("Error while querying node '%s' at time '%d' from database: %v", hostname, timestamp, err)
+		QueryRow().Scan(&node.Hostname, &node.Cluster, &node.SubCluster, &node.NodeState, &node.HealthState); err != nil {
+		cclog.Warnf("Error while querying node '%s' from database: %v", hostname, err)
 		return nil, err
 	}

@@ -106,31 +140,28 @@ func (r *NodeRepository) GetNode(hostname string, cluster string, withMeta bool)
 	return node, nil
 }

-func (r *NodeRepository) GetNodeById(id int64, withMeta bool) (*schema.Node, error) {
+func (r *NodeRepository) GetNodeByID(id int64, withMeta bool) (*schema.Node, error) {
 	node := &schema.Node{}
-	var timestamp int
-	if err := sq.Select("node.hostname", "node.cluster", "node.subcluster", "node_state.node_state",
-		"node_state.health_state", "MAX(node_state.time_stamp) as time").
-		From("node_state").
-		Join("node ON node_state.node_id = node.id").
+	if err := sq.Select("node.hostname", "node.cluster", "node.subcluster",
+		"node_state.node_state", "node_state.health_state").
+		From("node").
+		Join("node_state ON node_state.node_id = node.id").
+		Where(latestStateCondition()).
 		Where("node.id = ?", id).
-		GroupBy("node_state.node_id").
 		RunWith(r.DB).
-		QueryRow().Scan(&node.Hostname, &node.Cluster, &node.SubCluster, &node.NodeState, &node.HealthState, &timestamp); err != nil {
-		cclog.Warnf("Error while querying node ID '%d' at time '%d' from database: %v", id, timestamp, err)
+		QueryRow().Scan(&node.Hostname, &node.Cluster, &node.SubCluster, &node.NodeState, &node.HealthState); err != nil {
+		cclog.Warnf("Error while querying node ID '%d' from database: %v", id, err)
 		return nil, err
 	}

-	// NEEDS METADATA BY ID
-	// if withMeta {
-	// 	var err error
-	// 	var meta map[string]string
-	// 	if meta, err = r.FetchMetadata(hostname, cluster); err != nil {
-	// 		cclog.Warnf("Error while fetching metadata for node '%s'", hostname)
-	// 		return nil, err
-	// 	}
-	// 	node.MetaData = meta
-	// }
+	if withMeta {
+		meta, metaErr := r.FetchMetadata(node.Hostname, node.Cluster)
+		if metaErr != nil {
+			cclog.Warnf("Error while fetching metadata for node ID '%d': %v", id, metaErr)
+			return nil, metaErr
+		}
+		node.MetaData = meta
+	}

 	return node, nil
 }
@@ -166,9 +197,10 @@ func (r *NodeRepository) AddNode(node *schema.NodeDB) (int64, error) {
 }

 const NamedNodeStateInsert string = `
-INSERT INTO node_state (time_stamp, node_state, health_state, cpus_allocated,
-	memory_allocated, gpus_allocated, jobs_running, node_id)
-	VALUES (:time_stamp, :node_state, :health_state, :cpus_allocated, :memory_allocated, :gpus_allocated, :jobs_running, :node_id);`
+INSERT INTO node_state (time_stamp, node_state, health_state, health_metrics,
+	cpus_allocated, memory_allocated, gpus_allocated, jobs_running, node_id)
+	VALUES (:time_stamp, :node_state, :health_state, :health_metrics,
+	:cpus_allocated, :memory_allocated, :gpus_allocated, :jobs_running, :node_id);`

 // TODO: Add real Monitoring Health State

@@ -194,8 +226,7 @@ func (r *NodeRepository) UpdateNodeState(hostname string, cluster string, nodeSt
 				return err
 			}

-			cclog.Infof("Added node '%s' to database", hostname)
-			return nil
+			cclog.Debugf("Added node '%s' to database", hostname)
 		} else {
 			cclog.Warnf("Error while querying node '%v' from database", id)
 			return err
@@ -209,7 +240,7 @@ func (r *NodeRepository) UpdateNodeState(hostname string, cluster string, nodeSt
 		cclog.Errorf("Error while adding node state for '%v' to database", hostname)
 		return err
 	}
-	cclog.Infof("Updated node state for '%s' in database", hostname)
+	cclog.Debugf("Updated node state for '%s' in database", hostname)
 	return nil
 }

@@ -222,6 +253,77 @@ func (r *NodeRepository) UpdateNodeState(hostname string, cluster string, nodeSt
 // 	return nil
 // }

+// NodeStateWithNode combines a node state row with denormalized node info.
+type NodeStateWithNode struct {
+	ID              int64  `db:"id"`
+	TimeStamp       int64  `db:"time_stamp"`
+	NodeState       string `db:"node_state"`
+	HealthState     string `db:"health_state"`
+	HealthMetrics   string `db:"health_metrics"`
+	CpusAllocated   int    `db:"cpus_allocated"`
+	MemoryAllocated int64  `db:"memory_allocated"`
+	GpusAllocated   int    `db:"gpus_allocated"`
+	JobsRunning     int    `db:"jobs_running"`
+	Hostname        string `db:"hostname"`
+	Cluster         string `db:"cluster"`
+	SubCluster      string `db:"subcluster"`
+}
+
+// FindNodeStatesBefore returns all node_state rows with time_stamp < cutoff,
+// joined with node info for denormalized archiving.
+func (r *NodeRepository) FindNodeStatesBefore(cutoff int64) ([]NodeStateWithNode, error) {
+	rows, err := sq.Select(
+		"node_state.id", "node_state.time_stamp", "node_state.node_state",
+		"node_state.health_state", "node_state.health_metrics",
+		"node_state.cpus_allocated", "node_state.memory_allocated",
+		"node_state.gpus_allocated", "node_state.jobs_running",
+		"node.hostname", "node.cluster", "node.subcluster",
+	).
+		From("node_state").
+		Join("node ON node_state.node_id = node.id").
+		Where(sq.Lt{"node_state.time_stamp": cutoff}).
+		Where("node_state.id NOT IN (SELECT ns2.id FROM node_state ns2 WHERE ns2.time_stamp = (SELECT MAX(ns3.time_stamp) FROM node_state ns3 WHERE ns3.node_id = ns2.node_id))").
+		OrderBy("node.cluster ASC", "node.subcluster ASC", "node.hostname ASC", "node_state.time_stamp ASC").
+		RunWith(r.DB).Query()
+	if err != nil {
+		return nil, err
+	}
+	defer rows.Close()
+
+	var result []NodeStateWithNode
+	for rows.Next() {
+		var ns NodeStateWithNode
+		var healthMetrics sql.NullString
+		if err := rows.Scan(&ns.ID, &ns.TimeStamp, &ns.NodeState,
+			&ns.HealthState, &healthMetrics,
+			&ns.CpusAllocated, &ns.MemoryAllocated,
+			&ns.GpusAllocated, &ns.JobsRunning,
+			&ns.Hostname, &ns.Cluster, &ns.SubCluster); err != nil {
+			return nil, err
+		}
+		ns.HealthMetrics = healthMetrics.String
+		result = append(result, ns)
+	}
+	return result, nil
+}
+
+// DeleteNodeStatesBefore removes node_state rows with time_stamp < cutoff,
+// but always preserves the row with the latest timestamp per node_id.
+func (r *NodeRepository) DeleteNodeStatesBefore(cutoff int64) (int64, error) {
+	res, err := r.DB.Exec(
+		`DELETE FROM node_state WHERE time_stamp < ?
+		 AND id NOT IN (
+		   SELECT id FROM node_state ns2
+		   WHERE ns2.time_stamp = (SELECT MAX(ns3.time_stamp) FROM node_state ns3 WHERE ns3.node_id = ns2.node_id)
+		 )`,
+		cutoff,
+	)
+	if err != nil {
+		return 0, err
+	}
+	return res.RowsAffected()
+}
+
 func (r *NodeRepository) DeleteNode(id int64) error {
 	_, err := r.DB.Exec(`DELETE FROM node WHERE node.id = ?`, id)
 	if err != nil {
@@ -241,38 +343,17 @@ func (r *NodeRepository) QueryNodes(
 	order *model.OrderByInput, // Currently unused!
 ) ([]*schema.Node, error) {
 	query, qerr := AccessCheck(ctx,
-		sq.Select("hostname", "cluster", "subcluster", "node_state", "health_state", "MAX(time_stamp) as time").
+		sq.Select("node.hostname", "node.cluster", "node.subcluster",
+			"node_state.node_state", "node_state.health_state").
 			From("node").
-			Join("node_state ON node_state.node_id = node.id"))
+			Join("node_state ON node_state.node_id = node.id").
+			Where(latestStateCondition()))
 	if qerr != nil {
 		return nil, qerr
 	}

-	for _, f := range filters {
-		if f.Cluster != nil {
-			query = buildStringCondition("cluster", f.Cluster, query)
-		}
-		if f.Subcluster != nil {
-			query = buildStringCondition("subcluster", f.Subcluster, query)
-		}
-		if f.Hostname != nil {
-			query = buildStringCondition("hostname", f.Hostname, query)
-		}
-		if f.SchedulerState != nil {
-			query = query.Where("node_state = ?", f.SchedulerState)
-			// Requires Additional time_stamp Filter: Else the last (past!) time_stamp with queried state will be returned
-			now := time.Now().Unix()
-			query = query.Where(sq.Gt{"time_stamp": (now - 60)})
-		}
-		if f.HealthState != nil {
-			query = query.Where("health_state = ?", f.HealthState)
-			// Requires Additional time_stamp Filter: Else the last (past!) time_stamp with queried state will be returned
-			now := time.Now().Unix()
-			query = query.Where(sq.Gt{"time_stamp": (now - 60)})
-		}
-	}
-
-	query = query.GroupBy("node_id").OrderBy("hostname ASC")
+	query = applyNodeFilters(query, filters)
+	query = query.OrderBy("node.hostname ASC")

 	if page != nil && page.ItemsPerPage != -1 {
 		limit := uint64(page.ItemsPerPage)
@@ -290,11 +371,10 @@ func (r *NodeRepository) QueryNodes(
 	nodes := make([]*schema.Node, 0)
 	for rows.Next() {
 		node := schema.Node{}
-		var timestamp int
 		if err := rows.Scan(&node.Hostname, &node.Cluster, &node.SubCluster,
-			&node.NodeState, &node.HealthState, &timestamp); err != nil {
+			&node.NodeState, &node.HealthState); err != nil {
 			rows.Close()
-			cclog.Warnf("Error while scanning rows (QueryNodes) at time '%d'", timestamp)
+			cclog.Warn("Error while scanning rows (QueryNodes)")
 			return nil, err
 		}
 		nodes = append(nodes, &node)
@@ -386,73 +466,115 @@ func (r *NodeRepository) QueryNodesWithMeta(
 	return nodes, nil
 }

-// CountNodes returns the total matched nodes based on a node filter. It always operates
-// on the last state (largest timestamp).
-func (r *NodeRepository) CountNodes(
+// QueryNodesWithMeta returns a list of nodes based on a node filter. It always operates
+// on the last state (largest timestamp). It includes both (!) optional JSON column data
+func (r *NodeRepository) QueryNodesWithMeta(
 	ctx context.Context,
 	filters []*model.NodeFilter,
-) (int, error) {
+	page *model.PageRequest,
+	order *model.OrderByInput, // Currently unused!
+) ([]*schema.Node, error) {
 	query, qerr := AccessCheck(ctx,
-		sq.Select("time_stamp", "count(*) as countRes").
+		sq.Select("node.hostname", "node.cluster", "node.subcluster",
+			"node_state.node_state", "node_state.health_state",
+			"node.meta_data", "node_state.health_metrics").
 			From("node").
-			Join("node_state ON node_state.node_id = node.id"))
+			Join("node_state ON node_state.node_id = node.id").
+			Where(latestStateCondition()))
 	if qerr != nil {
-		return 0, qerr
+		return nil, qerr
 	}

-	for _, f := range filters {
-		if f.Cluster != nil {
-			query = buildStringCondition("cluster", f.Cluster, query)
-		}
-		if f.Subcluster != nil {
-			query = buildStringCondition("subcluster", f.Subcluster, query)
-		}
-		if f.Hostname != nil {
-			query = buildStringCondition("hostname", f.Hostname, query)
-		}
-		if f.SchedulerState != nil {
-			query = query.Where("node_state = ?", f.SchedulerState)
-			// Requires Additional time_stamp Filter: Else the last (past!) time_stamp with queried state will be returned
-			now := time.Now().Unix()
-			query = query.Where(sq.Gt{"time_stamp": (now - 60)})
-		}
-		if f.HealthState != nil {
-			query = query.Where("health_state = ?", f.HealthState)
-			// Requires Additional time_stamp Filter: Else the last (past!) time_stamp with queried state will be returned
-			now := time.Now().Unix()
-			query = query.Where(sq.Gt{"time_stamp": (now - 60)})
-		}
-	}
+	query = applyNodeFilters(query, filters)
+	query = query.OrderBy("node.hostname ASC")

-	query = query.GroupBy("time_stamp").OrderBy("time_stamp DESC").Limit(1)
+	if page != nil && page.ItemsPerPage != -1 {
+		limit := uint64(page.ItemsPerPage)
+		query = query.Offset((uint64(page.Page) - 1) * limit).Limit(limit)
+	}

 	rows, err := query.RunWith(r.stmtCache).Query()
 	if err != nil {
+		queryString, queryVars, _ := query.ToSql()
+		cclog.Errorf("Error while running query '%s' %v: %v", queryString, queryVars, err)
+		return nil, err
+	}
+
+	nodes := make([]*schema.Node, 0)
+	for rows.Next() {
+		node := schema.Node{}
+		RawMetaData := make([]byte, 0)
+		RawMetricHealth := make([]byte, 0)
+
+		if err := rows.Scan(&node.Hostname, &node.Cluster, &node.SubCluster,
+			&node.NodeState, &node.HealthState, &RawMetaData, &RawMetricHealth); err != nil {
+			rows.Close()
+			cclog.Warn("Error while scanning rows (QueryNodes)")
+			return nil, err
+		}
+
+		if len(RawMetaData) == 0 {
+			node.MetaData = nil
+		} else {
+			metaData := make(map[string]string)
+			if err := json.Unmarshal(RawMetaData, &metaData); err != nil {
+				cclog.Warn("Error while unmarshaling raw metadata json")
+				return nil, err
+			}
+			node.MetaData = metaData
+		}
+
+		if len(RawMetricHealth) == 0 {
+			node.HealthData = nil
+		} else {
+			healthData := make(map[string][]string)
+			if err := json.Unmarshal(RawMetricHealth, &healthData); err != nil {
+				cclog.Warn("Error while unmarshaling raw healthdata json")
+				return nil, err
+			}
+			node.HealthData = healthData
+		}
+
+		nodes = append(nodes, &node)
+	}
+
+	return nodes, nil
+}
+
+// CountNodes returns the total matched nodes based on a node filter. It always operates
+// on the last state (largest timestamp) per node.
+func (r *NodeRepository) CountNodes(
+	ctx context.Context,
+	filters []*model.NodeFilter,
+) (int, error) {
+	query, qerr := AccessCheck(ctx,
+		sq.Select("COUNT(*)").
+			From("node").
+			Join("node_state ON node_state.node_id = node.id").
+			Where(latestStateCondition()))
+	if qerr != nil {
+		return 0, qerr
+	}
+
+	query = applyNodeFilters(query, filters)
+
+	var count int
+	if err := query.RunWith(r.stmtCache).QueryRow().Scan(&count); err != nil {
 		queryString, queryVars, _ := query.ToSql()
 		cclog.Errorf("Error while running query '%s' %v: %v", queryString, queryVars, err)
 		return 0, err
 	}

-	var totalNodes int
-	for rows.Next() {
-		var timestamp int
-		if err := rows.Scan(&timestamp, &totalNodes); err != nil {
-			rows.Close()
-			cclog.Warnf("Error while scanning rows (CountNodes) at time '%d'", timestamp)
-			return 0, err
-		}
-	}
-
-	return totalNodes, nil
+	return count, nil
 }

 func (r *NodeRepository) ListNodes(cluster string) ([]*schema.Node, error) {
-	q := sq.Select("node.hostname", "node.cluster", "node.subcluster", "node_state.node_state",
-		"node_state.health_state", "MAX(node_state.time_stamp) as time").
+	q := sq.Select("node.hostname", "node.cluster", "node.subcluster",
+		"node_state.node_state", "node_state.health_state").
 		From("node").
 		Join("node_state ON node_state.node_id = node.id").
+		Where(latestStateCondition()).
 		Where("node.cluster = ?", cluster).
-		GroupBy("node_state.node_id").
 		OrderBy("node.hostname ASC")

 	rows, err := q.RunWith(r.DB).Query()
@@ -464,10 +586,9 @@ func (r *NodeRepository) ListNodes(cluster string) ([]*schema.Node, error) {
 	defer rows.Close()
 	for rows.Next() {
 		node := &schema.Node{}
-		var timestamp int
 		if err := rows.Scan(&node.Hostname, &node.Cluster,
-			&node.SubCluster, &node.NodeState, &node.HealthState, &timestamp); err != nil {
-			cclog.Warnf("Error while scanning node list (ListNodes) at time '%d'", timestamp)
+			&node.SubCluster, &node.NodeState, &node.HealthState); err != nil {
+			cclog.Warn("Error while scanning node list (ListNodes)")
 			return nil, err
 		}

@@ -478,11 +599,11 @@ func (r *NodeRepository) ListNodes(cluster string) ([]*schema.Node, error) {
 }

 func (r *NodeRepository) MapNodes(cluster string) (map[string]string, error) {
-	q := sq.Select("node.hostname", "node_state.node_state", "MAX(node_state.time_stamp) as time").
+	q := sq.Select("node.hostname", "node_state.node_state").
 		From("node").
 		Join("node_state ON node_state.node_id = node.id").
+		Where(latestStateCondition()).
 		Where("node.cluster = ?", cluster).
-		GroupBy("node_state.node_id").
 		OrderBy("node.hostname ASC")

 	rows, err := q.RunWith(r.DB).Query()
@@ -495,9 +616,8 @@ func (r *NodeRepository) MapNodes(cluster string) (map[string]string, error) {
 	defer rows.Close()
 	for rows.Next() {
 		var hostname, nodestate string
-		var timestamp int
-		if err := rows.Scan(&hostname, &nodestate, &timestamp); err != nil {
-			cclog.Warnf("Error while scanning node list (MapNodes) at time '%d'", timestamp)
+		if err := rows.Scan(&hostname, &nodestate); err != nil {
+			cclog.Warn("Error while scanning node list (MapNodes)")
 			return nil, err
 		}

@@ -509,37 +629,15 @@ func (r *NodeRepository) MapNodes(cluster string) (map[string]string, error) {

 func (r *NodeRepository) CountStates(ctx context.Context, filters []*model.NodeFilter, column string) ([]*model.NodeStates, error) {
 	query, qerr := AccessCheck(ctx,
-		sq.Select(column, "COUNT(*) as count").
+		sq.Select(column).
 			From("node").
 			Join("node_state ON node_state.node_id = node.id").
-			Where(latestStateCondition()).
-			GroupBy(column))
+			Where(latestStateCondition()))
 	if qerr != nil {
 		return nil, qerr
 	}

-	query = query.Join("node_state ON node_state.node_id = node.id")
-
-	for _, f := range filters {
-		if f.Hostname != nil {
-			query = buildStringCondition("hostname", f.Hostname, query)
-		}
-		if f.Cluster != nil {
-			query = buildStringCondition("cluster", f.Cluster, query)
-		}
-		if f.Subcluster != nil {
-			query = buildStringCondition("subcluster", f.Subcluster, query)
-		}
-		if f.SchedulerState != nil {
-			query = query.Where("node_state = ?", f.SchedulerState)
-		}
-		if f.HealthState != nil {
-			query = query.Where("health_state = ?", f.HealthState)
-		}
-	}
-
-	// Add Group and Order
-	query = query.GroupBy("hostname").OrderBy("hostname DESC")
+	query = applyNodeFilters(query, filters)

 	rows, err := query.RunWith(r.stmtCache).Query()
 	if err != nil {
@@ -549,6 +647,18 @@ func (r *NodeRepository) CountStates(ctx context.Context, filters []*model.NodeF
 	}
 	defer rows.Close()

+	stateMap := map[string]int{}
+	for rows.Next() {
+		var state string
+		if err := rows.Scan(&state); err != nil {
+			rows.Close()
+			cclog.Warn("Error while scanning rows (CountStates)")
+			return nil, err
+		}
+
+		stateMap[state] += 1
+	}
+
 	nodes := make([]*model.NodeStates, 0)
 	for rows.Next() {
 		var state string
@@ -587,8 +697,8 @@ func (r *NodeRepository) CountStatesTimed(ctx context.Context, filters []*model.
 		if f.Cluster != nil {
 			query = buildStringCondition("cluster", f.Cluster, query)
 		}
-		if f.Subcluster != nil {
-			query = buildStringCondition("subcluster", f.Subcluster, query)
+		if f.SubCluster != nil {
+			query = buildStringCondition("subcluster", f.SubCluster, query)
 		}
 		if f.SchedulerState != nil {
 			query = query.Where("node_state = ?", f.SchedulerState)
@@ -640,6 +750,132 @@ func (r *NodeRepository) CountStatesTimed(ctx context.Context, filters []*model.
 	return timedStates, nil
 }

+func (r *NodeRepository) GetNodesForList(
+	ctx context.Context,
+	cluster string,
+	subCluster string,
+	stateFilter string,
+	nodeFilter string,
+	page *model.PageRequest,
+) ([]string, map[string]string, int, bool, error) {
+	// Init Return Vars
+	nodes := make([]string, 0)
+	stateMap := make(map[string]string)
+	countNodes := 0
+	hasNextPage := false
+
+	// Build Filters
+	queryFilters := make([]*model.NodeFilter, 0)
+	if cluster != "" {
+		queryFilters = append(queryFilters, &model.NodeFilter{Cluster: &model.StringInput{Eq: &cluster}})
+	}
+	if subCluster != "" {
+		queryFilters = append(queryFilters, &model.NodeFilter{SubCluster: &model.StringInput{Eq: &subCluster}})
+	}
+	if nodeFilter != "" && stateFilter != "notindb" {
+		queryFilters = append(queryFilters, &model.NodeFilter{Hostname: &model.StringInput{Contains: &nodeFilter}})
+	}
+	if stateFilter != "all" && stateFilter != "notindb" {
+		queryState := schema.SchedulerState(stateFilter)
+		queryFilters = append(queryFilters, &model.NodeFilter{SchedulerState: &queryState})
+	}
+	// if healthFilter != "all" {
+	// 	filters = append(filters, &model.NodeFilter{HealthState: &healthFilter})
+	// }
+
+	// Special Case: Disable Paging for missing nodes filter, save IPP for later
+	var backupItems int
+	if stateFilter == "notindb" {
+		backupItems = page.ItemsPerPage
+		page.ItemsPerPage = -1
+	}
+
+	// Query Nodes From DB
+	rawNodes, serr := r.QueryNodes(ctx, queryFilters, page, nil) // Order not Used
+	if serr != nil {
+		cclog.Warn("error while loading node database data (Resolver.NodeMetricsList)")
+		return nil, nil, 0, false, serr
+	}
+
+	// Intermediate Node Result Info
+	for _, node := range rawNodes {
+		if node == nil {
+			continue
+		}
+		nodes = append(nodes, node.Hostname)
+		stateMap[node.Hostname] = string(node.NodeState)
+	}
+
+	// Special Case: Find Nodes not in DB node table but in metricStore only
+	if stateFilter == "notindb" {
+		// Reapply Original Paging
+		page.ItemsPerPage = backupItems
+		// Get Nodes From Topology
+		var topoNodes []string
+		if subCluster != "" {
+			scNodes := archive.NodeLists[cluster][subCluster]
+			topoNodes = scNodes.PrintList()
+		} else {
+			subClusterNodeLists := archive.NodeLists[cluster]
+			for _, nodeList := range subClusterNodeLists {
+				topoNodes = append(topoNodes, nodeList.PrintList()...)
+			}
+		}
+		// Compare to all nodes from cluster/subcluster in DB
+		var missingNodes []string
+		for _, scanNode := range topoNodes {
+			if !slices.Contains(nodes, scanNode) {
+				missingNodes = append(missingNodes, scanNode)
+			}
+		}
+		// Filter nodes by name
+		if nodeFilter != "" {
+			filteredNodesByName := []string{}
+			for _, missingNode := range missingNodes {
+				if strings.Contains(missingNode, nodeFilter) {
+					filteredNodesByName = append(filteredNodesByName, missingNode)
+				}
+			}
+			missingNodes = filteredNodesByName
+		}
+		// Sort Missing Nodes Alphanumerically
+		slices.Sort(missingNodes)
+		// Total Missing
+		countNodes = len(missingNodes)
+		// Apply paging
+		if countNodes > page.ItemsPerPage {
+			start := (page.Page - 1) * page.ItemsPerPage
+			end := start + page.ItemsPerPage
+			if end > countNodes {
+				end = countNodes
+				hasNextPage = false
+			} else {
+				hasNextPage = true
+			}
+			nodes = missingNodes[start:end]
+		} else {
+			nodes = missingNodes
+		}
+
+	} else {
+		// DB Nodes: Count and derive hasNextPage from count
+		var cerr error
+		countNodes, cerr = r.CountNodes(ctx, queryFilters)
+		if cerr != nil {
+			cclog.Warn("error while counting node database data (Resolver.NodeMetricsList)")
+			return nil, nil, 0, false, cerr
+		}
+		hasNextPage = page.Page*page.ItemsPerPage < countNodes
+	}
+
+	// Fallback for non-init'd node table in DB; Ignores stateFilter
+	if stateFilter == "all" && countNodes == 0 {
+		nodes, countNodes, hasNextPage = getNodesFromTopol(cluster, subCluster, nodeFilter, page)
+	}
+
+	return nodes, stateMap, countNodes, hasNextPage, nil
+}
+
 func AccessCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilder, error) {
 	user := GetUserFromContext(ctx)
 	return AccessCheckWithUser(user, query)
@@ -661,3 +897,51 @@ func AccessCheckWithUser(user *schema.User, query sq.SelectBuilder) (sq.SelectBu
 		return qnil, fmt.Errorf("user has no or unknown roles")
 	}
 }
+
+func getNodesFromTopol(cluster string, subCluster string, nodeFilter string, page *model.PageRequest) ([]string, int, bool) {
+	// 0) Init additional vars
+	hasNextPage := false
+	totalNodes := 0
+
+	// 1) Get list of all nodes
+	var topolNodes []string
+	if subCluster != "" {
+		scNodes := archive.NodeLists[cluster][subCluster]
+		topolNodes = scNodes.PrintList()
+	} else {
+		subClusterNodeLists := archive.NodeLists[cluster]
+		for _, nodeList := range subClusterNodeLists {
+			topolNodes = append(topolNodes, nodeList.PrintList()...)
+		}
+	}
+
+	// 2) Filter nodes
+	if nodeFilter != "" {
+		filteredNodes := []string{}
+		for _, node := range topolNodes {
+			if strings.Contains(node, nodeFilter) {
+				filteredNodes = append(filteredNodes, node)
+			}
+		}
+		topolNodes = filteredNodes
+	}
+
+	// 2.1) Count total nodes && Sort nodes -> Sorting invalidated after ccms return ...
+	totalNodes = len(topolNodes)
+	sort.Strings(topolNodes)
+
+	// 3) Apply paging
+	if len(topolNodes) > page.ItemsPerPage {
+		start := (page.Page - 1) * page.ItemsPerPage
+		end := start + page.ItemsPerPage
+		if end >= len(topolNodes) {
+			end = len(topolNodes)
+			hasNextPage = false
+		} else {
+			hasNextPage = true
+		}
+		topolNodes = topolNodes[start:end]
+	}
+
+	return topolNodes, totalNodes, hasNextPage
+}
--- a/internal/repository/node_test.go
+++ b/internal/repository/node_test.go
@@ -15,9 +15,9 @@ import (

 	"github.com/ClusterCockpit/cc-backend/internal/config"
 	"github.com/ClusterCockpit/cc-backend/pkg/archive"
-	ccconf "github.com/ClusterCockpit/cc-lib/ccConfig"
-	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
-	"github.com/ClusterCockpit/cc-lib/schema"
+	ccconf "github.com/ClusterCockpit/cc-lib/v2/ccConfig"
+	cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
+	"github.com/ClusterCockpit/cc-lib/v2/schema"
 	_ "github.com/mattn/go-sqlite3"
 )

@@ -26,7 +26,7 @@ func nodeTestSetup(t *testing.T) {
 		"main": {
 	"addr":            "0.0.0.0:8080",
 	"validate": false,
-  "apiAllowedIPs": [
+  "api-allowed-ips": [
    "*"
  ]
 	},
@@ -38,18 +38,7 @@ func nodeTestSetup(t *testing.T) {
  "jwts": {
      "max-age": "2m"
  }
-	},
-	"clusters": [
-	{
-	   "name": "testcluster",
-	   "metricDataRepository": {"kind": "test", "url": "bla:8081"},
-	   "filterRanges": {
-		"numNodes": { "from": 1, "to": 64 },
-		"duration": { "from": 0, "to": 86400 },
-		"startTime": { "from": "2022-01-01T00:00:00Z", "to": null }
-	   }
 	}
-	]
 }`
 	const testclusterJSON = `{
        "name": "testcluster",
@@ -130,7 +119,7 @@ func nodeTestSetup(t *testing.T) {
 	}

 	dbfilepath := filepath.Join(tmpdir, "test.db")
-	err := MigrateDB("sqlite3", dbfilepath)
+	err := MigrateDB(dbfilepath)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -144,19 +133,22 @@ func nodeTestSetup(t *testing.T) {

 	// Load and check main configuration
 	if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
-		if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil {
-			config.Init(cfg, clustercfg)
-		} else {
-			cclog.Abort("Cluster configuration must be present")
-		}
+		config.Init(cfg)
 	} else {
 		cclog.Abort("Main configuration must be present")
 	}
 	archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", jobarchive)

-	Connect("sqlite3", dbfilepath)
+	if err := ResetConnection(); err != nil {
+		t.Fatal(err)
+	}
+	t.Cleanup(func() {
+		ResetConnection()
+	})

-	if err := archive.Init(json.RawMessage(archiveCfg), config.Keys.DisableArchive); err != nil {
+	Connect(dbfilepath)
+
+	if err := archive.Init(json.RawMessage(archiveCfg)); err != nil {
 		t.Fatal(err)
 	}
 }
@@ -164,8 +156,12 @@ func nodeTestSetup(t *testing.T) {
 func TestUpdateNodeState(t *testing.T) {
 	nodeTestSetup(t)

+	repo := GetNodeRepository()
+	now := time.Now().Unix()
+
 	nodeState := schema.NodeStateDB{
-		TimeStamp: time.Now().Unix(), NodeState: "allocated",
+		TimeStamp:       now,
+		NodeState:       "allocated",
 		CpusAllocated:   72,
 		MemoryAllocated: 480,
 		GpusAllocated:   0,
@@ -173,18 +169,152 @@ func TestUpdateNodeState(t *testing.T) {
 		JobsRunning:     1,
 	}

-	repo := GetNodeRepository()
 	err := repo.UpdateNodeState("host124", "testcluster", &nodeState)
 	if err != nil {
-		return
+		t.Fatal(err)
 	}

 	node, err := repo.GetNode("host124", "testcluster", false)
 	if err != nil {
-		return
+		t.Fatal(err)
 	}

 	if node.NodeState != "allocated" {
 		t.Errorf("wrong node state\ngot: %s \nwant: allocated ", node.NodeState)
 	}
+
+	t.Run("FindBeforeEmpty", func(t *testing.T) {
+		// Only the current-timestamp row exists, so nothing should be found before now
+		rows, err := repo.FindNodeStatesBefore(now)
+		if err != nil {
+			t.Fatal(err)
+		}
+		if len(rows) != 0 {
+			t.Errorf("expected 0 rows, got %d", len(rows))
+		}
+	})
+
+	t.Run("DeleteOldRows", func(t *testing.T) {
+		// Insert 2 more old rows for host124
+		for i, ts := range []int64{now - 7200, now - 3600} {
+			ns := schema.NodeStateDB{
+				TimeStamp:       ts,
+				NodeState:       "allocated",
+				HealthState:     schema.MonitoringStateFull,
+				CpusAllocated:   72,
+				MemoryAllocated: 480,
+				JobsRunning:     i,
+			}
+			if err := repo.UpdateNodeState("host124", "testcluster", &ns); err != nil {
+				t.Fatal(err)
+			}
+		}
+
+		// Delete rows older than 30 minutes
+		cutoff := now - 1800
+		cnt, err := repo.DeleteNodeStatesBefore(cutoff)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		// Should delete the 2 old rows
+		if cnt != 2 {
+			t.Errorf("expected 2 deleted rows, got %d", cnt)
+		}
+
+		// Latest row should still exist
+		node, err := repo.GetNode("host124", "testcluster", false)
+		if err != nil {
+			t.Fatal(err)
+		}
+		if node.NodeState != "allocated" {
+			t.Errorf("expected node state 'allocated', got %s", node.NodeState)
+		}
+	})
+
+	t.Run("PreservesLatestPerNode", func(t *testing.T) {
+		// Insert a single old row for host125 — it's the latest per node so it must survive
+		ns := schema.NodeStateDB{
+			TimeStamp:       now - 7200,
+			NodeState:       "idle",
+			HealthState:     schema.MonitoringStateFull,
+			CpusAllocated:   0,
+			MemoryAllocated: 0,
+			JobsRunning:     0,
+		}
+		if err := repo.UpdateNodeState("host125", "testcluster", &ns); err != nil {
+			t.Fatal(err)
+		}
+
+		// Delete everything older than now — the latest per node should be preserved
+		_, err := repo.DeleteNodeStatesBefore(now)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		// The latest row for host125 must still exist
+		node, err := repo.GetNode("host125", "testcluster", false)
+		if err != nil {
+			t.Fatal(err)
+		}
+		if node.NodeState != "idle" {
+			t.Errorf("expected node state 'idle', got %s", node.NodeState)
+		}
+
+		// Verify exactly 1 row remains for host125
+		var countAfter int
+		if err := repo.DB.QueryRow(
+			"SELECT COUNT(*) FROM node_state WHERE node_id = (SELECT id FROM node WHERE hostname = 'host125')").
+			Scan(&countAfter); err != nil {
+			t.Fatal(err)
+		}
+		if countAfter != 1 {
+			t.Errorf("expected 1 row remaining for host125, got %d", countAfter)
+		}
+	})
+
+	t.Run("FindBeforeWithJoin", func(t *testing.T) {
+		// Insert old and current rows for host123
+		for _, ts := range []int64{now - 7200, now} {
+			ns := schema.NodeStateDB{
+				TimeStamp:       ts,
+				NodeState:       "allocated",
+				HealthState:     schema.MonitoringStateFull,
+				CpusAllocated:   8,
+				MemoryAllocated: 1024,
+				GpusAllocated:   1,
+				JobsRunning:     1,
+			}
+			if err := repo.UpdateNodeState("host123", "testcluster", &ns); err != nil {
+				t.Fatal(err)
+			}
+		}
+
+		// Find rows older than 30 minutes, excluding latest per node
+		cutoff := now - 1800
+		rows, err := repo.FindNodeStatesBefore(cutoff)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		// Should find the old host123 row
+		found := false
+		for _, row := range rows {
+			if row.Hostname == "host123" && row.TimeStamp == now-7200 {
+				found = true
+				if row.Cluster != "testcluster" {
+					t.Errorf("expected cluster 'testcluster', got %s", row.Cluster)
+				}
+				if row.SubCluster != "sc1" {
+					t.Errorf("expected subcluster 'sc1', got %s", row.SubCluster)
+				}
+				if row.CpusAllocated != 8 {
+					t.Errorf("expected cpus_allocated 8, got %d", row.CpusAllocated)
+				}
+			}
+		}
+		if !found {
+			t.Errorf("expected to find old host123 row among %d results", len(rows))
+		}
+	})
 }
--- a/internal/repository/repository_test.go
+++ b/internal/repository/repository_test.go
@@ -6,11 +6,13 @@ package repository

 import (
 	"context"
+	"os"
+	"path/filepath"
 	"testing"

 	"github.com/ClusterCockpit/cc-backend/internal/graph/model"
-	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
-	"github.com/ClusterCockpit/cc-lib/schema"
+	cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
+	"github.com/ClusterCockpit/cc-lib/v2/schema"
 	_ "github.com/mattn/go-sqlite3"
 )

@@ -46,7 +48,7 @@ func BenchmarkSelect1(b *testing.B) {
 }

 func BenchmarkDB_FindJobById(b *testing.B) {
-	var jobId int64 = 1677322
+	var jobID int64 = 1677322

 	b.Run("FindJobById", func(b *testing.B) {
 		db := setup(b)
@@ -55,7 +57,7 @@ func BenchmarkDB_FindJobById(b *testing.B) {

 		b.RunParallel(func(pb *testing.PB) {
 			for pb.Next() {
-				_, err := db.FindById(getContext(b), jobId)
+				_, err := db.FindByID(getContext(b), jobID)
 				noErr(b, err)
 			}
 		})
@@ -63,7 +65,7 @@ func BenchmarkDB_FindJobById(b *testing.B) {
 }

 func BenchmarkDB_FindJob(b *testing.B) {
-	var jobId int64 = 107266
+	var jobID int64 = 107266
 	var startTime int64 = 1657557241
 	cluster := "fritz"

@@ -74,7 +76,7 @@ func BenchmarkDB_FindJob(b *testing.B) {

 		b.RunParallel(func(pb *testing.PB) {
 			for pb.Next() {
-				_, err := db.Find(&jobId, &cluster, &startTime)
+				_, err := db.Find(&jobID, &cluster, &startTime)
 				noErr(b, err)
 			}
 		})
@@ -148,10 +150,24 @@ func getContext(tb testing.TB) context.Context {
 func setup(tb testing.TB) *JobRepository {
 	tb.Helper()
 	cclog.Init("warn", true)
-	dbfile := "testdata/job.db"
-	err := MigrateDB("sqlite3", dbfile)
+
+	// Copy test DB to a temp file for test isolation
+	srcData, err := os.ReadFile("testdata/job.db")
 	noErr(tb, err)
-	Connect("sqlite3", dbfile)
+	dbfile := filepath.Join(tb.TempDir(), "job.db")
+	err = os.WriteFile(dbfile, srcData, 0o644)
+	noErr(tb, err)
+
+	// Reset singletons so Connect uses the new temp DB
+	err = ResetConnection()
+	noErr(tb, err)
+	tb.Cleanup(func() {
+		ResetConnection()
+	})
+
+	err = MigrateDB(dbfile)
+	noErr(tb, err)
+	Connect(dbfile)
 	return GetJobRepository()
 }

--- a/internal/repository/stats.go
+++ b/internal/repository/stats.go
@@ -2,6 +2,44 @@
 // All rights reserved. This file is part of cc-backend.
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file.
+
+// This file contains job statistics and histogram generation functionality for the JobRepository.
+//
+// # Job Statistics
+//
+// The statistics methods provide aggregated metrics about jobs including total jobs, users,
+// walltime, and resource usage (nodes, cores, accelerators). Statistics can be computed:
+//   - Overall (JobsStats): Single aggregate across all matching jobs
+//   - Grouped (JobsStatsGrouped): Aggregated by user, project, cluster, or subcluster
+//   - Counts (JobCountGrouped, AddJobCount): Simple job counts with optional filtering
+//
+// All statistics methods support filtering via JobFilter and respect security contexts.
+//
+// # Histograms
+//
+// Histogram methods generate distribution data for visualization:
+//   - Duration, nodes, cores, accelerators (AddHistograms)
+//   - Job metrics like CPU load, memory usage (AddMetricHistograms)
+//
+// Histograms use intelligent binning:
+//   - Duration: Variable bin sizes (1m, 10m, 1h, 6h, 12h, 24h) with zero-padding
+//   - Resources: Natural value-based bins
+//   - Metrics: Normalized to peak values with configurable bin counts
+//
+// # Running vs. Completed Jobs
+//
+// Statistics handle running jobs specially:
+//   - Duration calculated as (now - start_time) for running jobs
+//   - Metric histograms for running jobs load data from metric backend instead of footprint
+//   - Job state filtering distinguishes running/completed jobs
+//
+// # Performance Considerations
+//
+// - All queries use prepared statements via stmtCache
+// - Complex aggregations use SQL for efficiency
+// - Histogram pre-initialization ensures consistent bin ranges
+// - Metric histogram queries limited to 5000 jobs for running job analysis
+
 package repository

 import (
@@ -12,14 +50,16 @@ import (

 	"github.com/ClusterCockpit/cc-backend/internal/config"
 	"github.com/ClusterCockpit/cc-backend/internal/graph/model"
-	"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
+	"github.com/ClusterCockpit/cc-backend/internal/metricdispatch"
 	"github.com/ClusterCockpit/cc-backend/pkg/archive"
-	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
-	"github.com/ClusterCockpit/cc-lib/schema"
+	cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
+	"github.com/ClusterCockpit/cc-lib/v2/schema"
 	sq "github.com/Masterminds/squirrel"
 )

-// GraphQL validation should make sure that no unkown values can be specified.
+// groupBy2column maps GraphQL Aggregate enum values to their corresponding database column names.
+// Used by JobsStatsGrouped and JobCountGrouped to translate user-facing grouping dimensions
+// into SQL GROUP BY clauses. GraphQL validation ensures only valid enum values are accepted.
 var groupBy2column = map[model.Aggregate]string{
 	model.AggregateUser:       "job.hpc_user",
 	model.AggregateProject:    "job.project",
@@ -27,6 +67,9 @@ var groupBy2column = map[model.Aggregate]string{
 	model.AggregateSubcluster: "job.subcluster",
 }

+// sortBy2column maps GraphQL SortByAggregate enum values to their corresponding computed column names.
+// Used by JobsStatsGrouped to translate sort preferences into SQL ORDER BY clauses.
+// Column names match the AS aliases used in buildStatsQuery.
 var sortBy2column = map[model.SortByAggregate]string{
 	model.SortByAggregateTotaljobs:      "totalJobs",
 	model.SortByAggregateTotalusers:     "totalUsers",
@@ -39,6 +82,21 @@ var sortBy2column = map[model.SortByAggregate]string{
 	model.SortByAggregateTotalacchours:  "totalAccHours",
 }

+// buildCountQuery constructs a SQL query to count jobs with optional grouping and filtering.
+//
+// Parameters:
+//   - filter: Job filters to apply (cluster, user, time range, etc.)
+//   - kind: Special filter - "running" for running jobs only, "short" for jobs under threshold
+//   - col: Column name to GROUP BY; empty string for total count without grouping
+//
+// Returns a SelectBuilder that produces either:
+//   - Single count: COUNT(job.id) when col is empty
+//   - Grouped counts: col, COUNT(job.id) when col is specified
+//
+// The kind parameter enables counting specific job categories:
+//   - "running": Only jobs with job_state = 'running'
+//   - "short": Only jobs with duration < ShortRunningJobsDuration config value
+//   - empty: All jobs matching filters
 func (r *JobRepository) buildCountQuery(
 	filter []*model.JobFilter,
 	kind string,
@@ -47,10 +105,8 @@ func (r *JobRepository) buildCountQuery(
 	var query sq.SelectBuilder

 	if col != "" {
-		// Scan columns: id, cnt
 		query = sq.Select(col, "COUNT(job.id)").From("job").GroupBy(col)
 	} else {
-		// Scan columns:  cnt
 		query = sq.Select("COUNT(job.id)").From("job")
 	}

@@ -68,42 +124,58 @@ func (r *JobRepository) buildCountQuery(
 	return query
 }

+// buildStatsQuery constructs a SQL query to compute comprehensive job statistics with optional grouping.
+//
+// Parameters:
+//   - filter: Job filters to apply (cluster, user, time range, etc.)
+//   - col: Column name to GROUP BY; empty string for overall statistics without grouping
+//
+// Returns a SelectBuilder that produces comprehensive statistics:
+//   - totalJobs: Count of jobs
+//   - totalUsers: Count of distinct users (always 0 when grouping by user)
+//   - totalWalltime: Sum of job durations in hours
+//   - totalNodes: Sum of nodes used across all jobs
+//   - totalNodeHours: Sum of (duration × num_nodes) in hours
+//   - totalCores: Sum of hardware threads used across all jobs
+//   - totalCoreHours: Sum of (duration × num_hwthreads) in hours
+//   - totalAccs: Sum of accelerators used across all jobs
+//   - totalAccHours: Sum of (duration × num_acc) in hours
+//
+// Special handling:
+//   - Running jobs: Duration calculated as (now - start_time) instead of stored duration
+//   - Grouped queries: Also select grouping column and user's display name from hpc_user table
+//   - All time values converted from seconds to hours (÷ 3600) and rounded
 func (r *JobRepository) buildStatsQuery(
 	filter []*model.JobFilter,
 	col string,
 ) sq.SelectBuilder {
 	var query sq.SelectBuilder
-	castType := r.getCastType()
-
-	// fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as %s) as value`, time.Now().Unix(), castType)

 	if col != "" {
-		// Scan columns: id, name, totalJobs, totalUsers, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
 		query = sq.Select(
 			col,
 			"name",
 			"COUNT(job.id) as totalJobs",
 			"COUNT(DISTINCT job.hpc_user) AS totalUsers",
-			fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s) as totalWalltime`, time.Now().Unix(), castType),
-			fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s) as totalNodes`, castType),
-			fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s) as totalNodeHours`, time.Now().Unix(), castType),
-			fmt.Sprintf(`CAST(SUM(job.num_hwthreads) as %s) as totalCores`, castType),
-			fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as %s) as totalCoreHours`, time.Now().Unix(), castType),
-			fmt.Sprintf(`CAST(SUM(job.num_acc) as %s) as totalAccs`, castType),
-			fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as %s) as totalAccHours`, time.Now().Unix(), castType),
+			fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as int) as totalWalltime`, time.Now().Unix()),
+			`CAST(SUM(job.num_nodes) as int) as totalNodes`,
+			fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as int) as totalNodeHours`, time.Now().Unix()),
+			`CAST(SUM(job.num_hwthreads) as int) as totalCores`,
+			fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as int) as totalCoreHours`, time.Now().Unix()),
+			`CAST(SUM(job.num_acc) as int) as totalAccs`,
+			fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as int) as totalAccHours`, time.Now().Unix()),
 		).From("job").LeftJoin("hpc_user ON hpc_user.username = job.hpc_user").GroupBy(col)
 	} else {
-		// Scan columns: totalJobs, totalUsers, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
 		query = sq.Select(
 			"COUNT(job.id) as totalJobs",
 			"COUNT(DISTINCT job.hpc_user) AS totalUsers",
-			fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s)`, time.Now().Unix(), castType),
-			fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s)`, castType),
-			fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s)`, time.Now().Unix(), castType),
-			fmt.Sprintf(`CAST(SUM(job.num_hwthreads) as %s)`, castType),
-			fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as %s)`, time.Now().Unix(), castType),
-			fmt.Sprintf(`CAST(SUM(job.num_acc) as %s)`, castType),
-			fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as %s)`, time.Now().Unix(), castType),
+			fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as int)`, time.Now().Unix()),
+			`CAST(SUM(job.num_nodes) as int)`,
+			fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as int)`, time.Now().Unix()),
+			`CAST(SUM(job.num_hwthreads) as int)`,
+			fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as int)`, time.Now().Unix()),
+			`CAST(SUM(job.num_acc) as int)`,
+			fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as int)`, time.Now().Unix()),
 		).From("job")
 	}

@@ -114,21 +186,25 @@ func (r *JobRepository) buildStatsQuery(
 	return query
 }

-func (r *JobRepository) getCastType() string {
-	var castType string
-
-	switch r.driver {
-	case "sqlite3":
-		castType = "int"
-	case "mysql":
-		castType = "unsigned"
-	default:
-		castType = ""
-	}
-
-	return castType
-}
-
+// JobsStatsGrouped computes comprehensive job statistics grouped by a dimension (user, project, cluster, or subcluster).
+//
+// This is the primary method for generating aggregated statistics views in the UI, providing
+// metrics like total jobs, walltime, and resource usage broken down by the specified grouping.
+//
+// Parameters:
+//   - ctx: Context for security checks and cancellation
+//   - filter: Filters to apply (time range, cluster, job state, etc.)
+//   - page: Optional pagination (ItemsPerPage: -1 disables pagination)
+//   - sortBy: Optional sort column (totalJobs, totalWalltime, totalCoreHours, etc.)
+//   - groupBy: Required grouping dimension (User, Project, Cluster, or SubCluster)
+//
+// Returns a slice of JobsStatistics, one per group, with:
+//   - ID: The group identifier (username, project name, cluster name, etc.)
+//   - Name: Display name (for users, from hpc_user.name; empty for other groups)
+//   - Statistics: totalJobs, totalUsers, totalWalltime, resource usage metrics
+//
+// Security: Respects user roles via SecurityCheck - users see only their own data unless admin/support.
+// Performance: Results are sorted in SQL and pagination applied before scanning rows.
 func (r *JobRepository) JobsStatsGrouped(
 	ctx context.Context,
 	filter []*model.JobFilter,
@@ -253,6 +329,21 @@ func (r *JobRepository) JobsStatsGrouped(
 	return stats, nil
 }

+// JobsStats computes overall job statistics across all matching jobs without grouping.
+//
+// This method provides a single aggregate view of job metrics, useful for dashboard
+// summaries and overall system utilization reports.
+//
+// Parameters:
+//   - ctx: Context for security checks and cancellation
+//   - filter: Filters to apply (time range, cluster, job state, etc.)
+//
+// Returns a single-element slice containing aggregate statistics:
+//   - totalJobs, totalUsers, totalWalltime
+//   - totalNodeHours, totalCoreHours, totalAccHours
+//
+// Unlike JobsStatsGrouped, this returns overall totals without breaking down by dimension.
+// Security checks are applied via SecurityCheck to respect user access levels.
 func (r *JobRepository) JobsStats(
 	ctx context.Context,
 	filter []*model.JobFilter,
@@ -300,6 +391,15 @@ func (r *JobRepository) JobsStats(
 	return stats, nil
 }

+// LoadJobStat retrieves a specific statistic for a metric from a job's statistics.
+// Returns 0.0 if the metric is not found or statType is invalid.
+//
+// Parameters:
+//   - job: Job struct with populated Statistics field
+//   - metric: Name of the metric to query (e.g., "cpu_load", "mem_used")
+//   - statType: Type of statistic: "avg", "min", or "max"
+//
+// Returns the requested statistic value or 0.0 if not found.
 func LoadJobStat(job *schema.Job, metric string, statType string) float64 {
 	if stats, ok := job.Statistics[metric]; ok {
 		switch statType {
@@ -317,6 +417,17 @@ func LoadJobStat(job *schema.Job, metric string, statType string) float64 {
 	return 0.0
 }

+// JobCountGrouped counts jobs grouped by a dimension without computing detailed statistics.
+//
+// This is a lightweight alternative to JobsStatsGrouped when only job counts are needed,
+// avoiding the overhead of calculating walltime and resource usage metrics.
+//
+// Parameters:
+//   - ctx: Context for security checks
+//   - filter: Filters to apply
+//   - groupBy: Grouping dimension (User, Project, Cluster, or SubCluster)
+//
+// Returns JobsStatistics with only ID and TotalJobs populated for each group.
 func (r *JobRepository) JobCountGrouped(
 	ctx context.Context,
 	filter []*model.JobFilter,
@@ -362,6 +473,20 @@ func (r *JobRepository) JobCountGrouped(
 	return stats, nil
 }

+// AddJobCountGrouped augments existing statistics with additional job counts by category.
+//
+// This method enriches JobsStatistics returned by JobsStatsGrouped or JobCountGrouped
+// with counts of running or short-running (based on ShortRunningJobsDuration) jobs, matched by group ID.
+//
+// Parameters:
+//   - ctx: Context for security checks
+//   - filter: Filters to apply
+//   - groupBy: Grouping dimension (must match the dimension used for stats parameter)
+//   - stats: Existing statistics to augment (modified in-place by ID matching)
+//   - kind: "running" to add RunningJobs count, "short" to add ShortJobs count
+//
+// Returns the same stats slice with RunningJobs or ShortJobs fields populated per group.
+// Groups without matching jobs will have 0 for the added field.
 func (r *JobRepository) AddJobCountGrouped(
 	ctx context.Context,
 	filter []*model.JobFilter,
@@ -416,6 +541,18 @@ func (r *JobRepository) AddJobCountGrouped(
 	return stats, nil
 }

+// AddJobCount augments existing overall statistics with additional job counts by category.
+//
+// Similar to AddJobCountGrouped but for ungrouped statistics. Applies the same count
+// to all statistics entries (typically just one).
+//
+// Parameters:
+//   - ctx: Context for security checks
+//   - filter: Filters to apply
+//   - stats: Existing statistics to augment (modified in-place)
+//   - kind: "running" to add RunningJobs count, "short" to add ShortJobs count
+//
+// Returns the same stats slice with RunningJobs or ShortJobs fields set to the total count.
 func (r *JobRepository) AddJobCount(
 	ctx context.Context,
 	filter []*model.JobFilter,
@@ -451,6 +588,26 @@ func (r *JobRepository) AddJobCount(
 	return stats, nil
 }

+// AddHistograms augments statistics with distribution histograms for job properties.
+//
+// Generates histogram data for visualization of job duration, node count, core count,
+// and accelerator count distributions. Duration histogram uses intelligent binning based
+// on the requested resolution.
+//
+// Parameters:
+//   - ctx: Context for security checks
+//   - filter: Filters to apply to jobs included in histograms
+//   - stat: Statistics struct to augment (modified in-place)
+//   - durationBins: Bin size - "1m", "10m", "1h", "6h", "12h", or "24h" (default)
+//
+// Populates these fields in stat:
+//   - HistDuration: Job duration distribution (zero-padded bins)
+//   - HistNumNodes: Node count distribution
+//   - HistNumCores: Core (hwthread) count distribution
+//   - HistNumAccs: Accelerator count distribution
+//
+// Duration bins are pre-initialized with zeros to ensure consistent ranges for visualization.
+// Bin size determines both the width and maximum duration displayed (e.g., "1h" = 48 bins × 1h = 48h max).
 func (r *JobRepository) AddHistograms(
 	ctx context.Context,
 	filter []*model.JobFilter,
@@ -461,20 +618,20 @@ func (r *JobRepository) AddHistograms(

 	var targetBinCount int
 	var targetBinSize int
-	switch {
-	case *durationBins == "1m": // 1 Minute Bins + Max 60 Bins -> Max 60 Minutes
+	switch *durationBins {
+	case "1m": // 1 Minute Bins + Max 60 Bins -> Max 60 Minutes
 		targetBinCount = 60
 		targetBinSize = 60
-	case *durationBins == "10m": // 10 Minute Bins + Max 72 Bins -> Max 12 Hours
+	case "10m": // 10 Minute Bins + Max 72 Bins -> Max 12 Hours
 		targetBinCount = 72
 		targetBinSize = 600
-	case *durationBins == "1h": // 1 Hour Bins + Max 48 Bins -> Max 48 Hours
+	case "1h": // 1 Hour Bins + Max 48 Bins -> Max 48 Hours
 		targetBinCount = 48
 		targetBinSize = 3600
-	case *durationBins == "6h": // 6 Hour Bins + Max 12 Bins -> Max 3 Days
+	case "6h": // 6 Hour Bins + Max 12 Bins -> Max 3 Days
 		targetBinCount = 12
 		targetBinSize = 21600
-	case *durationBins == "12h": // 12 hour Bins + Max 14 Bins -> Max 7 Days
+	case "12h": // 12 hour Bins + Max 14 Bins -> Max 7 Days
 		targetBinCount = 14
 		targetBinSize = 43200
 	default: // 24h
@@ -482,10 +639,9 @@ func (r *JobRepository) AddHistograms(
 		targetBinSize = 3600
 	}

-	castType := r.getCastType()
 	var err error
 	// Return X-Values always as seconds, will be formatted into minutes and hours in frontend
-	value := fmt.Sprintf(`CAST(ROUND(((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / %d) + 1) as %s) as value`, time.Now().Unix(), targetBinSize, castType)
+	value := fmt.Sprintf(`CAST(ROUND(((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / %d) + 1) as int) as value`, time.Now().Unix(), targetBinSize)
 	stat.HistDuration, err = r.jobsDurationStatisticsHistogram(ctx, value, filter, targetBinSize, &targetBinCount)
 	if err != nil {
 		cclog.Warn("Error while loading job statistics histogram: job duration")
@@ -514,7 +670,30 @@ func (r *JobRepository) AddHistograms(
 	return stat, nil
 }

-// Requires thresholds for metric from config for cluster? Of all clusters and use largest? split to 10 + 1 for artifacts?
+// AddMetricHistograms augments statistics with distribution histograms for job metrics.
+//
+// Generates histogram data for metrics like CPU load, memory usage, etc. Handles running
+// and completed jobs differently: running jobs load data from metric backend, completed jobs
+// use footprint data from database.
+//
+// Parameters:
+//   - ctx: Context for security checks
+//   - filter: Filters to apply (MUST contain State filter for running jobs)
+//   - metrics: List of metric names to histogram (e.g., ["cpu_load", "mem_used"])
+//   - stat: Statistics struct to augment (modified in-place)
+//   - targetBinCount: Number of histogram bins (default: 10)
+//
+// Populates HistMetrics field in stat with MetricHistoPoints for each metric.
+//
+// Binning algorithm:
+//   - Values normalized to metric's peak value from cluster configuration
+//   - Bins evenly distributed from 0 to peak
+//   - Pre-initialized with zeros for consistent visualization
+//
+// Limitations:
+//   - Running jobs: Limited to 5000 jobs for performance
+//   - Requires valid cluster configuration with metric peak values
+//   - Uses footprint statistic (avg/max/min) configured per metric
 func (r *JobRepository) AddMetricHistograms(
 	ctx context.Context,
 	filter []*model.JobFilter,
@@ -549,7 +728,16 @@ func (r *JobRepository) AddMetricHistograms(
 	return stat, nil
 }

-// `value` must be the column grouped by, but renamed to "value"
+// jobsStatisticsHistogram generates a simple histogram by grouping on a column value.
+//
+// Used for histograms where the column value directly represents the bin (e.g., node count, core count).
+// Unlike duration/metric histograms, this doesn't pre-initialize bins with zeros.
+//
+// Parameters:
+//   - value: SQL expression that produces the histogram value, aliased as "value"
+//   - filters: Job filters to apply
+//
+// Returns histogram points with Value (from column) and Count (number of jobs).
 func (r *JobRepository) jobsStatisticsHistogram(
 	ctx context.Context,
 	value string,
@@ -594,6 +782,26 @@ func (r *JobRepository) jobsStatisticsHistogram(
 	return points, nil
 }

+// jobsDurationStatisticsHistogram generates a duration histogram with pre-initialized bins.
+//
+// Bins are zero-padded to provide consistent ranges for visualization, unlike simple
+// histograms which only return bins with data. The value parameter should compute
+// the bin number from job duration.
+//
+// Parameters:
+//   - value: SQL expression computing bin number from duration, aliased as "value"
+//   - filters: Job filters to apply
+//   - binSizeSeconds: Width of each bin in seconds
+//   - targetBinCount: Number of bins to pre-initialize
+//
+// Returns histogram points with Value (bin_number × binSizeSeconds) and Count.
+// All bins from 1 to targetBinCount are returned, with Count=0 for empty bins.
+//
+// Algorithm:
+//  1. Pre-initialize targetBinCount bins with zero counts
+//  2. Query database for actual counts per bin
+//  3. Match query results to pre-initialized bins by value
+//  4. Bins without matches remain at zero
 func (r *JobRepository) jobsDurationStatisticsHistogram(
 	ctx context.Context,
 	value string,
@@ -609,7 +817,8 @@ func (r *JobRepository) jobsDurationStatisticsHistogram(
 		return nil, qerr
 	}

-	// Setup Array
+	// Each bin represents a duration range: bin N = [N*binSizeSeconds, (N+1)*binSizeSeconds)
+	// Example: binSizeSeconds=3600 (1 hour), bin 1 = 0-1h, bin 2 = 1-2h, etc.
 	points := make([]*model.HistoPoint, 0)
 	for i := 1; i <= *targetBinCount; i++ {
 		point := model.HistoPoint{Value: i * binSizeSeconds, Count: 0}
@@ -627,7 +836,8 @@ func (r *JobRepository) jobsDurationStatisticsHistogram(
 	}
 	defer rows.Close()

-	// Fill Array at matching $Value
+	// Match query results to pre-initialized bins.
+	// point.Value from query is the bin number; multiply by binSizeSeconds to match bin.Value.
 	for rows.Next() {
 		point := model.HistoPoint{}
 		if err := rows.Scan(&point.Value, &point.Count); err != nil {
@@ -637,9 +847,6 @@ func (r *JobRepository) jobsDurationStatisticsHistogram(

 		for _, e := range points {
 			if e.Value == (point.Value * binSizeSeconds) {
-				// Note:
-				//  Matching on unmodified integer value (and multiplying point.Value by binSizeSeconds after match)
-				//  causes frontend to loop into highest targetBinCount, due to zoom condition instantly being fullfilled (cause unknown)
 				e.Count = point.Count
 				break
 			}
@@ -654,18 +861,43 @@ func (r *JobRepository) jobsDurationStatisticsHistogram(
 	return points, nil
 }

+// jobsMetricStatisticsHistogram generates a metric histogram using footprint data from completed jobs.
+//
+// Values are normalized to the metric's peak value and distributed into bins. The algorithm
+// is based on SQL histogram generation techniques, extracting metric values from JSON footprint
+// and computing bin assignments in SQL.
+//
+// Parameters:
+//   - metric: Metric name (e.g., "cpu_load", "mem_used")
+//   - filters: Job filters to apply
+//   - bins: Number of bins to generate
+//
+// Returns MetricHistoPoints with metric name, unit, footprint stat type, and binned data.
+//
+// Algorithm:
+//  1. Determine peak value from cluster configuration (filtered cluster or max across all)
+//  2. Generate SQL that extracts footprint value, normalizes to [0,1], multiplies by bin count
+//  3. Pre-initialize bins with min/max ranges based on peak value
+//  4. Query database for counts per bin
+//  5. Match results to pre-initialized bins
+//
+// Special handling: Values exactly equal to peak are forced into the last bin by multiplying
+// peak by 0.999999999 to avoid creating an extra bin.
 func (r *JobRepository) jobsMetricStatisticsHistogram(
 	ctx context.Context,
 	metric string,
 	filters []*model.JobFilter,
 	bins *int,
 ) (*model.MetricHistoPoints, error) {
-	// Get specific Peak or largest Peak
+	// Peak value defines the upper bound for binning: values are distributed across
+	// bins from 0 to peak. First try to get peak from filtered cluster, otherwise
+	// scan all clusters to find the maximum peak value.
 	var metricConfig *schema.MetricConfig
 	var peak float64
 	var unit string
 	var footprintStat string

+	// Try to get metric config from filtered cluster
 	for _, f := range filters {
 		if f.Cluster != nil {
 			metricConfig = archive.GetMetricConfig(*f.Cluster.Eq, metric)
@@ -676,6 +908,8 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
 		}
 	}

+	// If no cluster filter or peak not found, find largest peak across all clusters
+	// This ensures histogram can accommodate all possible values
 	if peak == 0.0 {
 		for _, c := range archive.Clusters {
 			for _, m := range c.MetricConfig {
@@ -694,11 +928,14 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
 		}
 	}

-	// cclog.Debugf("Metric %s, Peak %f, Unit %s", metric, peak, unit)
-	// Make bins, see https://jereze.com/code/sql-histogram/ (Modified here)
+	// Construct SQL histogram bins using normalized values.
+	// Algorithm based on: https://jereze.com/code/sql-histogram/ (modified)
 	start := time.Now()

-	// Find Jobs' Value Bin Number: Divide Value by Peak, Multiply by RequestedBins, then CAST to INT: Gets Bin-Number of Job
+	// Bin calculation formula:
+	//   bin_number = CAST( (value / peak) * num_bins AS INTEGER ) + 1
+	// Special case: value == peak would create bin N+1, so we test for equality
+	// and multiply peak by 0.999999999 to force it into bin N.
 	binQuery := fmt.Sprintf(`CAST(
 		((case when json_extract(footprint, "$.%s") = %f then %f*0.999999999 else json_extract(footprint, "$.%s") end) / %f)
 		* %v as INTEGER )`,
@@ -707,24 +944,19 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
 	mainQuery := sq.Select(
 		fmt.Sprintf(`%s + 1 as bin`, binQuery),
 		`count(*) as count`,
-		// For Debug: // fmt.Sprintf(`CAST((%f / %d) as INTEGER ) * %s as min`, peak, *bins, binQuery),
-		// For Debug: // fmt.Sprintf(`CAST((%f / %d) as INTEGER ) * (%s + 1) as max`, peak, *bins, binQuery),
 	).From("job").Where(
 		"JSON_VALID(footprint)",
 	).Where(fmt.Sprintf(`json_extract(footprint, "$.%s") is not null and json_extract(footprint, "$.%s") <= %f`, (metric + "_" + footprintStat), (metric + "_" + footprintStat), peak))

-	// Only accessible Jobs...
 	mainQuery, qerr := SecurityCheck(ctx, mainQuery)
 	if qerr != nil {
 		return nil, qerr
 	}

-	// Filters...
 	for _, f := range filters {
 		mainQuery = BuildWhereClause(f, mainQuery)
 	}

-	// Finalize query with Grouping and Ordering
 	mainQuery = mainQuery.GroupBy("bin").OrderBy("bin")

 	rows, err := mainQuery.RunWith(r.DB).Query()
@@ -734,7 +966,8 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
 	}
 	defer rows.Close()

-	// Setup Return Array With Bin-Numbers for Match and Min/Max based on Peak
+	// Pre-initialize bins with calculated min/max ranges.
+	// Example: peak=1000, bins=10 -> bin 1=[0,100), bin 2=[100,200), ..., bin 10=[900,1000]
 	points := make([]*model.MetricHistoPoint, 0)
 	binStep := int(peak) / *bins
 	for i := 1; i <= *bins; i++ {
@@ -744,26 +977,18 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
 		points = append(points, &epoint)
 	}

-	for rows.Next() { // Fill Count if Bin-No. Matches (Not every Bin exists in DB!)
+	// Match query results to pre-initialized bins.
+	for rows.Next() {
 		rpoint := model.MetricHistoPoint{}
-		if err := rows.Scan(&rpoint.Bin, &rpoint.Count); err != nil { // Required for Debug: &rpoint.Min, &rpoint.Max
+		if err := rows.Scan(&rpoint.Bin, &rpoint.Count); err != nil {
 			cclog.Warnf("Error while scanning rows for %s", metric)
-			return nil, err // FIXME: Totally bricks cc-backend if returned and if all metrics requested?
+			return nil, err
 		}

 		for _, e := range points {
-			if e.Bin != nil && rpoint.Bin != nil {
-				if *e.Bin == *rpoint.Bin {
-					e.Count = rpoint.Count
-					// Only Required For Debug: Check DB returned Min/Max against Backend Init above
-					// if rpoint.Min != nil {
-					// 	cclog.Warnf(">>>> Bin %d Min Set For %s to %d (Init'd with: %d)", *e.Bin, metric, *rpoint.Min, *e.Min)
-					// }
-					// if rpoint.Max != nil {
-					// 	cclog.Warnf(">>>> Bin %d Max Set For %s to %d (Init'd with: %d)", *e.Bin, metric, *rpoint.Max, *e.Max)
-					// }
-					break
-				}
+			if e.Bin != nil && rpoint.Bin != nil && *e.Bin == *rpoint.Bin {
+				e.Count = rpoint.Count
+				break
 			}
 		}
 	}
@@ -778,6 +1003,28 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
 	return &result, nil
 }

+// runningJobsMetricStatisticsHistogram generates metric histograms for running jobs using live data.
+//
+// Unlike completed jobs which use footprint data from the database, running jobs require
+// fetching current metric averages from the metric backend (via metricdispatch).
+//
+// Parameters:
+//   - metrics: List of metric names
+//   - filters: Job filters (should filter to running jobs only)
+//   - bins: Number of histogram bins
+//
+// Returns slice of MetricHistoPoints, one per metric.
+//
+// Limitations:
+//   - Maximum 5000 jobs (returns nil if more jobs match)
+//   - Requires metric backend availability
+//   - Bins based on metric peak values from cluster configuration
+//
+// Algorithm:
+//  1. Query first 5001 jobs to check count limit
+//  2. Load metric averages for all jobs via metricdispatch
+//  3. For each metric, create bins based on peak value
+//  4. Iterate averages and count jobs per bin
 func (r *JobRepository) runningJobsMetricStatisticsHistogram(
 	ctx context.Context,
 	metrics []string,
@@ -785,13 +1032,13 @@ func (r *JobRepository) runningJobsMetricStatisticsHistogram(
 	bins *int,
 ) []*model.MetricHistoPoints {
 	// Get Jobs
-	jobs, err := r.QueryJobs(ctx, filters, &model.PageRequest{Page: 1, ItemsPerPage: 500 + 1}, nil)
+	jobs, err := r.QueryJobs(ctx, filters, &model.PageRequest{Page: 1, ItemsPerPage: 5000 + 1}, nil)
 	if err != nil {
 		cclog.Errorf("Error while querying jobs for footprint: %s", err)
 		return nil
 	}
-	if len(jobs) > 500 {
-		cclog.Errorf("too many jobs matched (max: %d)", 500)
+	if len(jobs) > 5000 {
+		cclog.Errorf("too many jobs matched (max: %d)", 5000)
 		return nil
 	}

@@ -806,7 +1053,7 @@ func (r *JobRepository) runningJobsMetricStatisticsHistogram(
 			continue
 		}

-		if err := metricDataDispatcher.LoadAverages(job, metrics, avgs, ctx); err != nil {
+		if err := metricdispatch.LoadAverages(job, metrics, avgs, ctx); err != nil {
 			cclog.Errorf("Error while loading averages for histogram: %s", err)
 			return nil
 		}
--- a/internal/repository/stats_test.go
+++ b/internal/repository/stats_test.go
@@ -25,11 +25,14 @@ func TestBuildJobStatsQuery(t *testing.T) {
 func TestJobStats(t *testing.T) {
 	r := setup(t)

-	filter := &model.JobFilter{}
-	stats, err := r.JobsStats(getContext(t), []*model.JobFilter{filter})
+	var expectedCount int
+	err := r.DB.QueryRow(`SELECT COUNT(*) FROM job`).Scan(&expectedCount)
 	noErr(t, err)

-	if stats[0].TotalJobs != 544 {
-		t.Fatalf("Want 544, Got %d", stats[0].TotalJobs)
+	stats, err := r.JobsStats(getContext(t), []*model.JobFilter{})
+	noErr(t, err)
+
+	if stats[0].TotalJobs != expectedCount {
+		t.Fatalf("Want %d, Got %d", expectedCount, stats[0].TotalJobs)
 	}
 }
--- a/internal/repository/tags.go
+++ b/internal/repository/tags.go
@@ -2,6 +2,35 @@
 // All rights reserved. This file is part of cc-backend.
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file.
+
+// Package repository provides data access and persistence layer for ClusterCockpit.
+//
+// This file implements tag management functionality for job categorization and classification.
+// Tags support both manual assignment (via REST/GraphQL APIs) and automatic detection
+// (via tagger plugins). The implementation includes role-based access control through
+// tag scopes and maintains bidirectional consistency between the SQL database and
+// the file-based job archive.
+//
+// Database Schema:
+//
+//	CREATE TABLE tag (
+//	    id INTEGER PRIMARY KEY AUTOINCREMENT,
+//	    tag_type VARCHAR(255) NOT NULL,
+//	    tag_name VARCHAR(255) NOT NULL,
+//	    tag_scope VARCHAR(255) NOT NULL DEFAULT "global",
+//	    CONSTRAINT tag_unique UNIQUE (tag_type, tag_name, tag_scope)
+//	);
+//
+//	CREATE TABLE jobtag (
+//	    job_id INTEGER,
+//	    tag_id INTEGER,
+//	    PRIMARY KEY (job_id, tag_id),
+//	    FOREIGN KEY (job_id) REFERENCES job(id) ON DELETE CASCADE,
+//	    FOREIGN KEY (tag_id) REFERENCES tag(id) ON DELETE CASCADE
+//	);
+//
+// The jobtag junction table enables many-to-many relationships between jobs and tags.
+// CASCADE deletion ensures referential integrity when jobs or tags are removed.
 package repository

 import (
@@ -10,15 +39,39 @@ import (
 	"strings"

 	"github.com/ClusterCockpit/cc-backend/pkg/archive"
-	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
-	"github.com/ClusterCockpit/cc-lib/schema"
+	cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
+	"github.com/ClusterCockpit/cc-lib/v2/schema"
 	sq "github.com/Masterminds/squirrel"
 )

+// Tag Scope Rules:
+//
+// Tags in ClusterCockpit have three visibility scopes that control who can see and use them:
+//
+//  1. "global" - Visible to all users, can be used by anyone
+//     Example: System-generated tags like "energy-efficient", "failed", "short"
+//
+//  2. "private" - Only visible to the creating user
+//     Example: Personal notes like "needs-review", "interesting-case"
+//
+//  3. "admin" - Only visible to users with admin or support roles
+//     Example: Internal notes like "hardware-issue", "billing-problem"
+//
+// Authorization Rules:
+//   - Regular users can only create/see "global" and their own "private" tags
+//   - Admin/Support can create/see all scopes including "admin" tags
+//   - Users can only add tags to jobs they have permission to view
+//   - Tag scope is enforced at query time in GetTags() and CountTags()
+
 // AddTag adds the tag with id `tagId` to the job with the database id `jobId`.
 // Requires user authentication for security checks.
+//
+// The user must have permission to view the job. Tag visibility is determined by scope:
+//   - "global" tags: visible to all users
+//   - "private" tags: only visible to the tag creator
+//   - "admin" tags: only visible to admin/support users
 func (r *JobRepository) AddTag(user *schema.User, job int64, tag int64) ([]*schema.Tag, error) {
-	j, err := r.FindByIdWithUser(user, job)
+	j, err := r.FindByIDWithUser(user, job)
 	if err != nil {
 		cclog.Warnf("Error finding job %d for user %s: %v", job, user.Username, err)
 		return nil, err
@@ -32,7 +85,7 @@ func (r *JobRepository) AddTag(user *schema.User, job int64, tag int64) ([]*sche
 // AddTagDirect adds a tag without user security checks.
 // Use only for internal/admin operations.
 func (r *JobRepository) AddTagDirect(job int64, tag int64) ([]*schema.Tag, error) {
-	j, err := r.FindByIdDirect(job)
+	j, err := r.FindByIDDirect(job)
 	if err != nil {
 		cclog.Warnf("Error finding job %d: %v", job, err)
 		return nil, err
@@ -43,12 +96,12 @@ func (r *JobRepository) AddTagDirect(job int64, tag int64) ([]*schema.Tag, error
 	})
 }

-// Removes a tag from a job by tag id.
-// Used by GraphQL API
+// RemoveTag removes the tag with the database id `tag` from the job with the database id `job`.
+// Requires user authentication for security checks. Used by GraphQL API.
 func (r *JobRepository) RemoveTag(user *schema.User, job, tag int64) ([]*schema.Tag, error) {
-	j, err := r.FindByIdWithUser(user, job)
+	j, err := r.FindByIDWithUser(user, job)
 	if err != nil {
-		cclog.Warn("Error while finding job by id")
+		cclog.Warnf("Error while finding job %d for user %s during tag removal: %v", job, user.Username, err)
 		return nil, err
 	}

@@ -68,27 +121,27 @@ func (r *JobRepository) RemoveTag(user *schema.User, job, tag int64) ([]*schema.

 	archiveTags, err := r.getArchiveTags(&job)
 	if err != nil {
-		cclog.Warn("Error while getting tags for job")
+		cclog.Warnf("Error while getting archive tags for job %d in RemoveTag: %v", job, err)
 		return nil, err
 	}

 	return tags, archive.UpdateTags(j, archiveTags)
 }

-// Removes a tag from a job by tag info
-// Used by REST API
+// RemoveJobTagByRequest removes a tag from the job with the database id `job` by tag type, name, and scope.
+// Requires user authentication for security checks. Used by REST API.
 func (r *JobRepository) RemoveJobTagByRequest(user *schema.User, job int64, tagType string, tagName string, tagScope string) ([]*schema.Tag, error) {
 	// Get Tag ID to delete
-	tagID, exists := r.TagId(tagType, tagName, tagScope)
+	tagID, exists := r.TagID(tagType, tagName, tagScope)
 	if !exists {
 		cclog.Warnf("Tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
 		return nil, fmt.Errorf("tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
 	}

 	// Get Job
-	j, err := r.FindByIdWithUser(user, job)
+	j, err := r.FindByIDWithUser(user, job)
 	if err != nil {
-		cclog.Warn("Error while finding job by id")
+		cclog.Warnf("Error while finding job %d for user %s during tag removal by request: %v", job, user.Username, err)
 		return nil, err
 	}

@@ -103,19 +156,30 @@ func (r *JobRepository) RemoveJobTagByRequest(user *schema.User, job int64, tagT

 	tags, err := r.GetTags(user, &job)
 	if err != nil {
-		cclog.Warn("Error while getting tags for job")
+		cclog.Warnf("Error while getting tags for job %d in RemoveJobTagByRequest: %v", job, err)
 		return nil, err
 	}

 	archiveTags, err := r.getArchiveTags(&job)
 	if err != nil {
-		cclog.Warn("Error while getting tags for job")
+		cclog.Warnf("Error while getting archive tags for job %d in RemoveJobTagByRequest: %v", job, err)
 		return nil, err
 	}

 	return tags, archive.UpdateTags(j, archiveTags)
 }

+// removeTagFromArchiveJobs updates the job archive for all affected jobs after a tag deletion.
+//
+// This function is called asynchronously (via goroutine) after removing a tag from the database
+// to synchronize the file-based job archive with the database state. Errors are logged but not
+// returned since this runs in the background.
+//
+// Parameters:
+//   - jobIds: Database IDs of all jobs that had the deleted tag
+//
+// Implementation note: Each job is processed individually to handle partial failures gracefully.
+// If one job fails to update, others will still be processed.
 func (r *JobRepository) removeTagFromArchiveJobs(jobIds []int64) {
 	for _, j := range jobIds {
 		tags, err := r.getArchiveTags(&j)
@@ -124,7 +188,7 @@ func (r *JobRepository) removeTagFromArchiveJobs(jobIds []int64) {
 			continue
 		}

-		job, err := r.FindByIdDirect(j)
+		job, err := r.FindByIDDirect(j)
 		if err != nil {
 			cclog.Warnf("Error while getting job %d", j)
 			continue
@@ -138,18 +202,18 @@ func (r *JobRepository) removeTagFromArchiveJobs(jobIds []int64) {
 // Used by REST API. Does not update tagged jobs in Job archive.
 func (r *JobRepository) RemoveTagByRequest(tagType string, tagName string, tagScope string) error {
 	// Get Tag ID to delete
-	tagID, exists := r.TagId(tagType, tagName, tagScope)
+	tagID, exists := r.TagID(tagType, tagName, tagScope)
 	if !exists {
 		cclog.Warnf("Tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
 		return fmt.Errorf("tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
 	}

-	return r.RemoveTagById(tagID)
+	return r.RemoveTagByID(tagID)
 }

 // Removes a tag from db by tag id
 // Used by GraphQL API.
-func (r *JobRepository) RemoveTagById(tagID int64) error {
+func (r *JobRepository) RemoveTagByID(tagID int64) error {
 	jobIds, err := r.FindJobIdsByTag(tagID)
 	if err != nil {
 		return err
@@ -179,8 +243,16 @@ func (r *JobRepository) RemoveTagById(tagID int64) error {
 	return nil
 }

-// CreateTag creates a new tag with the specified type and name and returns its database id.
-func (r *JobRepository) CreateTag(tagType string, tagName string, tagScope string) (tagId int64, err error) {
+// CreateTag creates a new tag with the specified type, name, and scope.
+// Returns the database ID of the newly created tag.
+//
+// Scope defaults to "global" if empty string is provided.
+// Valid scopes: "global", "private", "admin"
+//
+// Example:
+//
+//	tagID, err := repo.CreateTag("performance", "high-memory", "global")
+func (r *JobRepository) CreateTag(tagType string, tagName string, tagScope string) (tagID int64, err error) {
 	// Default to "Global" scope if none defined
 	if tagScope == "" {
 		tagScope = "global"
@@ -198,8 +270,14 @@ func (r *JobRepository) CreateTag(tagType string, tagName string, tagScope strin
 	return res.LastInsertId()
 }

+// CountTags returns all tags visible to the user and the count of jobs for each tag.
+// Applies scope-based filtering to respect tag visibility rules.
+//
+// Returns:
+//   - tags: slice of tags the user can see
+//   - counts: map of tag name to job count
+//   - err: any error encountered
 func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts map[string]int, err error) {
-	// Fetch all Tags in DB for Display in Frontend Tag-View
 	tags = make([]schema.Tag, 0, 100)
 	xrows, err := r.DB.Queryx("SELECT id, tag_type, tag_name, tag_scope FROM tag")
 	if err != nil {
@@ -228,10 +306,10 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts
 	}

 	// Query and Count Jobs with attached Tags
-	q := sq.Select("t.tag_name, t.id, count(jt.tag_id)").
+	q := sq.Select("t.tag_type, t.tag_name, t.id, count(jt.tag_id)").
 		From("tag t").
 		LeftJoin("jobtag jt ON t.id = jt.tag_id").
-		GroupBy("t.tag_name")
+		GroupBy("t.tag_type, t.tag_name")

 	// Build scope list for filtering
 	var scopeBuilder strings.Builder
@@ -265,14 +343,15 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts

 	counts = make(map[string]int)
 	for rows.Next() {
+		var tagType string
 		var tagName string
-		var tagId int
+		var tagID int
 		var count int
-		if err = rows.Scan(&tagName, &tagId, &count); err != nil {
+		if err = rows.Scan(&tagType, &tagName, &tagID, &count); err != nil {
 			return nil, nil, err
 		}
 		// Use tagId as second Map-Key component to differentiate tags with identical names
-		counts[fmt.Sprint(tagName, tagId)] = count
+		counts[fmt.Sprint(tagType, tagName, tagID)] = count
 	}
 	err = rows.Err()

@@ -280,18 +359,44 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts
 }

 var (
-	ErrTagNotFound        = errors.New("the tag does not exist")
-	ErrJobNotOwned        = errors.New("user is not owner of job")
-	ErrTagNoAccess        = errors.New("user not permitted to use that tag")
-	ErrTagPrivateScope    = errors.New("tag is private to another user")
-	ErrTagAdminScope      = errors.New("tag requires admin privileges")
+	// ErrTagNotFound is returned when a tag ID or tag identifier (type, name, scope) does not exist in the database.
+	ErrTagNotFound = errors.New("the tag does not exist")
+
+	// ErrJobNotOwned is returned when a user attempts to tag a job they do not have permission to access.
+	ErrJobNotOwned = errors.New("user is not owner of job")
+
+	// ErrTagNoAccess is returned when a user attempts to use a tag they cannot access due to scope restrictions.
+	ErrTagNoAccess = errors.New("user not permitted to use that tag")
+
+	// ErrTagPrivateScope is returned when a user attempts to access another user's private tag.
+	ErrTagPrivateScope = errors.New("tag is private to another user")
+
+	// ErrTagAdminScope is returned when a non-admin user attempts to use an admin-scoped tag.
+	ErrTagAdminScope = errors.New("tag requires admin privileges")
+
+	// ErrTagsIncompatScopes is returned when attempting to combine admin and non-admin scoped tags in a single operation.
 	ErrTagsIncompatScopes = errors.New("combining admin and non-admin scoped tags not allowed")
 )

 // addJobTag is a helper function that inserts a job-tag association and updates the archive.
-// Returns the updated tag list for the job.
-func (r *JobRepository) addJobTag(jobId int64, tagId int64, job *schema.Job, getTags func() ([]*schema.Tag, error)) ([]*schema.Tag, error) {
-	q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(jobId, tagId)
+//
+// This function performs three operations atomically:
+//  1. Inserts the job-tag association into the jobtag junction table
+//  2. Retrieves the updated tag list for the job (using the provided getTags callback)
+//  3. Updates the job archive with the new tags to maintain database-archive consistency
+//
+// Parameters:
+//   - jobId: Database ID of the job
+//   - tagId: Database ID of the tag to associate
+//   - job: Full job object needed for archive update
+//   - getTags: Callback function to retrieve updated tags (allows different security contexts)
+//
+// Returns the complete updated tag list for the job or an error.
+//
+// Note: This function does NOT validate tag scope permissions - callers must perform
+// authorization checks before invoking this helper.
+func (r *JobRepository) addJobTag(jobID int64, tagID int64, job *schema.Job, getTags func() ([]*schema.Tag, error)) ([]*schema.Tag, error) {
+	q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(jobID, tagID)

 	if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
 		s, _, _ := q.ToSql()
@@ -301,13 +406,13 @@ func (r *JobRepository) addJobTag(jobId int64, tagId int64, job *schema.Job, get

 	tags, err := getTags()
 	if err != nil {
-		cclog.Warnf("Error getting tags for job %d: %v", jobId, err)
+		cclog.Warnf("Error getting tags for job %d: %v", jobID, err)
 		return nil, err
 	}

-	archiveTags, err := r.getArchiveTags(&jobId)
+	archiveTags, err := r.getArchiveTags(&jobID)
 	if err != nil {
-		cclog.Warnf("Error getting archive tags for job %d: %v", jobId, err)
+		cclog.Warnf("Error getting archive tags for job %d: %v", jobID, err)
 		return nil, err
 	}

@@ -316,7 +421,7 @@ func (r *JobRepository) addJobTag(jobId int64, tagId int64, job *schema.Job, get

 // AddTagOrCreate adds the tag with the specified type and name to the job with the database id `jobId`.
 // If such a tag does not yet exist, it is created.
-func (r *JobRepository) AddTagOrCreate(user *schema.User, jobId int64, tagType string, tagName string, tagScope string) (tagId int64, err error) {
+func (r *JobRepository) AddTagOrCreate(user *schema.User, jobID int64, tagType string, tagName string, tagScope string) (tagID int64, err error) {
 	// Default to "Global" scope if none defined
 	if tagScope == "" {
 		tagScope = "global"
@@ -330,44 +435,45 @@ func (r *JobRepository) AddTagOrCreate(user *schema.User, jobId int64, tagType s
 		return 0, fmt.Errorf("cannot write tag scope with current authorization")
 	}

-	tagId, exists := r.TagId(tagType, tagName, tagScope)
+	tagID, exists := r.TagID(tagType, tagName, tagScope)
 	if !exists {
-		tagId, err = r.CreateTag(tagType, tagName, tagScope)
+		tagID, err = r.CreateTag(tagType, tagName, tagScope)
 		if err != nil {
 			return 0, err
 		}
 	}

-	if _, err := r.AddTag(user, jobId, tagId); err != nil {
+	if _, err := r.AddTag(user, jobID, tagID); err != nil {
 		return 0, err
 	}

-	return tagId, nil
+	return tagID, nil
 }

-// used in auto tagger plugins
-func (r *JobRepository) AddTagOrCreateDirect(jobId int64, tagType string, tagName string) (tagId int64, err error) {
+func (r *JobRepository) AddTagOrCreateDirect(jobID int64, tagType string, tagName string) (tagID int64, err error) {
 	tagScope := "global"

-	tagId, exists := r.TagId(tagType, tagName, tagScope)
+	tagID, exists := r.TagID(tagType, tagName, tagScope)
 	if !exists {
-		tagId, err = r.CreateTag(tagType, tagName, tagScope)
+		tagID, err = r.CreateTag(tagType, tagName, tagScope)
 		if err != nil {
 			return 0, err
 		}
 	}

-	if _, err := r.AddTagDirect(jobId, tagId); err != nil {
+	cclog.Infof("Adding tag %s:%s:%s (direct)", tagType, tagName, tagScope)
+
+	if _, err := r.AddTagDirect(jobID, tagID); err != nil {
 		return 0, err
 	}

-	return tagId, nil
+	return tagID, nil
 }

-func (r *JobRepository) HasTag(jobId int64, tagType string, tagName string) bool {
+func (r *JobRepository) HasTag(jobID int64, tagType string, tagName string) bool {
 	var id int64
 	q := sq.Select("id").From("tag").Join("jobtag ON jobtag.tag_id = tag.id").
-		Where("jobtag.job_id = ?", jobId).Where("tag.tag_type = ?", tagType).
+		Where("jobtag.job_id = ?", jobID).Where("tag.tag_type = ?", tagType).
 		Where("tag.tag_name = ?", tagName)
 	err := q.RunWith(r.stmtCache).QueryRow().Scan(&id)
 	if err != nil {
@@ -377,21 +483,21 @@ func (r *JobRepository) HasTag(jobId int64, tagType string, tagName string) bool
 	}
 }

-// TagId returns the database id of the tag with the specified type and name.
-func (r *JobRepository) TagId(tagType string, tagName string, tagScope string) (tagId int64, exists bool) {
+// TagID returns the database id of the tag with the specified type and name.
+func (r *JobRepository) TagID(tagType string, tagName string, tagScope string) (tagID int64, exists bool) {
 	exists = true
 	if err := sq.Select("id").From("tag").
 		Where("tag.tag_type = ?", tagType).Where("tag.tag_name = ?", tagName).Where("tag.tag_scope = ?", tagScope).
-		RunWith(r.stmtCache).QueryRow().Scan(&tagId); err != nil {
+		RunWith(r.stmtCache).QueryRow().Scan(&tagID); err != nil {
 		exists = false
 	}
 	return
 }

 // TagInfo returns the database infos of the tag with the specified id.
-func (r *JobRepository) TagInfo(tagId int64) (tagType string, tagName string, tagScope string, exists bool) {
+func (r *JobRepository) TagInfo(tagID int64) (tagType string, tagName string, tagScope string, exists bool) {
 	exists = true
-	if err := sq.Select("tag.tag_type", "tag.tag_name", "tag.tag_scope").From("tag").Where("tag.id = ?", tagId).
+	if err := sq.Select("tag.tag_type", "tag.tag_name", "tag.tag_scope").From("tag").Where("tag.id = ?", tagID).
 		RunWith(r.stmtCache).QueryRow().Scan(&tagType, &tagName, &tagScope); err != nil {
 		exists = false
 	}
@@ -417,7 +523,7 @@ func (r *JobRepository) GetTags(user *schema.User, job *int64) ([]*schema.Tag, e
 	for rows.Next() {
 		tag := &schema.Tag{}
 		if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name, &tag.Scope); err != nil {
-			cclog.Warn("Error while scanning rows")
+			cclog.Warnf("Error while scanning tag rows in GetTags: %v", err)
 			return nil, err
 		}
 		// Handle Scope Filtering: Tag Scope is Global, Private (== Username) or User is auth'd to view Admin Tags
@@ -455,7 +561,7 @@ func (r *JobRepository) GetTagsDirect(job *int64) ([]*schema.Tag, error) {
 	for rows.Next() {
 		tag := &schema.Tag{}
 		if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name, &tag.Scope); err != nil {
-			cclog.Warn("Error while scanning rows")
+			cclog.Warnf("Error while scanning tag rows in GetTagsDirect: %v", err)
 			return nil, err
 		}
 		tags = append(tags, tag)
@@ -468,7 +574,18 @@ func (r *JobRepository) GetTagsDirect(job *int64) ([]*schema.Tag, error) {
 	return tags, nil
 }

-// GetArchiveTags returns a list of all tags *regardless of scope* for archiving if job is nil or of the tags that the job with that database ID has.
+// getArchiveTags returns all tags for a job WITHOUT applying scope-based filtering.
+//
+// This internal function is used exclusively for job archive synchronization where we need
+// to store all tags regardless of the current user's permissions. Unlike GetTags() which
+// filters by scope, this returns the complete unfiltered tag list.
+//
+// Parameters:
+//   - job: Pointer to job database ID, or nil to return all tags in the system
+//
+// Returns all tags without scope filtering, used only for archive operations.
+//
+// WARNING: Do NOT expose this function to user-facing APIs as it bypasses authorization.
 func (r *JobRepository) getArchiveTags(job *int64) ([]*schema.Tag, error) {
 	q := sq.Select("id", "tag_type", "tag_name", "tag_scope").From("tag")
 	if job != nil {
@@ -487,7 +604,7 @@ func (r *JobRepository) getArchiveTags(job *int64) ([]*schema.Tag, error) {
 	for rows.Next() {
 		tag := &schema.Tag{}
 		if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name, &tag.Scope); err != nil {
-			cclog.Warn("Error while scanning rows")
+			cclog.Warnf("Error while scanning tag rows in getArchiveTags: %v", err)
 			return nil, err
 		}
 		tags = append(tags, tag)
@@ -500,18 +617,18 @@ func (r *JobRepository) getArchiveTags(job *int64) ([]*schema.Tag, error) {
 	return tags, nil
 }

-func (r *JobRepository) ImportTag(jobId int64, tagType string, tagName string, tagScope string) (err error) {
+func (r *JobRepository) ImportTag(jobID int64, tagType string, tagName string, tagScope string) (err error) {
 	// Import has no scope ctx, only import from metafile to DB (No recursive archive update required), only returns err

-	tagId, exists := r.TagId(tagType, tagName, tagScope)
+	tagID, exists := r.TagID(tagType, tagName, tagScope)
 	if !exists {
-		tagId, err = r.CreateTag(tagType, tagName, tagScope)
+		tagID, err = r.CreateTag(tagType, tagName, tagScope)
 		if err != nil {
 			return err
 		}
 	}

-	q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(jobId, tagId)
+	q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(jobID, tagID)

 	if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
 		s, _, _ := q.ToSql()
@@ -522,16 +639,38 @@ func (r *JobRepository) ImportTag(jobId int64, tagType string, tagName string, t
 	return nil
 }

+// checkScopeAuth validates whether a user is authorized to perform an operation on a tag with the given scope.
+//
+// This function implements the tag scope authorization matrix:
+//
+//	Scope        | Read Access                      | Write Access
+//	-------------|----------------------------------|----------------------------------
+//	"global"     | All users                        | Admin, Support, API-only
+//	"admin"      | Admin, Support                   | Admin, API-only
+//	<username>   | Owner only                       | Owner only (private tags)
+//
+// Parameters:
+//   - user: User attempting the operation (must not be nil)
+//   - operation: Either "read" or "write"
+//   - scope: Tag scope value ("global", "admin", or username for private tags)
+//
+// Returns:
+//   - pass: true if authorized, false if denied
+//   - err: error only if operation is invalid or user is nil
+//
+// Special cases:
+//   - API-only users (single role: RoleApi) can write to admin and global scopes for automation
+//   - Private tags use the username as scope, granting exclusive access to that user
 func (r *JobRepository) checkScopeAuth(user *schema.User, operation string, scope string) (pass bool, err error) {
 	if user != nil {
 		switch {
 		case operation == "write" && scope == "admin":
-			if user.HasRole(schema.RoleAdmin) || (len(user.Roles) == 1 && user.HasRole(schema.RoleApi)) {
+			if user.HasRole(schema.RoleAdmin) || (len(user.Roles) == 1 && user.HasRole(schema.RoleAPI)) {
 				return true, nil
 			}
 			return false, nil
 		case operation == "write" && scope == "global":
-			if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) || (len(user.Roles) == 1 && user.HasRole(schema.RoleApi)) {
+			if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) || (len(user.Roles) == 1 && user.HasRole(schema.RoleAPI)) {
 				return true, nil
 			}
 			return false, nil
--- a/internal/repository/testdata/job.db
+++ b/internal/repository/testdata/job.db
--- a/internal/repository/transaction.go
+++ b/internal/repository/transaction.go
@@ -62,7 +62,7 @@ func (r *JobRepository) TransactionEnd(t *Transaction) error {
 func (r *JobRepository) TransactionAddNamed(
 	t *Transaction,
 	query string,
-	args ...interface{},
+	args ...any,
 ) (int64, error) {
 	if t.tx == nil {
 		return 0, fmt.Errorf("transaction is nil or already completed")
@@ -82,7 +82,7 @@ func (r *JobRepository) TransactionAddNamed(
 }

 // TransactionAdd executes a query within the transaction.
-func (r *JobRepository) TransactionAdd(t *Transaction, query string, args ...interface{}) (int64, error) {
+func (r *JobRepository) TransactionAdd(t *Transaction, query string, args ...any) (int64, error) {
 	if t.tx == nil {
 		return 0, fmt.Errorf("transaction is nil or already completed")
 	}
--- a/internal/repository/transaction_test.go
+++ b/internal/repository/transaction_test.go
@@ -0,0 +1,311 @@
+// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+// All rights reserved. This file is part of cc-backend.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+package repository
+
+import (
+	"testing"
+
+	_ "github.com/mattn/go-sqlite3"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestTransactionInit(t *testing.T) {
+	r := setup(t)
+
+	t.Run("successful transaction init", func(t *testing.T) {
+		tx, err := r.TransactionInit()
+		require.NoError(t, err, "TransactionInit should succeed")
+		require.NotNil(t, tx, "Transaction should not be nil")
+		require.NotNil(t, tx.tx, "Transaction.tx should not be nil")
+
+		// Clean up
+		err = tx.Rollback()
+		require.NoError(t, err, "Rollback should succeed")
+	})
+}
+
+func TestTransactionCommit(t *testing.T) {
+	r := setup(t)
+
+	t.Run("commit after successful operations", func(t *testing.T) {
+		tx, err := r.TransactionInit()
+		require.NoError(t, err)
+
+		// Insert a test tag
+		_, err = r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
+			"test_type", "test_tag_commit", "global")
+		require.NoError(t, err, "TransactionAdd should succeed")
+
+		// Commit the transaction
+		err = tx.Commit()
+		require.NoError(t, err, "Commit should succeed")
+
+		// Verify the tag was inserted
+		var count int
+		err = r.DB.QueryRow("SELECT COUNT(*) FROM tag WHERE tag_name = ?", "test_tag_commit").Scan(&count)
+		require.NoError(t, err)
+		assert.Equal(t, 1, count, "Tag should be committed to database")
+
+		// Clean up
+		_, err = r.DB.Exec("DELETE FROM tag WHERE tag_name = ?", "test_tag_commit")
+		require.NoError(t, err)
+	})
+
+	t.Run("commit on already committed transaction", func(t *testing.T) {
+		tx, err := r.TransactionInit()
+		require.NoError(t, err)
+
+		err = tx.Commit()
+		require.NoError(t, err, "First commit should succeed")
+
+		err = tx.Commit()
+		assert.Error(t, err, "Second commit should fail")
+		assert.Contains(t, err.Error(), "transaction already committed or rolled back")
+	})
+}
+
+func TestTransactionRollback(t *testing.T) {
+	r := setup(t)
+
+	t.Run("rollback after operations", func(t *testing.T) {
+		tx, err := r.TransactionInit()
+		require.NoError(t, err)
+
+		// Insert a test tag
+		_, err = r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
+			"test_type", "test_tag_rollback", "global")
+		require.NoError(t, err, "TransactionAdd should succeed")
+
+		// Rollback the transaction
+		err = tx.Rollback()
+		require.NoError(t, err, "Rollback should succeed")
+
+		// Verify the tag was NOT inserted
+		var count int
+		err = r.DB.QueryRow("SELECT COUNT(*) FROM tag WHERE tag_name = ?", "test_tag_rollback").Scan(&count)
+		require.NoError(t, err)
+		assert.Equal(t, 0, count, "Tag should not be in database after rollback")
+	})
+
+	t.Run("rollback on already rolled back transaction", func(t *testing.T) {
+		tx, err := r.TransactionInit()
+		require.NoError(t, err)
+
+		err = tx.Rollback()
+		require.NoError(t, err, "First rollback should succeed")
+
+		err = tx.Rollback()
+		assert.NoError(t, err, "Second rollback should be safe (no-op)")
+	})
+
+	t.Run("rollback on committed transaction", func(t *testing.T) {
+		tx, err := r.TransactionInit()
+		require.NoError(t, err)
+
+		err = tx.Commit()
+		require.NoError(t, err)
+
+		err = tx.Rollback()
+		assert.NoError(t, err, "Rollback after commit should be safe (no-op)")
+	})
+}
+
+func TestTransactionAdd(t *testing.T) {
+	r := setup(t)
+
+	t.Run("insert with TransactionAdd", func(t *testing.T) {
+		tx, err := r.TransactionInit()
+		require.NoError(t, err)
+		defer tx.Rollback()
+
+		id, err := r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
+			"test_type", "test_add", "global")
+		require.NoError(t, err, "TransactionAdd should succeed")
+		assert.Greater(t, id, int64(0), "Should return valid insert ID")
+	})
+
+	t.Run("error on nil transaction", func(t *testing.T) {
+		tx := &Transaction{tx: nil}
+
+		_, err := r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
+			"test_type", "test_nil", "global")
+		assert.Error(t, err, "Should error on nil transaction")
+		assert.Contains(t, err.Error(), "transaction is nil or already completed")
+	})
+
+	t.Run("error on invalid SQL", func(t *testing.T) {
+		tx, err := r.TransactionInit()
+		require.NoError(t, err)
+		defer tx.Rollback()
+
+		_, err = r.TransactionAdd(tx, "INVALID SQL STATEMENT")
+		assert.Error(t, err, "Should error on invalid SQL")
+	})
+
+	t.Run("error after transaction committed", func(t *testing.T) {
+		tx, err := r.TransactionInit()
+		require.NoError(t, err)
+
+		err = tx.Commit()
+		require.NoError(t, err)
+
+		_, err = r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
+			"test_type", "test_after_commit", "global")
+		assert.Error(t, err, "Should error when transaction is already committed")
+	})
+}
+
+func TestTransactionAddNamed(t *testing.T) {
+	r := setup(t)
+
+	t.Run("insert with TransactionAddNamed", func(t *testing.T) {
+		tx, err := r.TransactionInit()
+		require.NoError(t, err)
+		defer tx.Rollback()
+
+		type TagArgs struct {
+			Type  string `db:"type"`
+			Name  string `db:"name"`
+			Scope string `db:"scope"`
+		}
+
+		args := TagArgs{
+			Type:  "test_type",
+			Name:  "test_named",
+			Scope: "global",
+		}
+
+		id, err := r.TransactionAddNamed(tx,
+			"INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (:type, :name, :scope)",
+			args)
+		require.NoError(t, err, "TransactionAddNamed should succeed")
+		assert.Greater(t, id, int64(0), "Should return valid insert ID")
+	})
+
+	t.Run("error on nil transaction", func(t *testing.T) {
+		tx := &Transaction{tx: nil}
+
+		_, err := r.TransactionAddNamed(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (:type, :name, :scope)",
+			map[string]any{"type": "test", "name": "test", "scope": "global"})
+		assert.Error(t, err, "Should error on nil transaction")
+		assert.Contains(t, err.Error(), "transaction is nil or already completed")
+	})
+}
+
+func TestTransactionMultipleOperations(t *testing.T) {
+	r := setup(t)
+
+	t.Run("multiple inserts in single transaction", func(t *testing.T) {
+		tx, err := r.TransactionInit()
+		require.NoError(t, err)
+		defer tx.Rollback()
+
+		// Insert multiple tags
+		for i := range 5 {
+			_, err = r.TransactionAdd(tx,
+				"INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
+				"test_type", "test_multi_"+string(rune('a'+i)), "global")
+			require.NoError(t, err, "Insert %d should succeed", i)
+		}
+
+		err = tx.Commit()
+		require.NoError(t, err, "Commit should succeed")
+
+		// Verify all tags were inserted
+		var count int
+		err = r.DB.QueryRow("SELECT COUNT(*) FROM tag WHERE tag_name LIKE 'test_multi_%'").Scan(&count)
+		require.NoError(t, err)
+		assert.Equal(t, 5, count, "All 5 tags should be committed")
+
+		// Clean up
+		_, err = r.DB.Exec("DELETE FROM tag WHERE tag_name LIKE 'test_multi_%'")
+		require.NoError(t, err)
+	})
+
+	t.Run("rollback undoes all operations", func(t *testing.T) {
+		tx, err := r.TransactionInit()
+		require.NoError(t, err)
+
+		// Insert multiple tags
+		for i := range 3 {
+			_, err = r.TransactionAdd(tx,
+				"INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
+				"test_type", "test_rollback_"+string(rune('a'+i)), "global")
+			require.NoError(t, err)
+		}
+
+		err = tx.Rollback()
+		require.NoError(t, err, "Rollback should succeed")
+
+		// Verify no tags were inserted
+		var count int
+		err = r.DB.QueryRow("SELECT COUNT(*) FROM tag WHERE tag_name LIKE 'test_rollback_%'").Scan(&count)
+		require.NoError(t, err)
+		assert.Equal(t, 0, count, "No tags should be in database after rollback")
+	})
+}
+
+func TestTransactionEnd(t *testing.T) {
+	r := setup(t)
+
+	t.Run("deprecated TransactionEnd calls Commit", func(t *testing.T) {
+		tx, err := r.TransactionInit()
+		require.NoError(t, err)
+
+		_, err = r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
+			"test_type", "test_end", "global")
+		require.NoError(t, err)
+
+		// Use deprecated method
+		err = r.TransactionEnd(tx)
+		require.NoError(t, err, "TransactionEnd should succeed")
+
+		// Verify the tag was committed
+		var count int
+		err = r.DB.QueryRow("SELECT COUNT(*) FROM tag WHERE tag_name = ?", "test_end").Scan(&count)
+		require.NoError(t, err)
+		assert.Equal(t, 1, count, "Tag should be committed")
+
+		// Clean up
+		_, err = r.DB.Exec("DELETE FROM tag WHERE tag_name = ?", "test_end")
+		require.NoError(t, err)
+	})
+}
+
+func TestTransactionDeferPattern(t *testing.T) {
+	r := setup(t)
+
+	t.Run("defer rollback pattern", func(t *testing.T) {
+		insertTag := func() error {
+			tx, err := r.TransactionInit()
+			if err != nil {
+				return err
+			}
+			defer tx.Rollback() // Safe to call even after commit
+
+			_, err = r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
+				"test_type", "test_defer", "global")
+			if err != nil {
+				return err
+			}
+
+			return tx.Commit()
+		}
+
+		err := insertTag()
+		require.NoError(t, err, "Function should succeed")
+
+		// Verify the tag was committed
+		var count int
+		err = r.DB.QueryRow("SELECT COUNT(*) FROM tag WHERE tag_name = ?", "test_defer").Scan(&count)
+		require.NoError(t, err)
+		assert.Equal(t, 1, count, "Tag should be committed despite defer rollback")
+
+		// Clean up
+		_, err = r.DB.Exec("DELETE FROM tag WHERE tag_name = ?", "test_defer")
+		require.NoError(t, err)
+	})
+}
--- a/internal/repository/user.go
+++ b/internal/repository/user.go
@@ -10,18 +10,38 @@ import (
 	"encoding/json"
 	"errors"
 	"fmt"
+	"reflect"
 	"strings"
 	"sync"

 	"github.com/ClusterCockpit/cc-backend/internal/config"
 	"github.com/ClusterCockpit/cc-backend/internal/graph/model"
-	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
-	"github.com/ClusterCockpit/cc-lib/schema"
+	cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
+	"github.com/ClusterCockpit/cc-lib/v2/schema"
 	sq "github.com/Masterminds/squirrel"
 	"github.com/jmoiron/sqlx"
 	"golang.org/x/crypto/bcrypt"
 )

+// Authentication and Role System:
+//
+// ClusterCockpit supports multiple authentication sources:
+//   - Local: Username/password stored in database (password hashed with bcrypt)
+//   - LDAP: External LDAP/Active Directory authentication
+//   - JWT: Token-based authentication for API access
+//
+// Role Hierarchy (from highest to lowest privilege):
+//   1. "admin" - Full system access, can manage all users and jobs
+//   2. "support" - Can view all jobs but limited management capabilities
+//   3. "manager" - Can manage specific projects and their users
+//   4. "api" - Programmatic access for job submission/management
+//   5. "user" - Default role, can only view own jobs
+//
+// Project Association:
+//   - Managers have a list of projects they oversee
+//   - Regular users' project membership is determined by job data
+//   - Managers can view/manage all jobs within their projects
+
 var (
 	userRepoOnce     sync.Once
 	userRepoInstance *UserRepository
@@ -44,6 +64,9 @@ func GetUserRepository() *UserRepository {
 	return userRepoInstance
 }

+// GetUser retrieves a user by username from the database.
+// Returns the complete user record including hashed password, roles, and projects.
+// Password field contains bcrypt hash for local auth users, empty for LDAP users.
 func (r *UserRepository) GetUser(username string) (*schema.User, error) {
 	user := &schema.User{Username: username}
 	var hashedPassword, name, rawRoles, email, rawProjects sql.NullString
@@ -93,12 +116,18 @@ func (r *UserRepository) GetLdapUsernames() ([]string, error) {
 	return users, nil
 }

+// AddUser creates a new user in the database.
+// Passwords are automatically hashed with bcrypt before storage.
+// Auth source determines authentication method (local, LDAP, etc.).
+//
+// Required fields: Username, Roles
+// Optional fields: Name, Email, Password, Projects, AuthSource
 func (r *UserRepository) AddUser(user *schema.User) error {
 	rolesJson, _ := json.Marshal(user.Roles)
 	projectsJson, _ := json.Marshal(user.Projects)

 	cols := []string{"username", "roles", "projects"}
-	vals := []interface{}{user.Username, string(rolesJson), string(projectsJson)}
+	vals := []any{user.Username, string(rolesJson), string(projectsJson)}

 	if user.Name != "" {
 		cols = append(cols, "name")
@@ -159,8 +188,8 @@ func (r *UserRepository) AddUser(user *schema.User) error {
 }

 func (r *UserRepository) UpdateUser(dbUser *schema.User, user *schema.User) error {
-	// user contains updated info, apply to dbuser
-	// TODO: Discuss updatable fields
+	// user contains updated info -> Apply to dbUser
+	// --- Simple Name Update ---
 	if dbUser.Name != user.Name {
 		if _, err := sq.Update("hpc_user").Set("name", user.Name).Where("hpc_user.username = ?", dbUser.Username).RunWith(r.DB).Exec(); err != nil {
 			cclog.Errorf("error while updating name of user '%s'", user.Username)
@@ -168,13 +197,64 @@ func (r *UserRepository) UpdateUser(dbUser *schema.User, user *schema.User) erro
 		}
 	}

-	// Toggled until greenlit
-	// if dbUser.HasRole(schema.RoleManager) && !reflect.DeepEqual(dbUser.Projects, user.Projects) {
-	// 	projects, _ := json.Marshal(user.Projects)
-	// 	if _, err := sq.Update("hpc_user").Set("projects", projects).Where("hpc_user.username = ?", dbUser.Username).RunWith(r.DB).Exec(); err != nil {
-	// 		return err
-	// 	}
-	// }
+	// --- Def Helpers ---
+	// Helper to update roles
+	updateRoles := func(roles []string) error {
+		rolesJSON, _ := json.Marshal(roles)
+		_, err := sq.Update("hpc_user").Set("roles", rolesJSON).Where("hpc_user.username = ?", dbUser.Username).RunWith(r.DB).Exec()
+		return err
+	}
+
+	// Helper to update projects
+	updateProjects := func(projects []string) error {
+		projectsJSON, _ := json.Marshal(projects)
+		_, err := sq.Update("hpc_user").Set("projects", projectsJSON).Where("hpc_user.username = ?", dbUser.Username).RunWith(r.DB).Exec()
+		return err
+	}
+
+	// Helper to clear projects
+	clearProjects := func() error {
+		_, err := sq.Update("hpc_user").Set("projects", "[]").Where("hpc_user.username = ?", dbUser.Username).RunWith(r.DB).Exec()
+		return err
+	}
+
+	// --- Manager Role Handling ---
+	if dbUser.HasRole(schema.RoleManager) && user.HasRole(schema.RoleManager) && !reflect.DeepEqual(dbUser.Projects, user.Projects) {
+		// Existing Manager: update projects
+		if err := updateProjects(user.Projects); err != nil {
+			return err
+		}
+	} else if dbUser.HasRole(schema.RoleUser) && user.HasRole(schema.RoleManager) && user.HasNotRoles([]schema.Role{schema.RoleAdmin}) {
+		// New Manager: update roles and projects
+		if err := updateRoles(user.Roles); err != nil {
+			return err
+		}
+		if err := updateProjects(user.Projects); err != nil {
+			return err
+		}
+	} else if dbUser.HasRole(schema.RoleManager) && user.HasNotRoles([]schema.Role{schema.RoleAdmin, schema.RoleManager}) {
+		// Remove Manager: update roles and clear projects
+		if err := updateRoles(user.Roles); err != nil {
+			return err
+		}
+		if err := clearProjects(); err != nil {
+			return err
+		}
+	}
+
+	// --- Support Role Handling ---
+	if dbUser.HasRole(schema.RoleUser) && dbUser.HasNotRoles([]schema.Role{schema.RoleSupport}) &&
+		user.HasRole(schema.RoleSupport) && user.HasNotRoles([]schema.Role{schema.RoleAdmin}) {
+		// New Support: update roles
+		if err := updateRoles(user.Roles); err != nil {
+			return err
+		}
+	} else if dbUser.HasRole(schema.RoleSupport) && user.HasNotRoles([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
+		// Remove Support: update roles
+		if err := updateRoles(user.Roles); err != nil {
+			return err
+		}
+	}

 	return nil
 }
@@ -229,6 +309,14 @@ func (r *UserRepository) ListUsers(specialsOnly bool) ([]*schema.User, error) {
 	return users, nil
 }

+// AddRole adds a role to a user's role list.
+// Role string is automatically lowercased.
+// Valid roles: admin, support, manager, api, user
+//
+// Returns error if:
+//   - User doesn't exist
+//   - Role is invalid
+//   - User already has the role
 func (r *UserRepository) AddRole(
 	ctx context.Context,
 	username string,
@@ -258,6 +346,11 @@ func (r *UserRepository) AddRole(
 	return nil
 }

+// RemoveRole removes a role from a user's role list.
+//
+// Special rules:
+//   - Cannot remove "manager" role while user has assigned projects
+//   - Must remove all projects first before removing manager role
 func (r *UserRepository) RemoveRole(ctx context.Context, username string, queryrole string) error {
 	oldRole := strings.ToLower(queryrole)
 	user, err := r.GetUser(username)
@@ -294,6 +387,12 @@ func (r *UserRepository) RemoveRole(ctx context.Context, username string, queryr
 	return nil
 }

+// AddProject assigns a project to a manager user.
+// Only users with the "manager" role can have assigned projects.
+//
+// Returns error if:
+//   - User doesn't have manager role
+//   - User already manages the project
 func (r *UserRepository) AddProject(
 	ctx context.Context,
 	username string,
@@ -345,7 +444,7 @@ func (r *UserRepository) RemoveProject(ctx context.Context, username string, pro
 	}

 	if exists {
-		var result interface{}
+		var result any
 		if len(newprojects) == 0 {
 			result = "[]"
 		} else {
--- a/internal/repository/userConfig.go
+++ b/internal/repository/userConfig.go
@@ -12,9 +12,9 @@ import (

 	"github.com/ClusterCockpit/cc-backend/internal/config"
 	"github.com/ClusterCockpit/cc-backend/web"
-	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
-	"github.com/ClusterCockpit/cc-lib/lrucache"
-	"github.com/ClusterCockpit/cc-lib/schema"
+	cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
+	"github.com/ClusterCockpit/cc-lib/v2/lrucache"
+	"github.com/ClusterCockpit/cc-lib/v2/schema"
 	"github.com/jmoiron/sqlx"
 )

--- a/internal/repository/userConfig_test.go
+++ b/internal/repository/userConfig_test.go
@@ -10,9 +10,9 @@ import (
 	"testing"

 	"github.com/ClusterCockpit/cc-backend/internal/config"
-	ccconf "github.com/ClusterCockpit/cc-lib/ccConfig"
-	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
-	"github.com/ClusterCockpit/cc-lib/schema"
+	ccconf "github.com/ClusterCockpit/cc-lib/v2/ccConfig"
+	cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
+	"github.com/ClusterCockpit/cc-lib/v2/schema"
 	_ "github.com/mattn/go-sqlite3"
 )

@@ -20,33 +20,40 @@ func setupUserTest(t *testing.T) *UserCfgRepo {
 	const testconfig = `{
 	"main": {
 	 "addr":   "0.0.0.0:8080",
-   "apiAllowedIPs": [
+   "api-allowed-ips": [
     "*"
   ]
  },
 	"archive": {
 		"kind": "file",
 		"path": "./var/job-archive"
-	},
-	"clusters": [
-	{
-	   "name": "testcluster",
-	   "metricDataRepository": {"kind": "test", "url": "bla:8081"},
-	   "filterRanges": {
-		"numNodes": { "from": 1, "to": 64 },
-		"duration": { "from": 0, "to": 86400 },
-		"startTime": { "from": "2022-01-01T00:00:00Z", "to": null }
 	}
-	}]
 }`

 	cclog.Init("info", true)
-	dbfilepath := "testdata/job.db"
-	err := MigrateDB("sqlite3", dbfilepath)
+
+	// Copy test DB to a temp file for test isolation
+	srcData, err := os.ReadFile("testdata/job.db")
 	if err != nil {
 		t.Fatal(err)
 	}
-	Connect("sqlite3", dbfilepath)
+	dbfilepath := filepath.Join(t.TempDir(), "job.db")
+	if err := os.WriteFile(dbfilepath, srcData, 0o644); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := ResetConnection(); err != nil {
+		t.Fatal(err)
+	}
+	t.Cleanup(func() {
+		ResetConnection()
+	})
+
+	err = MigrateDB(dbfilepath)
+	if err != nil {
+		t.Fatal(err)
+	}
+	Connect(dbfilepath)

 	tmpdir := t.TempDir()
 	cfgFilePath := filepath.Join(tmpdir, "config.json")
@@ -58,11 +65,7 @@ func setupUserTest(t *testing.T) *UserCfgRepo {

 	// Load and check main configuration
 	if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
-		if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil {
-			config.Init(cfg, clustercfg)
-		} else {
-			t.Fatal("Cluster configuration must be present")
-		}
+		config.Init(cfg)
 	} else {
 		t.Fatal("Main configuration must be present")
 	}
--- a/internal/repository/user_test.go
+++ b/internal/repository/user_test.go
@@ -0,0 +1,596 @@
+// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+// All rights reserved. This file is part of cc-backend.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+package repository
+
+import (
+	"context"
+	"testing"
+
+	"github.com/ClusterCockpit/cc-lib/v2/schema"
+	_ "github.com/mattn/go-sqlite3"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"golang.org/x/crypto/bcrypt"
+)
+
+func TestAddUser(t *testing.T) {
+	_ = setup(t)
+	r := GetUserRepository()
+
+	t.Run("add user with all fields", func(t *testing.T) {
+		user := &schema.User{
+			Username:   "testuser1",
+			Name:       "Test User One",
+			Email:      "test1@example.com",
+			Password:   "testpassword123",
+			Roles:      []string{"user"},
+			Projects:   []string{"project1", "project2"},
+			AuthSource: schema.AuthViaLocalPassword,
+		}
+
+		err := r.AddUser(user)
+		require.NoError(t, err)
+
+		retrievedUser, err := r.GetUser("testuser1")
+		require.NoError(t, err)
+		assert.Equal(t, user.Username, retrievedUser.Username)
+		assert.Equal(t, user.Name, retrievedUser.Name)
+		assert.Equal(t, user.Email, retrievedUser.Email)
+		assert.Equal(t, user.Roles, retrievedUser.Roles)
+		assert.Equal(t, user.Projects, retrievedUser.Projects)
+		assert.NotEmpty(t, retrievedUser.Password)
+		err = bcrypt.CompareHashAndPassword([]byte(retrievedUser.Password), []byte("testpassword123"))
+		assert.NoError(t, err, "Password should be hashed correctly")
+
+		err = r.DelUser("testuser1")
+		require.NoError(t, err)
+	})
+
+	t.Run("add user with minimal fields", func(t *testing.T) {
+		user := &schema.User{
+			Username:   "testuser2",
+			Roles:      []string{"user"},
+			Projects:   []string{},
+			AuthSource: schema.AuthViaLDAP,
+		}
+
+		err := r.AddUser(user)
+		require.NoError(t, err)
+
+		retrievedUser, err := r.GetUser("testuser2")
+		require.NoError(t, err)
+		assert.Equal(t, user.Username, retrievedUser.Username)
+		assert.Equal(t, "", retrievedUser.Name)
+		assert.Equal(t, "", retrievedUser.Email)
+		assert.Equal(t, "", retrievedUser.Password)
+
+		err = r.DelUser("testuser2")
+		require.NoError(t, err)
+	})
+
+	t.Run("add duplicate user fails", func(t *testing.T) {
+		user := &schema.User{
+			Username:   "testuser3",
+			Roles:      []string{"user"},
+			Projects:   []string{},
+			AuthSource: schema.AuthViaLocalPassword,
+		}
+
+		err := r.AddUser(user)
+		require.NoError(t, err)
+
+		err = r.AddUser(user)
+		assert.Error(t, err, "Adding duplicate user should fail")
+
+		err = r.DelUser("testuser3")
+		require.NoError(t, err)
+	})
+}
+
+func TestGetUser(t *testing.T) {
+	_ = setup(t)
+	r := GetUserRepository()
+
+	t.Run("get existing user", func(t *testing.T) {
+		user := &schema.User{
+			Username:   "getuser1",
+			Name:       "Get User",
+			Email:      "getuser@example.com",
+			Roles:      []string{"user", "admin"},
+			Projects:   []string{"proj1"},
+			AuthSource: schema.AuthViaLocalPassword,
+		}
+
+		err := r.AddUser(user)
+		require.NoError(t, err)
+
+		retrieved, err := r.GetUser("getuser1")
+		require.NoError(t, err)
+		assert.Equal(t, user.Username, retrieved.Username)
+		assert.Equal(t, user.Name, retrieved.Name)
+		assert.Equal(t, user.Email, retrieved.Email)
+		assert.ElementsMatch(t, user.Roles, retrieved.Roles)
+		assert.ElementsMatch(t, user.Projects, retrieved.Projects)
+
+		err = r.DelUser("getuser1")
+		require.NoError(t, err)
+	})
+
+	t.Run("get non-existent user", func(t *testing.T) {
+		_, err := r.GetUser("nonexistent")
+		assert.Error(t, err)
+	})
+}
+
+func TestUpdateUser(t *testing.T) {
+	_ = setup(t)
+	r := GetUserRepository()
+
+	t.Run("update user name", func(t *testing.T) {
+		user := &schema.User{
+			Username:   "updateuser1",
+			Name:       "Original Name",
+			Roles:      []string{"user"},
+			Projects:   []string{},
+			AuthSource: schema.AuthViaLocalPassword,
+		}
+
+		err := r.AddUser(user)
+		require.NoError(t, err)
+
+		dbUser, err := r.GetUser("updateuser1")
+		require.NoError(t, err)
+
+		updatedUser := &schema.User{
+			Username: "updateuser1",
+			Name:     "Updated Name",
+		}
+
+		err = r.UpdateUser(dbUser, updatedUser)
+		require.NoError(t, err)
+
+		retrieved, err := r.GetUser("updateuser1")
+		require.NoError(t, err)
+		assert.Equal(t, "Updated Name", retrieved.Name)
+
+		err = r.DelUser("updateuser1")
+		require.NoError(t, err)
+	})
+
+	t.Run("update with no changes", func(t *testing.T) {
+		user := &schema.User{
+			Username:   "updateuser2",
+			Name:       "Same Name",
+			Roles:      []string{"user"},
+			Projects:   []string{},
+			AuthSource: schema.AuthViaLocalPassword,
+		}
+
+		err := r.AddUser(user)
+		require.NoError(t, err)
+
+		dbUser, err := r.GetUser("updateuser2")
+		require.NoError(t, err)
+
+		err = r.UpdateUser(dbUser, dbUser)
+		assert.NoError(t, err)
+
+		err = r.DelUser("updateuser2")
+		require.NoError(t, err)
+	})
+}
+
+func TestDelUser(t *testing.T) {
+	_ = setup(t)
+	r := GetUserRepository()
+
+	t.Run("delete existing user", func(t *testing.T) {
+		user := &schema.User{
+			Username:   "deluser1",
+			Roles:      []string{"user"},
+			Projects:   []string{},
+			AuthSource: schema.AuthViaLocalPassword,
+		}
+
+		err := r.AddUser(user)
+		require.NoError(t, err)
+
+		err = r.DelUser("deluser1")
+		require.NoError(t, err)
+
+		_, err = r.GetUser("deluser1")
+		assert.Error(t, err, "User should not exist after deletion")
+	})
+
+	t.Run("delete non-existent user", func(t *testing.T) {
+		err := r.DelUser("nonexistent")
+		assert.NoError(t, err, "Deleting non-existent user should not error")
+	})
+}
+
+func TestListUsers(t *testing.T) {
+	_ = setup(t)
+	r := GetUserRepository()
+
+	user1 := &schema.User{
+		Username:   "listuser1",
+		Roles:      []string{"user"},
+		Projects:   []string{},
+		AuthSource: schema.AuthViaLocalPassword,
+	}
+	user2 := &schema.User{
+		Username:   "listuser2",
+		Roles:      []string{"admin"},
+		Projects:   []string{},
+		AuthSource: schema.AuthViaLocalPassword,
+	}
+	user3 := &schema.User{
+		Username:   "listuser3",
+		Roles:      []string{"manager"},
+		Projects:   []string{"proj1"},
+		AuthSource: schema.AuthViaLocalPassword,
+	}
+
+	err := r.AddUser(user1)
+	require.NoError(t, err)
+	err = r.AddUser(user2)
+	require.NoError(t, err)
+	err = r.AddUser(user3)
+	require.NoError(t, err)
+
+	t.Run("list all users", func(t *testing.T) {
+		users, err := r.ListUsers(false)
+		require.NoError(t, err)
+		assert.GreaterOrEqual(t, len(users), 3)
+
+		usernames := make([]string, len(users))
+		for i, u := range users {
+			usernames[i] = u.Username
+		}
+		assert.Contains(t, usernames, "listuser1")
+		assert.Contains(t, usernames, "listuser2")
+		assert.Contains(t, usernames, "listuser3")
+	})
+
+	t.Run("list special users only", func(t *testing.T) {
+		users, err := r.ListUsers(true)
+		require.NoError(t, err)
+
+		usernames := make([]string, len(users))
+		for i, u := range users {
+			usernames[i] = u.Username
+		}
+		assert.Contains(t, usernames, "listuser2")
+		assert.Contains(t, usernames, "listuser3")
+	})
+
+	err = r.DelUser("listuser1")
+	require.NoError(t, err)
+	err = r.DelUser("listuser2")
+	require.NoError(t, err)
+	err = r.DelUser("listuser3")
+	require.NoError(t, err)
+}
+
+func TestGetLdapUsernames(t *testing.T) {
+	_ = setup(t)
+	r := GetUserRepository()
+
+	ldapUser := &schema.User{
+		Username:   "ldapuser1",
+		Roles:      []string{"user"},
+		Projects:   []string{},
+		AuthSource: schema.AuthViaLDAP,
+	}
+	localUser := &schema.User{
+		Username:   "localuser1",
+		Roles:      []string{"user"},
+		Projects:   []string{},
+		AuthSource: schema.AuthViaLocalPassword,
+	}
+
+	err := r.AddUser(ldapUser)
+	require.NoError(t, err)
+	err = r.AddUser(localUser)
+	require.NoError(t, err)
+
+	usernames, err := r.GetLdapUsernames()
+	require.NoError(t, err)
+	assert.Contains(t, usernames, "ldapuser1")
+	assert.NotContains(t, usernames, "localuser1")
+
+	err = r.DelUser("ldapuser1")
+	require.NoError(t, err)
+	err = r.DelUser("localuser1")
+	require.NoError(t, err)
+}
+
+func TestAddRole(t *testing.T) {
+	_ = setup(t)
+	r := GetUserRepository()
+	ctx := context.Background()
+
+	t.Run("add valid role", func(t *testing.T) {
+		user := &schema.User{
+			Username:   "roleuser1",
+			Roles:      []string{"user"},
+			Projects:   []string{},
+			AuthSource: schema.AuthViaLocalPassword,
+		}
+
+		err := r.AddUser(user)
+		require.NoError(t, err)
+
+		err = r.AddRole(ctx, "roleuser1", "admin")
+		require.NoError(t, err)
+
+		retrieved, err := r.GetUser("roleuser1")
+		require.NoError(t, err)
+		assert.Contains(t, retrieved.Roles, "admin")
+		assert.Contains(t, retrieved.Roles, "user")
+
+		err = r.DelUser("roleuser1")
+		require.NoError(t, err)
+	})
+
+	t.Run("add duplicate role", func(t *testing.T) {
+		user := &schema.User{
+			Username:   "roleuser2",
+			Roles:      []string{"user"},
+			Projects:   []string{},
+			AuthSource: schema.AuthViaLocalPassword,
+		}
+
+		err := r.AddUser(user)
+		require.NoError(t, err)
+
+		err = r.AddRole(ctx, "roleuser2", "user")
+		assert.Error(t, err, "Adding duplicate role should fail")
+		assert.Contains(t, err.Error(), "already has role")
+
+		err = r.DelUser("roleuser2")
+		require.NoError(t, err)
+	})
+
+	t.Run("add invalid role", func(t *testing.T) {
+		user := &schema.User{
+			Username:   "roleuser3",
+			Roles:      []string{"user"},
+			Projects:   []string{},
+			AuthSource: schema.AuthViaLocalPassword,
+		}
+
+		err := r.AddUser(user)
+		require.NoError(t, err)
+
+		err = r.AddRole(ctx, "roleuser3", "invalidrole")
+		assert.Error(t, err, "Adding invalid role should fail")
+		assert.Contains(t, err.Error(), "no valid option")
+
+		err = r.DelUser("roleuser3")
+		require.NoError(t, err)
+	})
+}
+
+func TestRemoveRole(t *testing.T) {
+	_ = setup(t)
+	r := GetUserRepository()
+	ctx := context.Background()
+
+	t.Run("remove existing role", func(t *testing.T) {
+		user := &schema.User{
+			Username:   "rmroleuser1",
+			Roles:      []string{"user", "admin"},
+			Projects:   []string{},
+			AuthSource: schema.AuthViaLocalPassword,
+		}
+
+		err := r.AddUser(user)
+		require.NoError(t, err)
+
+		err = r.RemoveRole(ctx, "rmroleuser1", "admin")
+		require.NoError(t, err)
+
+		retrieved, err := r.GetUser("rmroleuser1")
+		require.NoError(t, err)
+		assert.NotContains(t, retrieved.Roles, "admin")
+		assert.Contains(t, retrieved.Roles, "user")
+
+		err = r.DelUser("rmroleuser1")
+		require.NoError(t, err)
+	})
+
+	t.Run("remove non-existent role", func(t *testing.T) {
+		user := &schema.User{
+			Username:   "rmroleuser2",
+			Roles:      []string{"user"},
+			Projects:   []string{},
+			AuthSource: schema.AuthViaLocalPassword,
+		}
+
+		err := r.AddUser(user)
+		require.NoError(t, err)
+
+		err = r.RemoveRole(ctx, "rmroleuser2", "admin")
+		assert.Error(t, err, "Removing non-existent role should fail")
+		assert.Contains(t, err.Error(), "already deleted")
+
+		err = r.DelUser("rmroleuser2")
+		require.NoError(t, err)
+	})
+
+	t.Run("remove manager role with projects", func(t *testing.T) {
+		user := &schema.User{
+			Username:   "rmroleuser3",
+			Roles:      []string{"manager"},
+			Projects:   []string{"proj1", "proj2"},
+			AuthSource: schema.AuthViaLocalPassword,
+		}
+
+		err := r.AddUser(user)
+		require.NoError(t, err)
+
+		err = r.RemoveRole(ctx, "rmroleuser3", "manager")
+		assert.Error(t, err, "Removing manager role with projects should fail")
+		assert.Contains(t, err.Error(), "still has assigned project")
+
+		err = r.DelUser("rmroleuser3")
+		require.NoError(t, err)
+	})
+}
+
+func TestAddProject(t *testing.T) {
+	_ = setup(t)
+	r := GetUserRepository()
+	ctx := context.Background()
+
+	t.Run("add project to manager", func(t *testing.T) {
+		user := &schema.User{
+			Username:   "projuser1",
+			Roles:      []string{"manager"},
+			Projects:   []string{},
+			AuthSource: schema.AuthViaLocalPassword,
+		}
+
+		err := r.AddUser(user)
+		require.NoError(t, err)
+
+		err = r.AddProject(ctx, "projuser1", "newproject")
+		require.NoError(t, err)
+
+		retrieved, err := r.GetUser("projuser1")
+		require.NoError(t, err)
+		assert.Contains(t, retrieved.Projects, "newproject")
+
+		err = r.DelUser("projuser1")
+		require.NoError(t, err)
+	})
+
+	t.Run("add project to non-manager", func(t *testing.T) {
+		user := &schema.User{
+			Username:   "projuser2",
+			Roles:      []string{"user"},
+			Projects:   []string{},
+			AuthSource: schema.AuthViaLocalPassword,
+		}
+
+		err := r.AddUser(user)
+		require.NoError(t, err)
+
+		err = r.AddProject(ctx, "projuser2", "newproject")
+		assert.Error(t, err, "Adding project to non-manager should fail")
+		assert.Contains(t, err.Error(), "not a manager")
+
+		err = r.DelUser("projuser2")
+		require.NoError(t, err)
+	})
+
+	t.Run("add duplicate project", func(t *testing.T) {
+		user := &schema.User{
+			Username:   "projuser3",
+			Roles:      []string{"manager"},
+			Projects:   []string{"existingproject"},
+			AuthSource: schema.AuthViaLocalPassword,
+		}
+
+		err := r.AddUser(user)
+		require.NoError(t, err)
+
+		err = r.AddProject(ctx, "projuser3", "existingproject")
+		assert.Error(t, err, "Adding duplicate project should fail")
+		assert.Contains(t, err.Error(), "already manages")
+
+		err = r.DelUser("projuser3")
+		require.NoError(t, err)
+	})
+}
+
+func TestRemoveProject(t *testing.T) {
+	_ = setup(t)
+	r := GetUserRepository()
+	ctx := context.Background()
+
+	t.Run("remove existing project", func(t *testing.T) {
+		user := &schema.User{
+			Username:   "rmprojuser1",
+			Roles:      []string{"manager"},
+			Projects:   []string{"proj1", "proj2"},
+			AuthSource: schema.AuthViaLocalPassword,
+		}
+
+		err := r.AddUser(user)
+		require.NoError(t, err)
+
+		err = r.RemoveProject(ctx, "rmprojuser1", "proj1")
+		require.NoError(t, err)
+
+		retrieved, err := r.GetUser("rmprojuser1")
+		require.NoError(t, err)
+		assert.NotContains(t, retrieved.Projects, "proj1")
+		assert.Contains(t, retrieved.Projects, "proj2")
+
+		err = r.DelUser("rmprojuser1")
+		require.NoError(t, err)
+	})
+
+	t.Run("remove non-existent project", func(t *testing.T) {
+		user := &schema.User{
+			Username:   "rmprojuser2",
+			Roles:      []string{"manager"},
+			Projects:   []string{"proj1"},
+			AuthSource: schema.AuthViaLocalPassword,
+		}
+
+		err := r.AddUser(user)
+		require.NoError(t, err)
+
+		err = r.RemoveProject(ctx, "rmprojuser2", "nonexistent")
+		assert.Error(t, err, "Removing non-existent project should fail")
+
+		err = r.DelUser("rmprojuser2")
+		require.NoError(t, err)
+	})
+
+	t.Run("remove project from non-manager", func(t *testing.T) {
+		user := &schema.User{
+			Username:   "rmprojuser3",
+			Roles:      []string{"user"},
+			Projects:   []string{},
+			AuthSource: schema.AuthViaLocalPassword,
+		}
+
+		err := r.AddUser(user)
+		require.NoError(t, err)
+
+		err = r.RemoveProject(ctx, "rmprojuser3", "proj1")
+		assert.Error(t, err, "Removing project from non-manager should fail")
+		assert.Contains(t, err.Error(), "not a manager")
+
+		err = r.DelUser("rmprojuser3")
+		require.NoError(t, err)
+	})
+}
+
+func TestGetUserFromContext(t *testing.T) {
+	t.Run("get user from context", func(t *testing.T) {
+		user := &schema.User{
+			Username: "contextuser",
+			Roles:    []string{"user"},
+		}
+
+		ctx := context.WithValue(context.Background(), ContextUserKey, user)
+		retrieved := GetUserFromContext(ctx)
+
+		require.NotNil(t, retrieved)
+		assert.Equal(t, user.Username, retrieved.Username)
+	})
+
+	t.Run("get user from empty context", func(t *testing.T) {
+		ctx := context.Background()
+		retrieved := GetUserFromContext(ctx)
+
+		assert.Nil(t, retrieved)
+	})
+}
				`@@ -1 +0,0 @@`
				`ALTER TABLE user ADD COLUMN projects varchar(255) NOT NULL DEFAULT "[]";`
				`@@ -1 +0,0 @@`
				`ALTER TABLE configuration MODIFY value VARCHAR(255);`