Merge branch 'hotfix' of github.com:ClusterCockpit/cc-backend into hotfix

This commit is contained in:
2026-03-11 05:06:26 +01:00
317 changed files with 32717 additions and 15040 deletions

View File

@@ -12,7 +12,7 @@ import (
"sync"
"time"
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
"github.com/jmoiron/sqlx"
"github.com/mattn/go-sqlite3"
"github.com/qustavo/sqlhooks/v2"
@@ -51,7 +51,7 @@ func setupSqlite(db *sql.DB) error {
return nil
}
func Connect(driver string, db string) {
func Connect(db string) {
var err error
var dbHandle *sqlx.DB
@@ -64,39 +64,31 @@ func Connect(driver string, db string) {
ConnectionMaxIdleTime: repoConfig.ConnectionMaxIdleTime,
}
switch driver {
case "sqlite3":
// TODO: Have separate DB handles for Writes and Reads
// Optimize SQLite connection: https://kerkour.com/sqlite-for-servers
connectionURLParams := make(url.Values)
connectionURLParams.Add("_txlock", "immediate")
connectionURLParams.Add("_journal_mode", "WAL")
connectionURLParams.Add("_busy_timeout", "5000")
connectionURLParams.Add("_synchronous", "NORMAL")
connectionURLParams.Add("_cache_size", "1000000000")
connectionURLParams.Add("_foreign_keys", "true")
opts.URL = fmt.Sprintf("file:%s?%s", opts.URL, connectionURLParams.Encode())
// TODO: Have separate DB handles for Writes and Reads
// Optimize SQLite connection: https://kerkour.com/sqlite-for-servers
connectionURLParams := make(url.Values)
connectionURLParams.Add("_txlock", "immediate")
connectionURLParams.Add("_journal_mode", "WAL")
connectionURLParams.Add("_busy_timeout", "5000")
connectionURLParams.Add("_synchronous", "NORMAL")
connectionURLParams.Add("_cache_size", "1000000000")
connectionURLParams.Add("_foreign_keys", "true")
opts.URL = fmt.Sprintf("file:%s?%s", opts.URL, connectionURLParams.Encode())
if cclog.Loglevel() == "debug" {
sql.Register("sqlite3WithHooks", sqlhooks.Wrap(&sqlite3.SQLiteDriver{}, &Hooks{}))
dbHandle, err = sqlx.Open("sqlite3WithHooks", opts.URL)
} else {
dbHandle, err = sqlx.Open("sqlite3", opts.URL)
}
err = setupSqlite(dbHandle.DB)
if err != nil {
cclog.Abortf("Failed sqlite db setup.\nError: %s\n", err.Error())
}
case "mysql":
opts.URL += "?multiStatements=true"
dbHandle, err = sqlx.Open("mysql", opts.URL)
default:
cclog.Abortf("DB Connection: Unsupported database driver '%s'.\n", driver)
if cclog.Loglevel() == "debug" {
sql.Register("sqlite3WithHooks", sqlhooks.Wrap(&sqlite3.SQLiteDriver{}, &Hooks{}))
dbHandle, err = sqlx.Open("sqlite3WithHooks", opts.URL)
} else {
dbHandle, err = sqlx.Open("sqlite3", opts.URL)
}
if err != nil {
cclog.Abortf("DB Connection: Could not connect to '%s' database with sqlx.Open().\nError: %s\n", driver, err.Error())
cclog.Abortf("DB Connection: Could not connect to SQLite database with sqlx.Open().\nError: %s\n", err.Error())
}
err = setupSqlite(dbHandle.DB)
if err != nil {
cclog.Abortf("Failed sqlite db setup.\nError: %s\n", err.Error())
}
dbHandle.SetMaxOpenConns(opts.MaxOpenConnections)
@@ -104,8 +96,8 @@ func Connect(driver string, db string) {
dbHandle.SetConnMaxLifetime(opts.ConnectionMaxLifetime)
dbHandle.SetConnMaxIdleTime(opts.ConnectionMaxIdleTime)
dbConnInstance = &DBConnection{DB: dbHandle, Driver: driver}
err = checkDBVersion(driver, dbHandle.DB)
dbConnInstance = &DBConnection{DB: dbHandle}
err = checkDBVersion(dbHandle.DB)
if err != nil {
cclog.Abortf("DB Connection: Failed DB version check.\nError: %s\n", err.Error())
}
@@ -119,3 +111,26 @@ func GetConnection() *DBConnection {
return dbConnInstance
}
// ResetConnection closes the current database connection and resets the connection state.
// This function is intended for testing purposes only to allow test isolation.
func ResetConnection() error {
if dbConnInstance != nil && dbConnInstance.DB != nil {
if err := dbConnInstance.DB.Close(); err != nil {
return fmt.Errorf("failed to close database connection: %w", err)
}
}
dbConnInstance = nil
dbConnOnce = sync.Once{}
jobRepoInstance = nil
jobRepoOnce = sync.Once{}
nodeRepoInstance = nil
nodeRepoOnce = sync.Once{}
userRepoInstance = nil
userRepoOnce = sync.Once{}
userCfgRepoInstance = nil
userCfgRepoOnce = sync.Once{}
return nil
}

View File

@@ -2,13 +2,14 @@
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"context"
"time"
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
)
// Hooks satisfies the sqlhook.Hooks interface

View File

@@ -0,0 +1,274 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"context"
"testing"
"time"
"github.com/ClusterCockpit/cc-lib/v2/schema"
_ "github.com/mattn/go-sqlite3"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
type MockJobHook struct {
startCalled bool
stopCalled bool
startJobs []*schema.Job
stopJobs []*schema.Job
}
func (m *MockJobHook) JobStartCallback(job *schema.Job) {
m.startCalled = true
m.startJobs = append(m.startJobs, job)
}
func (m *MockJobHook) JobStopCallback(job *schema.Job) {
m.stopCalled = true
m.stopJobs = append(m.stopJobs, job)
}
func TestRegisterJobHook(t *testing.T) {
t.Run("register single hook", func(t *testing.T) {
hooks = nil
mock := &MockJobHook{}
RegisterJobHook(mock)
assert.NotNil(t, hooks)
assert.Len(t, hooks, 1)
assert.Equal(t, mock, hooks[0])
hooks = nil
})
t.Run("register multiple hooks", func(t *testing.T) {
hooks = nil
mock1 := &MockJobHook{}
mock2 := &MockJobHook{}
RegisterJobHook(mock1)
RegisterJobHook(mock2)
assert.Len(t, hooks, 2)
assert.Equal(t, mock1, hooks[0])
assert.Equal(t, mock2, hooks[1])
hooks = nil
})
t.Run("register nil hook does not add to hooks", func(t *testing.T) {
hooks = nil
RegisterJobHook(nil)
if hooks != nil {
assert.Len(t, hooks, 0, "Nil hook should not be added")
}
hooks = nil
})
}
func TestCallJobStartHooks(t *testing.T) {
t.Run("call start hooks with single job", func(t *testing.T) {
hooks = nil
mock := &MockJobHook{}
RegisterJobHook(mock)
job := &schema.Job{
JobID: 123,
User: "testuser",
Cluster: "testcluster",
}
CallJobStartHooks([]*schema.Job{job})
assert.True(t, mock.startCalled)
assert.False(t, mock.stopCalled)
assert.Len(t, mock.startJobs, 1)
assert.Equal(t, int64(123), mock.startJobs[0].JobID)
hooks = nil
})
t.Run("call start hooks with multiple jobs", func(t *testing.T) {
hooks = nil
mock := &MockJobHook{}
RegisterJobHook(mock)
jobs := []*schema.Job{
{JobID: 1, User: "user1", Cluster: "cluster1"},
{JobID: 2, User: "user2", Cluster: "cluster2"},
{JobID: 3, User: "user3", Cluster: "cluster3"},
}
CallJobStartHooks(jobs)
assert.True(t, mock.startCalled)
assert.Len(t, mock.startJobs, 3)
assert.Equal(t, int64(1), mock.startJobs[0].JobID)
assert.Equal(t, int64(2), mock.startJobs[1].JobID)
assert.Equal(t, int64(3), mock.startJobs[2].JobID)
hooks = nil
})
t.Run("call start hooks with multiple registered hooks", func(t *testing.T) {
hooks = nil
mock1 := &MockJobHook{}
mock2 := &MockJobHook{}
RegisterJobHook(mock1)
RegisterJobHook(mock2)
job := &schema.Job{
JobID: 456, User: "testuser", Cluster: "testcluster",
}
CallJobStartHooks([]*schema.Job{job})
assert.True(t, mock1.startCalled)
assert.True(t, mock2.startCalled)
assert.Len(t, mock1.startJobs, 1)
assert.Len(t, mock2.startJobs, 1)
hooks = nil
})
t.Run("call start hooks with nil hooks", func(t *testing.T) {
hooks = nil
job := &schema.Job{
JobID: 789, User: "testuser", Cluster: "testcluster",
}
CallJobStartHooks([]*schema.Job{job})
hooks = nil
})
t.Run("call start hooks with empty job list", func(t *testing.T) {
hooks = nil
mock := &MockJobHook{}
RegisterJobHook(mock)
CallJobStartHooks([]*schema.Job{})
assert.False(t, mock.startCalled)
assert.Len(t, mock.startJobs, 0)
hooks = nil
})
}
func TestCallJobStopHooks(t *testing.T) {
t.Run("call stop hooks with single job", func(t *testing.T) {
hooks = nil
mock := &MockJobHook{}
RegisterJobHook(mock)
job := &schema.Job{
JobID: 123,
User: "testuser",
Cluster: "testcluster",
}
CallJobStopHooks(job)
assert.True(t, mock.stopCalled)
assert.False(t, mock.startCalled)
assert.Len(t, mock.stopJobs, 1)
assert.Equal(t, int64(123), mock.stopJobs[0].JobID)
hooks = nil
})
t.Run("call stop hooks with multiple registered hooks", func(t *testing.T) {
hooks = nil
mock1 := &MockJobHook{}
mock2 := &MockJobHook{}
RegisterJobHook(mock1)
RegisterJobHook(mock2)
job := &schema.Job{
JobID: 456, User: "testuser", Cluster: "testcluster",
}
CallJobStopHooks(job)
assert.True(t, mock1.stopCalled)
assert.True(t, mock2.stopCalled)
assert.Len(t, mock1.stopJobs, 1)
assert.Len(t, mock2.stopJobs, 1)
hooks = nil
})
t.Run("call stop hooks with nil hooks", func(t *testing.T) {
hooks = nil
job := &schema.Job{
JobID: 789, User: "testuser", Cluster: "testcluster",
}
CallJobStopHooks(job)
hooks = nil
})
}
func TestSQLHooks(t *testing.T) {
_ = setup(t)
t.Run("hooks log queries in debug mode", func(t *testing.T) {
h := &Hooks{}
ctx := context.Background()
query := "SELECT * FROM job WHERE job_id = ?"
args := []any{123}
ctxWithTime, err := h.Before(ctx, query, args...)
require.NoError(t, err)
assert.NotNil(t, ctxWithTime)
beginTime := ctxWithTime.Value("begin")
require.NotNil(t, beginTime)
_, ok := beginTime.(time.Time)
assert.True(t, ok, "Begin time should be time.Time")
time.Sleep(10 * time.Millisecond)
ctxAfter, err := h.After(ctxWithTime, query, args...)
require.NoError(t, err)
assert.NotNil(t, ctxAfter)
})
}
func TestHookIntegration(t *testing.T) {
t.Run("hooks are called during job lifecycle", func(t *testing.T) {
hooks = nil
mock := &MockJobHook{}
RegisterJobHook(mock)
job := &schema.Job{
JobID: 999,
User: "integrationuser",
Cluster: "integrationcluster",
}
CallJobStartHooks([]*schema.Job{job})
assert.True(t, mock.startCalled)
assert.Equal(t, 1, len(mock.startJobs))
CallJobStopHooks(job)
assert.True(t, mock.stopCalled)
assert.Equal(t, 1, len(mock.stopJobs))
assert.Equal(t, mock.startJobs[0].JobID, mock.stopJobs[0].JobID)
hooks = nil
})
}

File diff suppressed because it is too large Load Diff

View File

@@ -2,14 +2,15 @@
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"encoding/json"
"fmt"
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
"github.com/ClusterCockpit/cc-lib/schema"
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
"github.com/ClusterCockpit/cc-lib/v2/schema"
sq "github.com/Masterminds/squirrel"
)
@@ -29,6 +30,27 @@ const NamedJobInsert string = `INSERT INTO job (
:shared, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
);`
// InsertJobDirect inserts a job directly into the job table (not job_cache).
// Use this when the returned ID will be used for operations on the job table
// (e.g., adding tags), or for imported jobs that are already completed.
func (r *JobRepository) InsertJobDirect(job *schema.Job) (int64, error) {
r.Mutex.Lock()
defer r.Mutex.Unlock()
res, err := r.DB.NamedExec(NamedJobInsert, job)
if err != nil {
cclog.Warn("Error while NamedJobInsert (direct)")
return 0, err
}
id, err := res.LastInsertId()
if err != nil {
cclog.Warn("Error while getting last insert ID (direct)")
return 0, err
}
return id, nil
}
func (r *JobRepository) InsertJob(job *schema.Job) (int64, error) {
r.Mutex.Lock()
defer r.Mutex.Unlock()
@@ -70,8 +92,9 @@ func (r *JobRepository) SyncJobs() ([]*schema.Job, error) {
jobs = append(jobs, job)
}
// Use INSERT OR IGNORE to skip jobs already transferred by the stop path
_, err = r.DB.Exec(
"INSERT INTO job (job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache")
"INSERT OR IGNORE INTO job (job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache")
if err != nil {
cclog.Warnf("Error while Job sync: %v", err)
return nil, err
@@ -83,9 +106,48 @@ func (r *JobRepository) SyncJobs() ([]*schema.Job, error) {
return nil, err
}
// Resolve correct job.id from the job table. The IDs read from job_cache
// are from a different auto-increment sequence and must not be used to
// query the job table.
for _, job := range jobs {
var newID int64
if err := sq.Select("job.id").From("job").
Where("job.job_id = ? AND job.cluster = ? AND job.start_time = ?",
job.JobID, job.Cluster, job.StartTime).
RunWith(r.stmtCache).QueryRow().Scan(&newID); err != nil {
cclog.Warnf("SyncJobs: could not resolve job table id for job %d on %s: %v",
job.JobID, job.Cluster, err)
continue
}
job.ID = &newID
}
return jobs, nil
}
// TransferCachedJobToMain moves a job from job_cache to the job table.
// Caller must hold r.Mutex. Returns the new job table ID.
func (r *JobRepository) TransferCachedJobToMain(cacheID int64) (int64, error) {
res, err := r.DB.Exec(
"INSERT INTO job (job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache WHERE id = ?",
cacheID)
if err != nil {
return 0, fmt.Errorf("transferring cached job %d to main table failed: %w", cacheID, err)
}
newID, err := res.LastInsertId()
if err != nil {
return 0, fmt.Errorf("getting new job ID after transfer failed: %w", err)
}
_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", cacheID)
if err != nil {
return 0, fmt.Errorf("deleting cached job %d after transfer failed: %w", cacheID, err)
}
return newID, nil
}
// Start inserts a new job in the table, returning the unique job ID.
// Statistics are not transfered!
func (r *JobRepository) Start(job *schema.Job) (id int64, err error) {
@@ -107,41 +169,46 @@ func (r *JobRepository) Start(job *schema.Job) (id int64, err error) {
return r.InsertJob(job)
}
// StartDirect inserts a new job directly into the job table (not job_cache).
// Use this when the returned ID will immediately be used for job table
// operations such as adding tags.
func (r *JobRepository) StartDirect(job *schema.Job) (id int64, err error) {
job.RawFootprint, err = json.Marshal(job.Footprint)
if err != nil {
return -1, fmt.Errorf("REPOSITORY/JOB > encoding footprint field failed: %w", err)
}
job.RawResources, err = json.Marshal(job.Resources)
if err != nil {
return -1, fmt.Errorf("REPOSITORY/JOB > encoding resources field failed: %w", err)
}
job.RawMetaData, err = json.Marshal(job.MetaData)
if err != nil {
return -1, fmt.Errorf("REPOSITORY/JOB > encoding metaData field failed: %w", err)
}
return r.InsertJobDirect(job)
}
// Stop updates the job with the database id jobId using the provided arguments.
func (r *JobRepository) Stop(
jobId int64,
jobID int64,
duration int32,
state schema.JobState,
monitoringStatus int32,
) (err error) {
// Invalidate cache entries as job state is changing
r.cache.Del(fmt.Sprintf("metadata:%d", jobId))
r.cache.Del(fmt.Sprintf("energyFootprint:%d", jobId))
r.cache.Del(fmt.Sprintf("metadata:%d", jobID))
r.cache.Del(fmt.Sprintf("energyFootprint:%d", jobID))
stmt := sq.Update("job").
Set("job_state", state).
Set("duration", duration).
Set("monitoring_status", monitoringStatus).
Where("job.id = ?", jobId)
Where("job.id = ?", jobID)
_, err = stmt.RunWith(r.stmtCache).Exec()
return err
}
func (r *JobRepository) StopCached(
jobId int64,
duration int32,
state schema.JobState,
monitoringStatus int32,
) (err error) {
// Note: StopCached updates job_cache table, not the main job table
// Cache invalidation happens when job is synced to main table
stmt := sq.Update("job_cache").
Set("job_state", state).
Set("duration", duration).
Set("monitoring_status", monitoringStatus).
Where("job_cache.id = ?", jobId)
_, err = stmt.RunWith(r.stmtCache).Exec()
return err
}

View File

@@ -0,0 +1,607 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"encoding/json"
"testing"
"github.com/ClusterCockpit/cc-lib/v2/schema"
_ "github.com/mattn/go-sqlite3"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// createTestJob creates a minimal valid job for testing
func createTestJob(jobID int64, cluster string) *schema.Job {
return &schema.Job{
JobID: jobID,
User: "testuser",
Project: "testproject",
Cluster: cluster,
SubCluster: "main",
Partition: "batch",
NumNodes: 1,
NumHWThreads: 4,
NumAcc: 0,
Shared: "none",
MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
SMT: 1,
State: schema.JobStateRunning,
StartTime: 1234567890,
Duration: 0,
Walltime: 3600,
Resources: []*schema.Resource{
{
Hostname: "node01",
HWThreads: []int{0, 1, 2, 3},
},
},
Footprint: map[string]float64{
"cpu_load": 50.0,
"mem_used": 8000.0,
"flops_any": 0.5,
"mem_bw": 10.0,
"net_bw": 2.0,
"file_bw": 1.0,
"cpu_used": 2.0,
"cpu_load_core": 12.5,
},
MetaData: map[string]string{
"jobName": "test_job",
"queue": "normal",
"qosName": "default",
"accountName": "testaccount",
},
}
}
func TestInsertJob(t *testing.T) {
r := setup(t)
t.Run("successful insertion", func(t *testing.T) {
job := createTestJob(999001, "testcluster")
job.RawResources, _ = json.Marshal(job.Resources)
job.RawFootprint, _ = json.Marshal(job.Footprint)
job.RawMetaData, _ = json.Marshal(job.MetaData)
id, err := r.InsertJob(job)
require.NoError(t, err, "InsertJob should succeed")
assert.Greater(t, id, int64(0), "Should return valid insert ID")
// Verify job was inserted into job_cache
var count int
err = r.DB.QueryRow("SELECT COUNT(*) FROM job_cache WHERE job_id = ? AND cluster = ?",
job.JobID, job.Cluster).Scan(&count)
require.NoError(t, err)
assert.Equal(t, 1, count, "Job should be in job_cache table")
// Clean up
_, err = r.DB.Exec("DELETE FROM job_cache WHERE job_id = ? AND cluster = ?", job.JobID, job.Cluster)
require.NoError(t, err)
})
t.Run("insertion with all fields", func(t *testing.T) {
job := createTestJob(999002, "testcluster")
job.ArrayJobID = 5000
job.Energy = 1500.5
job.RawResources, _ = json.Marshal(job.Resources)
job.RawFootprint, _ = json.Marshal(job.Footprint)
job.RawMetaData, _ = json.Marshal(job.MetaData)
id, err := r.InsertJob(job)
require.NoError(t, err)
assert.Greater(t, id, int64(0))
// Verify all fields were stored correctly
var retrievedJob schema.Job
err = r.DB.QueryRow(`SELECT job_id, hpc_user, project, cluster, array_job_id, energy
FROM job_cache WHERE id = ?`, id).Scan(
&retrievedJob.JobID, &retrievedJob.User, &retrievedJob.Project,
&retrievedJob.Cluster, &retrievedJob.ArrayJobID, &retrievedJob.Energy)
require.NoError(t, err)
assert.Equal(t, job.JobID, retrievedJob.JobID)
assert.Equal(t, job.User, retrievedJob.User)
assert.Equal(t, job.Project, retrievedJob.Project)
assert.Equal(t, job.Cluster, retrievedJob.Cluster)
assert.Equal(t, job.ArrayJobID, retrievedJob.ArrayJobID)
assert.Equal(t, job.Energy, retrievedJob.Energy)
// Clean up
_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
require.NoError(t, err)
})
}
func TestStart(t *testing.T) {
r := setup(t)
t.Run("successful job start with JSON encoding", func(t *testing.T) {
job := createTestJob(999003, "testcluster")
id, err := r.Start(job)
require.NoError(t, err, "Start should succeed")
assert.Greater(t, id, int64(0), "Should return valid insert ID")
// Verify job was inserted and JSON fields were encoded
var rawResources, rawFootprint, rawMetaData []byte
err = r.DB.QueryRow(`SELECT resources, footprint, meta_data FROM job_cache WHERE id = ?`, id).Scan(
&rawResources, &rawFootprint, &rawMetaData)
require.NoError(t, err)
// Verify resources JSON
var resources []*schema.Resource
err = json.Unmarshal(rawResources, &resources)
require.NoError(t, err, "Resources should be valid JSON")
assert.Equal(t, 1, len(resources))
assert.Equal(t, "node01", resources[0].Hostname)
// Verify footprint JSON
var footprint map[string]float64
err = json.Unmarshal(rawFootprint, &footprint)
require.NoError(t, err, "Footprint should be valid JSON")
assert.Equal(t, 50.0, footprint["cpu_load"])
assert.Equal(t, 8000.0, footprint["mem_used"])
// Verify metadata JSON
var metaData map[string]string
err = json.Unmarshal(rawMetaData, &metaData)
require.NoError(t, err, "MetaData should be valid JSON")
assert.Equal(t, "test_job", metaData["jobName"])
// Clean up
_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
require.NoError(t, err)
})
t.Run("job start with empty footprint", func(t *testing.T) {
job := createTestJob(999004, "testcluster")
job.Footprint = map[string]float64{}
id, err := r.Start(job)
require.NoError(t, err)
assert.Greater(t, id, int64(0))
// Verify empty footprint was encoded as empty JSON object
var rawFootprint []byte
err = r.DB.QueryRow(`SELECT footprint FROM job_cache WHERE id = ?`, id).Scan(&rawFootprint)
require.NoError(t, err)
assert.Equal(t, []byte("{}"), rawFootprint)
// Clean up
_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
require.NoError(t, err)
})
t.Run("job start with nil metadata", func(t *testing.T) {
job := createTestJob(999005, "testcluster")
job.MetaData = nil
id, err := r.Start(job)
require.NoError(t, err)
assert.Greater(t, id, int64(0))
// Clean up
_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
require.NoError(t, err)
})
}
func TestStop(t *testing.T) {
r := setup(t)
t.Run("successful job stop", func(t *testing.T) {
// First insert a job using Start
job := createTestJob(999106, "testcluster")
id, err := r.Start(job)
require.NoError(t, err)
// Move from job_cache to job table (simulate SyncJobs) - exclude id to let it auto-increment
_, err = r.DB.Exec(`INSERT INTO job (job_id, cluster, subcluster, submit_time, start_time, hpc_user, project,
cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, num_nodes,
num_hwthreads, num_acc, smt, shared, monitoring_status, energy, energy_footprint, footprint)
SELECT job_id, cluster, subcluster, submit_time, start_time, hpc_user, project,
cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, num_nodes,
num_hwthreads, num_acc, smt, shared, monitoring_status, energy, energy_footprint, footprint
FROM job_cache WHERE id = ?`, id)
require.NoError(t, err)
_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
require.NoError(t, err)
// Get the new job id in the job table
err = r.DB.QueryRow("SELECT id FROM job WHERE job_id = ? AND cluster = ? AND start_time = ?",
job.JobID, job.Cluster, job.StartTime).Scan(&id)
require.NoError(t, err)
// Stop the job
duration := int32(3600)
state := schema.JobStateCompleted
monitoringStatus := int32(schema.MonitoringStatusArchivingSuccessful)
err = r.Stop(id, duration, state, monitoringStatus)
require.NoError(t, err, "Stop should succeed")
// Verify job was updated
var retrievedDuration int32
var retrievedState string
var retrievedMonStatus int32
err = r.DB.QueryRow(`SELECT duration, job_state, monitoring_status FROM job WHERE id = ?`, id).Scan(
&retrievedDuration, &retrievedState, &retrievedMonStatus)
require.NoError(t, err)
assert.Equal(t, duration, retrievedDuration)
assert.Equal(t, string(state), retrievedState)
assert.Equal(t, monitoringStatus, retrievedMonStatus)
// Clean up
_, err = r.DB.Exec("DELETE FROM job WHERE id = ?", id)
require.NoError(t, err)
})
t.Run("stop updates job state transitions", func(t *testing.T) {
// Insert a job
job := createTestJob(999107, "testcluster")
id, err := r.Start(job)
require.NoError(t, err)
// Move to job table
_, err = r.DB.Exec(`INSERT INTO job (job_id, cluster, subcluster, submit_time, start_time, hpc_user, project,
cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, num_nodes,
num_hwthreads, num_acc, smt, shared, monitoring_status, energy, energy_footprint, footprint)
SELECT job_id, cluster, subcluster, submit_time, start_time, hpc_user, project,
cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, num_nodes,
num_hwthreads, num_acc, smt, shared, monitoring_status, energy, energy_footprint, footprint
FROM job_cache WHERE id = ?`, id)
require.NoError(t, err)
_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
require.NoError(t, err)
// Get the new job id in the job table
err = r.DB.QueryRow("SELECT id FROM job WHERE job_id = ? AND cluster = ? AND start_time = ?",
job.JobID, job.Cluster, job.StartTime).Scan(&id)
require.NoError(t, err)
// Stop the job with different duration
err = r.Stop(id, 7200, schema.JobStateCompleted, int32(schema.MonitoringStatusArchivingSuccessful))
require.NoError(t, err)
// Verify the duration was updated correctly
var duration int32
err = r.DB.QueryRow(`SELECT duration FROM job WHERE id = ?`, id).Scan(&duration)
require.NoError(t, err)
assert.Equal(t, int32(7200), duration, "Duration should be updated to 7200")
// Clean up
_, err = r.DB.Exec("DELETE FROM job WHERE id = ?", id)
require.NoError(t, err)
})
t.Run("stop with different states", func(t *testing.T) {
testCases := []struct {
name string
jobID int64
state schema.JobState
monitoringStatus int32
}{
{"completed", 999108, schema.JobStateCompleted, int32(schema.MonitoringStatusArchivingSuccessful)},
{"failed", 999118, schema.JobStateFailed, int32(schema.MonitoringStatusArchivingSuccessful)},
{"cancelled", 999119, schema.JobStateCancelled, int32(schema.MonitoringStatusArchivingSuccessful)},
{"timeout", 999120, schema.JobStateTimeout, int32(schema.MonitoringStatusArchivingSuccessful)},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
job := createTestJob(tc.jobID, "testcluster")
id, err := r.Start(job)
require.NoError(t, err)
// Move to job table
_, err = r.DB.Exec(`INSERT INTO job (job_id, cluster, subcluster, submit_time, start_time, hpc_user, project,
cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, num_nodes,
num_hwthreads, num_acc, smt, shared, monitoring_status, energy, energy_footprint, footprint)
SELECT job_id, cluster, subcluster, submit_time, start_time, hpc_user, project,
cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, num_nodes,
num_hwthreads, num_acc, smt, shared, monitoring_status, energy, energy_footprint, footprint
FROM job_cache WHERE id = ?`, id)
require.NoError(t, err)
_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
require.NoError(t, err)
// Get the new job id in the job table
err = r.DB.QueryRow("SELECT id FROM job WHERE job_id = ? AND cluster = ? AND start_time = ?",
job.JobID, job.Cluster, job.StartTime).Scan(&id)
require.NoError(t, err)
// Stop with specific state
err = r.Stop(id, 1800, tc.state, tc.monitoringStatus)
require.NoError(t, err)
// Verify state was set correctly
var retrievedState string
err = r.DB.QueryRow(`SELECT job_state FROM job WHERE id = ?`, id).Scan(&retrievedState)
require.NoError(t, err)
assert.Equal(t, string(tc.state), retrievedState)
// Clean up
_, err = r.DB.Exec("DELETE FROM job WHERE id = ?", id)
require.NoError(t, err)
})
}
})
}
func TestTransferCachedJobToMain(t *testing.T) {
r := setup(t)
t.Run("successful transfer from cache to main", func(t *testing.T) {
// Insert a job in job_cache
job := createTestJob(999009, "testcluster")
cacheID, err := r.Start(job)
require.NoError(t, err)
// Transfer the cached job to the main table
r.Mutex.Lock()
newID, err := r.TransferCachedJobToMain(cacheID)
r.Mutex.Unlock()
require.NoError(t, err, "TransferCachedJobToMain should succeed")
assert.NotEqual(t, cacheID, newID, "New ID should differ from cache ID")
// Verify job exists in job table
var count int
err = r.DB.QueryRow(`SELECT COUNT(*) FROM job WHERE id = ?`, newID).Scan(&count)
require.NoError(t, err)
assert.Equal(t, 1, count, "Job should exist in main table")
// Verify job was removed from job_cache
err = r.DB.QueryRow(`SELECT COUNT(*) FROM job_cache WHERE id = ?`, cacheID).Scan(&count)
require.NoError(t, err)
assert.Equal(t, 0, count, "Job should be removed from cache")
// Clean up
_, err = r.DB.Exec("DELETE FROM job WHERE id = ?", newID)
require.NoError(t, err)
})
t.Run("transfer preserves job data", func(t *testing.T) {
// Insert a job in job_cache
job := createTestJob(999010, "testcluster")
cacheID, err := r.Start(job)
require.NoError(t, err)
// Transfer the cached job
r.Mutex.Lock()
newID, err := r.TransferCachedJobToMain(cacheID)
r.Mutex.Unlock()
require.NoError(t, err)
// Verify the transferred job has the correct data
var jobID int64
var cluster string
err = r.DB.QueryRow(`SELECT job_id, cluster FROM job WHERE id = ?`, newID).Scan(&jobID, &cluster)
require.NoError(t, err)
assert.Equal(t, job.JobID, jobID)
assert.Equal(t, job.Cluster, cluster)
// Clean up
_, err = r.DB.Exec("DELETE FROM job WHERE id = ?", newID)
require.NoError(t, err)
})
}
func TestSyncJobs(t *testing.T) {
r := setup(t)
t.Run("sync jobs from cache to main table", func(t *testing.T) {
// Ensure cache is empty first
_, err := r.DB.Exec("DELETE FROM job_cache")
require.NoError(t, err)
// Insert multiple jobs in job_cache
job1 := createTestJob(999011, "testcluster")
job2 := createTestJob(999012, "testcluster")
job3 := createTestJob(999013, "testcluster")
_, err = r.Start(job1)
require.NoError(t, err)
_, err = r.Start(job2)
require.NoError(t, err)
_, err = r.Start(job3)
require.NoError(t, err)
// Verify jobs are in job_cache
var cacheCount int
err = r.DB.QueryRow("SELECT COUNT(*) FROM job_cache WHERE job_id IN (?, ?, ?)",
job1.JobID, job2.JobID, job3.JobID).Scan(&cacheCount)
require.NoError(t, err)
assert.Equal(t, 3, cacheCount, "All jobs should be in job_cache")
// Sync jobs
jobs, err := r.SyncJobs()
require.NoError(t, err, "SyncJobs should succeed")
assert.Equal(t, 3, len(jobs), "Should return 3 synced jobs")
// Verify jobs were moved to job table
var jobCount int
err = r.DB.QueryRow("SELECT COUNT(*) FROM job WHERE job_id IN (?, ?, ?)",
job1.JobID, job2.JobID, job3.JobID).Scan(&jobCount)
require.NoError(t, err)
assert.Equal(t, 3, jobCount, "All jobs should be in job table")
// Verify job_cache was cleared
err = r.DB.QueryRow("SELECT COUNT(*) FROM job_cache WHERE job_id IN (?, ?, ?)",
job1.JobID, job2.JobID, job3.JobID).Scan(&cacheCount)
require.NoError(t, err)
assert.Equal(t, 0, cacheCount, "job_cache should be empty after sync")
// Clean up
_, err = r.DB.Exec("DELETE FROM job WHERE job_id IN (?, ?, ?)", job1.JobID, job2.JobID, job3.JobID)
require.NoError(t, err)
})
t.Run("sync preserves job data", func(t *testing.T) {
// Ensure cache is empty first
_, err := r.DB.Exec("DELETE FROM job_cache")
require.NoError(t, err)
// Insert a job with specific data
job := createTestJob(999014, "testcluster")
job.ArrayJobID = 7777
job.Energy = 2500.75
job.Duration = 1800
id, err := r.Start(job)
require.NoError(t, err)
// Update some fields to simulate job progress
result, err := r.DB.Exec(`UPDATE job_cache SET duration = ?, energy = ? WHERE id = ?`,
3600, 3000.5, id)
require.NoError(t, err)
rowsAffected, _ := result.RowsAffected()
require.Equal(t, int64(1), rowsAffected, "UPDATE should affect exactly 1 row")
// Verify the update worked
var checkDuration int32
var checkEnergy float64
err = r.DB.QueryRow(`SELECT duration, energy FROM job_cache WHERE id = ?`, id).Scan(&checkDuration, &checkEnergy)
require.NoError(t, err)
require.Equal(t, int32(3600), checkDuration, "Duration should be updated to 3600 before sync")
require.Equal(t, 3000.5, checkEnergy, "Energy should be updated to 3000.5 before sync")
// Sync jobs
jobs, err := r.SyncJobs()
require.NoError(t, err)
require.Equal(t, 1, len(jobs), "Should return exactly 1 synced job")
// Verify in database
var dbJob schema.Job
err = r.DB.QueryRow(`SELECT job_id, hpc_user, project, cluster, array_job_id, duration, energy
FROM job WHERE job_id = ? AND cluster = ?`, job.JobID, job.Cluster).Scan(
&dbJob.JobID, &dbJob.User, &dbJob.Project, &dbJob.Cluster,
&dbJob.ArrayJobID, &dbJob.Duration, &dbJob.Energy)
require.NoError(t, err)
assert.Equal(t, job.JobID, dbJob.JobID)
assert.Equal(t, int32(3600), dbJob.Duration)
assert.Equal(t, 3000.5, dbJob.Energy)
// Clean up
_, err = r.DB.Exec("DELETE FROM job WHERE job_id = ? AND cluster = ?", job.JobID, job.Cluster)
require.NoError(t, err)
})
t.Run("sync returns job table IDs not cache IDs", func(t *testing.T) {
// Ensure cache is empty first
_, err := r.DB.Exec("DELETE FROM job_cache")
require.NoError(t, err)
// Insert a job into job_cache
job := createTestJob(999015, "testcluster")
cacheID, err := r.Start(job)
require.NoError(t, err)
// Sync jobs
jobs, err := r.SyncJobs()
require.NoError(t, err)
require.Equal(t, 1, len(jobs))
// The returned ID must refer to the job table, not job_cache
var jobTableID int64
err = r.DB.QueryRow("SELECT id FROM job WHERE job_id = ? AND cluster = ? AND start_time = ?",
jobs[0].JobID, jobs[0].Cluster, jobs[0].StartTime).Scan(&jobTableID)
require.NoError(t, err)
assert.Equal(t, jobTableID, *jobs[0].ID,
"returned ID should match the job table row, not the cache ID (%d)", cacheID)
// Clean up
_, err = r.DB.Exec("DELETE FROM job WHERE job_id = ? AND cluster = ?", job.JobID, job.Cluster)
require.NoError(t, err)
})
t.Run("sync with empty cache returns empty list", func(t *testing.T) {
// Ensure cache is empty
_, err := r.DB.Exec("DELETE FROM job_cache")
require.NoError(t, err)
// Sync should return empty list
jobs, err := r.SyncJobs()
require.NoError(t, err)
assert.Equal(t, 0, len(jobs), "Should return empty list when cache is empty")
})
}
func TestInsertJobDirect(t *testing.T) {
r := setup(t)
t.Run("inserts into job table not cache", func(t *testing.T) {
job := createTestJob(999020, "testcluster")
job.RawResources, _ = json.Marshal(job.Resources)
job.RawFootprint, _ = json.Marshal(job.Footprint)
job.RawMetaData, _ = json.Marshal(job.MetaData)
id, err := r.InsertJobDirect(job)
require.NoError(t, err, "InsertJobDirect should succeed")
assert.Greater(t, id, int64(0), "Should return valid insert ID")
// Verify job is in job table
var count int
err = r.DB.QueryRow("SELECT COUNT(*) FROM job WHERE id = ?", id).Scan(&count)
require.NoError(t, err)
assert.Equal(t, 1, count, "Job should be in job table")
// Verify job is NOT in job_cache
err = r.DB.QueryRow("SELECT COUNT(*) FROM job_cache WHERE job_id = ? AND cluster = ?",
job.JobID, job.Cluster).Scan(&count)
require.NoError(t, err)
assert.Equal(t, 0, count, "Job should NOT be in job_cache")
// Clean up
_, err = r.DB.Exec("DELETE FROM job WHERE id = ?", id)
require.NoError(t, err)
})
t.Run("returned ID works for tag operations", func(t *testing.T) {
job := createTestJob(999021, "testcluster")
job.RawResources, _ = json.Marshal(job.Resources)
job.RawFootprint, _ = json.Marshal(job.Footprint)
job.RawMetaData, _ = json.Marshal(job.MetaData)
id, err := r.InsertJobDirect(job)
require.NoError(t, err)
// Adding a tag using the returned ID should succeed (FK constraint on jobtag)
err = r.ImportTag(id, "test_type", "test_name", "global")
require.NoError(t, err, "ImportTag should succeed with direct insert ID")
// Clean up
_, err = r.DB.Exec("DELETE FROM jobtag WHERE job_id = ?", id)
require.NoError(t, err)
_, err = r.DB.Exec("DELETE FROM job WHERE id = ?", id)
require.NoError(t, err)
})
}
func TestStartDirect(t *testing.T) {
r := setup(t)
t.Run("inserts into job table with JSON encoding", func(t *testing.T) {
job := createTestJob(999022, "testcluster")
id, err := r.StartDirect(job)
require.NoError(t, err, "StartDirect should succeed")
assert.Greater(t, id, int64(0))
// Verify job is in job table with encoded JSON
var rawResources []byte
err = r.DB.QueryRow("SELECT resources FROM job WHERE id = ?", id).Scan(&rawResources)
require.NoError(t, err)
var resources []*schema.Resource
err = json.Unmarshal(rawResources, &resources)
require.NoError(t, err, "Resources should be valid JSON")
assert.Equal(t, "node01", resources[0].Hostname)
// Clean up
_, err = r.DB.Exec("DELETE FROM job WHERE id = ?", id)
require.NoError(t, err)
})
}

View File

@@ -2,6 +2,7 @@
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
@@ -11,8 +12,8 @@ import (
"time"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
"github.com/ClusterCockpit/cc-lib/schema"
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
"github.com/ClusterCockpit/cc-lib/v2/schema"
sq "github.com/Masterminds/squirrel"
)
@@ -22,13 +23,17 @@ import (
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) Find(
jobId *int64,
jobID *int64,
cluster *string,
startTime *int64,
) (*schema.Job, error) {
if jobID == nil {
return nil, fmt.Errorf("jobID cannot be nil")
}
start := time.Now()
q := sq.Select(jobColumns...).From("job").
Where("job.job_id = ?", *jobId)
Where("job.job_id = ?", *jobID)
if cluster != nil {
q = q.Where("job.cluster = ?", *cluster)
@@ -37,19 +42,29 @@ func (r *JobRepository) Find(
q = q.Where("job.start_time = ?", *startTime)
}
q = q.OrderBy("job.id DESC") // always use newest matching job by db id if more than one match
q = q.OrderBy("job.id DESC").Limit(1) // always use newest matching job by db id if more than one match
cclog.Debugf("Timer Find %s", time.Since(start))
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
// FindCached executes a SQL query to find a specific batch job from the job_cache table.
// The job is queried using the batch job id, and optionally filtered by cluster name
// and start time (UNIX epoch time seconds). This method uses cached job data which
// may be stale but provides faster access than Find().
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) FindCached(
jobId *int64,
jobID *int64,
cluster *string,
startTime *int64,
) (*schema.Job, error) {
if jobID == nil {
return nil, fmt.Errorf("jobID cannot be nil")
}
q := sq.Select(jobCacheColumns...).From("job_cache").
Where("job_cache.job_id = ?", *jobId)
Where("job_cache.job_id = ?", *jobID)
if cluster != nil {
q = q.Where("job_cache.cluster = ?", *cluster)
@@ -58,24 +73,28 @@ func (r *JobRepository) FindCached(
q = q.Where("job_cache.start_time = ?", *startTime)
}
q = q.OrderBy("job_cache.id DESC") // always use newest matching job by db id if more than one match
q = q.OrderBy("job_cache.id DESC").Limit(1) // always use newest matching job by db id if more than one match
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
// Find executes a SQL query to find a specific batch job.
// The job is queried using the batch job id, the cluster name,
// and the start time of the job in UNIX epoch time seconds.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
// FindAll executes a SQL query to find all batch jobs matching the given criteria.
// Jobs are queried using the batch job id, and optionally filtered by cluster name
// and start time (UNIX epoch time seconds).
// It returns a slice of pointers to schema.Job data structures and an error variable.
// An empty slice is returned if no matching jobs are found.
func (r *JobRepository) FindAll(
jobId *int64,
jobID *int64,
cluster *string,
startTime *int64,
) ([]*schema.Job, error) {
if jobID == nil {
return nil, fmt.Errorf("jobID cannot be nil")
}
start := time.Now()
q := sq.Select(jobColumns...).From("job").
Where("job.job_id = ?", *jobId)
Where("job.job_id = ?", *jobID)
if cluster != nil {
q = q.Where("job.cluster = ?", *cluster)
@@ -86,8 +105,8 @@ func (r *JobRepository) FindAll(
rows, err := q.RunWith(r.stmtCache).Query()
if err != nil {
cclog.Error("Error while running query")
return nil, err
cclog.Errorf("Error while running FindAll query for jobID=%d: %v", *jobID, err)
return nil, fmt.Errorf("failed to execute FindAll query: %w", err)
}
defer rows.Close()
@@ -95,8 +114,8 @@ func (r *JobRepository) FindAll(
for rows.Next() {
job, err := scanJob(rows)
if err != nil {
cclog.Warn("Error while scanning rows")
return nil, err
cclog.Warnf("Error while scanning rows in FindAll: %v", err)
return nil, fmt.Errorf("failed to scan job row: %w", err)
}
jobs = append(jobs, job)
}
@@ -119,8 +138,8 @@ func (r *JobRepository) GetJobList(limit int, offset int) ([]int64, error) {
rows, err := query.RunWith(r.stmtCache).Query()
if err != nil {
cclog.Error("Error while running query")
return nil, err
cclog.Errorf("Error while running GetJobList query (limit=%d, offset=%d): %v", limit, offset, err)
return nil, fmt.Errorf("failed to execute GetJobList query: %w", err)
}
defer rows.Close()
@@ -129,23 +148,23 @@ func (r *JobRepository) GetJobList(limit int, offset int) ([]int64, error) {
var id int64
err := rows.Scan(&id)
if err != nil {
cclog.Warn("Error while scanning rows")
return nil, err
cclog.Warnf("Error while scanning rows in GetJobList: %v", err)
return nil, fmt.Errorf("failed to scan job ID: %w", err)
}
jl = append(jl, id)
}
cclog.Infof("Return job count %d", len(jl))
cclog.Debugf("JobRepository.GetJobList(): Return job count %d", len(jl))
return jl, nil
}
// FindById executes a SQL query to find a specific batch job.
// FindByID executes a SQL query to find a specific batch job.
// The job is queried using the database id.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) FindById(ctx context.Context, jobId int64) (*schema.Job, error) {
func (r *JobRepository) FindByID(ctx context.Context, jobID int64) (*schema.Job, error) {
q := sq.Select(jobColumns...).
From("job").Where("job.id = ?", jobId)
From("job").Where("job.id = ?", jobID)
q, qerr := SecurityCheck(ctx, q)
if qerr != nil {
@@ -155,14 +174,14 @@ func (r *JobRepository) FindById(ctx context.Context, jobId int64) (*schema.Job,
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
// FindByIdWithUser executes a SQL query to find a specific batch job.
// FindByIDWithUser executes a SQL query to find a specific batch job.
// The job is queried using the database id. The user is passed directly,
// instead as part of the context.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) FindByIdWithUser(user *schema.User, jobId int64) (*schema.Job, error) {
func (r *JobRepository) FindByIDWithUser(user *schema.User, jobID int64) (*schema.Job, error) {
q := sq.Select(jobColumns...).
From("job").Where("job.id = ?", jobId)
From("job").Where("job.id = ?", jobID)
q, qerr := SecurityCheckWithUser(user, q)
if qerr != nil {
@@ -172,24 +191,24 @@ func (r *JobRepository) FindByIdWithUser(user *schema.User, jobId int64) (*schem
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
// FindByIdDirect executes a SQL query to find a specific batch job.
// FindByIDDirect executes a SQL query to find a specific batch job.
// The job is queried using the database id.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) FindByIdDirect(jobId int64) (*schema.Job, error) {
func (r *JobRepository) FindByIDDirect(jobID int64) (*schema.Job, error) {
q := sq.Select(jobColumns...).
From("job").Where("job.id = ?", jobId)
From("job").Where("job.id = ?", jobID)
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
// FindByJobId executes a SQL query to find a specific batch job.
// FindByJobID executes a SQL query to find a specific batch job.
// The job is queried using the slurm id and the clustername.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) FindByJobId(ctx context.Context, jobId int64, startTime int64, cluster string) (*schema.Job, error) {
func (r *JobRepository) FindByJobID(ctx context.Context, jobID int64, startTime int64, cluster string) (*schema.Job, error) {
q := sq.Select(jobColumns...).
From("job").
Where("job.job_id = ?", jobId).
Where("job.job_id = ?", jobID).
Where("job.cluster = ?", cluster).
Where("job.start_time = ?", startTime)
@@ -201,19 +220,22 @@ func (r *JobRepository) FindByJobId(ctx context.Context, jobId int64, startTime
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
// IsJobOwner executes a SQL query to find a specific batch job.
// The job is queried using the slurm id,a username and the cluster.
// It returns a bool.
// If job was found, user is owner: test err != sql.ErrNoRows
func (r *JobRepository) IsJobOwner(jobId int64, startTime int64, user string, cluster string) bool {
// IsJobOwner checks if the specified user owns the batch job identified by jobID,
// startTime, and cluster. Returns true if the user is the owner, false otherwise.
// This method does not return errors; it returns false for both non-existent jobs
// and jobs owned by other users.
func (r *JobRepository) IsJobOwner(jobID int64, startTime int64, user string, cluster string) bool {
q := sq.Select("id").
From("job").
Where("job.job_id = ?", jobId).
Where("job.job_id = ?", jobID).
Where("job.hpc_user = ?", user).
Where("job.cluster = ?", cluster).
Where("job.start_time = ?", startTime)
_, err := scanJob(q.RunWith(r.stmtCache).QueryRow())
if err != nil && err != sql.ErrNoRows {
cclog.Warnf("IsJobOwner: unexpected error for jobID=%d, user=%s, cluster=%s: %v", jobID, user, cluster, err)
}
return err != sql.ErrNoRows
}
@@ -231,6 +253,11 @@ func (r *JobRepository) FindConcurrentJobs(
}
query = query.Where("cluster = ?", job.Cluster)
if len(job.Resources) == 0 {
return nil, fmt.Errorf("job has no resources defined")
}
var startTime int64
var stopTime int64
@@ -243,25 +270,28 @@ func (r *JobRepository) FindConcurrentJobs(
stopTime = startTime + int64(job.Duration)
}
// Add 200s overlap for jobs start time at the end
startTimeTail := startTime + 10
stopTimeTail := stopTime - 200
startTimeFront := startTime + 200
// Time buffer constant for finding overlapping jobs
// overlapBufferEnd: 200s buffer at job end to account for scheduling/cleanup overlap
const overlapBufferEnd = 200
queryRunning := query.Where("job.job_state = ?").Where("(job.start_time BETWEEN ? AND ? OR job.start_time < ?)",
"running", startTimeTail, stopTimeTail, startTime)
stopTimeTail := stopTime - overlapBufferEnd
startTimeFront := startTime + overlapBufferEnd
queryRunning := query.Where("job.job_state = ?", "running").
Where("job.start_time <= ?", stopTimeTail)
// Get At Least One Exact Hostname Match from JSON Resources Array in Database
queryRunning = queryRunning.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, '$.hostname') = ?)", hostname)
query = query.Where("job.job_state != ?").Where("((job.start_time BETWEEN ? AND ?) OR (job.start_time + job.duration) BETWEEN ? AND ? OR (job.start_time < ?) AND (job.start_time + job.duration) > ?)",
"running", startTimeTail, stopTimeTail, startTimeFront, stopTimeTail, startTime, stopTime)
query = query.Where("job.job_state != ?", "running").
Where("job.start_time < ?", stopTimeTail).
Where("(job.start_time + job.duration) > ?", startTimeFront)
// Get At Least One Exact Hostname Match from JSON Resources Array in Database
query = query.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, '$.hostname') = ?)", hostname)
rows, err := query.RunWith(r.stmtCache).Query()
if err != nil {
cclog.Errorf("Error while running query: %v", err)
return nil, err
cclog.Errorf("Error while running concurrent jobs query: %v", err)
return nil, fmt.Errorf("failed to execute concurrent jobs query: %w", err)
}
defer rows.Close()
@@ -269,44 +299,44 @@ func (r *JobRepository) FindConcurrentJobs(
queryString := fmt.Sprintf("cluster=%s", job.Cluster)
for rows.Next() {
var id, jobId, startTime sql.NullInt64
var id, jobID, startTime sql.NullInt64
if err = rows.Scan(&id, &jobId, &startTime); err != nil {
cclog.Warn("Error while scanning rows")
return nil, err
if err = rows.Scan(&id, &jobID, &startTime); err != nil {
cclog.Warnf("Error while scanning concurrent job rows: %v", err)
return nil, fmt.Errorf("failed to scan concurrent job row: %w", err)
}
if id.Valid {
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
queryString += fmt.Sprintf("&jobId=%d", int(jobID.Int64))
items = append(items,
&model.JobLink{
ID: fmt.Sprint(id.Int64),
JobID: int(jobId.Int64),
JobID: int(jobID.Int64),
})
}
}
rows, err = queryRunning.RunWith(r.stmtCache).Query()
if err != nil {
cclog.Errorf("Error while running query: %v", err)
return nil, err
cclog.Errorf("Error while running concurrent running jobs query: %v", err)
return nil, fmt.Errorf("failed to execute concurrent running jobs query: %w", err)
}
defer rows.Close()
for rows.Next() {
var id, jobId, startTime sql.NullInt64
var id, jobID, startTime sql.NullInt64
if err := rows.Scan(&id, &jobId, &startTime); err != nil {
cclog.Warn("Error while scanning rows")
return nil, err
if err := rows.Scan(&id, &jobID, &startTime); err != nil {
cclog.Warnf("Error while scanning running concurrent job rows: %v", err)
return nil, fmt.Errorf("failed to scan running concurrent job row: %w", err)
}
if id.Valid {
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
queryString += fmt.Sprintf("&jobId=%d", int(jobID.Int64))
items = append(items,
&model.JobLink{
ID: fmt.Sprint(id.Int64),
JobID: int(jobId.Int64),
JobID: int(jobID.Int64),
})
}
}

View File

@@ -2,16 +2,45 @@
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"sync"
"github.com/ClusterCockpit/cc-lib/schema"
"github.com/ClusterCockpit/cc-lib/v2/schema"
)
// JobHook interface allows external components to hook into job lifecycle events.
// Implementations can perform actions when jobs start or stop, such as tagging,
// logging, notifications, or triggering external workflows.
//
// Example implementation:
//
// type MyJobTagger struct{}
//
// func (t *MyJobTagger) JobStartCallback(job *schema.Job) {
// if job.NumNodes > 100 {
// // Tag large jobs automatically
// }
// }
//
// func (t *MyJobTagger) JobStopCallback(job *schema.Job) {
// if job.State == schema.JobStateFailed {
// // Log or alert on failed jobs
// }
// }
//
// Register hooks during application initialization:
//
// repository.RegisterJobHook(&MyJobTagger{})
type JobHook interface {
// JobStartCallback is invoked when one or more jobs start.
// This is called synchronously, so implementations should be fast.
JobStartCallback(job *schema.Job)
// JobStopCallback is invoked when a job completes.
// This is called synchronously, so implementations should be fast.
JobStopCallback(job *schema.Job)
}
@@ -20,7 +49,13 @@ var (
hooks []JobHook
)
func RegisterJobJook(hook JobHook) {
// RegisterJobHook registers a JobHook to receive job lifecycle callbacks.
// Multiple hooks can be registered and will be called in registration order.
// This function is safe to call multiple times and is typically called during
// application initialization.
//
// Nil hooks are silently ignored to simplify conditional registration.
func RegisterJobHook(hook JobHook) {
initOnce.Do(func() {
hooks = make([]JobHook, 0)
})
@@ -30,6 +65,12 @@ func RegisterJobJook(hook JobHook) {
}
}
// CallJobStartHooks invokes all registered JobHook.JobStartCallback methods
// for each job in the provided slice. This is called internally by the repository
// when jobs are started (e.g., via StartJob or batch job imports).
//
// Hooks are called synchronously in registration order. If a hook panics,
// the panic will propagate to the caller.
func CallJobStartHooks(jobs []*schema.Job) {
if hooks == nil {
return
@@ -44,6 +85,12 @@ func CallJobStartHooks(jobs []*schema.Job) {
}
}
// CallJobStopHooks invokes all registered JobHook.JobStopCallback methods
// for the provided job. This is called internally by the repository when a
// job completes (e.g., via StopJob or job state updates).
//
// Hooks are called synchronously in registration order. If a hook panics,
// the panic will propagate to the caller.
func CallJobStopHooks(job *schema.Job) {
if hooks == nil {
return

View File

@@ -2,6 +2,10 @@
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// Package repository provides job query functionality with filtering, pagination,
// and security controls. This file contains the main query builders and security
// checks for job retrieval operations.
package repository
import (
@@ -14,11 +18,27 @@ import (
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
"github.com/ClusterCockpit/cc-lib/schema"
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
"github.com/ClusterCockpit/cc-lib/v2/schema"
sq "github.com/Masterminds/squirrel"
)
const (
// Default initial capacity for job result slices
defaultJobsCapacity = 50
)
// QueryJobs retrieves jobs from the database with optional filtering, pagination,
// and sorting. Security controls are automatically applied based on the user context.
//
// Parameters:
// - ctx: Context containing user authentication information
// - filters: Optional job filters (cluster, state, user, time ranges, etc.)
// - page: Optional pagination parameters (page number and items per page)
// - order: Optional sorting specification (column or footprint field)
//
// Returns a slice of jobs matching the criteria, or an error if the query fails.
// The function enforces role-based access control through SecurityCheck.
func (r *JobRepository) QueryJobs(
ctx context.Context,
filters []*model.JobFilter,
@@ -33,26 +53,24 @@ func (r *JobRepository) QueryJobs(
if order != nil {
field := toSnakeCase(order.Field)
if order.Type == "col" {
// "col": Fixed column name query
switch order.Order {
case model.SortDirectionEnumAsc:
query = query.OrderBy(fmt.Sprintf("job.%s ASC", field))
case model.SortDirectionEnumDesc:
query = query.OrderBy(fmt.Sprintf("job.%s DESC", field))
default:
return nil, errors.New("REPOSITORY/QUERY > invalid sorting order for column")
return nil, errors.New("invalid sorting order for column")
}
} else {
// "foot": Order by footprint JSON field values
// Verify and Search Only in Valid Jsons
query = query.Where("JSON_VALID(meta_data)")
// Order by footprint JSON field values
query = query.Where("JSON_VALID(footprint)")
switch order.Order {
case model.SortDirectionEnumAsc:
query = query.OrderBy(fmt.Sprintf("JSON_EXTRACT(footprint, \"$.%s\") ASC", field))
case model.SortDirectionEnumDesc:
query = query.OrderBy(fmt.Sprintf("JSON_EXTRACT(footprint, \"$.%s\") DESC", field))
default:
return nil, errors.New("REPOSITORY/QUERY > invalid sorting order for footprint")
return nil, errors.New("invalid sorting order for footprint")
}
}
}
@@ -69,29 +87,35 @@ func (r *JobRepository) QueryJobs(
rows, err := query.RunWith(r.stmtCache).Query()
if err != nil {
queryString, queryVars, _ := query.ToSql()
cclog.Errorf("Error while running query '%s' %v: %v", queryString, queryVars, err)
return nil, err
return nil, fmt.Errorf("query failed [%s] %v: %w", queryString, queryVars, err)
}
defer rows.Close()
jobs := make([]*schema.Job, 0, 50)
jobs := make([]*schema.Job, 0, defaultJobsCapacity)
for rows.Next() {
job, err := scanJob(rows)
if err != nil {
rows.Close()
cclog.Warn("Error while scanning rows (Jobs)")
return nil, err
cclog.Warnf("Error scanning job row: %v", err)
return nil, fmt.Errorf("failed to scan job row: %w", err)
}
jobs = append(jobs, job)
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("error iterating job rows: %w", err)
}
return jobs, nil
}
// CountJobs returns the total number of jobs matching the given filters.
// Security controls are automatically applied based on the user context.
// Uses DISTINCT count to handle tag filters correctly (jobs may appear multiple
// times when joined with the tag table).
func (r *JobRepository) CountJobs(
ctx context.Context,
filters []*model.JobFilter,
) (int, error) {
// DISTICT count for tags filters, does not affect other queries
query, qerr := SecurityCheck(ctx, sq.Select("count(DISTINCT job.id)").From("job"))
if qerr != nil {
return 0, qerr
@@ -103,12 +127,22 @@ func (r *JobRepository) CountJobs(
var count int
if err := query.RunWith(r.DB).Scan(&count); err != nil {
return 0, err
return 0, fmt.Errorf("failed to count jobs: %w", err)
}
return count, nil
}
// SecurityCheckWithUser applies role-based access control filters to a job query
// based on the provided user's roles and permissions.
//
// Access rules by role:
// - API role (exclusive): Full access to all jobs
// - Admin/Support roles: Full access to all jobs
// - Manager role: Access to jobs in managed projects plus own jobs
// - User role: Access only to own jobs
//
// Returns an error if the user is nil or has no recognized roles.
func SecurityCheckWithUser(user *schema.User, query sq.SelectBuilder) (sq.SelectBuilder, error) {
if user == nil {
var qnil sq.SelectBuilder
@@ -116,84 +150,68 @@ func SecurityCheckWithUser(user *schema.User, query sq.SelectBuilder) (sq.Select
}
switch {
case len(user.Roles) == 1 && user.HasRole(schema.RoleApi): // API-User : All jobs
case len(user.Roles) == 1 && user.HasRole(schema.RoleAPI):
return query, nil
case user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}): // Admin & Support : All jobs
case user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}):
return query, nil
case user.HasRole(schema.RoleManager): // Manager : Add filter for managed projects' jobs only + personal jobs
case user.HasRole(schema.RoleManager):
if len(user.Projects) != 0 {
return query.Where(sq.Or{sq.Eq{"job.project": user.Projects}, sq.Eq{"job.hpc_user": user.Username}}), nil
} else {
cclog.Debugf("Manager-User '%s' has no defined projects to lookup! Query only personal jobs ...", user.Username)
return query.Where("job.hpc_user = ?", user.Username), nil
}
case user.HasRole(schema.RoleUser): // User : Only personal jobs
cclog.Debugf("Manager '%s' has no assigned projects, restricting to personal jobs", user.Username)
return query.Where("job.hpc_user = ?", user.Username), nil
default: // No known Role, return error
case user.HasRole(schema.RoleUser):
return query.Where("job.hpc_user = ?", user.Username), nil
default:
var qnil sq.SelectBuilder
return qnil, fmt.Errorf("user has no or unknown roles")
}
}
// SecurityCheck extracts the user from the context and applies role-based access
// control filters to the query. This is a convenience wrapper around SecurityCheckWithUser.
func SecurityCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilder, error) {
user := GetUserFromContext(ctx)
return SecurityCheckWithUser(user, query)
}
// Build a sq.SelectBuilder out of a schema.JobFilter.
// BuildWhereClause constructs SQL WHERE conditions from a JobFilter and applies
// them to the query. Supports filtering by job properties (cluster, state, user),
// time ranges, resource usage, tags, and JSON field searches in meta_data,
// footprint, and resources columns.
func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.SelectBuilder {
if filter.Tags != nil {
// This is an OR-Logic query: Returns all distinct jobs with at least one of the requested tags; TODO: AND-Logic query?
query = query.Join("jobtag ON jobtag.job_id = job.id").Where(sq.Eq{"jobtag.tag_id": filter.Tags}).Distinct()
}
// Primary Key
if filter.DbID != nil {
dbIDs := make([]string, len(filter.DbID))
copy(dbIDs, filter.DbID)
query = query.Where(sq.Eq{"job.id": dbIDs})
}
if filter.JobID != nil {
query = buildStringCondition("job.job_id", filter.JobID, query)
}
if filter.ArrayJobID != nil {
query = query.Where("job.array_job_id = ?", *filter.ArrayJobID)
}
if filter.User != nil {
query = buildStringCondition("job.hpc_user", filter.User, query)
}
if filter.Project != nil {
query = buildStringCondition("job.project", filter.Project, query)
}
if filter.JobName != nil {
query = buildMetaJsonCondition("jobName", filter.JobName, query)
}
// Explicit indices
if filter.Cluster != nil {
query = buildStringCondition("job.cluster", filter.Cluster, query)
}
if filter.SubCluster != nil {
query = buildStringCondition("job.subcluster", filter.SubCluster, query)
}
if filter.Partition != nil {
query = buildStringCondition("job.cluster_partition", filter.Partition, query)
}
if filter.StartTime != nil {
query = buildTimeCondition("job.start_time", filter.StartTime, query)
}
if filter.Duration != nil {
query = buildIntCondition("job.duration", filter.Duration, query)
}
if filter.MinRunningFor != nil {
now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs.
query = query.Where("(job.job_state != 'running' OR (? - job.start_time) > ?)", now, *filter.MinRunningFor)
}
if filter.Shared != nil {
query = query.Where("job.shared = ?", *filter.Shared)
}
if filter.State != nil {
states := make([]string, len(filter.State))
for i, val := range filter.State {
states[i] = string(val)
}
query = query.Where(sq.Eq{"job.job_state": states})
}
if filter.Shared != nil {
query = query.Where("job.shared = ?", *filter.Shared)
}
if filter.Project != nil {
query = buildStringCondition("job.project", filter.Project, query)
}
if filter.User != nil {
query = buildStringCondition("job.hpc_user", filter.User, query)
}
if filter.NumNodes != nil {
query = buildIntCondition("job.num_nodes", filter.NumNodes, query)
}
@@ -203,33 +221,95 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
if filter.NumHWThreads != nil {
query = buildIntCondition("job.num_hwthreads", filter.NumHWThreads, query)
}
if filter.Node != nil {
query = buildResourceJsonCondition("hostname", filter.Node, query)
if filter.ArrayJobID != nil {
query = query.Where("job.array_job_id = ?", *filter.ArrayJobID)
}
if filter.StartTime != nil {
query = buildTimeCondition("job.start_time", filter.StartTime, query)
}
if filter.Duration != nil {
query = buildIntCondition("job.duration", filter.Duration, query)
}
if filter.Energy != nil {
query = buildFloatCondition("job.energy", filter.Energy, query)
}
// Indices on Tag Table
if filter.Tags != nil {
// This is an OR-Logic query: Returns all distinct jobs with at least one of the requested tags; TODO: AND-Logic query?
query = query.Join("jobtag ON jobtag.job_id = job.id").Where(sq.Eq{"jobtag.tag_id": filter.Tags}).Distinct()
}
// No explicit Indices
if filter.JobID != nil {
query = buildStringCondition("job.job_id", filter.JobID, query)
}
// Queries Within JSONs
if filter.MetricStats != nil {
for _, ms := range filter.MetricStats {
query = buildFloatJsonCondition(ms.MetricName, ms.Range, query)
query = buildFloatJSONCondition(ms.MetricName, ms.Range, query)
}
}
if filter.Node != nil {
query = buildResourceJSONCondition("hostname", filter.Node, query)
}
if filter.JobName != nil {
query = buildMetaJSONCondition("jobName", filter.JobName, query)
}
if filter.Schedule != nil {
interactiveJobname := "interactive"
switch *filter.Schedule {
case "interactive":
iFilter := model.StringInput{Eq: &interactiveJobname}
query = buildMetaJSONCondition("jobName", &iFilter, query)
case "batch":
sFilter := model.StringInput{Neq: &interactiveJobname}
query = buildMetaJSONCondition("jobName", &sFilter, query)
}
}
// Configurable Filter to exclude recently started jobs, see config.go: ShortRunningJobsDuration
if filter.MinRunningFor != nil {
now := time.Now().Unix()
// Only jobs whose start timestamp is more than MinRunningFor seconds in the past
// If a job completed within the configured timeframe, it will still show up after the start_time matches the condition!
query = query.Where(sq.Lt{"job.start_time": (now - int64(*filter.MinRunningFor))})
}
return query
}
// buildIntCondition creates clauses for integer range filters, using BETWEEN only if required.
func buildIntCondition(field string, cond *config.IntRange, query sq.SelectBuilder) sq.SelectBuilder {
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
if cond.From != 1 && cond.To != 0 {
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
} else if cond.From != 1 && cond.To == 0 {
return query.Where(field+" >= ?", cond.From)
} else if cond.From == 1 && cond.To != 0 {
return query.Where(field+" <= ?", cond.To)
} else {
return query
}
}
// buildFloatCondition creates a clauses for float range filters, using BETWEEN only if required.
func buildFloatCondition(field string, cond *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
if cond.From != 1.0 && cond.To != 0.0 {
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
} else if cond.From != 1.0 && cond.To == 0.0 {
return query.Where(field+" >= ?", cond.From)
} else if cond.From == 1.0 && cond.To != 0.0 {
return query.Where(field+" <= ?", cond.To)
} else {
return query
}
}
// buildTimeCondition creates time range filters supporting absolute timestamps,
// relative time ranges (last6h, last24h, last7d, last30d), or open-ended ranges.
// Reminder: BETWEEN Queries are slower and dont use indices as frequently: Only use if both conditions required
func buildTimeCondition(field string, cond *config.TimeRange, query sq.SelectBuilder) sq.SelectBuilder {
if cond.From != nil && cond.To != nil {
return query.Where(field+" BETWEEN ? AND ?", cond.From.Unix(), cond.To.Unix())
} else if cond.From != nil {
return query.Where("? <= "+field, cond.From.Unix())
return query.Where(field+" >= ?", cond.From.Unix())
} else if cond.To != nil {
return query.Where(field+" <= ?", cond.To.Unix())
} else if cond.Range != "" {
@@ -248,18 +328,28 @@ func buildTimeCondition(field string, cond *config.TimeRange, query sq.SelectBui
cclog.Debugf("No known named timeRange: startTime.range = %s", cond.Range)
return query
}
return query.Where(field+" BETWEEN ? AND ?", then, now)
return query.Where(field+" >= ?", then)
} else {
return query
}
}
func buildFloatJsonCondition(condName string, condRange *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
// Verify and Search Only in Valid Jsons
// buildFloatJSONCondition creates a filter on a numeric field within the footprint JSON column, using BETWEEN only if required.
func buildFloatJSONCondition(jsonField string, cond *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
query = query.Where("JSON_VALID(footprint)")
return query.Where("JSON_EXTRACT(footprint, \"$."+condName+"\") BETWEEN ? AND ?", condRange.From, condRange.To)
if cond.From != 1.0 && cond.To != 0.0 {
return query.Where("JSON_EXTRACT(footprint, \"$."+jsonField+"\") BETWEEN ? AND ?", cond.From, cond.To)
} else if cond.From != 1.0 && cond.To == 0.0 {
return query.Where("JSON_EXTRACT(footprint, \"$."+jsonField+"\") >= ?", cond.From)
} else if cond.From == 1.0 && cond.To != 0.0 {
return query.Where("JSON_EXTRACT(footprint, \"$."+jsonField+"\") <= ?", cond.To)
} else {
return query
}
}
// buildStringCondition creates filters for string fields supporting equality,
// inequality, prefix, suffix, substring, and IN list matching.
func buildStringCondition(field string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
if cond.Eq != nil {
return query.Where(field+" = ?", *cond.Eq)
@@ -284,10 +374,9 @@ func buildStringCondition(field string, cond *model.StringInput, query sq.Select
return query
}
func buildMetaJsonCondition(jsonField string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
// Verify and Search Only in Valid Jsons
// buildMetaJSONCondition creates filters on fields within the meta_data JSON column.
func buildMetaJSONCondition(jsonField string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
query = query.Where("JSON_VALID(meta_data)")
// add "AND" Sql query Block for field match
if cond.Eq != nil {
return query.Where("JSON_EXTRACT(meta_data, \"$."+jsonField+"\") = ?", *cond.Eq)
}
@@ -306,10 +395,10 @@ func buildMetaJsonCondition(jsonField string, cond *model.StringInput, query sq.
return query
}
func buildResourceJsonCondition(jsonField string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
// Verify and Search Only in Valid Jsons
// buildResourceJSONCondition creates filters on fields within the resources JSON array column.
// Uses json_each to search within array elements.
func buildResourceJSONCondition(jsonField string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
query = query.Where("JSON_VALID(resources)")
// add "AND" Sql query Block for field match
if cond.Eq != nil {
return query.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, \"$."+jsonField+"\") = ?)", *cond.Eq)
}
@@ -333,15 +422,16 @@ var (
matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])")
)
// toSnakeCase converts camelCase strings to snake_case for SQL column names.
// Includes security checks to prevent SQL injection attempts.
// Panics if potentially dangerous characters are detected.
func toSnakeCase(str string) string {
for _, c := range str {
if c == '\'' || c == '\\' {
cclog.Panic("toSnakeCase() attack vector!")
if c == '\'' || c == '\\' || c == '"' || c == ';' || c == '-' || c == ' ' {
cclog.Panicf("toSnakeCase: potentially dangerous character detected in input: %q", str)
}
}
str = strings.ReplaceAll(str, "'", "")
str = strings.ReplaceAll(str, "\\", "")
snake := matchFirstCap.ReplaceAllString(str, "${1}_${2}")
snake = matchAllCap.ReplaceAllString(snake, "${1}_${2}")
return strings.ToLower(snake)

View File

@@ -10,7 +10,7 @@ import (
"testing"
"time"
"github.com/ClusterCockpit/cc-lib/schema"
"github.com/ClusterCockpit/cc-lib/v2/schema"
_ "github.com/mattn/go-sqlite3"
)
@@ -33,7 +33,7 @@ func TestFind(t *testing.T) {
func TestFindById(t *testing.T) {
r := setup(t)
job, err := r.FindById(getContext(t), 338)
job, err := r.FindByID(getContext(t), 338)
if err != nil {
t.Fatal(err)
}
@@ -78,7 +78,7 @@ func TestFindJobsBetween(t *testing.T) {
// 1. Find a job to use (Find all jobs)
// We use a large time range to ensure we get something if it exists
jobs, err := r.FindJobsBetween(0, 9999999999, false)
jobs, err := r.FindJobsBetween(0, 9999999999, "none")
if err != nil {
t.Fatal(err)
}
@@ -88,21 +88,21 @@ func TestFindJobsBetween(t *testing.T) {
targetJob := jobs[0]
// 2. Create a tag
tagName := fmt.Sprintf("testtag_%d", time.Now().UnixNano())
tagId, err := r.CreateTag("testtype", tagName, "global")
// 2. Create an auto-tagger tag (type "app")
appTagName := fmt.Sprintf("apptag_%d", time.Now().UnixNano())
appTagID, err := r.CreateTag("app", appTagName, "global")
if err != nil {
t.Fatal(err)
}
// 3. Link Tag (Manually to avoid archive dependency side-effects in unit test)
_, err = r.DB.Exec("INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)", *targetJob.ID, tagId)
// 3. Link auto-tagger tag to job
_, err = r.DB.Exec("INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)", *targetJob.ID, appTagID)
if err != nil {
t.Fatal(err)
}
// 4. Search with omitTagged = false (Should find the job)
jobsFound, err := r.FindJobsBetween(0, 9999999999, false)
// 4. Search with omitTagged = "none" (Should find the job)
jobsFound, err := r.FindJobsBetween(0, 9999999999, "none")
if err != nil {
t.Fatal(err)
}
@@ -115,18 +115,58 @@ func TestFindJobsBetween(t *testing.T) {
}
}
if !found {
t.Errorf("Target job %d should be found when omitTagged=false", *targetJob.ID)
t.Errorf("Target job %d should be found when omitTagged=none", *targetJob.ID)
}
// 5. Search with omitTagged = true (Should NOT find the job)
jobsFiltered, err := r.FindJobsBetween(0, 9999999999, true)
// 5. Search with omitTagged = "all" (Should NOT find the job — it has a tag)
jobsFiltered, err := r.FindJobsBetween(0, 9999999999, "all")
if err != nil {
t.Fatal(err)
}
for _, j := range jobsFiltered {
if *j.ID == *targetJob.ID {
t.Errorf("Target job %d should NOT be found when omitTagged=true", *targetJob.ID)
t.Errorf("Target job %d should NOT be found when omitTagged=all", *targetJob.ID)
}
}
// 6. Search with omitTagged = "user": auto-tagger tag ("app") should NOT exclude the job
jobsUserFilter, err := r.FindJobsBetween(0, 9999999999, "user")
if err != nil {
t.Fatal(err)
}
found = false
for _, j := range jobsUserFilter {
if *j.ID == *targetJob.ID {
found = true
break
}
}
if !found {
t.Errorf("Target job %d should be found when omitTagged=user (only has auto-tagger tag)", *targetJob.ID)
}
// 7. Add a user-created tag (type "testtype") to the same job
userTagName := fmt.Sprintf("usertag_%d", time.Now().UnixNano())
userTagID, err := r.CreateTag("testtype", userTagName, "global")
if err != nil {
t.Fatal(err)
}
_, err = r.DB.Exec("INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)", *targetJob.ID, userTagID)
if err != nil {
t.Fatal(err)
}
// 8. Now omitTagged = "user" should exclude the job (has a user-created tag)
jobsUserFilter2, err := r.FindJobsBetween(0, 9999999999, "user")
if err != nil {
t.Fatal(err)
}
for _, j := range jobsUserFilter2 {
if *j.ID == *targetJob.ID {
t.Errorf("Target job %d should NOT be found when omitTagged=user (has user-created tag)", *targetJob.ID)
}
}
}

View File

@@ -10,52 +10,48 @@ import (
"embed"
"fmt"
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
"github.com/golang-migrate/migrate/v4"
"github.com/golang-migrate/migrate/v4/database/mysql"
"github.com/golang-migrate/migrate/v4/database/sqlite3"
"github.com/golang-migrate/migrate/v4/source/iofs"
)
// Version is the current database schema version required by this version of cc-backend.
// When the database schema changes, this version is incremented and a new migration file
// is added to internal/repository/migrations/sqlite3/.
//
// Version history:
// - Version 10: Current version
//
// Migration files are embedded at build time from the migrations directory.
const Version uint = 10
//go:embed migrations/*
var migrationFiles embed.FS
func checkDBVersion(backend string, db *sql.DB) error {
var m *migrate.Migrate
// checkDBVersion verifies that the database schema version matches the expected version.
// This is called automatically during Connect() to ensure schema compatibility.
//
// Returns an error if:
// - Database version is older than expected (needs migration)
// - Database version is newer than expected (needs app upgrade)
// - Database is in a dirty state (failed migration)
//
// A "dirty" database indicates a migration was started but not completed successfully.
// This requires manual intervention to fix the database and force the version.
func checkDBVersion(db *sql.DB) error {
driver, err := sqlite3.WithInstance(db, &sqlite3.Config{})
if err != nil {
return err
}
d, err := iofs.New(migrationFiles, "migrations/sqlite3")
if err != nil {
return err
}
switch backend {
case "sqlite3":
driver, err := sqlite3.WithInstance(db, &sqlite3.Config{})
if err != nil {
return err
}
d, err := iofs.New(migrationFiles, "migrations/sqlite3")
if err != nil {
return err
}
m, err = migrate.NewWithInstance("iofs", d, "sqlite3", driver)
if err != nil {
return err
}
case "mysql":
driver, err := mysql.WithInstance(db, &mysql.Config{})
if err != nil {
return err
}
d, err := iofs.New(migrationFiles, "migrations/mysql")
if err != nil {
return err
}
m, err = migrate.NewWithInstance("iofs", d, "mysql", driver)
if err != nil {
return err
}
default:
cclog.Abortf("Migration: Unsupported database backend '%s'.\n", backend)
m, err := migrate.NewWithInstance("iofs", d, "sqlite3", driver)
if err != nil {
return err
}
v, dirty, err := m.Version()
@@ -80,37 +76,41 @@ func checkDBVersion(backend string, db *sql.DB) error {
return nil
}
func getMigrateInstance(backend string, db string) (m *migrate.Migrate, err error) {
switch backend {
case "sqlite3":
d, err := iofs.New(migrationFiles, "migrations/sqlite3")
if err != nil {
cclog.Fatal(err)
}
// getMigrateInstance creates a new migration instance for the given database file.
// This is used internally by MigrateDB, RevertDB, and ForceDB.
func getMigrateInstance(db string) (m *migrate.Migrate, err error) {
d, err := iofs.New(migrationFiles, "migrations/sqlite3")
if err != nil {
return nil, err
}
m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("sqlite3://%s?_foreign_keys=on", db))
if err != nil {
return m, err
}
case "mysql":
d, err := iofs.New(migrationFiles, "migrations/mysql")
if err != nil {
return m, err
}
m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("mysql://%s?multiStatements=true", db))
if err != nil {
return m, err
}
default:
cclog.Abortf("Migration: Unsupported database backend '%s'.\n", backend)
m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("sqlite3://%s?_foreign_keys=on", db))
if err != nil {
return nil, err
}
return m, nil
}
func MigrateDB(backend string, db string) error {
m, err := getMigrateInstance(backend, db)
// MigrateDB applies all pending database migrations to bring the schema up to date.
// This should be run with the -migrate-db flag before starting the application
// after upgrading to a new version that requires schema changes.
//
// Process:
// 1. Checks current database version
// 2. Applies all migrations from current version to target Version
// 3. Updates schema_migrations table to track applied migrations
//
// Important:
// - Always backup your database before running migrations
// - Migrations are irreversible without manual intervention
// - If a migration fails, the database is marked "dirty" and requires manual fix
//
// Usage:
//
// cc-backend -migrate-db
func MigrateDB(db string) error {
m, err := getMigrateInstance(db)
if err != nil {
return err
}
@@ -118,7 +118,7 @@ func MigrateDB(backend string, db string) error {
v, dirty, err := m.Version()
if err != nil {
if err == migrate.ErrNilVersion {
cclog.Warn("Legacy database without version or missing database file!")
cclog.Info("Legacy database without version or missing database file!")
} else {
return err
}
@@ -144,8 +144,19 @@ func MigrateDB(backend string, db string) error {
return nil
}
func RevertDB(backend string, db string) error {
m, err := getMigrateInstance(backend, db)
// RevertDB rolls back the database schema to the previous version (Version - 1).
// This is primarily used for testing or emergency rollback scenarios.
//
// Warning:
// - This may cause data loss if newer schema added columns/tables
// - Always backup before reverting
// - Not all migrations are safely reversible
//
// Usage:
//
// cc-backend -revert-db
func RevertDB(db string) error {
m, err := getMigrateInstance(db)
if err != nil {
return err
}
@@ -162,8 +173,23 @@ func RevertDB(backend string, db string) error {
return nil
}
func ForceDB(backend string, db string) error {
m, err := getMigrateInstance(backend, db)
// ForceDB forces the database schema version to the current Version without running migrations.
// This is only used to recover from failed migrations that left the database in a "dirty" state.
//
// When to use:
// - After manually fixing a failed migration
// - When you've manually applied schema changes and need to update the version marker
//
// Warning:
// - This does NOT apply any schema changes
// - Only use after manually verifying the schema is correct
// - Improper use can cause schema/version mismatch
//
// Usage:
//
// cc-backend -force-db
func ForceDB(db string) error {
m, err := getMigrateInstance(db)
if err != nil {
return err
}

View File

@@ -1,5 +0,0 @@
DROP TABLE IF EXISTS job;
DROP TABLE IF EXISTS tags;
DROP TABLE IF EXISTS jobtag;
DROP TABLE IF EXISTS configuration;
DROP TABLE IF EXISTS user;

View File

@@ -1,66 +0,0 @@
CREATE TABLE IF NOT EXISTS job (
id INTEGER AUTO_INCREMENT PRIMARY KEY ,
job_id BIGINT NOT NULL,
cluster VARCHAR(255) NOT NULL,
subcluster VARCHAR(255) NOT NULL,
start_time BIGINT NOT NULL, -- Unix timestamp
user VARCHAR(255) NOT NULL,
project VARCHAR(255) NOT NULL,
`partition` VARCHAR(255) NOT NULL,
array_job_id BIGINT NOT NULL,
duration INT NOT NULL DEFAULT 0,
walltime INT NOT NULL DEFAULT 0,
job_state VARCHAR(255) NOT NULL
CHECK(job_state IN ('running', 'completed', 'failed', 'cancelled',
'stopped', 'timeout', 'preempted', 'out_of_memory')),
meta_data TEXT, -- JSON
resources TEXT NOT NULL, -- JSON
num_nodes INT NOT NULL,
num_hwthreads INT NOT NULL,
num_acc INT NOT NULL,
smt TINYINT NOT NULL DEFAULT 1 CHECK(smt IN (0, 1 )),
exclusive TINYINT NOT NULL DEFAULT 1 CHECK(exclusive IN (0, 1, 2)),
monitoring_status TINYINT NOT NULL DEFAULT 1 CHECK(monitoring_status IN (0, 1, 2, 3)),
mem_used_max REAL NOT NULL DEFAULT 0.0,
flops_any_avg REAL NOT NULL DEFAULT 0.0,
mem_bw_avg REAL NOT NULL DEFAULT 0.0,
load_avg REAL NOT NULL DEFAULT 0.0,
net_bw_avg REAL NOT NULL DEFAULT 0.0,
net_data_vol_total REAL NOT NULL DEFAULT 0.0,
file_bw_avg REAL NOT NULL DEFAULT 0.0,
file_data_vol_total REAL NOT NULL DEFAULT 0.0,
UNIQUE (job_id, cluster, start_time)
);
CREATE TABLE IF NOT EXISTS tag (
id INTEGER PRIMARY KEY,
tag_type VARCHAR(255) NOT NULL,
tag_name VARCHAR(255) NOT NULL,
UNIQUE (tag_type, tag_name));
CREATE TABLE IF NOT EXISTS jobtag (
job_id INTEGER,
tag_id INTEGER,
PRIMARY KEY (job_id, tag_id),
FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE,
FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE);
CREATE TABLE IF NOT EXISTS user (
username varchar(255) PRIMARY KEY NOT NULL,
password varchar(255) DEFAULT NULL,
ldap tinyint NOT NULL DEFAULT 0, /* col called "ldap" for historic reasons, fills the "AuthSource" */
name varchar(255) DEFAULT NULL,
roles varchar(255) NOT NULL DEFAULT "[]",
email varchar(255) DEFAULT NULL);
CREATE TABLE IF NOT EXISTS configuration (
username varchar(255),
confkey varchar(255),
value varchar(255),
PRIMARY KEY (username, confkey),
FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION);

View File

@@ -1,8 +0,0 @@
DROP INDEX IF EXISTS job_stats;
DROP INDEX IF EXISTS job_by_user;
DROP INDEX IF EXISTS job_by_starttime;
DROP INDEX IF EXISTS job_by_job_id;
DROP INDEX IF EXISTS job_list;
DROP INDEX IF EXISTS job_list_user;
DROP INDEX IF EXISTS job_list_users;
DROP INDEX IF EXISTS job_list_users_start;

View File

@@ -1,8 +0,0 @@
CREATE INDEX IF NOT EXISTS job_stats ON job (cluster,subcluster,user);
CREATE INDEX IF NOT EXISTS job_by_user ON job (user);
CREATE INDEX IF NOT EXISTS job_by_starttime ON job (start_time);
CREATE INDEX IF NOT EXISTS job_by_job_id ON job (job_id);
CREATE INDEX IF NOT EXISTS job_list ON job (cluster, job_state);
CREATE INDEX IF NOT EXISTS job_list_user ON job (user, cluster, job_state);
CREATE INDEX IF NOT EXISTS job_list_users ON job (user, job_state);
CREATE INDEX IF NOT EXISTS job_list_users_start ON job (start_time, user, job_state);

View File

@@ -1 +0,0 @@
ALTER TABLE user DROP COLUMN projects;

View File

@@ -1 +0,0 @@
ALTER TABLE user ADD COLUMN projects varchar(255) NOT NULL DEFAULT "[]";

View File

@@ -1,5 +0,0 @@
ALTER TABLE job
MODIFY `partition` VARCHAR(255) NOT NULL,
MODIFY array_job_id BIGINT NOT NULL,
MODIFY num_hwthreads INT NOT NULL,
MODIFY num_acc INT NOT NULL;

View File

@@ -1,5 +0,0 @@
ALTER TABLE job
MODIFY `partition` VARCHAR(255),
MODIFY array_job_id BIGINT,
MODIFY num_hwthreads INT,
MODIFY num_acc INT;

View File

@@ -1,2 +0,0 @@
ALTER TABLE tag DROP COLUMN insert_time;
ALTER TABLE jobtag DROP COLUMN insert_time;

View File

@@ -1,2 +0,0 @@
ALTER TABLE tag ADD COLUMN insert_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP;
ALTER TABLE jobtag ADD COLUMN insert_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP;

View File

@@ -1 +0,0 @@
ALTER TABLE configuration MODIFY value VARCHAR(255);

View File

@@ -1 +0,0 @@
ALTER TABLE configuration MODIFY value TEXT;

View File

@@ -1,3 +0,0 @@
SET FOREIGN_KEY_CHECKS = 0;
ALTER TABLE tag MODIFY id INTEGER;
SET FOREIGN_KEY_CHECKS = 1;

View File

@@ -1,3 +0,0 @@
SET FOREIGN_KEY_CHECKS = 0;
ALTER TABLE tag MODIFY id INTEGER AUTO_INCREMENT;
SET FOREIGN_KEY_CHECKS = 1;

View File

@@ -1,83 +0,0 @@
ALTER TABLE job DROP energy;
ALTER TABLE job DROP energy_footprint;
ALTER TABLE job ADD COLUMN flops_any_avg;
ALTER TABLE job ADD COLUMN mem_bw_avg;
ALTER TABLE job ADD COLUMN mem_used_max;
ALTER TABLE job ADD COLUMN load_avg;
ALTER TABLE job ADD COLUMN net_bw_avg;
ALTER TABLE job ADD COLUMN net_data_vol_total;
ALTER TABLE job ADD COLUMN file_bw_avg;
ALTER TABLE job ADD COLUMN file_data_vol_total;
UPDATE job SET flops_any_avg = json_extract(footprint, '$.flops_any_avg');
UPDATE job SET mem_bw_avg = json_extract(footprint, '$.mem_bw_avg');
UPDATE job SET mem_used_max = json_extract(footprint, '$.mem_used_max');
UPDATE job SET load_avg = json_extract(footprint, '$.cpu_load_avg');
UPDATE job SET net_bw_avg = json_extract(footprint, '$.net_bw_avg');
UPDATE job SET net_data_vol_total = json_extract(footprint, '$.net_data_vol_total');
UPDATE job SET file_bw_avg = json_extract(footprint, '$.file_bw_avg');
UPDATE job SET file_data_vol_total = json_extract(footprint, '$.file_data_vol_total');
ALTER TABLE job DROP footprint;
-- Do not use reserved keywords anymore
RENAME TABLE hpc_user TO `user`;
ALTER TABLE job RENAME COLUMN hpc_user TO `user`;
ALTER TABLE job RENAME COLUMN cluster_partition TO `partition`;
DROP INDEX IF EXISTS jobs_cluster;
DROP INDEX IF EXISTS jobs_cluster_user;
DROP INDEX IF EXISTS jobs_cluster_project;
DROP INDEX IF EXISTS jobs_cluster_subcluster;
DROP INDEX IF EXISTS jobs_cluster_starttime;
DROP INDEX IF EXISTS jobs_cluster_duration;
DROP INDEX IF EXISTS jobs_cluster_numnodes;
DROP INDEX IF EXISTS jobs_cluster_partition;
DROP INDEX IF EXISTS jobs_cluster_partition_starttime;
DROP INDEX IF EXISTS jobs_cluster_partition_duration;
DROP INDEX IF EXISTS jobs_cluster_partition_numnodes;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_user;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_project;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_starttime;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_duration;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numnodes;
DROP INDEX IF EXISTS jobs_cluster_jobstate;
DROP INDEX IF EXISTS jobs_cluster_jobstate_user;
DROP INDEX IF EXISTS jobs_cluster_jobstate_project;
DROP INDEX IF EXISTS jobs_cluster_jobstate_starttime;
DROP INDEX IF EXISTS jobs_cluster_jobstate_duration;
DROP INDEX IF EXISTS jobs_cluster_jobstate_numnodes;
DROP INDEX IF EXISTS jobs_user;
DROP INDEX IF EXISTS jobs_user_starttime;
DROP INDEX IF EXISTS jobs_user_duration;
DROP INDEX IF EXISTS jobs_user_numnodes;
DROP INDEX IF EXISTS jobs_project;
DROP INDEX IF EXISTS jobs_project_user;
DROP INDEX IF EXISTS jobs_project_starttime;
DROP INDEX IF EXISTS jobs_project_duration;
DROP INDEX IF EXISTS jobs_project_numnodes;
DROP INDEX IF EXISTS jobs_jobstate;
DROP INDEX IF EXISTS jobs_jobstate_user;
DROP INDEX IF EXISTS jobs_jobstate_project;
DROP INDEX IF EXISTS jobs_jobstate_starttime;
DROP INDEX IF EXISTS jobs_jobstate_duration;
DROP INDEX IF EXISTS jobs_jobstate_numnodes;
DROP INDEX IF EXISTS jobs_arrayjobid_starttime;
DROP INDEX IF EXISTS jobs_cluster_arrayjobid_starttime;
DROP INDEX IF EXISTS jobs_starttime;
DROP INDEX IF EXISTS jobs_duration;
DROP INDEX IF EXISTS jobs_numnodes;
DROP INDEX IF EXISTS jobs_duration_starttime;
DROP INDEX IF EXISTS jobs_numnodes_starttime;
DROP INDEX IF EXISTS jobs_numacc_starttime;
DROP INDEX IF EXISTS jobs_energy_starttime;

View File

@@ -1,123 +0,0 @@
DROP INDEX IF EXISTS job_stats ON job;
DROP INDEX IF EXISTS job_by_user ON job;
DROP INDEX IF EXISTS job_by_starttime ON job;
DROP INDEX IF EXISTS job_by_job_id ON job;
DROP INDEX IF EXISTS job_list ON job;
DROP INDEX IF EXISTS job_list_user ON job;
DROP INDEX IF EXISTS job_list_users ON job;
DROP INDEX IF EXISTS job_list_users_start ON job;
ALTER TABLE job ADD COLUMN energy REAL NOT NULL DEFAULT 0.0;
ALTER TABLE job ADD COLUMN energy_footprint JSON;
ALTER TABLE job ADD COLUMN footprint JSON;
ALTER TABLE tag ADD COLUMN tag_scope TEXT NOT NULL DEFAULT 'global';
-- Do not use reserved keywords anymore
RENAME TABLE `user` TO hpc_user;
ALTER TABLE job RENAME COLUMN `user` TO hpc_user;
ALTER TABLE job RENAME COLUMN `partition` TO cluster_partition;
ALTER TABLE job MODIFY COLUMN cluster VARCHAR(50);
ALTER TABLE job MODIFY COLUMN hpc_user VARCHAR(50);
ALTER TABLE job MODIFY COLUMN subcluster VARCHAR(50);
ALTER TABLE job MODIFY COLUMN project VARCHAR(50);
ALTER TABLE job MODIFY COLUMN cluster_partition VARCHAR(50);
ALTER TABLE job MODIFY COLUMN job_state VARCHAR(25);
UPDATE job SET footprint = '{"flops_any_avg": 0.0}';
UPDATE job SET footprint = json_replace(footprint, '$.flops_any_avg', job.flops_any_avg);
UPDATE job SET footprint = json_insert(footprint, '$.mem_bw_avg', job.mem_bw_avg);
UPDATE job SET footprint = json_insert(footprint, '$.mem_used_max', job.mem_used_max);
UPDATE job SET footprint = json_insert(footprint, '$.cpu_load_avg', job.load_avg);
UPDATE job SET footprint = json_insert(footprint, '$.net_bw_avg', job.net_bw_avg) WHERE job.net_bw_avg != 0;
UPDATE job SET footprint = json_insert(footprint, '$.net_data_vol_total', job.net_data_vol_total) WHERE job.net_data_vol_total != 0;
UPDATE job SET footprint = json_insert(footprint, '$.file_bw_avg', job.file_bw_avg) WHERE job.file_bw_avg != 0;
UPDATE job SET footprint = json_insert(footprint, '$.file_data_vol_total', job.file_data_vol_total) WHERE job.file_data_vol_total != 0;
ALTER TABLE job DROP flops_any_avg;
ALTER TABLE job DROP mem_bw_avg;
ALTER TABLE job DROP mem_used_max;
ALTER TABLE job DROP load_avg;
ALTER TABLE job DROP net_bw_avg;
ALTER TABLE job DROP net_data_vol_total;
ALTER TABLE job DROP file_bw_avg;
ALTER TABLE job DROP file_data_vol_total;
-- Indices for: Single filters, combined filters, sorting, sorting with filters
-- Cluster Filter
CREATE INDEX IF NOT EXISTS jobs_cluster ON job (cluster);
CREATE INDEX IF NOT EXISTS jobs_cluster_user ON job (cluster, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_cluster_project ON job (cluster, project);
CREATE INDEX IF NOT EXISTS jobs_cluster_subcluster ON job (cluster, subcluster);
-- Cluster Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_starttime ON job (cluster, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_duration ON job (cluster, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_numnodes ON job (cluster, num_nodes);
-- Cluster+Partition Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_partition ON job (cluster, cluster_partition);
-- Cluster+Partition Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime ON job (cluster, cluster_partition, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration ON job (cluster, cluster_partition, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numnodes ON job (cluster, cluster_partition, num_nodes);
-- Cluster+Partition+Jobstate Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, cluster_partition, job_state);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_user ON job (cluster, cluster_partition, job_state, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_project ON job (cluster, cluster_partition, job_state, project);
-- Cluster+Partition+Jobstate Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_starttime ON job (cluster, cluster_partition, job_state, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_duration ON job (cluster, cluster_partition, job_state, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numnodes ON job (cluster, cluster_partition, job_state, num_nodes);
-- Cluster+JobState Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate ON job (cluster, job_state);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_user ON job (cluster, job_state, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_project ON job (cluster, job_state, project);
-- Cluster+JobState Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_starttime ON job (cluster, job_state, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_duration ON job (cluster, job_state, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numnodes ON job (cluster, job_state, num_nodes);
-- User Filter
CREATE INDEX IF NOT EXISTS jobs_user ON job (hpc_user);
-- User Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_user_starttime ON job (hpc_user, start_time);
CREATE INDEX IF NOT EXISTS jobs_user_duration ON job (hpc_user, duration);
CREATE INDEX IF NOT EXISTS jobs_user_numnodes ON job (hpc_user, num_nodes);
-- Project Filter
CREATE INDEX IF NOT EXISTS jobs_project ON job (project);
CREATE INDEX IF NOT EXISTS jobs_project_user ON job (project, hpc_user);
-- Project Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_project_starttime ON job (project, start_time);
CREATE INDEX IF NOT EXISTS jobs_project_duration ON job (project, duration);
CREATE INDEX IF NOT EXISTS jobs_project_numnodes ON job (project, num_nodes);
-- JobState Filter
CREATE INDEX IF NOT EXISTS jobs_jobstate ON job (job_state);
CREATE INDEX IF NOT EXISTS jobs_jobstate_user ON job (job_state, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_jobstate_project ON job (job_state, project);
CREATE INDEX IF NOT EXISTS jobs_jobstate_cluster ON job (job_state, cluster);
-- JobState Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_jobstate_starttime ON job (job_state, start_time);
CREATE INDEX IF NOT EXISTS jobs_jobstate_duration ON job (job_state, duration);
CREATE INDEX IF NOT EXISTS jobs_jobstate_numnodes ON job (job_state, num_nodes);
-- ArrayJob Filter
CREATE INDEX IF NOT EXISTS jobs_arrayjobid_starttime ON job (array_job_id, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_arrayjobid_starttime ON job (cluster, array_job_id, start_time);
-- Sorting without active filters
CREATE INDEX IF NOT EXISTS jobs_starttime ON job (start_time);
CREATE INDEX IF NOT EXISTS jobs_duration ON job (duration);
CREATE INDEX IF NOT EXISTS jobs_numnodes ON job (num_nodes);
-- Single filters with default starttime sorting
CREATE INDEX IF NOT EXISTS jobs_duration_starttime ON job (duration, start_time);
CREATE INDEX IF NOT EXISTS jobs_numnodes_starttime ON job (num_nodes, start_time);
CREATE INDEX IF NOT EXISTS jobs_numacc_starttime ON job (num_acc, start_time);
CREATE INDEX IF NOT EXISTS jobs_energy_starttime ON job (energy, start_time);
-- Optimize DB index usage

View File

@@ -118,104 +118,116 @@ DROP TABLE lookup_exclusive;
DROP TABLE job; -- Deletes All Existing 'job' Indices; Recreate after Renaming
ALTER TABLE job_new RENAME TO job;
-- Recreate Indices from 08_add-footprint, include new submit_time indices
-- Recreate Indices from 08_add-footprint; include new 'shared' column
-- Cluster Filter
CREATE INDEX IF NOT EXISTS jobs_cluster ON job (cluster);
CREATE INDEX IF NOT EXISTS jobs_cluster_user ON job (cluster, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_cluster_project ON job (cluster, project);
CREATE INDEX IF NOT EXISTS jobs_cluster_subcluster ON job (cluster, subcluster);
-- Cluster Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_starttime ON job (cluster, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_submittime ON job (cluster, submit_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_duration ON job (cluster, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_numnodes ON job (cluster, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_cluster_numhwthreads ON job (cluster, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_cluster_numacc ON job (cluster, num_acc);
CREATE INDEX IF NOT EXISTS jobs_cluster_energy ON job (cluster, energy);
-- Cluster Time Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_duration_starttime ON job (cluster, duration, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_starttime_duration ON job (cluster, start_time, duration);
-- Cluster+Partition Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_partition ON job (cluster, cluster_partition);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_user ON job (cluster, cluster_partition, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_project ON job (cluster, cluster_partition, project);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, cluster_partition, job_state);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_shared ON job (cluster, cluster_partition, shared);
-- Cluster+Partition Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime ON job (cluster, cluster_partition, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_submittime ON job (cluster, cluster_partition, submit_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration ON job (cluster, cluster_partition, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numnodes ON job (cluster, cluster_partition, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numhwthreads ON job (cluster, cluster_partition, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numacc ON job (cluster, cluster_partition, num_acc);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_energy ON job (cluster, cluster_partition, energy);
-- Cluster+Partition+Jobstate Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, cluster_partition, job_state);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_user ON job (cluster, cluster_partition, job_state, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_project ON job (cluster, cluster_partition, job_state, project);
-- Cluster+Partition+Jobstate Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_starttime ON job (cluster, cluster_partition, job_state, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_submittime ON job (cluster, cluster_partition, job_state, submit_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_duration ON job (cluster, cluster_partition, job_state, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numnodes ON job (cluster, cluster_partition, job_state, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numhwthreads ON job (cluster, cluster_partition, job_state, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numacc ON job (cluster, cluster_partition, job_state, num_acc);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_energy ON job (cluster, cluster_partition, job_state, energy);
-- Cluster+Partition Time Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration_starttime ON job (cluster, cluster_partition, duration, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime_duration ON job (cluster, cluster_partition, start_time, duration);
-- Cluster+JobState Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate ON job (cluster, job_state);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_user ON job (cluster, job_state, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_project ON job (cluster, job_state, project);
-- Cluster+JobState Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_starttime ON job (cluster, job_state, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_submittime ON job (cluster, job_state, submit_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_duration ON job (cluster, job_state, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numnodes ON job (cluster, job_state, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numhwthreads ON job (cluster, job_state, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numacc ON job (cluster, job_state, num_acc);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_energy ON job (cluster, job_state, energy);
-- Cluster+JobState Time Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_starttime_duration ON job (cluster, job_state, start_time, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_duration_starttime ON job (cluster, job_state, duration, start_time);
-- Cluster+Shared Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_shared_user ON job (cluster, shared, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_cluster_shared_project ON job (cluster, shared, project);
-- Cluster+Shared Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_shared_numnodes ON job (cluster, shared, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_cluster_shared_numhwthreads ON job (cluster, shared, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_cluster_shared_numacc ON job (cluster, shared, num_acc);
CREATE INDEX IF NOT EXISTS jobs_cluster_shared_energy ON job (cluster, shared, energy);
-- Cluster+Shared Time Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_shared_starttime_duration ON job (cluster, shared, start_time, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_shared_duration_starttime ON job (cluster, shared, duration, start_time);
-- User Filter
CREATE INDEX IF NOT EXISTS jobs_user ON job (hpc_user);
-- User Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_user_starttime ON job (hpc_user, start_time);
CREATE INDEX IF NOT EXISTS jobs_user_duration ON job (hpc_user, duration);
CREATE INDEX IF NOT EXISTS jobs_user_numnodes ON job (hpc_user, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_user_numhwthreads ON job (hpc_user, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_user_numacc ON job (hpc_user, num_acc);
CREATE INDEX IF NOT EXISTS jobs_user_energy ON job (hpc_user, energy);
-- Cluster+Shared Time Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_user_starttime_duration ON job (hpc_user, start_time, duration);
CREATE INDEX IF NOT EXISTS jobs_user_duration_starttime ON job (hpc_user, duration, start_time);
-- Project Filter
CREATE INDEX IF NOT EXISTS jobs_project ON job (project);
CREATE INDEX IF NOT EXISTS jobs_project_user ON job (project, hpc_user);
-- Project Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_project_starttime ON job (project, start_time);
CREATE INDEX IF NOT EXISTS jobs_project_duration ON job (project, duration);
CREATE INDEX IF NOT EXISTS jobs_project_numnodes ON job (project, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_project_numhwthreads ON job (project, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_project_numacc ON job (project, num_acc);
CREATE INDEX IF NOT EXISTS jobs_project_energy ON job (project, energy);
-- Cluster+Shared Time Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_project_starttime_duration ON job (project, start_time, duration);
CREATE INDEX IF NOT EXISTS jobs_project_duration_starttime ON job (project, duration, start_time);
-- JobState Filter
CREATE INDEX IF NOT EXISTS jobs_jobstate ON job (job_state);
CREATE INDEX IF NOT EXISTS jobs_jobstate_user ON job (job_state, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_jobstate_project ON job (job_state, project);
CREATE INDEX IF NOT EXISTS jobs_jobstate_cluster ON job (job_state, cluster);
-- JobState Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_jobstate_starttime ON job (job_state, start_time);
CREATE INDEX IF NOT EXISTS jobs_jobstate_duration ON job (job_state, duration);
CREATE INDEX IF NOT EXISTS jobs_jobstate_numnodes ON job (job_state, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_jobstate_numhwthreads ON job (job_state, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_jobstate_numacc ON job (job_state, num_acc);
CREATE INDEX IF NOT EXISTS jobs_jobstate_energy ON job (job_state, energy);
-- Cluster+Shared Time Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_jobstate_starttime_duration ON job (job_state, start_time, duration);
CREATE INDEX IF NOT EXISTS jobs_jobstate_duration_starttime ON job (job_state, duration, start_time);
-- Shared Filter
CREATE INDEX IF NOT EXISTS jobs_shared_user ON job (shared, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_shared_project ON job (shared, project);
-- Shared Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_shared_numnodes ON job (shared, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_shared_numhwthreads ON job (shared, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_shared_numacc ON job (shared, num_acc);
CREATE INDEX IF NOT EXISTS jobs_shared_energy ON job (shared, energy);
-- Cluster+Shared Time Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_shared_starttime_duration ON job (shared, start_time, duration);
CREATE INDEX IF NOT EXISTS jobs_shared_duration_starttime ON job (shared, duration, start_time);
-- ArrayJob Filter
CREATE INDEX IF NOT EXISTS jobs_arrayjobid_starttime ON job (array_job_id, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_arrayjobid_starttime ON job (cluster, array_job_id, start_time);
-- Sorting without active filters
CREATE INDEX IF NOT EXISTS jobs_starttime ON job (start_time);
CREATE INDEX IF NOT EXISTS jobs_duration ON job (duration);
CREATE INDEX IF NOT EXISTS jobs_numnodes ON job (num_nodes);
CREATE INDEX IF NOT EXISTS jobs_numhwthreads ON job (num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_numacc ON job (num_acc);
CREATE INDEX IF NOT EXISTS jobs_energy ON job (energy);
-- Single filters with default starttime sorting
CREATE INDEX IF NOT EXISTS jobs_duration_starttime ON job (duration, start_time);
CREATE INDEX IF NOT EXISTS jobs_numnodes_starttime ON job (num_nodes, start_time);
@@ -223,6 +235,22 @@ CREATE INDEX IF NOT EXISTS jobs_numhwthreads_starttime ON job (num_hwthreads, st
CREATE INDEX IF NOT EXISTS jobs_numacc_starttime ON job (num_acc, start_time);
CREATE INDEX IF NOT EXISTS jobs_energy_starttime ON job (energy, start_time);
-- Single filters with duration sorting
CREATE INDEX IF NOT EXISTS jobs_starttime_duration ON job (start_time, duration);
CREATE INDEX IF NOT EXISTS jobs_numnodes_duration ON job (num_nodes, duration);
CREATE INDEX IF NOT EXISTS jobs_numhwthreads_duration ON job (num_hwthreads, duration);
CREATE INDEX IF NOT EXISTS jobs_numacc_duration ON job (num_acc, duration);
CREATE INDEX IF NOT EXISTS jobs_energy_duration ON job (energy, duration);
-- Backup Indices For High Variety Columns
CREATE INDEX IF NOT EXISTS jobs_starttime ON job (start_time);
CREATE INDEX IF NOT EXISTS jobs_duration ON job (duration);
-- Notes:
-- Cluster+Partition+Jobstate Filter: Tested -> Full Array Of Combinations non-required
-- Cluster+JobState+Shared Filter: Tested -> No further timing improvement
-- JobState+Shared Filter: Tested -> No further timing improvement
-- Optimize DB index usage
PRAGMA optimize;

View File

@@ -23,6 +23,7 @@ CREATE TABLE "node_state" (
CHECK (health_state IN (
'full', 'partial', 'failed'
)),
health_metrics TEXT, -- JSON array of strings
node_id INTEGER,
FOREIGN KEY (node_id) REFERENCES node (id)
);
@@ -33,12 +34,11 @@ CREATE INDEX IF NOT EXISTS nodes_cluster_subcluster ON node (cluster, subcluster
-- Add NEW Indices For New Node_State Table Fields
CREATE INDEX IF NOT EXISTS nodestates_timestamp ON node_state (time_stamp);
CREATE INDEX IF NOT EXISTS nodestates_state ON node_state (node_state);
CREATE INDEX IF NOT EXISTS nodestates_health ON node_state (health_state);
CREATE INDEX IF NOT EXISTS nodestates_state_timestamp ON node_state (node_state, time_stamp);
CREATE INDEX IF NOT EXISTS nodestates_health_timestamp ON node_state (health_state, time_stamp);
CREATE INDEX IF NOT EXISTS nodestates_nodeid_state ON node_state (node_id, node_state);
CREATE INDEX IF NOT EXISTS nodestates_nodeid_health ON node_state (node_id, health_state);
CREATE INDEX IF NOT EXISTS nodestates_nodeid_timestamp ON node_state (node_id, time_stamp DESC);
-- Add NEW Indices For Increased Amounts of Tags
CREATE INDEX IF NOT EXISTS tags_jobid ON jobtag (job_id);

View File

@@ -10,14 +10,17 @@ import (
"database/sql"
"encoding/json"
"fmt"
"slices"
"sort"
"strings"
"sync"
"time"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
"github.com/ClusterCockpit/cc-lib/lrucache"
"github.com/ClusterCockpit/cc-lib/schema"
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
"github.com/ClusterCockpit/cc-lib/v2/lrucache"
"github.com/ClusterCockpit/cc-lib/v2/schema"
sq "github.com/Masterminds/squirrel"
"github.com/jmoiron/sqlx"
)
@@ -49,6 +52,38 @@ func GetNodeRepository() *NodeRepository {
return nodeRepoInstance
}
// latestStateCondition returns a squirrel expression that restricts node_state
// rows to the latest per node_id using a correlated subquery.
// Requires the query to join node and node_state tables.
func latestStateCondition() sq.Sqlizer {
return sq.Expr(
"node_state.id = (SELECT ns2.id FROM node_state ns2 WHERE ns2.node_id = node.id ORDER BY ns2.time_stamp DESC LIMIT 1)",
)
}
// applyNodeFilters applies common NodeFilter conditions to a query that joins
// the node and node_state tables with latestStateCondition.
func applyNodeFilters(query sq.SelectBuilder, filters []*model.NodeFilter) sq.SelectBuilder {
for _, f := range filters {
if f.Cluster != nil {
query = buildStringCondition("node.cluster", f.Cluster, query)
}
if f.SubCluster != nil {
query = buildStringCondition("node.subcluster", f.SubCluster, query)
}
if f.Hostname != nil {
query = buildStringCondition("node.hostname", f.Hostname, query)
}
if f.SchedulerState != nil {
query = query.Where("node_state.node_state = ?", f.SchedulerState)
}
if f.HealthState != nil {
query = query.Where("node_state.health_state = ?", f.HealthState)
}
}
return query
}
func (r *NodeRepository) FetchMetadata(hostname string, cluster string) (map[string]string, error) {
start := time.Now()
@@ -79,17 +114,16 @@ func (r *NodeRepository) FetchMetadata(hostname string, cluster string) (map[str
func (r *NodeRepository) GetNode(hostname string, cluster string, withMeta bool) (*schema.Node, error) {
node := &schema.Node{}
var timestamp int
if err := sq.Select("node.hostname", "node.cluster", "node.subcluster", "node_state.node_state",
"node_state.health_state", "MAX(node_state.time_stamp) as time").
From("node_state").
Join("node ON node_state.node_id = node.id").
if err := sq.Select("node.hostname", "node.cluster", "node.subcluster",
"node_state.node_state", "node_state.health_state").
From("node").
Join("node_state ON node_state.node_id = node.id").
Where(latestStateCondition()).
Where("node.hostname = ?", hostname).
Where("node.cluster = ?", cluster).
GroupBy("node_state.node_id").
RunWith(r.DB).
QueryRow().Scan(&node.Hostname, &node.Cluster, &node.SubCluster, &node.NodeState, &node.HealthState, &timestamp); err != nil {
cclog.Warnf("Error while querying node '%s' at time '%d' from database: %v", hostname, timestamp, err)
QueryRow().Scan(&node.Hostname, &node.Cluster, &node.SubCluster, &node.NodeState, &node.HealthState); err != nil {
cclog.Warnf("Error while querying node '%s' from database: %v", hostname, err)
return nil, err
}
@@ -106,31 +140,28 @@ func (r *NodeRepository) GetNode(hostname string, cluster string, withMeta bool)
return node, nil
}
func (r *NodeRepository) GetNodeById(id int64, withMeta bool) (*schema.Node, error) {
func (r *NodeRepository) GetNodeByID(id int64, withMeta bool) (*schema.Node, error) {
node := &schema.Node{}
var timestamp int
if err := sq.Select("node.hostname", "node.cluster", "node.subcluster", "node_state.node_state",
"node_state.health_state", "MAX(node_state.time_stamp) as time").
From("node_state").
Join("node ON node_state.node_id = node.id").
if err := sq.Select("node.hostname", "node.cluster", "node.subcluster",
"node_state.node_state", "node_state.health_state").
From("node").
Join("node_state ON node_state.node_id = node.id").
Where(latestStateCondition()).
Where("node.id = ?", id).
GroupBy("node_state.node_id").
RunWith(r.DB).
QueryRow().Scan(&node.Hostname, &node.Cluster, &node.SubCluster, &node.NodeState, &node.HealthState, &timestamp); err != nil {
cclog.Warnf("Error while querying node ID '%d' at time '%d' from database: %v", id, timestamp, err)
QueryRow().Scan(&node.Hostname, &node.Cluster, &node.SubCluster, &node.NodeState, &node.HealthState); err != nil {
cclog.Warnf("Error while querying node ID '%d' from database: %v", id, err)
return nil, err
}
// NEEDS METADATA BY ID
// if withMeta {
// var err error
// var meta map[string]string
// if meta, err = r.FetchMetadata(hostname, cluster); err != nil {
// cclog.Warnf("Error while fetching metadata for node '%s'", hostname)
// return nil, err
// }
// node.MetaData = meta
// }
if withMeta {
meta, metaErr := r.FetchMetadata(node.Hostname, node.Cluster)
if metaErr != nil {
cclog.Warnf("Error while fetching metadata for node ID '%d': %v", id, metaErr)
return nil, metaErr
}
node.MetaData = meta
}
return node, nil
}
@@ -166,9 +197,10 @@ func (r *NodeRepository) AddNode(node *schema.NodeDB) (int64, error) {
}
const NamedNodeStateInsert string = `
INSERT INTO node_state (time_stamp, node_state, health_state, cpus_allocated,
memory_allocated, gpus_allocated, jobs_running, node_id)
VALUES (:time_stamp, :node_state, :health_state, :cpus_allocated, :memory_allocated, :gpus_allocated, :jobs_running, :node_id);`
INSERT INTO node_state (time_stamp, node_state, health_state, health_metrics,
cpus_allocated, memory_allocated, gpus_allocated, jobs_running, node_id)
VALUES (:time_stamp, :node_state, :health_state, :health_metrics,
:cpus_allocated, :memory_allocated, :gpus_allocated, :jobs_running, :node_id);`
// TODO: Add real Monitoring Health State
@@ -194,8 +226,7 @@ func (r *NodeRepository) UpdateNodeState(hostname string, cluster string, nodeSt
return err
}
cclog.Infof("Added node '%s' to database", hostname)
return nil
cclog.Debugf("Added node '%s' to database", hostname)
} else {
cclog.Warnf("Error while querying node '%v' from database", id)
return err
@@ -209,7 +240,7 @@ func (r *NodeRepository) UpdateNodeState(hostname string, cluster string, nodeSt
cclog.Errorf("Error while adding node state for '%v' to database", hostname)
return err
}
cclog.Infof("Updated node state for '%s' in database", hostname)
cclog.Debugf("Updated node state for '%s' in database", hostname)
return nil
}
@@ -222,6 +253,77 @@ func (r *NodeRepository) UpdateNodeState(hostname string, cluster string, nodeSt
// return nil
// }
// NodeStateWithNode combines a node state row with denormalized node info.
type NodeStateWithNode struct {
ID int64 `db:"id"`
TimeStamp int64 `db:"time_stamp"`
NodeState string `db:"node_state"`
HealthState string `db:"health_state"`
HealthMetrics string `db:"health_metrics"`
CpusAllocated int `db:"cpus_allocated"`
MemoryAllocated int64 `db:"memory_allocated"`
GpusAllocated int `db:"gpus_allocated"`
JobsRunning int `db:"jobs_running"`
Hostname string `db:"hostname"`
Cluster string `db:"cluster"`
SubCluster string `db:"subcluster"`
}
// FindNodeStatesBefore returns all node_state rows with time_stamp < cutoff,
// joined with node info for denormalized archiving.
func (r *NodeRepository) FindNodeStatesBefore(cutoff int64) ([]NodeStateWithNode, error) {
rows, err := sq.Select(
"node_state.id", "node_state.time_stamp", "node_state.node_state",
"node_state.health_state", "node_state.health_metrics",
"node_state.cpus_allocated", "node_state.memory_allocated",
"node_state.gpus_allocated", "node_state.jobs_running",
"node.hostname", "node.cluster", "node.subcluster",
).
From("node_state").
Join("node ON node_state.node_id = node.id").
Where(sq.Lt{"node_state.time_stamp": cutoff}).
Where("node_state.id NOT IN (SELECT ns2.id FROM node_state ns2 WHERE ns2.time_stamp = (SELECT MAX(ns3.time_stamp) FROM node_state ns3 WHERE ns3.node_id = ns2.node_id))").
OrderBy("node.cluster ASC", "node.subcluster ASC", "node.hostname ASC", "node_state.time_stamp ASC").
RunWith(r.DB).Query()
if err != nil {
return nil, err
}
defer rows.Close()
var result []NodeStateWithNode
for rows.Next() {
var ns NodeStateWithNode
var healthMetrics sql.NullString
if err := rows.Scan(&ns.ID, &ns.TimeStamp, &ns.NodeState,
&ns.HealthState, &healthMetrics,
&ns.CpusAllocated, &ns.MemoryAllocated,
&ns.GpusAllocated, &ns.JobsRunning,
&ns.Hostname, &ns.Cluster, &ns.SubCluster); err != nil {
return nil, err
}
ns.HealthMetrics = healthMetrics.String
result = append(result, ns)
}
return result, nil
}
// DeleteNodeStatesBefore removes node_state rows with time_stamp < cutoff,
// but always preserves the row with the latest timestamp per node_id.
func (r *NodeRepository) DeleteNodeStatesBefore(cutoff int64) (int64, error) {
res, err := r.DB.Exec(
`DELETE FROM node_state WHERE time_stamp < ?
AND id NOT IN (
SELECT id FROM node_state ns2
WHERE ns2.time_stamp = (SELECT MAX(ns3.time_stamp) FROM node_state ns3 WHERE ns3.node_id = ns2.node_id)
)`,
cutoff,
)
if err != nil {
return 0, err
}
return res.RowsAffected()
}
func (r *NodeRepository) DeleteNode(id int64) error {
_, err := r.DB.Exec(`DELETE FROM node WHERE node.id = ?`, id)
if err != nil {
@@ -241,38 +343,17 @@ func (r *NodeRepository) QueryNodes(
order *model.OrderByInput, // Currently unused!
) ([]*schema.Node, error) {
query, qerr := AccessCheck(ctx,
sq.Select("hostname", "cluster", "subcluster", "node_state", "health_state", "MAX(time_stamp) as time").
sq.Select("node.hostname", "node.cluster", "node.subcluster",
"node_state.node_state", "node_state.health_state").
From("node").
Join("node_state ON node_state.node_id = node.id"))
Join("node_state ON node_state.node_id = node.id").
Where(latestStateCondition()))
if qerr != nil {
return nil, qerr
}
for _, f := range filters {
if f.Cluster != nil {
query = buildStringCondition("cluster", f.Cluster, query)
}
if f.Subcluster != nil {
query = buildStringCondition("subcluster", f.Subcluster, query)
}
if f.Hostname != nil {
query = buildStringCondition("hostname", f.Hostname, query)
}
if f.SchedulerState != nil {
query = query.Where("node_state = ?", f.SchedulerState)
// Requires Additional time_stamp Filter: Else the last (past!) time_stamp with queried state will be returned
now := time.Now().Unix()
query = query.Where(sq.Gt{"time_stamp": (now - 60)})
}
if f.HealthState != nil {
query = query.Where("health_state = ?", f.HealthState)
// Requires Additional time_stamp Filter: Else the last (past!) time_stamp with queried state will be returned
now := time.Now().Unix()
query = query.Where(sq.Gt{"time_stamp": (now - 60)})
}
}
query = query.GroupBy("node_id").OrderBy("hostname ASC")
query = applyNodeFilters(query, filters)
query = query.OrderBy("node.hostname ASC")
if page != nil && page.ItemsPerPage != -1 {
limit := uint64(page.ItemsPerPage)
@@ -290,11 +371,10 @@ func (r *NodeRepository) QueryNodes(
nodes := make([]*schema.Node, 0)
for rows.Next() {
node := schema.Node{}
var timestamp int
if err := rows.Scan(&node.Hostname, &node.Cluster, &node.SubCluster,
&node.NodeState, &node.HealthState, &timestamp); err != nil {
&node.NodeState, &node.HealthState); err != nil {
rows.Close()
cclog.Warnf("Error while scanning rows (QueryNodes) at time '%d'", timestamp)
cclog.Warn("Error while scanning rows (QueryNodes)")
return nil, err
}
nodes = append(nodes, &node)
@@ -386,73 +466,115 @@ func (r *NodeRepository) QueryNodesWithMeta(
return nodes, nil
}
// CountNodes returns the total matched nodes based on a node filter. It always operates
// on the last state (largest timestamp).
func (r *NodeRepository) CountNodes(
// QueryNodesWithMeta returns a list of nodes based on a node filter. It always operates
// on the last state (largest timestamp). It includes both (!) optional JSON column data
func (r *NodeRepository) QueryNodesWithMeta(
ctx context.Context,
filters []*model.NodeFilter,
) (int, error) {
page *model.PageRequest,
order *model.OrderByInput, // Currently unused!
) ([]*schema.Node, error) {
query, qerr := AccessCheck(ctx,
sq.Select("time_stamp", "count(*) as countRes").
sq.Select("node.hostname", "node.cluster", "node.subcluster",
"node_state.node_state", "node_state.health_state",
"node.meta_data", "node_state.health_metrics").
From("node").
Join("node_state ON node_state.node_id = node.id"))
Join("node_state ON node_state.node_id = node.id").
Where(latestStateCondition()))
if qerr != nil {
return 0, qerr
return nil, qerr
}
for _, f := range filters {
if f.Cluster != nil {
query = buildStringCondition("cluster", f.Cluster, query)
}
if f.Subcluster != nil {
query = buildStringCondition("subcluster", f.Subcluster, query)
}
if f.Hostname != nil {
query = buildStringCondition("hostname", f.Hostname, query)
}
if f.SchedulerState != nil {
query = query.Where("node_state = ?", f.SchedulerState)
// Requires Additional time_stamp Filter: Else the last (past!) time_stamp with queried state will be returned
now := time.Now().Unix()
query = query.Where(sq.Gt{"time_stamp": (now - 60)})
}
if f.HealthState != nil {
query = query.Where("health_state = ?", f.HealthState)
// Requires Additional time_stamp Filter: Else the last (past!) time_stamp with queried state will be returned
now := time.Now().Unix()
query = query.Where(sq.Gt{"time_stamp": (now - 60)})
}
}
query = applyNodeFilters(query, filters)
query = query.OrderBy("node.hostname ASC")
query = query.GroupBy("time_stamp").OrderBy("time_stamp DESC").Limit(1)
if page != nil && page.ItemsPerPage != -1 {
limit := uint64(page.ItemsPerPage)
query = query.Offset((uint64(page.Page) - 1) * limit).Limit(limit)
}
rows, err := query.RunWith(r.stmtCache).Query()
if err != nil {
queryString, queryVars, _ := query.ToSql()
cclog.Errorf("Error while running query '%s' %v: %v", queryString, queryVars, err)
return nil, err
}
nodes := make([]*schema.Node, 0)
for rows.Next() {
node := schema.Node{}
RawMetaData := make([]byte, 0)
RawMetricHealth := make([]byte, 0)
if err := rows.Scan(&node.Hostname, &node.Cluster, &node.SubCluster,
&node.NodeState, &node.HealthState, &RawMetaData, &RawMetricHealth); err != nil {
rows.Close()
cclog.Warn("Error while scanning rows (QueryNodes)")
return nil, err
}
if len(RawMetaData) == 0 {
node.MetaData = nil
} else {
metaData := make(map[string]string)
if err := json.Unmarshal(RawMetaData, &metaData); err != nil {
cclog.Warn("Error while unmarshaling raw metadata json")
return nil, err
}
node.MetaData = metaData
}
if len(RawMetricHealth) == 0 {
node.HealthData = nil
} else {
healthData := make(map[string][]string)
if err := json.Unmarshal(RawMetricHealth, &healthData); err != nil {
cclog.Warn("Error while unmarshaling raw healthdata json")
return nil, err
}
node.HealthData = healthData
}
nodes = append(nodes, &node)
}
return nodes, nil
}
// CountNodes returns the total matched nodes based on a node filter. It always operates
// on the last state (largest timestamp) per node.
func (r *NodeRepository) CountNodes(
ctx context.Context,
filters []*model.NodeFilter,
) (int, error) {
query, qerr := AccessCheck(ctx,
sq.Select("COUNT(*)").
From("node").
Join("node_state ON node_state.node_id = node.id").
Where(latestStateCondition()))
if qerr != nil {
return 0, qerr
}
query = applyNodeFilters(query, filters)
var count int
if err := query.RunWith(r.stmtCache).QueryRow().Scan(&count); err != nil {
queryString, queryVars, _ := query.ToSql()
cclog.Errorf("Error while running query '%s' %v: %v", queryString, queryVars, err)
return 0, err
}
var totalNodes int
for rows.Next() {
var timestamp int
if err := rows.Scan(&timestamp, &totalNodes); err != nil {
rows.Close()
cclog.Warnf("Error while scanning rows (CountNodes) at time '%d'", timestamp)
return 0, err
}
}
return totalNodes, nil
return count, nil
}
func (r *NodeRepository) ListNodes(cluster string) ([]*schema.Node, error) {
q := sq.Select("node.hostname", "node.cluster", "node.subcluster", "node_state.node_state",
"node_state.health_state", "MAX(node_state.time_stamp) as time").
q := sq.Select("node.hostname", "node.cluster", "node.subcluster",
"node_state.node_state", "node_state.health_state").
From("node").
Join("node_state ON node_state.node_id = node.id").
Where(latestStateCondition()).
Where("node.cluster = ?", cluster).
GroupBy("node_state.node_id").
OrderBy("node.hostname ASC")
rows, err := q.RunWith(r.DB).Query()
@@ -464,10 +586,9 @@ func (r *NodeRepository) ListNodes(cluster string) ([]*schema.Node, error) {
defer rows.Close()
for rows.Next() {
node := &schema.Node{}
var timestamp int
if err := rows.Scan(&node.Hostname, &node.Cluster,
&node.SubCluster, &node.NodeState, &node.HealthState, &timestamp); err != nil {
cclog.Warnf("Error while scanning node list (ListNodes) at time '%d'", timestamp)
&node.SubCluster, &node.NodeState, &node.HealthState); err != nil {
cclog.Warn("Error while scanning node list (ListNodes)")
return nil, err
}
@@ -478,11 +599,11 @@ func (r *NodeRepository) ListNodes(cluster string) ([]*schema.Node, error) {
}
func (r *NodeRepository) MapNodes(cluster string) (map[string]string, error) {
q := sq.Select("node.hostname", "node_state.node_state", "MAX(node_state.time_stamp) as time").
q := sq.Select("node.hostname", "node_state.node_state").
From("node").
Join("node_state ON node_state.node_id = node.id").
Where(latestStateCondition()).
Where("node.cluster = ?", cluster).
GroupBy("node_state.node_id").
OrderBy("node.hostname ASC")
rows, err := q.RunWith(r.DB).Query()
@@ -495,9 +616,8 @@ func (r *NodeRepository) MapNodes(cluster string) (map[string]string, error) {
defer rows.Close()
for rows.Next() {
var hostname, nodestate string
var timestamp int
if err := rows.Scan(&hostname, &nodestate, &timestamp); err != nil {
cclog.Warnf("Error while scanning node list (MapNodes) at time '%d'", timestamp)
if err := rows.Scan(&hostname, &nodestate); err != nil {
cclog.Warn("Error while scanning node list (MapNodes)")
return nil, err
}
@@ -509,37 +629,15 @@ func (r *NodeRepository) MapNodes(cluster string) (map[string]string, error) {
func (r *NodeRepository) CountStates(ctx context.Context, filters []*model.NodeFilter, column string) ([]*model.NodeStates, error) {
query, qerr := AccessCheck(ctx,
sq.Select(column, "COUNT(*) as count").
sq.Select(column).
From("node").
Join("node_state ON node_state.node_id = node.id").
Where(latestStateCondition()).
GroupBy(column))
Where(latestStateCondition()))
if qerr != nil {
return nil, qerr
}
query = query.Join("node_state ON node_state.node_id = node.id")
for _, f := range filters {
if f.Hostname != nil {
query = buildStringCondition("hostname", f.Hostname, query)
}
if f.Cluster != nil {
query = buildStringCondition("cluster", f.Cluster, query)
}
if f.Subcluster != nil {
query = buildStringCondition("subcluster", f.Subcluster, query)
}
if f.SchedulerState != nil {
query = query.Where("node_state = ?", f.SchedulerState)
}
if f.HealthState != nil {
query = query.Where("health_state = ?", f.HealthState)
}
}
// Add Group and Order
query = query.GroupBy("hostname").OrderBy("hostname DESC")
query = applyNodeFilters(query, filters)
rows, err := query.RunWith(r.stmtCache).Query()
if err != nil {
@@ -549,6 +647,18 @@ func (r *NodeRepository) CountStates(ctx context.Context, filters []*model.NodeF
}
defer rows.Close()
stateMap := map[string]int{}
for rows.Next() {
var state string
if err := rows.Scan(&state); err != nil {
rows.Close()
cclog.Warn("Error while scanning rows (CountStates)")
return nil, err
}
stateMap[state] += 1
}
nodes := make([]*model.NodeStates, 0)
for rows.Next() {
var state string
@@ -587,8 +697,8 @@ func (r *NodeRepository) CountStatesTimed(ctx context.Context, filters []*model.
if f.Cluster != nil {
query = buildStringCondition("cluster", f.Cluster, query)
}
if f.Subcluster != nil {
query = buildStringCondition("subcluster", f.Subcluster, query)
if f.SubCluster != nil {
query = buildStringCondition("subcluster", f.SubCluster, query)
}
if f.SchedulerState != nil {
query = query.Where("node_state = ?", f.SchedulerState)
@@ -640,6 +750,132 @@ func (r *NodeRepository) CountStatesTimed(ctx context.Context, filters []*model.
return timedStates, nil
}
func (r *NodeRepository) GetNodesForList(
ctx context.Context,
cluster string,
subCluster string,
stateFilter string,
nodeFilter string,
page *model.PageRequest,
) ([]string, map[string]string, int, bool, error) {
// Init Return Vars
nodes := make([]string, 0)
stateMap := make(map[string]string)
countNodes := 0
hasNextPage := false
// Build Filters
queryFilters := make([]*model.NodeFilter, 0)
if cluster != "" {
queryFilters = append(queryFilters, &model.NodeFilter{Cluster: &model.StringInput{Eq: &cluster}})
}
if subCluster != "" {
queryFilters = append(queryFilters, &model.NodeFilter{SubCluster: &model.StringInput{Eq: &subCluster}})
}
if nodeFilter != "" && stateFilter != "notindb" {
queryFilters = append(queryFilters, &model.NodeFilter{Hostname: &model.StringInput{Contains: &nodeFilter}})
}
if stateFilter != "all" && stateFilter != "notindb" {
queryState := schema.SchedulerState(stateFilter)
queryFilters = append(queryFilters, &model.NodeFilter{SchedulerState: &queryState})
}
// if healthFilter != "all" {
// filters = append(filters, &model.NodeFilter{HealthState: &healthFilter})
// }
// Special Case: Disable Paging for missing nodes filter, save IPP for later
var backupItems int
if stateFilter == "notindb" {
backupItems = page.ItemsPerPage
page.ItemsPerPage = -1
}
// Query Nodes From DB
rawNodes, serr := r.QueryNodes(ctx, queryFilters, page, nil) // Order not Used
if serr != nil {
cclog.Warn("error while loading node database data (Resolver.NodeMetricsList)")
return nil, nil, 0, false, serr
}
// Intermediate Node Result Info
for _, node := range rawNodes {
if node == nil {
continue
}
nodes = append(nodes, node.Hostname)
stateMap[node.Hostname] = string(node.NodeState)
}
// Special Case: Find Nodes not in DB node table but in metricStore only
if stateFilter == "notindb" {
// Reapply Original Paging
page.ItemsPerPage = backupItems
// Get Nodes From Topology
var topoNodes []string
if subCluster != "" {
scNodes := archive.NodeLists[cluster][subCluster]
topoNodes = scNodes.PrintList()
} else {
subClusterNodeLists := archive.NodeLists[cluster]
for _, nodeList := range subClusterNodeLists {
topoNodes = append(topoNodes, nodeList.PrintList()...)
}
}
// Compare to all nodes from cluster/subcluster in DB
var missingNodes []string
for _, scanNode := range topoNodes {
if !slices.Contains(nodes, scanNode) {
missingNodes = append(missingNodes, scanNode)
}
}
// Filter nodes by name
if nodeFilter != "" {
filteredNodesByName := []string{}
for _, missingNode := range missingNodes {
if strings.Contains(missingNode, nodeFilter) {
filteredNodesByName = append(filteredNodesByName, missingNode)
}
}
missingNodes = filteredNodesByName
}
// Sort Missing Nodes Alphanumerically
slices.Sort(missingNodes)
// Total Missing
countNodes = len(missingNodes)
// Apply paging
if countNodes > page.ItemsPerPage {
start := (page.Page - 1) * page.ItemsPerPage
end := start + page.ItemsPerPage
if end > countNodes {
end = countNodes
hasNextPage = false
} else {
hasNextPage = true
}
nodes = missingNodes[start:end]
} else {
nodes = missingNodes
}
} else {
// DB Nodes: Count and derive hasNextPage from count
var cerr error
countNodes, cerr = r.CountNodes(ctx, queryFilters)
if cerr != nil {
cclog.Warn("error while counting node database data (Resolver.NodeMetricsList)")
return nil, nil, 0, false, cerr
}
hasNextPage = page.Page*page.ItemsPerPage < countNodes
}
// Fallback for non-init'd node table in DB; Ignores stateFilter
if stateFilter == "all" && countNodes == 0 {
nodes, countNodes, hasNextPage = getNodesFromTopol(cluster, subCluster, nodeFilter, page)
}
return nodes, stateMap, countNodes, hasNextPage, nil
}
func AccessCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilder, error) {
user := GetUserFromContext(ctx)
return AccessCheckWithUser(user, query)
@@ -661,3 +897,51 @@ func AccessCheckWithUser(user *schema.User, query sq.SelectBuilder) (sq.SelectBu
return qnil, fmt.Errorf("user has no or unknown roles")
}
}
func getNodesFromTopol(cluster string, subCluster string, nodeFilter string, page *model.PageRequest) ([]string, int, bool) {
// 0) Init additional vars
hasNextPage := false
totalNodes := 0
// 1) Get list of all nodes
var topolNodes []string
if subCluster != "" {
scNodes := archive.NodeLists[cluster][subCluster]
topolNodes = scNodes.PrintList()
} else {
subClusterNodeLists := archive.NodeLists[cluster]
for _, nodeList := range subClusterNodeLists {
topolNodes = append(topolNodes, nodeList.PrintList()...)
}
}
// 2) Filter nodes
if nodeFilter != "" {
filteredNodes := []string{}
for _, node := range topolNodes {
if strings.Contains(node, nodeFilter) {
filteredNodes = append(filteredNodes, node)
}
}
topolNodes = filteredNodes
}
// 2.1) Count total nodes && Sort nodes -> Sorting invalidated after ccms return ...
totalNodes = len(topolNodes)
sort.Strings(topolNodes)
// 3) Apply paging
if len(topolNodes) > page.ItemsPerPage {
start := (page.Page - 1) * page.ItemsPerPage
end := start + page.ItemsPerPage
if end >= len(topolNodes) {
end = len(topolNodes)
hasNextPage = false
} else {
hasNextPage = true
}
topolNodes = topolNodes[start:end]
}
return topolNodes, totalNodes, hasNextPage
}

View File

@@ -15,9 +15,9 @@ import (
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
ccconf "github.com/ClusterCockpit/cc-lib/ccConfig"
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
"github.com/ClusterCockpit/cc-lib/schema"
ccconf "github.com/ClusterCockpit/cc-lib/v2/ccConfig"
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
"github.com/ClusterCockpit/cc-lib/v2/schema"
_ "github.com/mattn/go-sqlite3"
)
@@ -26,7 +26,7 @@ func nodeTestSetup(t *testing.T) {
"main": {
"addr": "0.0.0.0:8080",
"validate": false,
"apiAllowedIPs": [
"api-allowed-ips": [
"*"
]
},
@@ -38,18 +38,7 @@ func nodeTestSetup(t *testing.T) {
"jwts": {
"max-age": "2m"
}
},
"clusters": [
{
"name": "testcluster",
"metricDataRepository": {"kind": "test", "url": "bla:8081"},
"filterRanges": {
"numNodes": { "from": 1, "to": 64 },
"duration": { "from": 0, "to": 86400 },
"startTime": { "from": "2022-01-01T00:00:00Z", "to": null }
}
}
]
}`
const testclusterJSON = `{
"name": "testcluster",
@@ -130,7 +119,7 @@ func nodeTestSetup(t *testing.T) {
}
dbfilepath := filepath.Join(tmpdir, "test.db")
err := MigrateDB("sqlite3", dbfilepath)
err := MigrateDB(dbfilepath)
if err != nil {
t.Fatal(err)
}
@@ -144,19 +133,22 @@ func nodeTestSetup(t *testing.T) {
// Load and check main configuration
if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil {
config.Init(cfg, clustercfg)
} else {
cclog.Abort("Cluster configuration must be present")
}
config.Init(cfg)
} else {
cclog.Abort("Main configuration must be present")
}
archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", jobarchive)
Connect("sqlite3", dbfilepath)
if err := ResetConnection(); err != nil {
t.Fatal(err)
}
t.Cleanup(func() {
ResetConnection()
})
if err := archive.Init(json.RawMessage(archiveCfg), config.Keys.DisableArchive); err != nil {
Connect(dbfilepath)
if err := archive.Init(json.RawMessage(archiveCfg)); err != nil {
t.Fatal(err)
}
}
@@ -164,8 +156,12 @@ func nodeTestSetup(t *testing.T) {
func TestUpdateNodeState(t *testing.T) {
nodeTestSetup(t)
repo := GetNodeRepository()
now := time.Now().Unix()
nodeState := schema.NodeStateDB{
TimeStamp: time.Now().Unix(), NodeState: "allocated",
TimeStamp: now,
NodeState: "allocated",
CpusAllocated: 72,
MemoryAllocated: 480,
GpusAllocated: 0,
@@ -173,18 +169,152 @@ func TestUpdateNodeState(t *testing.T) {
JobsRunning: 1,
}
repo := GetNodeRepository()
err := repo.UpdateNodeState("host124", "testcluster", &nodeState)
if err != nil {
return
t.Fatal(err)
}
node, err := repo.GetNode("host124", "testcluster", false)
if err != nil {
return
t.Fatal(err)
}
if node.NodeState != "allocated" {
t.Errorf("wrong node state\ngot: %s \nwant: allocated ", node.NodeState)
}
t.Run("FindBeforeEmpty", func(t *testing.T) {
// Only the current-timestamp row exists, so nothing should be found before now
rows, err := repo.FindNodeStatesBefore(now)
if err != nil {
t.Fatal(err)
}
if len(rows) != 0 {
t.Errorf("expected 0 rows, got %d", len(rows))
}
})
t.Run("DeleteOldRows", func(t *testing.T) {
// Insert 2 more old rows for host124
for i, ts := range []int64{now - 7200, now - 3600} {
ns := schema.NodeStateDB{
TimeStamp: ts,
NodeState: "allocated",
HealthState: schema.MonitoringStateFull,
CpusAllocated: 72,
MemoryAllocated: 480,
JobsRunning: i,
}
if err := repo.UpdateNodeState("host124", "testcluster", &ns); err != nil {
t.Fatal(err)
}
}
// Delete rows older than 30 minutes
cutoff := now - 1800
cnt, err := repo.DeleteNodeStatesBefore(cutoff)
if err != nil {
t.Fatal(err)
}
// Should delete the 2 old rows
if cnt != 2 {
t.Errorf("expected 2 deleted rows, got %d", cnt)
}
// Latest row should still exist
node, err := repo.GetNode("host124", "testcluster", false)
if err != nil {
t.Fatal(err)
}
if node.NodeState != "allocated" {
t.Errorf("expected node state 'allocated', got %s", node.NodeState)
}
})
t.Run("PreservesLatestPerNode", func(t *testing.T) {
// Insert a single old row for host125 — it's the latest per node so it must survive
ns := schema.NodeStateDB{
TimeStamp: now - 7200,
NodeState: "idle",
HealthState: schema.MonitoringStateFull,
CpusAllocated: 0,
MemoryAllocated: 0,
JobsRunning: 0,
}
if err := repo.UpdateNodeState("host125", "testcluster", &ns); err != nil {
t.Fatal(err)
}
// Delete everything older than now — the latest per node should be preserved
_, err := repo.DeleteNodeStatesBefore(now)
if err != nil {
t.Fatal(err)
}
// The latest row for host125 must still exist
node, err := repo.GetNode("host125", "testcluster", false)
if err != nil {
t.Fatal(err)
}
if node.NodeState != "idle" {
t.Errorf("expected node state 'idle', got %s", node.NodeState)
}
// Verify exactly 1 row remains for host125
var countAfter int
if err := repo.DB.QueryRow(
"SELECT COUNT(*) FROM node_state WHERE node_id = (SELECT id FROM node WHERE hostname = 'host125')").
Scan(&countAfter); err != nil {
t.Fatal(err)
}
if countAfter != 1 {
t.Errorf("expected 1 row remaining for host125, got %d", countAfter)
}
})
t.Run("FindBeforeWithJoin", func(t *testing.T) {
// Insert old and current rows for host123
for _, ts := range []int64{now - 7200, now} {
ns := schema.NodeStateDB{
TimeStamp: ts,
NodeState: "allocated",
HealthState: schema.MonitoringStateFull,
CpusAllocated: 8,
MemoryAllocated: 1024,
GpusAllocated: 1,
JobsRunning: 1,
}
if err := repo.UpdateNodeState("host123", "testcluster", &ns); err != nil {
t.Fatal(err)
}
}
// Find rows older than 30 minutes, excluding latest per node
cutoff := now - 1800
rows, err := repo.FindNodeStatesBefore(cutoff)
if err != nil {
t.Fatal(err)
}
// Should find the old host123 row
found := false
for _, row := range rows {
if row.Hostname == "host123" && row.TimeStamp == now-7200 {
found = true
if row.Cluster != "testcluster" {
t.Errorf("expected cluster 'testcluster', got %s", row.Cluster)
}
if row.SubCluster != "sc1" {
t.Errorf("expected subcluster 'sc1', got %s", row.SubCluster)
}
if row.CpusAllocated != 8 {
t.Errorf("expected cpus_allocated 8, got %d", row.CpusAllocated)
}
}
}
if !found {
t.Errorf("expected to find old host123 row among %d results", len(rows))
}
})
}

View File

@@ -6,11 +6,13 @@ package repository
import (
"context"
"os"
"path/filepath"
"testing"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
"github.com/ClusterCockpit/cc-lib/schema"
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
"github.com/ClusterCockpit/cc-lib/v2/schema"
_ "github.com/mattn/go-sqlite3"
)
@@ -46,7 +48,7 @@ func BenchmarkSelect1(b *testing.B) {
}
func BenchmarkDB_FindJobById(b *testing.B) {
var jobId int64 = 1677322
var jobID int64 = 1677322
b.Run("FindJobById", func(b *testing.B) {
db := setup(b)
@@ -55,7 +57,7 @@ func BenchmarkDB_FindJobById(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, err := db.FindById(getContext(b), jobId)
_, err := db.FindByID(getContext(b), jobID)
noErr(b, err)
}
})
@@ -63,7 +65,7 @@ func BenchmarkDB_FindJobById(b *testing.B) {
}
func BenchmarkDB_FindJob(b *testing.B) {
var jobId int64 = 107266
var jobID int64 = 107266
var startTime int64 = 1657557241
cluster := "fritz"
@@ -74,7 +76,7 @@ func BenchmarkDB_FindJob(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, err := db.Find(&jobId, &cluster, &startTime)
_, err := db.Find(&jobID, &cluster, &startTime)
noErr(b, err)
}
})
@@ -148,10 +150,24 @@ func getContext(tb testing.TB) context.Context {
func setup(tb testing.TB) *JobRepository {
tb.Helper()
cclog.Init("warn", true)
dbfile := "testdata/job.db"
err := MigrateDB("sqlite3", dbfile)
// Copy test DB to a temp file for test isolation
srcData, err := os.ReadFile("testdata/job.db")
noErr(tb, err)
Connect("sqlite3", dbfile)
dbfile := filepath.Join(tb.TempDir(), "job.db")
err = os.WriteFile(dbfile, srcData, 0o644)
noErr(tb, err)
// Reset singletons so Connect uses the new temp DB
err = ResetConnection()
noErr(tb, err)
tb.Cleanup(func() {
ResetConnection()
})
err = MigrateDB(dbfile)
noErr(tb, err)
Connect(dbfile)
return GetJobRepository()
}

View File

@@ -2,6 +2,44 @@
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// This file contains job statistics and histogram generation functionality for the JobRepository.
//
// # Job Statistics
//
// The statistics methods provide aggregated metrics about jobs including total jobs, users,
// walltime, and resource usage (nodes, cores, accelerators). Statistics can be computed:
// - Overall (JobsStats): Single aggregate across all matching jobs
// - Grouped (JobsStatsGrouped): Aggregated by user, project, cluster, or subcluster
// - Counts (JobCountGrouped, AddJobCount): Simple job counts with optional filtering
//
// All statistics methods support filtering via JobFilter and respect security contexts.
//
// # Histograms
//
// Histogram methods generate distribution data for visualization:
// - Duration, nodes, cores, accelerators (AddHistograms)
// - Job metrics like CPU load, memory usage (AddMetricHistograms)
//
// Histograms use intelligent binning:
// - Duration: Variable bin sizes (1m, 10m, 1h, 6h, 12h, 24h) with zero-padding
// - Resources: Natural value-based bins
// - Metrics: Normalized to peak values with configurable bin counts
//
// # Running vs. Completed Jobs
//
// Statistics handle running jobs specially:
// - Duration calculated as (now - start_time) for running jobs
// - Metric histograms for running jobs load data from metric backend instead of footprint
// - Job state filtering distinguishes running/completed jobs
//
// # Performance Considerations
//
// - All queries use prepared statements via stmtCache
// - Complex aggregations use SQL for efficiency
// - Histogram pre-initialization ensures consistent bin ranges
// - Metric histogram queries limited to 5000 jobs for running job analysis
package repository
import (
@@ -12,14 +50,16 @@ import (
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
"github.com/ClusterCockpit/cc-backend/internal/metricdispatch"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
"github.com/ClusterCockpit/cc-lib/schema"
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
"github.com/ClusterCockpit/cc-lib/v2/schema"
sq "github.com/Masterminds/squirrel"
)
// GraphQL validation should make sure that no unkown values can be specified.
// groupBy2column maps GraphQL Aggregate enum values to their corresponding database column names.
// Used by JobsStatsGrouped and JobCountGrouped to translate user-facing grouping dimensions
// into SQL GROUP BY clauses. GraphQL validation ensures only valid enum values are accepted.
var groupBy2column = map[model.Aggregate]string{
model.AggregateUser: "job.hpc_user",
model.AggregateProject: "job.project",
@@ -27,6 +67,9 @@ var groupBy2column = map[model.Aggregate]string{
model.AggregateSubcluster: "job.subcluster",
}
// sortBy2column maps GraphQL SortByAggregate enum values to their corresponding computed column names.
// Used by JobsStatsGrouped to translate sort preferences into SQL ORDER BY clauses.
// Column names match the AS aliases used in buildStatsQuery.
var sortBy2column = map[model.SortByAggregate]string{
model.SortByAggregateTotaljobs: "totalJobs",
model.SortByAggregateTotalusers: "totalUsers",
@@ -39,6 +82,21 @@ var sortBy2column = map[model.SortByAggregate]string{
model.SortByAggregateTotalacchours: "totalAccHours",
}
// buildCountQuery constructs a SQL query to count jobs with optional grouping and filtering.
//
// Parameters:
// - filter: Job filters to apply (cluster, user, time range, etc.)
// - kind: Special filter - "running" for running jobs only, "short" for jobs under threshold
// - col: Column name to GROUP BY; empty string for total count without grouping
//
// Returns a SelectBuilder that produces either:
// - Single count: COUNT(job.id) when col is empty
// - Grouped counts: col, COUNT(job.id) when col is specified
//
// The kind parameter enables counting specific job categories:
// - "running": Only jobs with job_state = 'running'
// - "short": Only jobs with duration < ShortRunningJobsDuration config value
// - empty: All jobs matching filters
func (r *JobRepository) buildCountQuery(
filter []*model.JobFilter,
kind string,
@@ -47,10 +105,8 @@ func (r *JobRepository) buildCountQuery(
var query sq.SelectBuilder
if col != "" {
// Scan columns: id, cnt
query = sq.Select(col, "COUNT(job.id)").From("job").GroupBy(col)
} else {
// Scan columns: cnt
query = sq.Select("COUNT(job.id)").From("job")
}
@@ -68,42 +124,58 @@ func (r *JobRepository) buildCountQuery(
return query
}
// buildStatsQuery constructs a SQL query to compute comprehensive job statistics with optional grouping.
//
// Parameters:
// - filter: Job filters to apply (cluster, user, time range, etc.)
// - col: Column name to GROUP BY; empty string for overall statistics without grouping
//
// Returns a SelectBuilder that produces comprehensive statistics:
// - totalJobs: Count of jobs
// - totalUsers: Count of distinct users (always 0 when grouping by user)
// - totalWalltime: Sum of job durations in hours
// - totalNodes: Sum of nodes used across all jobs
// - totalNodeHours: Sum of (duration × num_nodes) in hours
// - totalCores: Sum of hardware threads used across all jobs
// - totalCoreHours: Sum of (duration × num_hwthreads) in hours
// - totalAccs: Sum of accelerators used across all jobs
// - totalAccHours: Sum of (duration × num_acc) in hours
//
// Special handling:
// - Running jobs: Duration calculated as (now - start_time) instead of stored duration
// - Grouped queries: Also select grouping column and user's display name from hpc_user table
// - All time values converted from seconds to hours (÷ 3600) and rounded
func (r *JobRepository) buildStatsQuery(
filter []*model.JobFilter,
col string,
) sq.SelectBuilder {
var query sq.SelectBuilder
castType := r.getCastType()
// fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as %s) as value`, time.Now().Unix(), castType)
if col != "" {
// Scan columns: id, name, totalJobs, totalUsers, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
query = sq.Select(
col,
"name",
"COUNT(job.id) as totalJobs",
"COUNT(DISTINCT job.hpc_user) AS totalUsers",
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s) as totalWalltime`, time.Now().Unix(), castType),
fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s) as totalNodes`, castType),
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s) as totalNodeHours`, time.Now().Unix(), castType),
fmt.Sprintf(`CAST(SUM(job.num_hwthreads) as %s) as totalCores`, castType),
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as %s) as totalCoreHours`, time.Now().Unix(), castType),
fmt.Sprintf(`CAST(SUM(job.num_acc) as %s) as totalAccs`, castType),
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as %s) as totalAccHours`, time.Now().Unix(), castType),
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as int) as totalWalltime`, time.Now().Unix()),
`CAST(SUM(job.num_nodes) as int) as totalNodes`,
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as int) as totalNodeHours`, time.Now().Unix()),
`CAST(SUM(job.num_hwthreads) as int) as totalCores`,
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as int) as totalCoreHours`, time.Now().Unix()),
`CAST(SUM(job.num_acc) as int) as totalAccs`,
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as int) as totalAccHours`, time.Now().Unix()),
).From("job").LeftJoin("hpc_user ON hpc_user.username = job.hpc_user").GroupBy(col)
} else {
// Scan columns: totalJobs, totalUsers, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
query = sq.Select(
"COUNT(job.id) as totalJobs",
"COUNT(DISTINCT job.hpc_user) AS totalUsers",
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s)`, time.Now().Unix(), castType),
fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s)`, castType),
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s)`, time.Now().Unix(), castType),
fmt.Sprintf(`CAST(SUM(job.num_hwthreads) as %s)`, castType),
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as %s)`, time.Now().Unix(), castType),
fmt.Sprintf(`CAST(SUM(job.num_acc) as %s)`, castType),
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as %s)`, time.Now().Unix(), castType),
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as int)`, time.Now().Unix()),
`CAST(SUM(job.num_nodes) as int)`,
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as int)`, time.Now().Unix()),
`CAST(SUM(job.num_hwthreads) as int)`,
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as int)`, time.Now().Unix()),
`CAST(SUM(job.num_acc) as int)`,
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as int)`, time.Now().Unix()),
).From("job")
}
@@ -114,21 +186,25 @@ func (r *JobRepository) buildStatsQuery(
return query
}
func (r *JobRepository) getCastType() string {
var castType string
switch r.driver {
case "sqlite3":
castType = "int"
case "mysql":
castType = "unsigned"
default:
castType = ""
}
return castType
}
// JobsStatsGrouped computes comprehensive job statistics grouped by a dimension (user, project, cluster, or subcluster).
//
// This is the primary method for generating aggregated statistics views in the UI, providing
// metrics like total jobs, walltime, and resource usage broken down by the specified grouping.
//
// Parameters:
// - ctx: Context for security checks and cancellation
// - filter: Filters to apply (time range, cluster, job state, etc.)
// - page: Optional pagination (ItemsPerPage: -1 disables pagination)
// - sortBy: Optional sort column (totalJobs, totalWalltime, totalCoreHours, etc.)
// - groupBy: Required grouping dimension (User, Project, Cluster, or SubCluster)
//
// Returns a slice of JobsStatistics, one per group, with:
// - ID: The group identifier (username, project name, cluster name, etc.)
// - Name: Display name (for users, from hpc_user.name; empty for other groups)
// - Statistics: totalJobs, totalUsers, totalWalltime, resource usage metrics
//
// Security: Respects user roles via SecurityCheck - users see only their own data unless admin/support.
// Performance: Results are sorted in SQL and pagination applied before scanning rows.
func (r *JobRepository) JobsStatsGrouped(
ctx context.Context,
filter []*model.JobFilter,
@@ -253,6 +329,21 @@ func (r *JobRepository) JobsStatsGrouped(
return stats, nil
}
// JobsStats computes overall job statistics across all matching jobs without grouping.
//
// This method provides a single aggregate view of job metrics, useful for dashboard
// summaries and overall system utilization reports.
//
// Parameters:
// - ctx: Context for security checks and cancellation
// - filter: Filters to apply (time range, cluster, job state, etc.)
//
// Returns a single-element slice containing aggregate statistics:
// - totalJobs, totalUsers, totalWalltime
// - totalNodeHours, totalCoreHours, totalAccHours
//
// Unlike JobsStatsGrouped, this returns overall totals without breaking down by dimension.
// Security checks are applied via SecurityCheck to respect user access levels.
func (r *JobRepository) JobsStats(
ctx context.Context,
filter []*model.JobFilter,
@@ -300,6 +391,15 @@ func (r *JobRepository) JobsStats(
return stats, nil
}
// LoadJobStat retrieves a specific statistic for a metric from a job's statistics.
// Returns 0.0 if the metric is not found or statType is invalid.
//
// Parameters:
// - job: Job struct with populated Statistics field
// - metric: Name of the metric to query (e.g., "cpu_load", "mem_used")
// - statType: Type of statistic: "avg", "min", or "max"
//
// Returns the requested statistic value or 0.0 if not found.
func LoadJobStat(job *schema.Job, metric string, statType string) float64 {
if stats, ok := job.Statistics[metric]; ok {
switch statType {
@@ -317,6 +417,17 @@ func LoadJobStat(job *schema.Job, metric string, statType string) float64 {
return 0.0
}
// JobCountGrouped counts jobs grouped by a dimension without computing detailed statistics.
//
// This is a lightweight alternative to JobsStatsGrouped when only job counts are needed,
// avoiding the overhead of calculating walltime and resource usage metrics.
//
// Parameters:
// - ctx: Context for security checks
// - filter: Filters to apply
// - groupBy: Grouping dimension (User, Project, Cluster, or SubCluster)
//
// Returns JobsStatistics with only ID and TotalJobs populated for each group.
func (r *JobRepository) JobCountGrouped(
ctx context.Context,
filter []*model.JobFilter,
@@ -362,6 +473,20 @@ func (r *JobRepository) JobCountGrouped(
return stats, nil
}
// AddJobCountGrouped augments existing statistics with additional job counts by category.
//
// This method enriches JobsStatistics returned by JobsStatsGrouped or JobCountGrouped
// with counts of running or short-running (based on ShortRunningJobsDuration) jobs, matched by group ID.
//
// Parameters:
// - ctx: Context for security checks
// - filter: Filters to apply
// - groupBy: Grouping dimension (must match the dimension used for stats parameter)
// - stats: Existing statistics to augment (modified in-place by ID matching)
// - kind: "running" to add RunningJobs count, "short" to add ShortJobs count
//
// Returns the same stats slice with RunningJobs or ShortJobs fields populated per group.
// Groups without matching jobs will have 0 for the added field.
func (r *JobRepository) AddJobCountGrouped(
ctx context.Context,
filter []*model.JobFilter,
@@ -416,6 +541,18 @@ func (r *JobRepository) AddJobCountGrouped(
return stats, nil
}
// AddJobCount augments existing overall statistics with additional job counts by category.
//
// Similar to AddJobCountGrouped but for ungrouped statistics. Applies the same count
// to all statistics entries (typically just one).
//
// Parameters:
// - ctx: Context for security checks
// - filter: Filters to apply
// - stats: Existing statistics to augment (modified in-place)
// - kind: "running" to add RunningJobs count, "short" to add ShortJobs count
//
// Returns the same stats slice with RunningJobs or ShortJobs fields set to the total count.
func (r *JobRepository) AddJobCount(
ctx context.Context,
filter []*model.JobFilter,
@@ -451,6 +588,26 @@ func (r *JobRepository) AddJobCount(
return stats, nil
}
// AddHistograms augments statistics with distribution histograms for job properties.
//
// Generates histogram data for visualization of job duration, node count, core count,
// and accelerator count distributions. Duration histogram uses intelligent binning based
// on the requested resolution.
//
// Parameters:
// - ctx: Context for security checks
// - filter: Filters to apply to jobs included in histograms
// - stat: Statistics struct to augment (modified in-place)
// - durationBins: Bin size - "1m", "10m", "1h", "6h", "12h", or "24h" (default)
//
// Populates these fields in stat:
// - HistDuration: Job duration distribution (zero-padded bins)
// - HistNumNodes: Node count distribution
// - HistNumCores: Core (hwthread) count distribution
// - HistNumAccs: Accelerator count distribution
//
// Duration bins are pre-initialized with zeros to ensure consistent ranges for visualization.
// Bin size determines both the width and maximum duration displayed (e.g., "1h" = 48 bins × 1h = 48h max).
func (r *JobRepository) AddHistograms(
ctx context.Context,
filter []*model.JobFilter,
@@ -461,20 +618,20 @@ func (r *JobRepository) AddHistograms(
var targetBinCount int
var targetBinSize int
switch {
case *durationBins == "1m": // 1 Minute Bins + Max 60 Bins -> Max 60 Minutes
switch *durationBins {
case "1m": // 1 Minute Bins + Max 60 Bins -> Max 60 Minutes
targetBinCount = 60
targetBinSize = 60
case *durationBins == "10m": // 10 Minute Bins + Max 72 Bins -> Max 12 Hours
case "10m": // 10 Minute Bins + Max 72 Bins -> Max 12 Hours
targetBinCount = 72
targetBinSize = 600
case *durationBins == "1h": // 1 Hour Bins + Max 48 Bins -> Max 48 Hours
case "1h": // 1 Hour Bins + Max 48 Bins -> Max 48 Hours
targetBinCount = 48
targetBinSize = 3600
case *durationBins == "6h": // 6 Hour Bins + Max 12 Bins -> Max 3 Days
case "6h": // 6 Hour Bins + Max 12 Bins -> Max 3 Days
targetBinCount = 12
targetBinSize = 21600
case *durationBins == "12h": // 12 hour Bins + Max 14 Bins -> Max 7 Days
case "12h": // 12 hour Bins + Max 14 Bins -> Max 7 Days
targetBinCount = 14
targetBinSize = 43200
default: // 24h
@@ -482,10 +639,9 @@ func (r *JobRepository) AddHistograms(
targetBinSize = 3600
}
castType := r.getCastType()
var err error
// Return X-Values always as seconds, will be formatted into minutes and hours in frontend
value := fmt.Sprintf(`CAST(ROUND(((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / %d) + 1) as %s) as value`, time.Now().Unix(), targetBinSize, castType)
value := fmt.Sprintf(`CAST(ROUND(((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / %d) + 1) as int) as value`, time.Now().Unix(), targetBinSize)
stat.HistDuration, err = r.jobsDurationStatisticsHistogram(ctx, value, filter, targetBinSize, &targetBinCount)
if err != nil {
cclog.Warn("Error while loading job statistics histogram: job duration")
@@ -514,7 +670,30 @@ func (r *JobRepository) AddHistograms(
return stat, nil
}
// Requires thresholds for metric from config for cluster? Of all clusters and use largest? split to 10 + 1 for artifacts?
// AddMetricHistograms augments statistics with distribution histograms for job metrics.
//
// Generates histogram data for metrics like CPU load, memory usage, etc. Handles running
// and completed jobs differently: running jobs load data from metric backend, completed jobs
// use footprint data from database.
//
// Parameters:
// - ctx: Context for security checks
// - filter: Filters to apply (MUST contain State filter for running jobs)
// - metrics: List of metric names to histogram (e.g., ["cpu_load", "mem_used"])
// - stat: Statistics struct to augment (modified in-place)
// - targetBinCount: Number of histogram bins (default: 10)
//
// Populates HistMetrics field in stat with MetricHistoPoints for each metric.
//
// Binning algorithm:
// - Values normalized to metric's peak value from cluster configuration
// - Bins evenly distributed from 0 to peak
// - Pre-initialized with zeros for consistent visualization
//
// Limitations:
// - Running jobs: Limited to 5000 jobs for performance
// - Requires valid cluster configuration with metric peak values
// - Uses footprint statistic (avg/max/min) configured per metric
func (r *JobRepository) AddMetricHistograms(
ctx context.Context,
filter []*model.JobFilter,
@@ -549,7 +728,16 @@ func (r *JobRepository) AddMetricHistograms(
return stat, nil
}
// `value` must be the column grouped by, but renamed to "value"
// jobsStatisticsHistogram generates a simple histogram by grouping on a column value.
//
// Used for histograms where the column value directly represents the bin (e.g., node count, core count).
// Unlike duration/metric histograms, this doesn't pre-initialize bins with zeros.
//
// Parameters:
// - value: SQL expression that produces the histogram value, aliased as "value"
// - filters: Job filters to apply
//
// Returns histogram points with Value (from column) and Count (number of jobs).
func (r *JobRepository) jobsStatisticsHistogram(
ctx context.Context,
value string,
@@ -594,6 +782,26 @@ func (r *JobRepository) jobsStatisticsHistogram(
return points, nil
}
// jobsDurationStatisticsHistogram generates a duration histogram with pre-initialized bins.
//
// Bins are zero-padded to provide consistent ranges for visualization, unlike simple
// histograms which only return bins with data. The value parameter should compute
// the bin number from job duration.
//
// Parameters:
// - value: SQL expression computing bin number from duration, aliased as "value"
// - filters: Job filters to apply
// - binSizeSeconds: Width of each bin in seconds
// - targetBinCount: Number of bins to pre-initialize
//
// Returns histogram points with Value (bin_number × binSizeSeconds) and Count.
// All bins from 1 to targetBinCount are returned, with Count=0 for empty bins.
//
// Algorithm:
// 1. Pre-initialize targetBinCount bins with zero counts
// 2. Query database for actual counts per bin
// 3. Match query results to pre-initialized bins by value
// 4. Bins without matches remain at zero
func (r *JobRepository) jobsDurationStatisticsHistogram(
ctx context.Context,
value string,
@@ -609,7 +817,8 @@ func (r *JobRepository) jobsDurationStatisticsHistogram(
return nil, qerr
}
// Setup Array
// Each bin represents a duration range: bin N = [N*binSizeSeconds, (N+1)*binSizeSeconds)
// Example: binSizeSeconds=3600 (1 hour), bin 1 = 0-1h, bin 2 = 1-2h, etc.
points := make([]*model.HistoPoint, 0)
for i := 1; i <= *targetBinCount; i++ {
point := model.HistoPoint{Value: i * binSizeSeconds, Count: 0}
@@ -627,7 +836,8 @@ func (r *JobRepository) jobsDurationStatisticsHistogram(
}
defer rows.Close()
// Fill Array at matching $Value
// Match query results to pre-initialized bins.
// point.Value from query is the bin number; multiply by binSizeSeconds to match bin.Value.
for rows.Next() {
point := model.HistoPoint{}
if err := rows.Scan(&point.Value, &point.Count); err != nil {
@@ -637,9 +847,6 @@ func (r *JobRepository) jobsDurationStatisticsHistogram(
for _, e := range points {
if e.Value == (point.Value * binSizeSeconds) {
// Note:
// Matching on unmodified integer value (and multiplying point.Value by binSizeSeconds after match)
// causes frontend to loop into highest targetBinCount, due to zoom condition instantly being fullfilled (cause unknown)
e.Count = point.Count
break
}
@@ -654,18 +861,43 @@ func (r *JobRepository) jobsDurationStatisticsHistogram(
return points, nil
}
// jobsMetricStatisticsHistogram generates a metric histogram using footprint data from completed jobs.
//
// Values are normalized to the metric's peak value and distributed into bins. The algorithm
// is based on SQL histogram generation techniques, extracting metric values from JSON footprint
// and computing bin assignments in SQL.
//
// Parameters:
// - metric: Metric name (e.g., "cpu_load", "mem_used")
// - filters: Job filters to apply
// - bins: Number of bins to generate
//
// Returns MetricHistoPoints with metric name, unit, footprint stat type, and binned data.
//
// Algorithm:
// 1. Determine peak value from cluster configuration (filtered cluster or max across all)
// 2. Generate SQL that extracts footprint value, normalizes to [0,1], multiplies by bin count
// 3. Pre-initialize bins with min/max ranges based on peak value
// 4. Query database for counts per bin
// 5. Match results to pre-initialized bins
//
// Special handling: Values exactly equal to peak are forced into the last bin by multiplying
// peak by 0.999999999 to avoid creating an extra bin.
func (r *JobRepository) jobsMetricStatisticsHistogram(
ctx context.Context,
metric string,
filters []*model.JobFilter,
bins *int,
) (*model.MetricHistoPoints, error) {
// Get specific Peak or largest Peak
// Peak value defines the upper bound for binning: values are distributed across
// bins from 0 to peak. First try to get peak from filtered cluster, otherwise
// scan all clusters to find the maximum peak value.
var metricConfig *schema.MetricConfig
var peak float64
var unit string
var footprintStat string
// Try to get metric config from filtered cluster
for _, f := range filters {
if f.Cluster != nil {
metricConfig = archive.GetMetricConfig(*f.Cluster.Eq, metric)
@@ -676,6 +908,8 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
}
}
// If no cluster filter or peak not found, find largest peak across all clusters
// This ensures histogram can accommodate all possible values
if peak == 0.0 {
for _, c := range archive.Clusters {
for _, m := range c.MetricConfig {
@@ -694,11 +928,14 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
}
}
// cclog.Debugf("Metric %s, Peak %f, Unit %s", metric, peak, unit)
// Make bins, see https://jereze.com/code/sql-histogram/ (Modified here)
// Construct SQL histogram bins using normalized values.
// Algorithm based on: https://jereze.com/code/sql-histogram/ (modified)
start := time.Now()
// Find Jobs' Value Bin Number: Divide Value by Peak, Multiply by RequestedBins, then CAST to INT: Gets Bin-Number of Job
// Bin calculation formula:
// bin_number = CAST( (value / peak) * num_bins AS INTEGER ) + 1
// Special case: value == peak would create bin N+1, so we test for equality
// and multiply peak by 0.999999999 to force it into bin N.
binQuery := fmt.Sprintf(`CAST(
((case when json_extract(footprint, "$.%s") = %f then %f*0.999999999 else json_extract(footprint, "$.%s") end) / %f)
* %v as INTEGER )`,
@@ -707,24 +944,19 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
mainQuery := sq.Select(
fmt.Sprintf(`%s + 1 as bin`, binQuery),
`count(*) as count`,
// For Debug: // fmt.Sprintf(`CAST((%f / %d) as INTEGER ) * %s as min`, peak, *bins, binQuery),
// For Debug: // fmt.Sprintf(`CAST((%f / %d) as INTEGER ) * (%s + 1) as max`, peak, *bins, binQuery),
).From("job").Where(
"JSON_VALID(footprint)",
).Where(fmt.Sprintf(`json_extract(footprint, "$.%s") is not null and json_extract(footprint, "$.%s") <= %f`, (metric + "_" + footprintStat), (metric + "_" + footprintStat), peak))
// Only accessible Jobs...
mainQuery, qerr := SecurityCheck(ctx, mainQuery)
if qerr != nil {
return nil, qerr
}
// Filters...
for _, f := range filters {
mainQuery = BuildWhereClause(f, mainQuery)
}
// Finalize query with Grouping and Ordering
mainQuery = mainQuery.GroupBy("bin").OrderBy("bin")
rows, err := mainQuery.RunWith(r.DB).Query()
@@ -734,7 +966,8 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
}
defer rows.Close()
// Setup Return Array With Bin-Numbers for Match and Min/Max based on Peak
// Pre-initialize bins with calculated min/max ranges.
// Example: peak=1000, bins=10 -> bin 1=[0,100), bin 2=[100,200), ..., bin 10=[900,1000]
points := make([]*model.MetricHistoPoint, 0)
binStep := int(peak) / *bins
for i := 1; i <= *bins; i++ {
@@ -744,26 +977,18 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
points = append(points, &epoint)
}
for rows.Next() { // Fill Count if Bin-No. Matches (Not every Bin exists in DB!)
// Match query results to pre-initialized bins.
for rows.Next() {
rpoint := model.MetricHistoPoint{}
if err := rows.Scan(&rpoint.Bin, &rpoint.Count); err != nil { // Required for Debug: &rpoint.Min, &rpoint.Max
if err := rows.Scan(&rpoint.Bin, &rpoint.Count); err != nil {
cclog.Warnf("Error while scanning rows for %s", metric)
return nil, err // FIXME: Totally bricks cc-backend if returned and if all metrics requested?
return nil, err
}
for _, e := range points {
if e.Bin != nil && rpoint.Bin != nil {
if *e.Bin == *rpoint.Bin {
e.Count = rpoint.Count
// Only Required For Debug: Check DB returned Min/Max against Backend Init above
// if rpoint.Min != nil {
// cclog.Warnf(">>>> Bin %d Min Set For %s to %d (Init'd with: %d)", *e.Bin, metric, *rpoint.Min, *e.Min)
// }
// if rpoint.Max != nil {
// cclog.Warnf(">>>> Bin %d Max Set For %s to %d (Init'd with: %d)", *e.Bin, metric, *rpoint.Max, *e.Max)
// }
break
}
if e.Bin != nil && rpoint.Bin != nil && *e.Bin == *rpoint.Bin {
e.Count = rpoint.Count
break
}
}
}
@@ -778,6 +1003,28 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
return &result, nil
}
// runningJobsMetricStatisticsHistogram generates metric histograms for running jobs using live data.
//
// Unlike completed jobs which use footprint data from the database, running jobs require
// fetching current metric averages from the metric backend (via metricdispatch).
//
// Parameters:
// - metrics: List of metric names
// - filters: Job filters (should filter to running jobs only)
// - bins: Number of histogram bins
//
// Returns slice of MetricHistoPoints, one per metric.
//
// Limitations:
// - Maximum 5000 jobs (returns nil if more jobs match)
// - Requires metric backend availability
// - Bins based on metric peak values from cluster configuration
//
// Algorithm:
// 1. Query first 5001 jobs to check count limit
// 2. Load metric averages for all jobs via metricdispatch
// 3. For each metric, create bins based on peak value
// 4. Iterate averages and count jobs per bin
func (r *JobRepository) runningJobsMetricStatisticsHistogram(
ctx context.Context,
metrics []string,
@@ -785,13 +1032,13 @@ func (r *JobRepository) runningJobsMetricStatisticsHistogram(
bins *int,
) []*model.MetricHistoPoints {
// Get Jobs
jobs, err := r.QueryJobs(ctx, filters, &model.PageRequest{Page: 1, ItemsPerPage: 500 + 1}, nil)
jobs, err := r.QueryJobs(ctx, filters, &model.PageRequest{Page: 1, ItemsPerPage: 5000 + 1}, nil)
if err != nil {
cclog.Errorf("Error while querying jobs for footprint: %s", err)
return nil
}
if len(jobs) > 500 {
cclog.Errorf("too many jobs matched (max: %d)", 500)
if len(jobs) > 5000 {
cclog.Errorf("too many jobs matched (max: %d)", 5000)
return nil
}
@@ -806,7 +1053,7 @@ func (r *JobRepository) runningJobsMetricStatisticsHistogram(
continue
}
if err := metricDataDispatcher.LoadAverages(job, metrics, avgs, ctx); err != nil {
if err := metricdispatch.LoadAverages(job, metrics, avgs, ctx); err != nil {
cclog.Errorf("Error while loading averages for histogram: %s", err)
return nil
}

View File

@@ -25,11 +25,14 @@ func TestBuildJobStatsQuery(t *testing.T) {
func TestJobStats(t *testing.T) {
r := setup(t)
filter := &model.JobFilter{}
stats, err := r.JobsStats(getContext(t), []*model.JobFilter{filter})
var expectedCount int
err := r.DB.QueryRow(`SELECT COUNT(*) FROM job`).Scan(&expectedCount)
noErr(t, err)
if stats[0].TotalJobs != 544 {
t.Fatalf("Want 544, Got %d", stats[0].TotalJobs)
stats, err := r.JobsStats(getContext(t), []*model.JobFilter{})
noErr(t, err)
if stats[0].TotalJobs != expectedCount {
t.Fatalf("Want %d, Got %d", expectedCount, stats[0].TotalJobs)
}
}

View File

@@ -2,6 +2,35 @@
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// Package repository provides data access and persistence layer for ClusterCockpit.
//
// This file implements tag management functionality for job categorization and classification.
// Tags support both manual assignment (via REST/GraphQL APIs) and automatic detection
// (via tagger plugins). The implementation includes role-based access control through
// tag scopes and maintains bidirectional consistency between the SQL database and
// the file-based job archive.
//
// Database Schema:
//
// CREATE TABLE tag (
// id INTEGER PRIMARY KEY AUTOINCREMENT,
// tag_type VARCHAR(255) NOT NULL,
// tag_name VARCHAR(255) NOT NULL,
// tag_scope VARCHAR(255) NOT NULL DEFAULT "global",
// CONSTRAINT tag_unique UNIQUE (tag_type, tag_name, tag_scope)
// );
//
// CREATE TABLE jobtag (
// job_id INTEGER,
// tag_id INTEGER,
// PRIMARY KEY (job_id, tag_id),
// FOREIGN KEY (job_id) REFERENCES job(id) ON DELETE CASCADE,
// FOREIGN KEY (tag_id) REFERENCES tag(id) ON DELETE CASCADE
// );
//
// The jobtag junction table enables many-to-many relationships between jobs and tags.
// CASCADE deletion ensures referential integrity when jobs or tags are removed.
package repository
import (
@@ -10,15 +39,39 @@ import (
"strings"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
"github.com/ClusterCockpit/cc-lib/schema"
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
"github.com/ClusterCockpit/cc-lib/v2/schema"
sq "github.com/Masterminds/squirrel"
)
// Tag Scope Rules:
//
// Tags in ClusterCockpit have three visibility scopes that control who can see and use them:
//
// 1. "global" - Visible to all users, can be used by anyone
// Example: System-generated tags like "energy-efficient", "failed", "short"
//
// 2. "private" - Only visible to the creating user
// Example: Personal notes like "needs-review", "interesting-case"
//
// 3. "admin" - Only visible to users with admin or support roles
// Example: Internal notes like "hardware-issue", "billing-problem"
//
// Authorization Rules:
// - Regular users can only create/see "global" and their own "private" tags
// - Admin/Support can create/see all scopes including "admin" tags
// - Users can only add tags to jobs they have permission to view
// - Tag scope is enforced at query time in GetTags() and CountTags()
// AddTag adds the tag with id `tagId` to the job with the database id `jobId`.
// Requires user authentication for security checks.
//
// The user must have permission to view the job. Tag visibility is determined by scope:
// - "global" tags: visible to all users
// - "private" tags: only visible to the tag creator
// - "admin" tags: only visible to admin/support users
func (r *JobRepository) AddTag(user *schema.User, job int64, tag int64) ([]*schema.Tag, error) {
j, err := r.FindByIdWithUser(user, job)
j, err := r.FindByIDWithUser(user, job)
if err != nil {
cclog.Warnf("Error finding job %d for user %s: %v", job, user.Username, err)
return nil, err
@@ -32,7 +85,7 @@ func (r *JobRepository) AddTag(user *schema.User, job int64, tag int64) ([]*sche
// AddTagDirect adds a tag without user security checks.
// Use only for internal/admin operations.
func (r *JobRepository) AddTagDirect(job int64, tag int64) ([]*schema.Tag, error) {
j, err := r.FindByIdDirect(job)
j, err := r.FindByIDDirect(job)
if err != nil {
cclog.Warnf("Error finding job %d: %v", job, err)
return nil, err
@@ -43,12 +96,12 @@ func (r *JobRepository) AddTagDirect(job int64, tag int64) ([]*schema.Tag, error
})
}
// Removes a tag from a job by tag id.
// Used by GraphQL API
// RemoveTag removes the tag with the database id `tag` from the job with the database id `job`.
// Requires user authentication for security checks. Used by GraphQL API.
func (r *JobRepository) RemoveTag(user *schema.User, job, tag int64) ([]*schema.Tag, error) {
j, err := r.FindByIdWithUser(user, job)
j, err := r.FindByIDWithUser(user, job)
if err != nil {
cclog.Warn("Error while finding job by id")
cclog.Warnf("Error while finding job %d for user %s during tag removal: %v", job, user.Username, err)
return nil, err
}
@@ -68,27 +121,27 @@ func (r *JobRepository) RemoveTag(user *schema.User, job, tag int64) ([]*schema.
archiveTags, err := r.getArchiveTags(&job)
if err != nil {
cclog.Warn("Error while getting tags for job")
cclog.Warnf("Error while getting archive tags for job %d in RemoveTag: %v", job, err)
return nil, err
}
return tags, archive.UpdateTags(j, archiveTags)
}
// Removes a tag from a job by tag info
// Used by REST API
// RemoveJobTagByRequest removes a tag from the job with the database id `job` by tag type, name, and scope.
// Requires user authentication for security checks. Used by REST API.
func (r *JobRepository) RemoveJobTagByRequest(user *schema.User, job int64, tagType string, tagName string, tagScope string) ([]*schema.Tag, error) {
// Get Tag ID to delete
tagID, exists := r.TagId(tagType, tagName, tagScope)
tagID, exists := r.TagID(tagType, tagName, tagScope)
if !exists {
cclog.Warnf("Tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
return nil, fmt.Errorf("tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
}
// Get Job
j, err := r.FindByIdWithUser(user, job)
j, err := r.FindByIDWithUser(user, job)
if err != nil {
cclog.Warn("Error while finding job by id")
cclog.Warnf("Error while finding job %d for user %s during tag removal by request: %v", job, user.Username, err)
return nil, err
}
@@ -103,19 +156,30 @@ func (r *JobRepository) RemoveJobTagByRequest(user *schema.User, job int64, tagT
tags, err := r.GetTags(user, &job)
if err != nil {
cclog.Warn("Error while getting tags for job")
cclog.Warnf("Error while getting tags for job %d in RemoveJobTagByRequest: %v", job, err)
return nil, err
}
archiveTags, err := r.getArchiveTags(&job)
if err != nil {
cclog.Warn("Error while getting tags for job")
cclog.Warnf("Error while getting archive tags for job %d in RemoveJobTagByRequest: %v", job, err)
return nil, err
}
return tags, archive.UpdateTags(j, archiveTags)
}
// removeTagFromArchiveJobs updates the job archive for all affected jobs after a tag deletion.
//
// This function is called asynchronously (via goroutine) after removing a tag from the database
// to synchronize the file-based job archive with the database state. Errors are logged but not
// returned since this runs in the background.
//
// Parameters:
// - jobIds: Database IDs of all jobs that had the deleted tag
//
// Implementation note: Each job is processed individually to handle partial failures gracefully.
// If one job fails to update, others will still be processed.
func (r *JobRepository) removeTagFromArchiveJobs(jobIds []int64) {
for _, j := range jobIds {
tags, err := r.getArchiveTags(&j)
@@ -124,7 +188,7 @@ func (r *JobRepository) removeTagFromArchiveJobs(jobIds []int64) {
continue
}
job, err := r.FindByIdDirect(j)
job, err := r.FindByIDDirect(j)
if err != nil {
cclog.Warnf("Error while getting job %d", j)
continue
@@ -138,18 +202,18 @@ func (r *JobRepository) removeTagFromArchiveJobs(jobIds []int64) {
// Used by REST API. Does not update tagged jobs in Job archive.
func (r *JobRepository) RemoveTagByRequest(tagType string, tagName string, tagScope string) error {
// Get Tag ID to delete
tagID, exists := r.TagId(tagType, tagName, tagScope)
tagID, exists := r.TagID(tagType, tagName, tagScope)
if !exists {
cclog.Warnf("Tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
return fmt.Errorf("tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
}
return r.RemoveTagById(tagID)
return r.RemoveTagByID(tagID)
}
// Removes a tag from db by tag id
// Used by GraphQL API.
func (r *JobRepository) RemoveTagById(tagID int64) error {
func (r *JobRepository) RemoveTagByID(tagID int64) error {
jobIds, err := r.FindJobIdsByTag(tagID)
if err != nil {
return err
@@ -179,8 +243,16 @@ func (r *JobRepository) RemoveTagById(tagID int64) error {
return nil
}
// CreateTag creates a new tag with the specified type and name and returns its database id.
func (r *JobRepository) CreateTag(tagType string, tagName string, tagScope string) (tagId int64, err error) {
// CreateTag creates a new tag with the specified type, name, and scope.
// Returns the database ID of the newly created tag.
//
// Scope defaults to "global" if empty string is provided.
// Valid scopes: "global", "private", "admin"
//
// Example:
//
// tagID, err := repo.CreateTag("performance", "high-memory", "global")
func (r *JobRepository) CreateTag(tagType string, tagName string, tagScope string) (tagID int64, err error) {
// Default to "Global" scope if none defined
if tagScope == "" {
tagScope = "global"
@@ -198,8 +270,14 @@ func (r *JobRepository) CreateTag(tagType string, tagName string, tagScope strin
return res.LastInsertId()
}
// CountTags returns all tags visible to the user and the count of jobs for each tag.
// Applies scope-based filtering to respect tag visibility rules.
//
// Returns:
// - tags: slice of tags the user can see
// - counts: map of tag name to job count
// - err: any error encountered
func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts map[string]int, err error) {
// Fetch all Tags in DB for Display in Frontend Tag-View
tags = make([]schema.Tag, 0, 100)
xrows, err := r.DB.Queryx("SELECT id, tag_type, tag_name, tag_scope FROM tag")
if err != nil {
@@ -228,10 +306,10 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts
}
// Query and Count Jobs with attached Tags
q := sq.Select("t.tag_name, t.id, count(jt.tag_id)").
q := sq.Select("t.tag_type, t.tag_name, t.id, count(jt.tag_id)").
From("tag t").
LeftJoin("jobtag jt ON t.id = jt.tag_id").
GroupBy("t.tag_name")
GroupBy("t.tag_type, t.tag_name")
// Build scope list for filtering
var scopeBuilder strings.Builder
@@ -265,14 +343,15 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts
counts = make(map[string]int)
for rows.Next() {
var tagType string
var tagName string
var tagId int
var tagID int
var count int
if err = rows.Scan(&tagName, &tagId, &count); err != nil {
if err = rows.Scan(&tagType, &tagName, &tagID, &count); err != nil {
return nil, nil, err
}
// Use tagId as second Map-Key component to differentiate tags with identical names
counts[fmt.Sprint(tagName, tagId)] = count
counts[fmt.Sprint(tagType, tagName, tagID)] = count
}
err = rows.Err()
@@ -280,18 +359,44 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts
}
var (
ErrTagNotFound = errors.New("the tag does not exist")
ErrJobNotOwned = errors.New("user is not owner of job")
ErrTagNoAccess = errors.New("user not permitted to use that tag")
ErrTagPrivateScope = errors.New("tag is private to another user")
ErrTagAdminScope = errors.New("tag requires admin privileges")
// ErrTagNotFound is returned when a tag ID or tag identifier (type, name, scope) does not exist in the database.
ErrTagNotFound = errors.New("the tag does not exist")
// ErrJobNotOwned is returned when a user attempts to tag a job they do not have permission to access.
ErrJobNotOwned = errors.New("user is not owner of job")
// ErrTagNoAccess is returned when a user attempts to use a tag they cannot access due to scope restrictions.
ErrTagNoAccess = errors.New("user not permitted to use that tag")
// ErrTagPrivateScope is returned when a user attempts to access another user's private tag.
ErrTagPrivateScope = errors.New("tag is private to another user")
// ErrTagAdminScope is returned when a non-admin user attempts to use an admin-scoped tag.
ErrTagAdminScope = errors.New("tag requires admin privileges")
// ErrTagsIncompatScopes is returned when attempting to combine admin and non-admin scoped tags in a single operation.
ErrTagsIncompatScopes = errors.New("combining admin and non-admin scoped tags not allowed")
)
// addJobTag is a helper function that inserts a job-tag association and updates the archive.
// Returns the updated tag list for the job.
func (r *JobRepository) addJobTag(jobId int64, tagId int64, job *schema.Job, getTags func() ([]*schema.Tag, error)) ([]*schema.Tag, error) {
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(jobId, tagId)
//
// This function performs three operations atomically:
// 1. Inserts the job-tag association into the jobtag junction table
// 2. Retrieves the updated tag list for the job (using the provided getTags callback)
// 3. Updates the job archive with the new tags to maintain database-archive consistency
//
// Parameters:
// - jobId: Database ID of the job
// - tagId: Database ID of the tag to associate
// - job: Full job object needed for archive update
// - getTags: Callback function to retrieve updated tags (allows different security contexts)
//
// Returns the complete updated tag list for the job or an error.
//
// Note: This function does NOT validate tag scope permissions - callers must perform
// authorization checks before invoking this helper.
func (r *JobRepository) addJobTag(jobID int64, tagID int64, job *schema.Job, getTags func() ([]*schema.Tag, error)) ([]*schema.Tag, error) {
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(jobID, tagID)
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
s, _, _ := q.ToSql()
@@ -301,13 +406,13 @@ func (r *JobRepository) addJobTag(jobId int64, tagId int64, job *schema.Job, get
tags, err := getTags()
if err != nil {
cclog.Warnf("Error getting tags for job %d: %v", jobId, err)
cclog.Warnf("Error getting tags for job %d: %v", jobID, err)
return nil, err
}
archiveTags, err := r.getArchiveTags(&jobId)
archiveTags, err := r.getArchiveTags(&jobID)
if err != nil {
cclog.Warnf("Error getting archive tags for job %d: %v", jobId, err)
cclog.Warnf("Error getting archive tags for job %d: %v", jobID, err)
return nil, err
}
@@ -316,7 +421,7 @@ func (r *JobRepository) addJobTag(jobId int64, tagId int64, job *schema.Job, get
// AddTagOrCreate adds the tag with the specified type and name to the job with the database id `jobId`.
// If such a tag does not yet exist, it is created.
func (r *JobRepository) AddTagOrCreate(user *schema.User, jobId int64, tagType string, tagName string, tagScope string) (tagId int64, err error) {
func (r *JobRepository) AddTagOrCreate(user *schema.User, jobID int64, tagType string, tagName string, tagScope string) (tagID int64, err error) {
// Default to "Global" scope if none defined
if tagScope == "" {
tagScope = "global"
@@ -330,44 +435,45 @@ func (r *JobRepository) AddTagOrCreate(user *schema.User, jobId int64, tagType s
return 0, fmt.Errorf("cannot write tag scope with current authorization")
}
tagId, exists := r.TagId(tagType, tagName, tagScope)
tagID, exists := r.TagID(tagType, tagName, tagScope)
if !exists {
tagId, err = r.CreateTag(tagType, tagName, tagScope)
tagID, err = r.CreateTag(tagType, tagName, tagScope)
if err != nil {
return 0, err
}
}
if _, err := r.AddTag(user, jobId, tagId); err != nil {
if _, err := r.AddTag(user, jobID, tagID); err != nil {
return 0, err
}
return tagId, nil
return tagID, nil
}
// used in auto tagger plugins
func (r *JobRepository) AddTagOrCreateDirect(jobId int64, tagType string, tagName string) (tagId int64, err error) {
func (r *JobRepository) AddTagOrCreateDirect(jobID int64, tagType string, tagName string) (tagID int64, err error) {
tagScope := "global"
tagId, exists := r.TagId(tagType, tagName, tagScope)
tagID, exists := r.TagID(tagType, tagName, tagScope)
if !exists {
tagId, err = r.CreateTag(tagType, tagName, tagScope)
tagID, err = r.CreateTag(tagType, tagName, tagScope)
if err != nil {
return 0, err
}
}
if _, err := r.AddTagDirect(jobId, tagId); err != nil {
cclog.Infof("Adding tag %s:%s:%s (direct)", tagType, tagName, tagScope)
if _, err := r.AddTagDirect(jobID, tagID); err != nil {
return 0, err
}
return tagId, nil
return tagID, nil
}
func (r *JobRepository) HasTag(jobId int64, tagType string, tagName string) bool {
func (r *JobRepository) HasTag(jobID int64, tagType string, tagName string) bool {
var id int64
q := sq.Select("id").From("tag").Join("jobtag ON jobtag.tag_id = tag.id").
Where("jobtag.job_id = ?", jobId).Where("tag.tag_type = ?", tagType).
Where("jobtag.job_id = ?", jobID).Where("tag.tag_type = ?", tagType).
Where("tag.tag_name = ?", tagName)
err := q.RunWith(r.stmtCache).QueryRow().Scan(&id)
if err != nil {
@@ -377,21 +483,21 @@ func (r *JobRepository) HasTag(jobId int64, tagType string, tagName string) bool
}
}
// TagId returns the database id of the tag with the specified type and name.
func (r *JobRepository) TagId(tagType string, tagName string, tagScope string) (tagId int64, exists bool) {
// TagID returns the database id of the tag with the specified type and name.
func (r *JobRepository) TagID(tagType string, tagName string, tagScope string) (tagID int64, exists bool) {
exists = true
if err := sq.Select("id").From("tag").
Where("tag.tag_type = ?", tagType).Where("tag.tag_name = ?", tagName).Where("tag.tag_scope = ?", tagScope).
RunWith(r.stmtCache).QueryRow().Scan(&tagId); err != nil {
RunWith(r.stmtCache).QueryRow().Scan(&tagID); err != nil {
exists = false
}
return
}
// TagInfo returns the database infos of the tag with the specified id.
func (r *JobRepository) TagInfo(tagId int64) (tagType string, tagName string, tagScope string, exists bool) {
func (r *JobRepository) TagInfo(tagID int64) (tagType string, tagName string, tagScope string, exists bool) {
exists = true
if err := sq.Select("tag.tag_type", "tag.tag_name", "tag.tag_scope").From("tag").Where("tag.id = ?", tagId).
if err := sq.Select("tag.tag_type", "tag.tag_name", "tag.tag_scope").From("tag").Where("tag.id = ?", tagID).
RunWith(r.stmtCache).QueryRow().Scan(&tagType, &tagName, &tagScope); err != nil {
exists = false
}
@@ -417,7 +523,7 @@ func (r *JobRepository) GetTags(user *schema.User, job *int64) ([]*schema.Tag, e
for rows.Next() {
tag := &schema.Tag{}
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name, &tag.Scope); err != nil {
cclog.Warn("Error while scanning rows")
cclog.Warnf("Error while scanning tag rows in GetTags: %v", err)
return nil, err
}
// Handle Scope Filtering: Tag Scope is Global, Private (== Username) or User is auth'd to view Admin Tags
@@ -455,7 +561,7 @@ func (r *JobRepository) GetTagsDirect(job *int64) ([]*schema.Tag, error) {
for rows.Next() {
tag := &schema.Tag{}
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name, &tag.Scope); err != nil {
cclog.Warn("Error while scanning rows")
cclog.Warnf("Error while scanning tag rows in GetTagsDirect: %v", err)
return nil, err
}
tags = append(tags, tag)
@@ -468,7 +574,18 @@ func (r *JobRepository) GetTagsDirect(job *int64) ([]*schema.Tag, error) {
return tags, nil
}
// GetArchiveTags returns a list of all tags *regardless of scope* for archiving if job is nil or of the tags that the job with that database ID has.
// getArchiveTags returns all tags for a job WITHOUT applying scope-based filtering.
//
// This internal function is used exclusively for job archive synchronization where we need
// to store all tags regardless of the current user's permissions. Unlike GetTags() which
// filters by scope, this returns the complete unfiltered tag list.
//
// Parameters:
// - job: Pointer to job database ID, or nil to return all tags in the system
//
// Returns all tags without scope filtering, used only for archive operations.
//
// WARNING: Do NOT expose this function to user-facing APIs as it bypasses authorization.
func (r *JobRepository) getArchiveTags(job *int64) ([]*schema.Tag, error) {
q := sq.Select("id", "tag_type", "tag_name", "tag_scope").From("tag")
if job != nil {
@@ -487,7 +604,7 @@ func (r *JobRepository) getArchiveTags(job *int64) ([]*schema.Tag, error) {
for rows.Next() {
tag := &schema.Tag{}
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name, &tag.Scope); err != nil {
cclog.Warn("Error while scanning rows")
cclog.Warnf("Error while scanning tag rows in getArchiveTags: %v", err)
return nil, err
}
tags = append(tags, tag)
@@ -500,18 +617,18 @@ func (r *JobRepository) getArchiveTags(job *int64) ([]*schema.Tag, error) {
return tags, nil
}
func (r *JobRepository) ImportTag(jobId int64, tagType string, tagName string, tagScope string) (err error) {
func (r *JobRepository) ImportTag(jobID int64, tagType string, tagName string, tagScope string) (err error) {
// Import has no scope ctx, only import from metafile to DB (No recursive archive update required), only returns err
tagId, exists := r.TagId(tagType, tagName, tagScope)
tagID, exists := r.TagID(tagType, tagName, tagScope)
if !exists {
tagId, err = r.CreateTag(tagType, tagName, tagScope)
tagID, err = r.CreateTag(tagType, tagName, tagScope)
if err != nil {
return err
}
}
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(jobId, tagId)
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(jobID, tagID)
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
s, _, _ := q.ToSql()
@@ -522,16 +639,38 @@ func (r *JobRepository) ImportTag(jobId int64, tagType string, tagName string, t
return nil
}
// checkScopeAuth validates whether a user is authorized to perform an operation on a tag with the given scope.
//
// This function implements the tag scope authorization matrix:
//
// Scope | Read Access | Write Access
// -------------|----------------------------------|----------------------------------
// "global" | All users | Admin, Support, API-only
// "admin" | Admin, Support | Admin, API-only
// <username> | Owner only | Owner only (private tags)
//
// Parameters:
// - user: User attempting the operation (must not be nil)
// - operation: Either "read" or "write"
// - scope: Tag scope value ("global", "admin", or username for private tags)
//
// Returns:
// - pass: true if authorized, false if denied
// - err: error only if operation is invalid or user is nil
//
// Special cases:
// - API-only users (single role: RoleApi) can write to admin and global scopes for automation
// - Private tags use the username as scope, granting exclusive access to that user
func (r *JobRepository) checkScopeAuth(user *schema.User, operation string, scope string) (pass bool, err error) {
if user != nil {
switch {
case operation == "write" && scope == "admin":
if user.HasRole(schema.RoleAdmin) || (len(user.Roles) == 1 && user.HasRole(schema.RoleApi)) {
if user.HasRole(schema.RoleAdmin) || (len(user.Roles) == 1 && user.HasRole(schema.RoleAPI)) {
return true, nil
}
return false, nil
case operation == "write" && scope == "global":
if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) || (len(user.Roles) == 1 && user.HasRole(schema.RoleApi)) {
if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) || (len(user.Roles) == 1 && user.HasRole(schema.RoleAPI)) {
return true, nil
}
return false, nil

Binary file not shown.

View File

@@ -62,7 +62,7 @@ func (r *JobRepository) TransactionEnd(t *Transaction) error {
func (r *JobRepository) TransactionAddNamed(
t *Transaction,
query string,
args ...interface{},
args ...any,
) (int64, error) {
if t.tx == nil {
return 0, fmt.Errorf("transaction is nil or already completed")
@@ -82,7 +82,7 @@ func (r *JobRepository) TransactionAddNamed(
}
// TransactionAdd executes a query within the transaction.
func (r *JobRepository) TransactionAdd(t *Transaction, query string, args ...interface{}) (int64, error) {
func (r *JobRepository) TransactionAdd(t *Transaction, query string, args ...any) (int64, error) {
if t.tx == nil {
return 0, fmt.Errorf("transaction is nil or already completed")
}

View File

@@ -0,0 +1,311 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"testing"
_ "github.com/mattn/go-sqlite3"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestTransactionInit(t *testing.T) {
r := setup(t)
t.Run("successful transaction init", func(t *testing.T) {
tx, err := r.TransactionInit()
require.NoError(t, err, "TransactionInit should succeed")
require.NotNil(t, tx, "Transaction should not be nil")
require.NotNil(t, tx.tx, "Transaction.tx should not be nil")
// Clean up
err = tx.Rollback()
require.NoError(t, err, "Rollback should succeed")
})
}
func TestTransactionCommit(t *testing.T) {
r := setup(t)
t.Run("commit after successful operations", func(t *testing.T) {
tx, err := r.TransactionInit()
require.NoError(t, err)
// Insert a test tag
_, err = r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
"test_type", "test_tag_commit", "global")
require.NoError(t, err, "TransactionAdd should succeed")
// Commit the transaction
err = tx.Commit()
require.NoError(t, err, "Commit should succeed")
// Verify the tag was inserted
var count int
err = r.DB.QueryRow("SELECT COUNT(*) FROM tag WHERE tag_name = ?", "test_tag_commit").Scan(&count)
require.NoError(t, err)
assert.Equal(t, 1, count, "Tag should be committed to database")
// Clean up
_, err = r.DB.Exec("DELETE FROM tag WHERE tag_name = ?", "test_tag_commit")
require.NoError(t, err)
})
t.Run("commit on already committed transaction", func(t *testing.T) {
tx, err := r.TransactionInit()
require.NoError(t, err)
err = tx.Commit()
require.NoError(t, err, "First commit should succeed")
err = tx.Commit()
assert.Error(t, err, "Second commit should fail")
assert.Contains(t, err.Error(), "transaction already committed or rolled back")
})
}
func TestTransactionRollback(t *testing.T) {
r := setup(t)
t.Run("rollback after operations", func(t *testing.T) {
tx, err := r.TransactionInit()
require.NoError(t, err)
// Insert a test tag
_, err = r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
"test_type", "test_tag_rollback", "global")
require.NoError(t, err, "TransactionAdd should succeed")
// Rollback the transaction
err = tx.Rollback()
require.NoError(t, err, "Rollback should succeed")
// Verify the tag was NOT inserted
var count int
err = r.DB.QueryRow("SELECT COUNT(*) FROM tag WHERE tag_name = ?", "test_tag_rollback").Scan(&count)
require.NoError(t, err)
assert.Equal(t, 0, count, "Tag should not be in database after rollback")
})
t.Run("rollback on already rolled back transaction", func(t *testing.T) {
tx, err := r.TransactionInit()
require.NoError(t, err)
err = tx.Rollback()
require.NoError(t, err, "First rollback should succeed")
err = tx.Rollback()
assert.NoError(t, err, "Second rollback should be safe (no-op)")
})
t.Run("rollback on committed transaction", func(t *testing.T) {
tx, err := r.TransactionInit()
require.NoError(t, err)
err = tx.Commit()
require.NoError(t, err)
err = tx.Rollback()
assert.NoError(t, err, "Rollback after commit should be safe (no-op)")
})
}
func TestTransactionAdd(t *testing.T) {
r := setup(t)
t.Run("insert with TransactionAdd", func(t *testing.T) {
tx, err := r.TransactionInit()
require.NoError(t, err)
defer tx.Rollback()
id, err := r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
"test_type", "test_add", "global")
require.NoError(t, err, "TransactionAdd should succeed")
assert.Greater(t, id, int64(0), "Should return valid insert ID")
})
t.Run("error on nil transaction", func(t *testing.T) {
tx := &Transaction{tx: nil}
_, err := r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
"test_type", "test_nil", "global")
assert.Error(t, err, "Should error on nil transaction")
assert.Contains(t, err.Error(), "transaction is nil or already completed")
})
t.Run("error on invalid SQL", func(t *testing.T) {
tx, err := r.TransactionInit()
require.NoError(t, err)
defer tx.Rollback()
_, err = r.TransactionAdd(tx, "INVALID SQL STATEMENT")
assert.Error(t, err, "Should error on invalid SQL")
})
t.Run("error after transaction committed", func(t *testing.T) {
tx, err := r.TransactionInit()
require.NoError(t, err)
err = tx.Commit()
require.NoError(t, err)
_, err = r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
"test_type", "test_after_commit", "global")
assert.Error(t, err, "Should error when transaction is already committed")
})
}
func TestTransactionAddNamed(t *testing.T) {
r := setup(t)
t.Run("insert with TransactionAddNamed", func(t *testing.T) {
tx, err := r.TransactionInit()
require.NoError(t, err)
defer tx.Rollback()
type TagArgs struct {
Type string `db:"type"`
Name string `db:"name"`
Scope string `db:"scope"`
}
args := TagArgs{
Type: "test_type",
Name: "test_named",
Scope: "global",
}
id, err := r.TransactionAddNamed(tx,
"INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (:type, :name, :scope)",
args)
require.NoError(t, err, "TransactionAddNamed should succeed")
assert.Greater(t, id, int64(0), "Should return valid insert ID")
})
t.Run("error on nil transaction", func(t *testing.T) {
tx := &Transaction{tx: nil}
_, err := r.TransactionAddNamed(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (:type, :name, :scope)",
map[string]any{"type": "test", "name": "test", "scope": "global"})
assert.Error(t, err, "Should error on nil transaction")
assert.Contains(t, err.Error(), "transaction is nil or already completed")
})
}
func TestTransactionMultipleOperations(t *testing.T) {
r := setup(t)
t.Run("multiple inserts in single transaction", func(t *testing.T) {
tx, err := r.TransactionInit()
require.NoError(t, err)
defer tx.Rollback()
// Insert multiple tags
for i := range 5 {
_, err = r.TransactionAdd(tx,
"INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
"test_type", "test_multi_"+string(rune('a'+i)), "global")
require.NoError(t, err, "Insert %d should succeed", i)
}
err = tx.Commit()
require.NoError(t, err, "Commit should succeed")
// Verify all tags were inserted
var count int
err = r.DB.QueryRow("SELECT COUNT(*) FROM tag WHERE tag_name LIKE 'test_multi_%'").Scan(&count)
require.NoError(t, err)
assert.Equal(t, 5, count, "All 5 tags should be committed")
// Clean up
_, err = r.DB.Exec("DELETE FROM tag WHERE tag_name LIKE 'test_multi_%'")
require.NoError(t, err)
})
t.Run("rollback undoes all operations", func(t *testing.T) {
tx, err := r.TransactionInit()
require.NoError(t, err)
// Insert multiple tags
for i := range 3 {
_, err = r.TransactionAdd(tx,
"INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
"test_type", "test_rollback_"+string(rune('a'+i)), "global")
require.NoError(t, err)
}
err = tx.Rollback()
require.NoError(t, err, "Rollback should succeed")
// Verify no tags were inserted
var count int
err = r.DB.QueryRow("SELECT COUNT(*) FROM tag WHERE tag_name LIKE 'test_rollback_%'").Scan(&count)
require.NoError(t, err)
assert.Equal(t, 0, count, "No tags should be in database after rollback")
})
}
func TestTransactionEnd(t *testing.T) {
r := setup(t)
t.Run("deprecated TransactionEnd calls Commit", func(t *testing.T) {
tx, err := r.TransactionInit()
require.NoError(t, err)
_, err = r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
"test_type", "test_end", "global")
require.NoError(t, err)
// Use deprecated method
err = r.TransactionEnd(tx)
require.NoError(t, err, "TransactionEnd should succeed")
// Verify the tag was committed
var count int
err = r.DB.QueryRow("SELECT COUNT(*) FROM tag WHERE tag_name = ?", "test_end").Scan(&count)
require.NoError(t, err)
assert.Equal(t, 1, count, "Tag should be committed")
// Clean up
_, err = r.DB.Exec("DELETE FROM tag WHERE tag_name = ?", "test_end")
require.NoError(t, err)
})
}
func TestTransactionDeferPattern(t *testing.T) {
r := setup(t)
t.Run("defer rollback pattern", func(t *testing.T) {
insertTag := func() error {
tx, err := r.TransactionInit()
if err != nil {
return err
}
defer tx.Rollback() // Safe to call even after commit
_, err = r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
"test_type", "test_defer", "global")
if err != nil {
return err
}
return tx.Commit()
}
err := insertTag()
require.NoError(t, err, "Function should succeed")
// Verify the tag was committed
var count int
err = r.DB.QueryRow("SELECT COUNT(*) FROM tag WHERE tag_name = ?", "test_defer").Scan(&count)
require.NoError(t, err)
assert.Equal(t, 1, count, "Tag should be committed despite defer rollback")
// Clean up
_, err = r.DB.Exec("DELETE FROM tag WHERE tag_name = ?", "test_defer")
require.NoError(t, err)
})
}

View File

@@ -10,18 +10,38 @@ import (
"encoding/json"
"errors"
"fmt"
"reflect"
"strings"
"sync"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
"github.com/ClusterCockpit/cc-lib/schema"
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
"github.com/ClusterCockpit/cc-lib/v2/schema"
sq "github.com/Masterminds/squirrel"
"github.com/jmoiron/sqlx"
"golang.org/x/crypto/bcrypt"
)
// Authentication and Role System:
//
// ClusterCockpit supports multiple authentication sources:
// - Local: Username/password stored in database (password hashed with bcrypt)
// - LDAP: External LDAP/Active Directory authentication
// - JWT: Token-based authentication for API access
//
// Role Hierarchy (from highest to lowest privilege):
// 1. "admin" - Full system access, can manage all users and jobs
// 2. "support" - Can view all jobs but limited management capabilities
// 3. "manager" - Can manage specific projects and their users
// 4. "api" - Programmatic access for job submission/management
// 5. "user" - Default role, can only view own jobs
//
// Project Association:
// - Managers have a list of projects they oversee
// - Regular users' project membership is determined by job data
// - Managers can view/manage all jobs within their projects
var (
userRepoOnce sync.Once
userRepoInstance *UserRepository
@@ -44,6 +64,9 @@ func GetUserRepository() *UserRepository {
return userRepoInstance
}
// GetUser retrieves a user by username from the database.
// Returns the complete user record including hashed password, roles, and projects.
// Password field contains bcrypt hash for local auth users, empty for LDAP users.
func (r *UserRepository) GetUser(username string) (*schema.User, error) {
user := &schema.User{Username: username}
var hashedPassword, name, rawRoles, email, rawProjects sql.NullString
@@ -93,12 +116,18 @@ func (r *UserRepository) GetLdapUsernames() ([]string, error) {
return users, nil
}
// AddUser creates a new user in the database.
// Passwords are automatically hashed with bcrypt before storage.
// Auth source determines authentication method (local, LDAP, etc.).
//
// Required fields: Username, Roles
// Optional fields: Name, Email, Password, Projects, AuthSource
func (r *UserRepository) AddUser(user *schema.User) error {
rolesJson, _ := json.Marshal(user.Roles)
projectsJson, _ := json.Marshal(user.Projects)
cols := []string{"username", "roles", "projects"}
vals := []interface{}{user.Username, string(rolesJson), string(projectsJson)}
vals := []any{user.Username, string(rolesJson), string(projectsJson)}
if user.Name != "" {
cols = append(cols, "name")
@@ -159,8 +188,8 @@ func (r *UserRepository) AddUser(user *schema.User) error {
}
func (r *UserRepository) UpdateUser(dbUser *schema.User, user *schema.User) error {
// user contains updated info, apply to dbuser
// TODO: Discuss updatable fields
// user contains updated info -> Apply to dbUser
// --- Simple Name Update ---
if dbUser.Name != user.Name {
if _, err := sq.Update("hpc_user").Set("name", user.Name).Where("hpc_user.username = ?", dbUser.Username).RunWith(r.DB).Exec(); err != nil {
cclog.Errorf("error while updating name of user '%s'", user.Username)
@@ -168,13 +197,64 @@ func (r *UserRepository) UpdateUser(dbUser *schema.User, user *schema.User) erro
}
}
// Toggled until greenlit
// if dbUser.HasRole(schema.RoleManager) && !reflect.DeepEqual(dbUser.Projects, user.Projects) {
// projects, _ := json.Marshal(user.Projects)
// if _, err := sq.Update("hpc_user").Set("projects", projects).Where("hpc_user.username = ?", dbUser.Username).RunWith(r.DB).Exec(); err != nil {
// return err
// }
// }
// --- Def Helpers ---
// Helper to update roles
updateRoles := func(roles []string) error {
rolesJSON, _ := json.Marshal(roles)
_, err := sq.Update("hpc_user").Set("roles", rolesJSON).Where("hpc_user.username = ?", dbUser.Username).RunWith(r.DB).Exec()
return err
}
// Helper to update projects
updateProjects := func(projects []string) error {
projectsJSON, _ := json.Marshal(projects)
_, err := sq.Update("hpc_user").Set("projects", projectsJSON).Where("hpc_user.username = ?", dbUser.Username).RunWith(r.DB).Exec()
return err
}
// Helper to clear projects
clearProjects := func() error {
_, err := sq.Update("hpc_user").Set("projects", "[]").Where("hpc_user.username = ?", dbUser.Username).RunWith(r.DB).Exec()
return err
}
// --- Manager Role Handling ---
if dbUser.HasRole(schema.RoleManager) && user.HasRole(schema.RoleManager) && !reflect.DeepEqual(dbUser.Projects, user.Projects) {
// Existing Manager: update projects
if err := updateProjects(user.Projects); err != nil {
return err
}
} else if dbUser.HasRole(schema.RoleUser) && user.HasRole(schema.RoleManager) && user.HasNotRoles([]schema.Role{schema.RoleAdmin}) {
// New Manager: update roles and projects
if err := updateRoles(user.Roles); err != nil {
return err
}
if err := updateProjects(user.Projects); err != nil {
return err
}
} else if dbUser.HasRole(schema.RoleManager) && user.HasNotRoles([]schema.Role{schema.RoleAdmin, schema.RoleManager}) {
// Remove Manager: update roles and clear projects
if err := updateRoles(user.Roles); err != nil {
return err
}
if err := clearProjects(); err != nil {
return err
}
}
// --- Support Role Handling ---
if dbUser.HasRole(schema.RoleUser) && dbUser.HasNotRoles([]schema.Role{schema.RoleSupport}) &&
user.HasRole(schema.RoleSupport) && user.HasNotRoles([]schema.Role{schema.RoleAdmin}) {
// New Support: update roles
if err := updateRoles(user.Roles); err != nil {
return err
}
} else if dbUser.HasRole(schema.RoleSupport) && user.HasNotRoles([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
// Remove Support: update roles
if err := updateRoles(user.Roles); err != nil {
return err
}
}
return nil
}
@@ -229,6 +309,14 @@ func (r *UserRepository) ListUsers(specialsOnly bool) ([]*schema.User, error) {
return users, nil
}
// AddRole adds a role to a user's role list.
// Role string is automatically lowercased.
// Valid roles: admin, support, manager, api, user
//
// Returns error if:
// - User doesn't exist
// - Role is invalid
// - User already has the role
func (r *UserRepository) AddRole(
ctx context.Context,
username string,
@@ -258,6 +346,11 @@ func (r *UserRepository) AddRole(
return nil
}
// RemoveRole removes a role from a user's role list.
//
// Special rules:
// - Cannot remove "manager" role while user has assigned projects
// - Must remove all projects first before removing manager role
func (r *UserRepository) RemoveRole(ctx context.Context, username string, queryrole string) error {
oldRole := strings.ToLower(queryrole)
user, err := r.GetUser(username)
@@ -294,6 +387,12 @@ func (r *UserRepository) RemoveRole(ctx context.Context, username string, queryr
return nil
}
// AddProject assigns a project to a manager user.
// Only users with the "manager" role can have assigned projects.
//
// Returns error if:
// - User doesn't have manager role
// - User already manages the project
func (r *UserRepository) AddProject(
ctx context.Context,
username string,
@@ -345,7 +444,7 @@ func (r *UserRepository) RemoveProject(ctx context.Context, username string, pro
}
if exists {
var result interface{}
var result any
if len(newprojects) == 0 {
result = "[]"
} else {

View File

@@ -12,9 +12,9 @@ import (
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/web"
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
"github.com/ClusterCockpit/cc-lib/lrucache"
"github.com/ClusterCockpit/cc-lib/schema"
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
"github.com/ClusterCockpit/cc-lib/v2/lrucache"
"github.com/ClusterCockpit/cc-lib/v2/schema"
"github.com/jmoiron/sqlx"
)

View File

@@ -10,9 +10,9 @@ import (
"testing"
"github.com/ClusterCockpit/cc-backend/internal/config"
ccconf "github.com/ClusterCockpit/cc-lib/ccConfig"
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
"github.com/ClusterCockpit/cc-lib/schema"
ccconf "github.com/ClusterCockpit/cc-lib/v2/ccConfig"
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
"github.com/ClusterCockpit/cc-lib/v2/schema"
_ "github.com/mattn/go-sqlite3"
)
@@ -20,33 +20,40 @@ func setupUserTest(t *testing.T) *UserCfgRepo {
const testconfig = `{
"main": {
"addr": "0.0.0.0:8080",
"apiAllowedIPs": [
"api-allowed-ips": [
"*"
]
},
"archive": {
"kind": "file",
"path": "./var/job-archive"
},
"clusters": [
{
"name": "testcluster",
"metricDataRepository": {"kind": "test", "url": "bla:8081"},
"filterRanges": {
"numNodes": { "from": 1, "to": 64 },
"duration": { "from": 0, "to": 86400 },
"startTime": { "from": "2022-01-01T00:00:00Z", "to": null }
}
}]
}`
cclog.Init("info", true)
dbfilepath := "testdata/job.db"
err := MigrateDB("sqlite3", dbfilepath)
// Copy test DB to a temp file for test isolation
srcData, err := os.ReadFile("testdata/job.db")
if err != nil {
t.Fatal(err)
}
Connect("sqlite3", dbfilepath)
dbfilepath := filepath.Join(t.TempDir(), "job.db")
if err := os.WriteFile(dbfilepath, srcData, 0o644); err != nil {
t.Fatal(err)
}
if err := ResetConnection(); err != nil {
t.Fatal(err)
}
t.Cleanup(func() {
ResetConnection()
})
err = MigrateDB(dbfilepath)
if err != nil {
t.Fatal(err)
}
Connect(dbfilepath)
tmpdir := t.TempDir()
cfgFilePath := filepath.Join(tmpdir, "config.json")
@@ -58,11 +65,7 @@ func setupUserTest(t *testing.T) *UserCfgRepo {
// Load and check main configuration
if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil {
config.Init(cfg, clustercfg)
} else {
t.Fatal("Cluster configuration must be present")
}
config.Init(cfg)
} else {
t.Fatal("Main configuration must be present")
}

View File

@@ -0,0 +1,596 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"context"
"testing"
"github.com/ClusterCockpit/cc-lib/v2/schema"
_ "github.com/mattn/go-sqlite3"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"golang.org/x/crypto/bcrypt"
)
func TestAddUser(t *testing.T) {
_ = setup(t)
r := GetUserRepository()
t.Run("add user with all fields", func(t *testing.T) {
user := &schema.User{
Username: "testuser1",
Name: "Test User One",
Email: "test1@example.com",
Password: "testpassword123",
Roles: []string{"user"},
Projects: []string{"project1", "project2"},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
retrievedUser, err := r.GetUser("testuser1")
require.NoError(t, err)
assert.Equal(t, user.Username, retrievedUser.Username)
assert.Equal(t, user.Name, retrievedUser.Name)
assert.Equal(t, user.Email, retrievedUser.Email)
assert.Equal(t, user.Roles, retrievedUser.Roles)
assert.Equal(t, user.Projects, retrievedUser.Projects)
assert.NotEmpty(t, retrievedUser.Password)
err = bcrypt.CompareHashAndPassword([]byte(retrievedUser.Password), []byte("testpassword123"))
assert.NoError(t, err, "Password should be hashed correctly")
err = r.DelUser("testuser1")
require.NoError(t, err)
})
t.Run("add user with minimal fields", func(t *testing.T) {
user := &schema.User{
Username: "testuser2",
Roles: []string{"user"},
Projects: []string{},
AuthSource: schema.AuthViaLDAP,
}
err := r.AddUser(user)
require.NoError(t, err)
retrievedUser, err := r.GetUser("testuser2")
require.NoError(t, err)
assert.Equal(t, user.Username, retrievedUser.Username)
assert.Equal(t, "", retrievedUser.Name)
assert.Equal(t, "", retrievedUser.Email)
assert.Equal(t, "", retrievedUser.Password)
err = r.DelUser("testuser2")
require.NoError(t, err)
})
t.Run("add duplicate user fails", func(t *testing.T) {
user := &schema.User{
Username: "testuser3",
Roles: []string{"user"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
err = r.AddUser(user)
assert.Error(t, err, "Adding duplicate user should fail")
err = r.DelUser("testuser3")
require.NoError(t, err)
})
}
func TestGetUser(t *testing.T) {
_ = setup(t)
r := GetUserRepository()
t.Run("get existing user", func(t *testing.T) {
user := &schema.User{
Username: "getuser1",
Name: "Get User",
Email: "getuser@example.com",
Roles: []string{"user", "admin"},
Projects: []string{"proj1"},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
retrieved, err := r.GetUser("getuser1")
require.NoError(t, err)
assert.Equal(t, user.Username, retrieved.Username)
assert.Equal(t, user.Name, retrieved.Name)
assert.Equal(t, user.Email, retrieved.Email)
assert.ElementsMatch(t, user.Roles, retrieved.Roles)
assert.ElementsMatch(t, user.Projects, retrieved.Projects)
err = r.DelUser("getuser1")
require.NoError(t, err)
})
t.Run("get non-existent user", func(t *testing.T) {
_, err := r.GetUser("nonexistent")
assert.Error(t, err)
})
}
func TestUpdateUser(t *testing.T) {
_ = setup(t)
r := GetUserRepository()
t.Run("update user name", func(t *testing.T) {
user := &schema.User{
Username: "updateuser1",
Name: "Original Name",
Roles: []string{"user"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
dbUser, err := r.GetUser("updateuser1")
require.NoError(t, err)
updatedUser := &schema.User{
Username: "updateuser1",
Name: "Updated Name",
}
err = r.UpdateUser(dbUser, updatedUser)
require.NoError(t, err)
retrieved, err := r.GetUser("updateuser1")
require.NoError(t, err)
assert.Equal(t, "Updated Name", retrieved.Name)
err = r.DelUser("updateuser1")
require.NoError(t, err)
})
t.Run("update with no changes", func(t *testing.T) {
user := &schema.User{
Username: "updateuser2",
Name: "Same Name",
Roles: []string{"user"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
dbUser, err := r.GetUser("updateuser2")
require.NoError(t, err)
err = r.UpdateUser(dbUser, dbUser)
assert.NoError(t, err)
err = r.DelUser("updateuser2")
require.NoError(t, err)
})
}
func TestDelUser(t *testing.T) {
_ = setup(t)
r := GetUserRepository()
t.Run("delete existing user", func(t *testing.T) {
user := &schema.User{
Username: "deluser1",
Roles: []string{"user"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
err = r.DelUser("deluser1")
require.NoError(t, err)
_, err = r.GetUser("deluser1")
assert.Error(t, err, "User should not exist after deletion")
})
t.Run("delete non-existent user", func(t *testing.T) {
err := r.DelUser("nonexistent")
assert.NoError(t, err, "Deleting non-existent user should not error")
})
}
func TestListUsers(t *testing.T) {
_ = setup(t)
r := GetUserRepository()
user1 := &schema.User{
Username: "listuser1",
Roles: []string{"user"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
user2 := &schema.User{
Username: "listuser2",
Roles: []string{"admin"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
user3 := &schema.User{
Username: "listuser3",
Roles: []string{"manager"},
Projects: []string{"proj1"},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user1)
require.NoError(t, err)
err = r.AddUser(user2)
require.NoError(t, err)
err = r.AddUser(user3)
require.NoError(t, err)
t.Run("list all users", func(t *testing.T) {
users, err := r.ListUsers(false)
require.NoError(t, err)
assert.GreaterOrEqual(t, len(users), 3)
usernames := make([]string, len(users))
for i, u := range users {
usernames[i] = u.Username
}
assert.Contains(t, usernames, "listuser1")
assert.Contains(t, usernames, "listuser2")
assert.Contains(t, usernames, "listuser3")
})
t.Run("list special users only", func(t *testing.T) {
users, err := r.ListUsers(true)
require.NoError(t, err)
usernames := make([]string, len(users))
for i, u := range users {
usernames[i] = u.Username
}
assert.Contains(t, usernames, "listuser2")
assert.Contains(t, usernames, "listuser3")
})
err = r.DelUser("listuser1")
require.NoError(t, err)
err = r.DelUser("listuser2")
require.NoError(t, err)
err = r.DelUser("listuser3")
require.NoError(t, err)
}
func TestGetLdapUsernames(t *testing.T) {
_ = setup(t)
r := GetUserRepository()
ldapUser := &schema.User{
Username: "ldapuser1",
Roles: []string{"user"},
Projects: []string{},
AuthSource: schema.AuthViaLDAP,
}
localUser := &schema.User{
Username: "localuser1",
Roles: []string{"user"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(ldapUser)
require.NoError(t, err)
err = r.AddUser(localUser)
require.NoError(t, err)
usernames, err := r.GetLdapUsernames()
require.NoError(t, err)
assert.Contains(t, usernames, "ldapuser1")
assert.NotContains(t, usernames, "localuser1")
err = r.DelUser("ldapuser1")
require.NoError(t, err)
err = r.DelUser("localuser1")
require.NoError(t, err)
}
func TestAddRole(t *testing.T) {
_ = setup(t)
r := GetUserRepository()
ctx := context.Background()
t.Run("add valid role", func(t *testing.T) {
user := &schema.User{
Username: "roleuser1",
Roles: []string{"user"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
err = r.AddRole(ctx, "roleuser1", "admin")
require.NoError(t, err)
retrieved, err := r.GetUser("roleuser1")
require.NoError(t, err)
assert.Contains(t, retrieved.Roles, "admin")
assert.Contains(t, retrieved.Roles, "user")
err = r.DelUser("roleuser1")
require.NoError(t, err)
})
t.Run("add duplicate role", func(t *testing.T) {
user := &schema.User{
Username: "roleuser2",
Roles: []string{"user"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
err = r.AddRole(ctx, "roleuser2", "user")
assert.Error(t, err, "Adding duplicate role should fail")
assert.Contains(t, err.Error(), "already has role")
err = r.DelUser("roleuser2")
require.NoError(t, err)
})
t.Run("add invalid role", func(t *testing.T) {
user := &schema.User{
Username: "roleuser3",
Roles: []string{"user"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
err = r.AddRole(ctx, "roleuser3", "invalidrole")
assert.Error(t, err, "Adding invalid role should fail")
assert.Contains(t, err.Error(), "no valid option")
err = r.DelUser("roleuser3")
require.NoError(t, err)
})
}
func TestRemoveRole(t *testing.T) {
_ = setup(t)
r := GetUserRepository()
ctx := context.Background()
t.Run("remove existing role", func(t *testing.T) {
user := &schema.User{
Username: "rmroleuser1",
Roles: []string{"user", "admin"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
err = r.RemoveRole(ctx, "rmroleuser1", "admin")
require.NoError(t, err)
retrieved, err := r.GetUser("rmroleuser1")
require.NoError(t, err)
assert.NotContains(t, retrieved.Roles, "admin")
assert.Contains(t, retrieved.Roles, "user")
err = r.DelUser("rmroleuser1")
require.NoError(t, err)
})
t.Run("remove non-existent role", func(t *testing.T) {
user := &schema.User{
Username: "rmroleuser2",
Roles: []string{"user"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
err = r.RemoveRole(ctx, "rmroleuser2", "admin")
assert.Error(t, err, "Removing non-existent role should fail")
assert.Contains(t, err.Error(), "already deleted")
err = r.DelUser("rmroleuser2")
require.NoError(t, err)
})
t.Run("remove manager role with projects", func(t *testing.T) {
user := &schema.User{
Username: "rmroleuser3",
Roles: []string{"manager"},
Projects: []string{"proj1", "proj2"},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
err = r.RemoveRole(ctx, "rmroleuser3", "manager")
assert.Error(t, err, "Removing manager role with projects should fail")
assert.Contains(t, err.Error(), "still has assigned project")
err = r.DelUser("rmroleuser3")
require.NoError(t, err)
})
}
func TestAddProject(t *testing.T) {
_ = setup(t)
r := GetUserRepository()
ctx := context.Background()
t.Run("add project to manager", func(t *testing.T) {
user := &schema.User{
Username: "projuser1",
Roles: []string{"manager"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
err = r.AddProject(ctx, "projuser1", "newproject")
require.NoError(t, err)
retrieved, err := r.GetUser("projuser1")
require.NoError(t, err)
assert.Contains(t, retrieved.Projects, "newproject")
err = r.DelUser("projuser1")
require.NoError(t, err)
})
t.Run("add project to non-manager", func(t *testing.T) {
user := &schema.User{
Username: "projuser2",
Roles: []string{"user"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
err = r.AddProject(ctx, "projuser2", "newproject")
assert.Error(t, err, "Adding project to non-manager should fail")
assert.Contains(t, err.Error(), "not a manager")
err = r.DelUser("projuser2")
require.NoError(t, err)
})
t.Run("add duplicate project", func(t *testing.T) {
user := &schema.User{
Username: "projuser3",
Roles: []string{"manager"},
Projects: []string{"existingproject"},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
err = r.AddProject(ctx, "projuser3", "existingproject")
assert.Error(t, err, "Adding duplicate project should fail")
assert.Contains(t, err.Error(), "already manages")
err = r.DelUser("projuser3")
require.NoError(t, err)
})
}
func TestRemoveProject(t *testing.T) {
_ = setup(t)
r := GetUserRepository()
ctx := context.Background()
t.Run("remove existing project", func(t *testing.T) {
user := &schema.User{
Username: "rmprojuser1",
Roles: []string{"manager"},
Projects: []string{"proj1", "proj2"},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
err = r.RemoveProject(ctx, "rmprojuser1", "proj1")
require.NoError(t, err)
retrieved, err := r.GetUser("rmprojuser1")
require.NoError(t, err)
assert.NotContains(t, retrieved.Projects, "proj1")
assert.Contains(t, retrieved.Projects, "proj2")
err = r.DelUser("rmprojuser1")
require.NoError(t, err)
})
t.Run("remove non-existent project", func(t *testing.T) {
user := &schema.User{
Username: "rmprojuser2",
Roles: []string{"manager"},
Projects: []string{"proj1"},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
err = r.RemoveProject(ctx, "rmprojuser2", "nonexistent")
assert.Error(t, err, "Removing non-existent project should fail")
err = r.DelUser("rmprojuser2")
require.NoError(t, err)
})
t.Run("remove project from non-manager", func(t *testing.T) {
user := &schema.User{
Username: "rmprojuser3",
Roles: []string{"user"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
err = r.RemoveProject(ctx, "rmprojuser3", "proj1")
assert.Error(t, err, "Removing project from non-manager should fail")
assert.Contains(t, err.Error(), "not a manager")
err = r.DelUser("rmprojuser3")
require.NoError(t, err)
})
}
func TestGetUserFromContext(t *testing.T) {
t.Run("get user from context", func(t *testing.T) {
user := &schema.User{
Username: "contextuser",
Roles: []string{"user"},
}
ctx := context.WithValue(context.Background(), ContextUserKey, user)
retrieved := GetUserFromContext(ctx)
require.NotNil(t, retrieved)
assert.Equal(t, user.Username, retrieved.Username)
})
t.Run("get user from empty context", func(t *testing.T) {
ctx := context.Background()
retrieved := GetUserFromContext(ctx)
assert.Nil(t, retrieved)
})
}